| /* |
| * Copy-on-read filter block driver |
| * |
| * Copyright (c) 2018 Red Hat, Inc. |
| * |
| * Author: |
| * Max Reitz <mreitz@redhat.com> |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License as |
| * published by the Free Software Foundation; either version 2 or |
| * (at your option) version 3 of the License. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "block/block_int.h" |
| #include "qemu/module.h" |
| #include "qapi/error.h" |
| #include "qapi/qmp/qdict.h" |
| #include "block/copy-on-read.h" |
| |
| |
| typedef struct BDRVStateCOR { |
| bool active; |
| BlockDriverState *bottom_bs; |
| bool chain_frozen; |
| } BDRVStateCOR; |
| |
| |
| static int cor_open(BlockDriverState *bs, QDict *options, int flags, |
| Error **errp) |
| { |
| BlockDriverState *bottom_bs = NULL; |
| BDRVStateCOR *state = bs->opaque; |
| /* Find a bottom node name, if any */ |
| const char *bottom_node = qdict_get_try_str(options, "bottom"); |
| |
| bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, |
| BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, |
| false, errp); |
| if (!bs->file) { |
| return -EINVAL; |
| } |
| |
| bs->supported_read_flags = BDRV_REQ_PREFETCH; |
| |
| bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | |
| (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); |
| |
| bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | |
| ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & |
| bs->file->bs->supported_zero_flags); |
| |
| if (bottom_node) { |
| bottom_bs = bdrv_find_node(bottom_node); |
| if (!bottom_bs) { |
| error_setg(errp, "Bottom node '%s' not found", bottom_node); |
| qdict_del(options, "bottom"); |
| return -EINVAL; |
| } |
| qdict_del(options, "bottom"); |
| |
| if (!bottom_bs->drv) { |
| error_setg(errp, "Bottom node '%s' not opened", bottom_node); |
| return -EINVAL; |
| } |
| |
| if (bottom_bs->drv->is_filter) { |
| error_setg(errp, "Bottom node '%s' is a filter", bottom_node); |
| return -EINVAL; |
| } |
| |
| if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) { |
| return -EINVAL; |
| } |
| state->chain_frozen = true; |
| |
| /* |
| * We do freeze the chain, so it shouldn't be removed. Still, storing a |
| * pointer worth bdrv_ref(). |
| */ |
| bdrv_ref(bottom_bs); |
| } |
| state->active = true; |
| state->bottom_bs = bottom_bs; |
| |
| /* |
| * We don't need to call bdrv_child_refresh_perms() now as the permissions |
| * will be updated later when the filter node gets its parent. |
| */ |
| |
| return 0; |
| } |
| |
| |
| #define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ |
| | BLK_PERM_WRITE \ |
| | BLK_PERM_RESIZE) |
| #define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH) |
| |
| static void cor_child_perm(BlockDriverState *bs, BdrvChild *c, |
| BdrvChildRole role, |
| BlockReopenQueue *reopen_queue, |
| uint64_t perm, uint64_t shared, |
| uint64_t *nperm, uint64_t *nshared) |
| { |
| BDRVStateCOR *s = bs->opaque; |
| |
| if (!s->active) { |
| /* |
| * While the filter is being removed |
| */ |
| *nperm = 0; |
| *nshared = BLK_PERM_ALL; |
| return; |
| } |
| |
| *nperm = perm & PERM_PASSTHROUGH; |
| *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED; |
| |
| /* We must not request write permissions for an inactive node, the child |
| * cannot provide it. */ |
| if (!(bs->open_flags & BDRV_O_INACTIVE)) { |
| *nperm |= BLK_PERM_WRITE_UNCHANGED; |
| } |
| } |
| |
| |
| static int64_t cor_getlength(BlockDriverState *bs) |
| { |
| return bdrv_getlength(bs->file->bs); |
| } |
| |
| |
| static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs, |
| uint64_t offset, uint64_t bytes, |
| QEMUIOVector *qiov, |
| size_t qiov_offset, |
| int flags) |
| { |
| int64_t n; |
| int local_flags; |
| int ret; |
| BDRVStateCOR *state = bs->opaque; |
| |
| if (!state->bottom_bs) { |
| return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, |
| flags | BDRV_REQ_COPY_ON_READ); |
| } |
| |
| while (bytes) { |
| local_flags = flags; |
| |
| /* In case of failure, try to copy-on-read anyway */ |
| ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n); |
| if (ret <= 0) { |
| ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs), |
| state->bottom_bs, true, offset, |
| n, &n); |
| if (ret > 0 || ret < 0) { |
| local_flags |= BDRV_REQ_COPY_ON_READ; |
| } |
| /* Finish earlier if the end of a backing file has been reached */ |
| if (n == 0) { |
| break; |
| } |
| } |
| |
| /* Skip if neither read nor write are needed */ |
| if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) != |
| BDRV_REQ_PREFETCH) { |
| ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset, |
| local_flags); |
| if (ret < 0) { |
| return ret; |
| } |
| } |
| |
| offset += n; |
| qiov_offset += n; |
| bytes -= n; |
| } |
| |
| return 0; |
| } |
| |
| |
| static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs, |
| uint64_t offset, |
| uint64_t bytes, |
| QEMUIOVector *qiov, |
| size_t qiov_offset, int flags) |
| { |
| return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, |
| flags); |
| } |
| |
| |
| static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, |
| int64_t offset, int bytes, |
| BdrvRequestFlags flags) |
| { |
| return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); |
| } |
| |
| |
| static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, |
| int64_t offset, int bytes) |
| { |
| return bdrv_co_pdiscard(bs->file, offset, bytes); |
| } |
| |
| |
| static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs, |
| uint64_t offset, |
| uint64_t bytes, |
| QEMUIOVector *qiov) |
| { |
| return bdrv_co_pwritev(bs->file, offset, bytes, qiov, |
| BDRV_REQ_WRITE_COMPRESSED); |
| } |
| |
| |
| static void cor_eject(BlockDriverState *bs, bool eject_flag) |
| { |
| bdrv_eject(bs->file->bs, eject_flag); |
| } |
| |
| |
| static void cor_lock_medium(BlockDriverState *bs, bool locked) |
| { |
| bdrv_lock_medium(bs->file->bs, locked); |
| } |
| |
| |
| static void cor_close(BlockDriverState *bs) |
| { |
| BDRVStateCOR *s = bs->opaque; |
| |
| if (s->chain_frozen) { |
| s->chain_frozen = false; |
| bdrv_unfreeze_backing_chain(bs, s->bottom_bs); |
| } |
| |
| bdrv_unref(s->bottom_bs); |
| } |
| |
| |
| static BlockDriver bdrv_copy_on_read = { |
| .format_name = "copy-on-read", |
| .instance_size = sizeof(BDRVStateCOR), |
| |
| .bdrv_open = cor_open, |
| .bdrv_close = cor_close, |
| .bdrv_child_perm = cor_child_perm, |
| |
| .bdrv_getlength = cor_getlength, |
| |
| .bdrv_co_preadv_part = cor_co_preadv_part, |
| .bdrv_co_pwritev_part = cor_co_pwritev_part, |
| .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes, |
| .bdrv_co_pdiscard = cor_co_pdiscard, |
| .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed, |
| |
| .bdrv_eject = cor_eject, |
| .bdrv_lock_medium = cor_lock_medium, |
| |
| .has_variable_length = true, |
| .is_filter = true, |
| }; |
| |
| |
| void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs) |
| { |
| BdrvChild *child; |
| BlockDriverState *bs; |
| BDRVStateCOR *s = cor_filter_bs->opaque; |
| |
| child = bdrv_filter_child(cor_filter_bs); |
| if (!child) { |
| return; |
| } |
| bs = child->bs; |
| |
| /* Retain the BDS until we complete the graph change. */ |
| bdrv_ref(bs); |
| /* Hold a guest back from writing while permissions are being reset. */ |
| bdrv_drained_begin(bs); |
| /* Drop permissions before the graph change. */ |
| s->active = false; |
| /* unfreeze, as otherwise bdrv_replace_node() will fail */ |
| if (s->chain_frozen) { |
| s->chain_frozen = false; |
| bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs); |
| } |
| bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort); |
| bdrv_replace_node(cor_filter_bs, bs, &error_abort); |
| |
| bdrv_drained_end(bs); |
| bdrv_unref(bs); |
| bdrv_unref(cor_filter_bs); |
| } |
| |
| |
| static void bdrv_copy_on_read_init(void) |
| { |
| bdrv_register(&bdrv_copy_on_read); |
| } |
| |
| block_init(bdrv_copy_on_read_init); |