qapi: copy-on-read filter: add 'bottom' option

Add an option to limit copy-on-read operations to specified sub-chain
of backing-chain, to make copy-on-read filter useful for block-stream
job.

Suggested-by: Max Reitz <mreitz@redhat.com>
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
  [vsementsov: change subject, modified to freeze the chain,
   do some fixes]
Message-Id: <20201216061703.70908-6-vsementsov@virtuozzo.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..7156098 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,24 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
     bool active;
+    BlockDriverState *bottom_bs;
+    bool chain_frozen;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
+    BlockDriverState *bottom_bs = NULL;
     BDRVStateCOR *state = bs->opaque;
+    /* Find a bottom node name, if any */
+    const char *bottom_node = qdict_get_try_str(options, "bottom");
 
     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -51,7 +57,38 @@
         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
             bs->file->bs->supported_zero_flags);
 
+    if (bottom_node) {
+        bottom_bs = bdrv_find_node(bottom_node);
+        if (!bottom_bs) {
+            error_setg(errp, "Bottom node '%s' not found", bottom_node);
+            qdict_del(options, "bottom");
+            return -EINVAL;
+        }
+        qdict_del(options, "bottom");
+
+        if (!bottom_bs->drv) {
+            error_setg(errp, "Bottom node '%s' not opened", bottom_node);
+            return -EINVAL;
+        }
+
+        if (bottom_bs->drv->is_filter) {
+            error_setg(errp, "Bottom node '%s' is a filter", bottom_node);
+            return -EINVAL;
+        }
+
+        if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) {
+            return -EINVAL;
+        }
+        state->chain_frozen = true;
+
+        /*
+         * We do freeze the chain, so it shouldn't be removed. Still, storing a
+         * pointer worth bdrv_ref().
+         */
+        bdrv_ref(bottom_bs);
+    }
     state->active = true;
+    state->bottom_bs = bottom_bs;
 
     /*
      * We don't need to call bdrv_child_refresh_perms() now as the permissions
@@ -107,8 +144,46 @@
                                            size_t qiov_offset,
                                            int flags)
 {
-    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-                               flags | BDRV_REQ_COPY_ON_READ);
+    int64_t n;
+    int local_flags;
+    int ret;
+    BDRVStateCOR *state = bs->opaque;
+
+    if (!state->bottom_bs) {
+        return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+                                   flags | BDRV_REQ_COPY_ON_READ);
+    }
+
+    while (bytes) {
+        local_flags = flags;
+
+        /* In case of failure, try to copy-on-read anyway */
+        ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n);
+        if (ret <= 0) {
+            ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+                                          state->bottom_bs, true, offset,
+                                          n, &n);
+            if (ret > 0 || ret < 0) {
+                local_flags |= BDRV_REQ_COPY_ON_READ;
+            }
+            /* Finish earlier if the end of a backing file has been reached */
+            if (n == 0) {
+                break;
+            }
+        }
+
+        ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+                                  local_flags);
+        if (ret < 0) {
+            return ret;
+        }
+
+        offset += n;
+        qiov_offset += n;
+        bytes -= n;
+    }
+
+    return 0;
 }
 
 
@@ -160,11 +235,25 @@
 }
 
 
+static void cor_close(BlockDriverState *bs)
+{
+    BDRVStateCOR *s = bs->opaque;
+
+    if (s->chain_frozen) {
+        s->chain_frozen = false;
+        bdrv_unfreeze_backing_chain(bs, s->bottom_bs);
+    }
+
+    bdrv_unref(s->bottom_bs);
+}
+
+
 static BlockDriver bdrv_copy_on_read = {
     .format_name                        = "copy-on-read",
     .instance_size                      = sizeof(BDRVStateCOR),
 
     .bdrv_open                          = cor_open,
+    .bdrv_close                         = cor_close,
     .bdrv_child_perm                    = cor_child_perm,
 
     .bdrv_getlength                     = cor_getlength,
@@ -201,6 +290,11 @@
     bdrv_drained_begin(bs);
     /* Drop permissions before the graph change. */
     s->active = false;
+    /* unfreeze, as otherwise bdrv_replace_node() will fail */
+    if (s->chain_frozen) {
+        s->chain_frozen = false;
+        bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs);
+    }
     bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort);
     bdrv_replace_node(cor_filter_bs, bs, &error_abort);
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index b55732d..65167eb 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3959,6 +3959,24 @@
   'data': { 'throttle-group': 'str',
             'file' : 'BlockdevRef'
              } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: The name of a non-filter node (allocation-bearing layer) that
+#          limits the COR operations in the backing chain (inclusive), so
+#          that no data below this node will be copied by this filter.
+#          If option is absent, the limit is not applied, so that data
+#          from all backing layers may be copied.
+#
+# Since: 6.0
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*bottom': 'str' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -4011,7 +4029,7 @@
       'bochs':      'BlockdevOptionsGenericFormat',
       'cloop':      'BlockdevOptionsGenericFormat',
       'compress':   'BlockdevOptionsGenericFormat',
-      'copy-on-read':'BlockdevOptionsGenericFormat',
+      'copy-on-read':'BlockdevOptionsCor',
       'dmg':        'BlockdevOptionsGenericFormat',
       'file':       'BlockdevOptionsFile',
       'ftp':        'BlockdevOptionsCurlFtp',