block: Let bdrv_open_inherit() return the snapshot

If bdrv_open_inherit() creates a snapshot BDS and *pbs is NULL, that
snapshot BDS should be returned instead of the BDS under it.

This has worked so far because (nearly) all users of BDRV_O_SNAPSHOT use
blk_new_open() to create the BDS tree. bdrv_append() (which is called by
bdrv_append_temp_snapshot()) redirects pointers from parents (i.e. the
BB in this case) to the newly appended child (i.e. the overlay),
therefore, while bdrv_open_inherit() did not return the root BDS, the BB
still pointed to it.

The only instance where BDRV_O_SNAPSHOT is used but blk_new_open() is
not is in blockdev_init() if no BDS tree is created, and instead
blk_new() is used and the flags are stored in the BB root state.
However, qmp_blockdev_change_medium() filters the BDRV_O_SNAPSHOT flag
before invoking bdrv_open(), so it will not have any effect.

In any case, it would be nicer if bdrv_open_inherit() could just always
return the root of the BDS tree that has been created.

To this end, bdrv_append_temp_snapshot() now returns the snapshot BDS
instead of just appending it on top of the snapshotted BDS. Also, it
calls bdrv_ref() before bdrv_append() (which bdrv_open_inherit() has to
undo if not returning the overlay).

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
diff --git a/block.c b/block.c
index f85c5a2..17ee088 100644
--- a/block.c
+++ b/block.c
@@ -1422,8 +1422,10 @@
     return c;
 }
 
-static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
-                                     QDict *snapshot_options, Error **errp)
+static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
+                                                   int flags,
+                                                   QDict *snapshot_options,
+                                                   Error **errp)
 {
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char *tmp_filename = g_malloc0(PATH_MAX + 1);
@@ -1439,7 +1441,6 @@
     /* Get the required size from the image */
     total_size = bdrv_getlength(bs);
     if (total_size < 0) {
-        ret = total_size;
         error_setg_errno(errp, -total_size, "Could not get image size");
         goto out;
     }
@@ -1479,12 +1480,19 @@
         goto out;
     }
 
+    /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will
+     * call bdrv_unref() on it), so in order to be able to return one, we have
+     * to increase bs_snapshot's refcount here */
+    bdrv_ref(bs_snapshot);
     bdrv_append(bs_snapshot, bs);
 
+    g_free(tmp_filename);
+    return bs_snapshot;
+
 out:
     QDECREF(snapshot_options);
     g_free(tmp_filename);
-    return ret;
+    return NULL;
 }
 
 /*
@@ -1704,17 +1712,42 @@
     }
 
     QDECREF(options);
-    *pbs = bs;
 
     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
      * temporary snapshot afterwards. */
     if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
-                                        &local_err);
+        BlockDriverState *snapshot_bs;
+        snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
+                                                snapshot_options, &local_err);
         snapshot_options = NULL;
         if (local_err) {
+            ret = -EINVAL;
             goto close_and_fail;
         }
+        if (!*pbs) {
+            /* We are not going to return bs but the overlay on top of it
+             * (snapshot_bs); thus, we have to drop the strong reference to bs
+             * (which we obtained by calling bdrv_new()). bs will not be
+             * deleted, though, because the overlay still has a reference to it.
+             */
+            bdrv_unref(bs);
+            bs = snapshot_bs;
+        } else {
+            /* We are not going to return snapshot_bs, so we have to drop the
+             * strong reference to it (which was returned by
+             * bdrv_append_temp_snapshot()). snapshot_bs will not be deleted,
+             * though, because bdrv_append_temp_snapshot() made all parental
+             * references to bs (*pbs) point to snapshot_bs.
+             * In fact, if *pbs was not NULL, we are not going to return any new
+             * BDS. But we do not need to decrement bs's refcount here as is
+             * done above, because with a non-NULL *pbs this function never even
+             * had a strong reference to bs. */
+            bdrv_unref(snapshot_bs);
+        }
+    }
+
+    if (!*pbs) {
+        *pbs = bs;
     }
 
     return 0;