block: Implement bdrv_append() without bdrv_swap()
Remember all parent nodes and just change the pointers there instead of
swapping the contents of the BlockDriverState.
Handling of snapshot=on must be moved further down in bdrv_open()
because *pbs (which is the bs pointer in the BlockBackend) must already
be set before bdrv_append() is called. Otherwise bdrv_append() changes
the BB's pointer to the temporary snapshot, but bdrv_open() overwrites
it with the read-only original image.
We also need to be careful to update callers as the interface changes
(becomes less insane): Previously, the meaning of the two parameters was
inverted when bdrv_append() returns. Now any BDS pointers keep pointing
to the same node.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/block.c b/block.c
index 980437f..b4d2313 100644
--- a/block.c
+++ b/block.c
@@ -1516,15 +1516,6 @@
bdrv_refresh_filename(bs);
- /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
- * temporary snapshot afterwards. */
- if (snapshot_flags) {
- ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
- if (local_err) {
- goto close_and_fail;
- }
- }
-
/* Check if any unknown options were used */
if (options && (qdict_size(options) != 0)) {
const QDictEntry *entry = qdict_first(options);
@@ -1556,6 +1547,16 @@
QDECREF(options);
*pbs = bs;
+
+ /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
+ * temporary snapshot afterwards. */
+ if (snapshot_flags) {
+ ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+ if (local_err) {
+ goto close_and_fail;
+ }
+ }
+
return 0;
fail:
@@ -2000,6 +2001,24 @@
bs_dest->enable_write_cache = bs_src->enable_write_cache;
+ /* r/w error */
+ bs_dest->on_read_error = bs_src->on_read_error;
+ bs_dest->on_write_error = bs_src->on_write_error;
+
+ /* i/o status */
+ bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
+ bs_dest->iostatus = bs_src->iostatus;
+
+ /* dirty bitmap */
+ bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
+}
+
+/* Fields that only need to be swapped if the contents of BDSes is swapped
+ * rather than pointers being changed in the parents, and throttling fields
+ * because only bdrv_swap() messes with internals of throttling. */
+static void bdrv_move_reference_fields(BlockDriverState *bs_dest,
+ BlockDriverState *bs_src)
+{
/* i/o throttled req */
bs_dest->throttle_state = bs_src->throttle_state,
bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
@@ -2014,23 +2033,6 @@
&bs_src->throttle_timers,
sizeof(ThrottleTimers));
- /* r/w error */
- bs_dest->on_read_error = bs_src->on_read_error;
- bs_dest->on_write_error = bs_src->on_write_error;
-
- /* i/o status */
- bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
- bs_dest->iostatus = bs_src->iostatus;
-
- /* dirty bitmap */
- bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
-}
-
-/* Fields that only need to be swapped if the contents of BDSes is swapped
- * rather than pointers being changed in the parents. */
-static void bdrv_move_reference_fields(BlockDriverState *bs_dest,
- BlockDriverState *bs_src)
-{
/* reference count */
bs_dest->refcnt = bs_src->refcnt;
@@ -2156,6 +2158,45 @@
bdrv_rebind(bs_old);
}
+static void change_parent_backing_link(BlockDriverState *from,
+ BlockDriverState *to)
+{
+ BdrvChild *c, *next;
+
+ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ assert(c->role != &child_backing);
+ c->bs = to;
+ QLIST_REMOVE(c, next_parent);
+ QLIST_INSERT_HEAD(&to->parents, c, next_parent);
+ bdrv_ref(to);
+ bdrv_unref(from);
+ }
+ if (from->blk) {
+ blk_set_bs(from->blk, to);
+ if (!to->device_list.tqe_prev) {
+ QTAILQ_INSERT_BEFORE(from, to, device_list);
+ }
+ QTAILQ_REMOVE(&bdrv_states, from, device_list);
+ }
+}
+
+static void swap_feature_fields(BlockDriverState *bs_top,
+ BlockDriverState *bs_new)
+{
+ BlockDriverState tmp;
+
+ bdrv_move_feature_fields(&tmp, bs_top);
+ bdrv_move_feature_fields(bs_top, bs_new);
+ bdrv_move_feature_fields(bs_new, &tmp);
+
+ assert(!bs_new->throttle_state);
+ if (bs_top->throttle_state) {
+ assert(bs_top->io_limits_enabled);
+ bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
+ bdrv_io_limits_disable(bs_top);
+ }
+}
+
/*
* Add new bs contents at the top of an image chain while the chain is
* live, while keeping required fields on the top layer.
@@ -2166,14 +2207,29 @@
* bs_new must not be attached to a BlockBackend.
*
* This function does not create any image files.
+ *
+ * bdrv_append() takes ownership of a bs_new reference and unrefs it because
+ * that's what the callers commonly need. bs_new will be referenced by the old
+ * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
+ * reference of its own, it must call bdrv_ref().
*/
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
{
- bdrv_swap(bs_new, bs_top);
+ assert(!bdrv_requests_pending(bs_top));
+ assert(!bdrv_requests_pending(bs_new));
- /* The contents of 'tmp' will become bs_top, as we are
- * swapping bs_new and bs_top contents. */
- bdrv_set_backing_hd(bs_top, bs_new);
+ bdrv_ref(bs_top);
+ change_parent_backing_link(bs_top, bs_new);
+
+ /* Some fields always stay on top of the backing file chain */
+ swap_feature_fields(bs_top, bs_new);
+
+ bdrv_set_backing_hd(bs_new, bs_top);
+ bdrv_unref(bs_top);
+
+ /* bs_new is now referenced by its new parents, we don't need the
+ * additional reference any more. */
+ bdrv_unref(bs_new);
}
static void bdrv_delete(BlockDriverState *bs)
diff --git a/blockdev.c b/blockdev.c
index b633212..6c8cce4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1546,7 +1546,7 @@
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
- bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR,
+ bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
NULL);
aio_context_release(state->aio_context);