Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches:
- file-posix: Fix O_DIRECT alignment detection
- Fixes for concurrent block jobs
- block-backend: Queue requests while drained (fix IDE vs. job crashes)
- qemu-img convert: Deprecate using -n and -o together
- iotests: Migration tests with filter nodes
- iotests: More media change tests
# gpg: Signature made Fri 16 Aug 2019 10:29:18 BST
# gpg: using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream:
file-posix: Handle undetectable alignment
qemu-img convert: Deprecate using -n and -o together
block-backend: Queue requests while drained
mirror: Keep mirror_top_bs drained after dropping permissions
block: Remove blk_pread_unthrottled()
iotests: Add test for concurrent stream/commit
tests: Test mid-drain bdrv_replace_child_noperm()
tests: Test polling in bdrv_drop_intermediate()
block: Reduce (un)drains when replacing a child
block: Keep subtree drained in drop_intermediate
block: Simplify bdrv_filter_default_perms()
iotests: Test migration with all kinds of filter nodes
iotests: Move migration helpers to iotests.py
iotests/118: Add -blockdev based tests
iotests/118: Create test classes dynamically
iotests/118: Test media change for scsi-cd
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/block.c b/block.c
index 1b67b49..2a2d069 100644
--- a/block.c
+++ b/block.c
@@ -2169,16 +2169,8 @@
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
- if (c == NULL) {
- *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
- *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
- return;
- }
-
- *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
- (c->perm & DEFAULT_PERM_UNCHANGED);
- *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
- (c->shared_perm & DEFAULT_PERM_UNCHANGED);
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+ *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
}
void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
@@ -2239,13 +2231,27 @@
BlockDriverState *new_bs)
{
BlockDriverState *old_bs = child->bs;
- int i;
+ int new_bs_quiesce_counter;
+ int drain_saldo;
assert(!child->frozen);
if (old_bs && new_bs) {
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
+
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+ drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
+
+ /*
+ * If the new child node is drained but the old one was not, flush
+ * all outstanding requests to the old child node.
+ */
+ while (drain_saldo > 0 && child->role->drained_begin) {
+ bdrv_parent_drained_begin_single(child, true);
+ drain_saldo--;
+ }
+
if (old_bs) {
/* Detach first so that the recursive drain sections coming from @child
* are already gone and we only end the drain sections that came from
@@ -2253,28 +2259,22 @@
if (child->role->detach) {
child->role->detach(child);
}
- while (child->parent_quiesce_counter) {
- bdrv_parent_drained_end_single(child);
- }
QLIST_REMOVE(child, next_parent);
- } else {
- assert(child->parent_quiesce_counter == 0);
}
child->bs = new_bs;
if (new_bs) {
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
- if (new_bs->quiesce_counter) {
- int num = new_bs->quiesce_counter;
- if (child->role->parent_is_bds) {
- num -= bdrv_drain_all_count;
- }
- assert(num >= 0);
- for (i = 0; i < num; i++) {
- bdrv_parent_drained_begin_single(child, true);
- }
- }
+
+ /*
+ * Detaching the old node may have led to the new node's
+ * quiesce_counter having been decreased. Not a problem, we
+ * just need to recognize this here and then invoke
+ * drained_end appropriately more often.
+ */
+ assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
+ drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
/* Attach only after starting new drained sections, so that recursive
* drain sections coming from @child don't get an extra .drained_begin
@@ -2283,6 +2283,15 @@
child->role->attach(child);
}
}
+
+ /*
+ * If the old child node was drained but the new one is not, allow
+ * requests to come in only after the new node has been attached.
+ */
+ while (drain_saldo < 0 && child->role->drained_end) {
+ bdrv_parent_drained_end_single(child);
+ drain_saldo++;
+ }
}
/*
@@ -4500,6 +4509,7 @@
int ret = -EIO;
bdrv_ref(top);
+ bdrv_subtree_drained_begin(top);
if (!top->drv || !base->drv) {
goto exit;
@@ -4571,6 +4581,7 @@
ret = 0;
exit:
+ bdrv_subtree_drained_end(top);
bdrv_unref(top);
return ret;
}
diff --git a/block/backup.c b/block/backup.c
index b26c22c..4743c8f 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -644,6 +644,7 @@
if (ret < 0) {
goto error;
}
+ blk_set_disable_request_queuing(job->target, true);
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
diff --git a/block/block-backend.c b/block/block-backend.c
index 84e76bf..1c605d5 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -81,6 +81,9 @@
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
int quiesce_counter;
+ CoQueue queued_requests;
+ bool disable_request_queuing;
+
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
@@ -341,6 +344,7 @@
block_acct_init(&blk->stats);
+ qemu_co_queue_init(&blk->queued_requests);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
QLIST_INIT(&blk->aio_notifiers);
@@ -1098,6 +1102,11 @@
blk->allow_aio_context_change = allow;
}
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
+{
+ blk->disable_request_queuing = disable;
+}
+
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
size_t size)
{
@@ -1129,13 +1138,24 @@
return 0;
}
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
+{
+ if (blk->quiesce_counter && !blk->disable_request_queuing) {
+ qemu_co_queue_wait(&blk->queued_requests, NULL);
+ }
+}
+
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
int ret;
- BlockDriverState *bs = blk_bs(blk);
+ BlockDriverState *bs;
+ blk_wait_while_drained(blk);
+
+ /* Call blk_bs() only after waiting, the graph may have changed */
+ bs = blk_bs(blk);
trace_blk_co_preadv(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
@@ -1161,8 +1181,12 @@
BdrvRequestFlags flags)
{
int ret;
- BlockDriverState *bs = blk_bs(blk);
+ BlockDriverState *bs;
+ blk_wait_while_drained(blk);
+
+ /* Call blk_bs() only after waiting, the graph may have changed */
+ bs = blk_bs(blk);
trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
@@ -1239,22 +1263,6 @@
return rwco.ret;
}
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
- int count)
-{
- int ret;
-
- ret = blk_check_byte_request(blk, offset, count);
- if (ret < 0) {
- return ret;
- }
-
- blk_root_drained_begin(blk->root);
- ret = blk_pread(blk, offset, buf, count);
- blk_root_drained_end(blk->root, NULL);
- return ret;
-}
-
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
@@ -1367,6 +1375,12 @@
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
+ if (rwco->blk->quiesce_counter) {
+ blk_dec_in_flight(rwco->blk);
+ blk_wait_while_drained(rwco->blk);
+ blk_inc_in_flight(rwco->blk);
+ }
+
assert(qiov->size == acb->bytes);
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
qiov, rwco->flags);
@@ -1379,6 +1393,12 @@
BlkRwCo *rwco = &acb->rwco;
QEMUIOVector *qiov = rwco->iobuf;
+ if (rwco->blk->quiesce_counter) {
+ blk_dec_in_flight(rwco->blk);
+ blk_wait_while_drained(rwco->blk);
+ blk_inc_in_flight(rwco->blk);
+ }
+
assert(!qiov || qiov->size == acb->bytes);
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
qiov, rwco->flags);
@@ -1500,6 +1520,8 @@
int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ blk_wait_while_drained(blk);
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1540,7 +1562,11 @@
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
- int ret = blk_check_byte_request(blk, offset, bytes);
+ int ret;
+
+ blk_wait_while_drained(blk);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
@@ -1550,6 +1576,8 @@
int blk_co_flush(BlockBackend *blk)
{
+ blk_wait_while_drained(blk);
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2250,6 +2278,9 @@
if (blk->dev_ops && blk->dev_ops->drained_end) {
blk->dev_ops->drained_end(blk->dev_opaque);
}
+ while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+ /* Resume all queued requests */
+ }
}
}
diff --git a/block/commit.c b/block/commit.c
index 2c5a6d4..408ae15 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -350,6 +350,7 @@
if (ret < 0) {
goto fail;
}
+ blk_set_disable_request_queuing(s->base, true);
s->base_bs = base;
/* Required permissions are already taken with block_job_add_bdrv() */
@@ -358,6 +359,7 @@
if (ret < 0) {
goto fail;
}
+ blk_set_disable_request_queuing(s->top, true);
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
diff --git a/block/file-posix.c b/block/file-posix.c
index 4479cc7..b8b4dad 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -323,6 +323,7 @@
BDRVRawState *s = bs->opaque;
char *buf;
size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
+ size_t alignments[] = {1, 512, 1024, 2048, 4096};
/* For SCSI generic devices the alignment is not really used.
With buffered I/O, we don't have any restrictions. */
@@ -349,25 +350,38 @@
}
#endif
- /* If we could not get the sizes so far, we can only guess them */
- if (!s->buf_align) {
+ /*
+ * If we could not get the sizes so far, we can only guess them. First try
+ * to detect request alignment, since it is more likely to succeed. Then
+ * try to detect buf_align, which cannot be detected in some cases (e.g.
+ * Gluster). If buf_align cannot be detected, we fallback to the value of
+ * request_alignment.
+ */
+
+ if (!bs->bl.request_alignment) {
+ int i;
size_t align;
- buf = qemu_memalign(max_align, 2 * max_align);
- for (align = 512; align <= max_align; align <<= 1) {
- if (raw_is_io_aligned(fd, buf + align, max_align)) {
- s->buf_align = align;
+ buf = qemu_memalign(max_align, max_align);
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
+ align = alignments[i];
+ if (raw_is_io_aligned(fd, buf, align)) {
+ /* Fallback to safe value. */
+ bs->bl.request_alignment = (align != 1) ? align : max_align;
break;
}
}
qemu_vfree(buf);
}
- if (!bs->bl.request_alignment) {
+ if (!s->buf_align) {
+ int i;
size_t align;
- buf = qemu_memalign(s->buf_align, max_align);
- for (align = 512; align <= max_align; align <<= 1) {
- if (raw_is_io_aligned(fd, buf, align)) {
- bs->bl.request_alignment = align;
+ buf = qemu_memalign(max_align, 2 * max_align);
+ for (i = 0; i < ARRAY_SIZE(alignments); i++) {
+ align = alignments[i];
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
+ /* Fallback to request_aligment. */
+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment;
break;
}
}
diff --git a/block/mirror.c b/block/mirror.c
index 9f5c59e..9b36391 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -656,7 +656,10 @@
s->target = NULL;
/* We don't access the source any more. Dropping any WRITE/RESIZE is
- * required before it could become a backing file of target_bs. */
+ * required before it could become a backing file of target_bs. Not having
+ * these permissions any more means that we can't allow any new requests on
+ * mirror_top_bs from now on, so keep it drained. */
+ bdrv_drained_begin(mirror_top_bs);
bs_opaque->stop = true;
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
@@ -724,6 +727,7 @@
bs_opaque->job = NULL;
bdrv_drained_end(src);
+ bdrv_drained_end(mirror_top_bs);
s->in_drain = false;
bdrv_unref(mirror_top_bs);
bdrv_unref(src);
@@ -1632,6 +1636,7 @@
blk_set_force_allow_inactivate(s->target);
}
blk_set_allow_aio_context_change(s->target, true);
+ blk_set_disable_request_queuing(s->target, true);
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
diff --git a/blockjob.c b/blockjob.c
index 45877c8..6e32d1a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -446,6 +446,9 @@
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
+ /* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
+ * The job reports that it's busy until it reaches a pause point. */
+ blk_set_disable_request_queuing(blk, true);
blk_set_allow_aio_context_change(blk, true);
/* Only set speed when necessary to avoid NotSupported error */
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
index 79384a2..dcbccee 100644
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -63,12 +63,7 @@
blk_get_geometry(blk, &nb_sectors);
- /**
- * The function will be invoked during startup not only in sync I/O mode,
- * but also in async I/O mode. So the I/O throttling function has to
- * be disabled temporarily here, not permanently.
- */
- if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
+ if (blk_pread(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
return -1;
}
/* test msdos magic */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 733c495..368d53a 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -104,6 +104,7 @@
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
void blk_iostatus_enable(BlockBackend *blk);
bool blk_iostatus_is_enabled(const BlockBackend *blk);
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
@@ -117,8 +118,6 @@
BlockBackend *blk_by_dev(void *dev);
BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
- int bytes);
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index fff07bb..f7680c0 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -305,6 +305,13 @@
it is to reinvoke @command{qemu-nbd -c /dev/nbd0} limited to just a
subset of the image.
+@subsection qemu-img convert -n -o (since 4.2.0)
+
+All options specified in @option{-o} are image creation options, so
+they have no effect when used with @option{-n} to skip image creation.
+Silently ignored options can be confusing, so this combination of
+options will be made an error in future versions.
+
@section Build system
@subsection Python 2 support (since 4.1.0)
diff --git a/qemu-img.c b/qemu-img.c
index 31c72dc..c920e35 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2231,6 +2231,11 @@
goto fail_getopt;
}
+ if (skip_create && options) {
+ warn_report("-o has no effect when skipping image creation");
+ warn_report("This will become an error in future QEMU versions.");
+ }
+
s.src_num = argc - optind - 1;
out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
index 499c5f0..6f45779 100755
--- a/tests/qemu-iotests/118
+++ b/tests/qemu-iotests/118
@@ -33,6 +33,8 @@
return 'ide-cd'
elif interface == 'floppy':
return 'floppy'
+ elif interface == 'scsi':
+ return 'scsi-cd'
else:
return None
@@ -40,10 +42,14 @@
has_opened = False
has_closed = False
+ device_name = 'qdev0'
+ use_drive = False
+
def process_events(self):
for event in self.vm.get_qmp_events(wait=False):
if (event['event'] == 'DEVICE_TRAY_MOVED' and
- event['data']['device'] == 'drive0'):
+ (event['data']['device'] == 'drive0' or
+ event['data']['id'] == self.device_name)):
if event['data']['tray-open'] == False:
self.has_closed = True
else:
@@ -67,9 +73,11 @@
class GeneralChangeTestsBaseClass(ChangeBaseClass):
- device_name = 'qdev0'
-
def test_change(self):
+ # 'change' requires a drive name, so skip the test for blockdev
+ if not self.use_drive:
+ return
+
result = self.vm.qmp('change', device='drive0', target=new_img,
arg=iotests.imgfmt)
self.assert_qmp(result, 'return', {})
@@ -292,13 +300,21 @@
class TestInitiallyFilled(GeneralChangeTestsBaseClass):
was_empty = False
- def setUp(self, media, interface):
+ def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k')
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
self.vm = iotests.VM()
- self.vm.add_drive(old_img, 'media=%s' % media, 'none')
+ if self.use_drive:
+ self.vm.add_drive(old_img, 'media=%s' % self.media, 'none')
+ else:
+ self.vm.add_blockdev([ 'node-name=drive0',
+ 'driver=%s' % iotests.imgfmt,
+ 'file.driver=file',
+ 'file.filename=%s' % old_img ])
+ if self.interface == 'scsi':
+ self.vm.add_device('virtio-scsi-pci')
self.vm.add_device('%s,drive=drive0,id=%s' %
- (interface_to_device_name(interface),
+ (interface_to_device_name(self.interface),
self.device_name))
self.vm.launch()
@@ -327,11 +343,16 @@
class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
was_empty = True
- def setUp(self, media, interface):
+ def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
- self.vm = iotests.VM().add_drive(None, 'media=%s' % media, 'none')
- self.vm.add_device('%s,drive=drive0,id=%s' %
- (interface_to_device_name(interface),
+ self.vm = iotests.VM()
+ if self.use_drive:
+ self.vm.add_drive(None, 'media=%s' % self.media, 'none')
+ if self.interface == 'scsi':
+ self.vm.add_device('virtio-scsi-pci')
+ self.vm.add_device('%s,%sid=%s' %
+ (interface_to_device_name(self.interface),
+ 'drive=drive0,' if self.use_drive else '',
self.device_name))
self.vm.launch()
@@ -349,36 +370,25 @@
# Should be a no-op
self.assert_qmp(result, 'return', {})
-class TestCDInitiallyFilled(TestInitiallyFilled):
- TestInitiallyFilled = TestInitiallyFilled
- has_real_tray = True
+# Do this in a function to avoid leaking variables like case into the global
+# name space (otherwise tests would be run for the abstract base classes)
+def create_basic_test_classes():
+ for (media, interface, has_real_tray) in [ ('cdrom', 'ide', True),
+ ('cdrom', 'scsi', True),
+ ('disk', 'floppy', False) ]:
- def setUp(self):
- self.TestInitiallyFilled.setUp(self, 'cdrom', 'ide')
+ for case in [ TestInitiallyFilled, TestInitiallyEmpty ]:
+ for use_drive in [ True, False ]:
+ attr = { 'media': media,
+ 'interface': interface,
+ 'has_real_tray': has_real_tray,
+ 'use_drive': use_drive }
-class TestCDInitiallyEmpty(TestInitiallyEmpty):
- TestInitiallyEmpty = TestInitiallyEmpty
- has_real_tray = True
+ name = '%s_%s_%s_%s' % (case.__name__, media, interface,
+ 'drive' if use_drive else 'blockdev')
+ globals()[name] = type(name, (case, ), attr)
- def setUp(self):
- self.TestInitiallyEmpty.setUp(self, 'cdrom', 'ide')
-
-class TestFloppyInitiallyFilled(TestInitiallyFilled):
- TestInitiallyFilled = TestInitiallyFilled
- has_real_tray = False
-
- def setUp(self):
- self.TestInitiallyFilled.setUp(self, 'disk', 'floppy')
-
-class TestFloppyInitiallyEmpty(TestInitiallyEmpty):
- TestInitiallyEmpty = TestInitiallyEmpty
- has_real_tray = False
-
- def setUp(self):
- self.TestInitiallyEmpty.setUp(self, 'disk', 'floppy')
- # FDDs not having a real tray and there not being a medium inside the
- # tray at startup means the tray will be considered open
- self.has_opened = True
+create_basic_test_classes()
class TestChangeReadOnly(ChangeBaseClass):
device_name = 'qdev0'
diff --git a/tests/qemu-iotests/118.out b/tests/qemu-iotests/118.out
index 4823c11..bf5bfd5 100644
--- a/tests/qemu-iotests/118.out
+++ b/tests/qemu-iotests/118.out
@@ -1,5 +1,5 @@
-...............................................................
+.......................................................................................................................................................................
----------------------------------------------------------------------
-Ran 63 tests
+Ran 167 tests
OK
diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234
index c4c26bc..34c818c 100755
--- a/tests/qemu-iotests/234
+++ b/tests/qemu-iotests/234
@@ -26,22 +26,6 @@
iotests.verify_image_format(supported_fmts=['qcow2'])
iotests.verify_platform(['linux'])
-def enable_migration_events(vm, name):
- iotests.log('Enabling migration QMP events on %s...' % name)
- iotests.log(vm.qmp('migrate-set-capabilities', capabilities=[
- {
- 'capability': 'events',
- 'state': True
- }
- ]))
-
-def wait_migration(vm):
- while True:
- event = vm.event_wait('MIGRATION')
- iotests.log(event, filters=[iotests.filter_qmp_event])
- if event['data']['status'] == 'completed':
- break
-
with iotests.FilePath('img') as img_path, \
iotests.FilePath('backing') as backing_path, \
iotests.FilePath('mig_fifo_a') as fifo_a, \
@@ -62,7 +46,7 @@
.add_blockdev('%s,file=drive0-backing-file,node-name=drive0-backing' % (iotests.imgfmt))
.launch())
- enable_migration_events(vm_a, 'A')
+ vm_a.enable_migration_events('A')
iotests.log('Launching destination VM...')
(vm_b.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path))
@@ -72,7 +56,7 @@
.add_incoming("exec: cat '%s'" % (fifo_a))
.launch())
- enable_migration_events(vm_b, 'B')
+ vm_b.enable_migration_events('B')
# Add a child node that was created after the parent node. The reverse case
# is covered by the -blockdev options above.
@@ -85,9 +69,9 @@
iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a)))
with iotests.Timeout(3, 'Migration does not complete'):
# Wait for the source first (which includes setup=setup)
- wait_migration(vm_a)
+ vm_a.wait_migration()
# Wait for the destination second (which does not)
- wait_migration(vm_b)
+ vm_b.wait_migration()
iotests.log(vm_a.qmp('query-migrate')['return']['status'])
iotests.log(vm_b.qmp('query-migrate')['return']['status'])
@@ -105,7 +89,7 @@
.add_incoming("exec: cat '%s'" % (fifo_b))
.launch())
- enable_migration_events(vm_a, 'A')
+ vm_a.enable_migration_events('A')
iotests.log(vm_a.qmp('blockdev-snapshot', node='drive0-backing',
overlay='drive0'))
@@ -114,9 +98,9 @@
iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b)))
with iotests.Timeout(3, 'Migration does not complete'):
# Wait for the source first (which includes setup=setup)
- wait_migration(vm_b)
+ vm_b.wait_migration()
# Wait for the destination second (which does not)
- wait_migration(vm_a)
+ vm_a.wait_migration()
iotests.log(vm_a.qmp('query-migrate')['return']['status'])
iotests.log(vm_b.qmp('query-migrate')['return']['status'])
diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258
new file mode 100755
index 0000000..b84cf02
--- /dev/null
+++ b/tests/qemu-iotests/258
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+#
+# Very specific tests for adjacent commit/stream block jobs
+#
+# Copyright (C) 2019 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Max Reitz <mreitz@redhat.com>
+
+import iotests
+from iotests import log, qemu_img, qemu_io_silent, \
+ filter_qmp_testfiles, filter_qmp_imgfmt
+
+# Need backing file and change-backing-file support
+iotests.verify_image_format(supported_fmts=['qcow2', 'qed'])
+iotests.verify_platform(['linux'])
+
+
+# Returns a node for blockdev-add
+def node(node_name, path, backing=None, fmt=None, throttle=None):
+ if fmt is None:
+ fmt = iotests.imgfmt
+
+ res = {
+ 'node-name': node_name,
+ 'driver': fmt,
+ 'file': {
+ 'driver': 'file',
+ 'filename': path
+ }
+ }
+
+ if backing is not None:
+ res['backing'] = backing
+
+ if throttle:
+ res['file'] = {
+ 'driver': 'throttle',
+ 'throttle-group': throttle,
+ 'file': res['file']
+ }
+
+ return res
+
+# Finds a node in the debug block graph
+def find_graph_node(graph, node_id):
+ return next(node for node in graph['nodes'] if node['id'] == node_id)
+
+
+def test_concurrent_finish(write_to_stream_node):
+ log('')
+ log('=== Commit and stream finish concurrently (letting %s write) ===' % \
+ ('stream' if write_to_stream_node else 'commit'))
+ log('')
+
+ # All chosen in such a way that when the commit job wants to
+ # finish, it polls and thus makes stream finish concurrently --
+ # and the other way around, depending on whether the commit job
+ # is finalized before stream completes or not.
+
+ with iotests.FilePath('node4.img') as node4_path, \
+ iotests.FilePath('node3.img') as node3_path, \
+ iotests.FilePath('node2.img') as node2_path, \
+ iotests.FilePath('node1.img') as node1_path, \
+ iotests.FilePath('node0.img') as node0_path, \
+ iotests.VM() as vm:
+
+ # It is important to use raw for the base layer (so that
+ # permissions are just handed through to the protocol layer)
+ assert qemu_img('create', '-f', 'raw', node0_path, '64M') == 0
+
+ stream_throttle=None
+ commit_throttle=None
+
+ for path in [node1_path, node2_path, node3_path, node4_path]:
+ assert qemu_img('create', '-f', iotests.imgfmt, path, '64M') == 0
+
+ if write_to_stream_node:
+ # This is what (most of the time) makes commit finish
+ # earlier and then pull in stream
+ assert qemu_io_silent(node2_path,
+ '-c', 'write %iK 64K' % (65536 - 192),
+ '-c', 'write %iK 64K' % (65536 - 64)) == 0
+
+ stream_throttle='tg'
+ else:
+ # And this makes stream finish earlier
+ assert qemu_io_silent(node1_path,
+ '-c', 'write %iK 64K' % (65536 - 64)) == 0
+
+ commit_throttle='tg'
+
+ vm.launch()
+
+ vm.qmp_log('object-add',
+ qom_type='throttle-group',
+ id='tg',
+ props={
+ 'x-iops-write': 1,
+ 'x-iops-write-max': 1
+ })
+
+ vm.qmp_log('blockdev-add',
+ filters=[filter_qmp_testfiles, filter_qmp_imgfmt],
+ **node('node4', node4_path, throttle=stream_throttle,
+ backing=node('node3', node3_path,
+ backing=node('node2', node2_path,
+ backing=node('node1', node1_path,
+ backing=node('node0', node0_path, throttle=commit_throttle,
+ fmt='raw'))))))
+
+ vm.qmp_log('block-commit',
+ job_id='commit',
+ device='node4',
+ filter_node_name='commit-filter',
+ top_node='node1',
+ base_node='node0',
+ auto_finalize=False)
+
+ vm.qmp_log('block-stream',
+ job_id='stream',
+ device='node3',
+ base_node='commit-filter')
+
+ if write_to_stream_node:
+ vm.run_job('commit', auto_finalize=False, auto_dismiss=True)
+ vm.run_job('stream', auto_finalize=True, auto_dismiss=True)
+ else:
+ # No, the jobs do not really finish concurrently here,
+ # the stream job does complete strictly before commit.
+ # But still, this is close enough for what we want to
+ # test.
+ vm.run_job('stream', auto_finalize=True, auto_dismiss=True)
+ vm.run_job('commit', auto_finalize=False, auto_dismiss=True)
+
+ # Assert that the backing node of node3 is node 0 now
+ graph = vm.qmp('x-debug-query-block-graph')['return']
+ for edge in graph['edges']:
+ if edge['name'] == 'backing' and \
+ find_graph_node(graph, edge['parent'])['name'] == 'node3':
+ assert find_graph_node(graph, edge['child'])['name'] == 'node0'
+ break
+
+
+def main():
+ log('Running tests:')
+ test_concurrent_finish(True)
+ test_concurrent_finish(False)
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out
new file mode 100644
index 0000000..ce6e9ba
--- /dev/null
+++ b/tests/qemu-iotests/258.out
@@ -0,0 +1,33 @@
+Running tests:
+
+=== Commit and stream finish concurrently (letting stream write) ===
+
+{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
+{"return": {}}
+{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}}
+{"return": {}}
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}}
+{"return": {}}
+{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}}
+{"return": {}}
+{"execute": "job-finalize", "arguments": {"id": "commit"}}
+{"return": {}}
+{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+
+=== Commit and stream finish concurrently (letting commit write) ===
+
+{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
+{"return": {}}
+{"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}}
+{"return": {}}
+{"execute": "block-commit", "arguments": {"auto-finalize": false, "base-node": "node0", "device": "node4", "filter-node-name": "commit-filter", "job-id": "commit", "top-node": "node1"}}
+{"return": {}}
+{"execute": "block-stream", "arguments": {"base-node": "commit-filter", "device": "node3", "job-id": "stream"}}
+{"return": {}}
+{"data": {"device": "stream", "len": 67108864, "offset": 67108864, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"execute": "job-finalize", "arguments": {"id": "commit"}}
+{"return": {}}
+{"data": {"id": "commit", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"device": "commit", "len": 67108864, "offset": 67108864, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262
new file mode 100755
index 0000000..398f635
--- /dev/null
+++ b/tests/qemu-iotests/262
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2019 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
+#
+# Test migration with filter drivers present. Keep everything in an
+# iothread just for fun.
+
+import iotests
+import os
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+iotests.verify_platform(['linux'])
+
+with iotests.FilePath('img') as img_path, \
+ iotests.FilePath('mig_fifo') as fifo, \
+ iotests.VM(path_suffix='a') as vm_a, \
+ iotests.VM(path_suffix='b') as vm_b:
+
+ def add_opts(vm):
+ vm.add_object('iothread,id=iothread0')
+ vm.add_object('throttle-group,id=tg0,x-bps-total=65536')
+ vm.add_blockdev('file,filename=%s,node-name=drive0-file' % (img_path))
+ vm.add_blockdev('%s,file=drive0-file,node-name=drive0-fmt' % (iotests.imgfmt))
+ vm.add_blockdev('copy-on-read,file=drive0-fmt,node-name=drive0-cor')
+ vm.add_blockdev('throttle,file=drive0-cor,node-name=drive0-throttle,throttle-group=tg0')
+ vm.add_blockdev('blkdebug,image=drive0-throttle,node-name=drive0-dbg')
+ vm.add_blockdev('null-co,node-name=null,read-zeroes=on')
+ vm.add_blockdev('blkverify,test=drive0-dbg,raw=null,node-name=drive0-verify')
+
+ if iotests.supports_quorum():
+ vm.add_blockdev('quorum,children.0=drive0-verify,vote-threshold=1,node-name=drive0-quorum')
+ root = "drive0-quorum"
+ else:
+ root = "drive0-verify"
+
+ vm.add_device('virtio-blk,drive=%s,iothread=iothread0' % root)
+
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, img_path, '64M')
+
+ os.mkfifo(fifo)
+
+ iotests.log('Launching source VM...')
+ add_opts(vm_a)
+ vm_a.launch()
+
+ vm_a.enable_migration_events('A')
+
+ iotests.log('Launching destination VM...')
+ add_opts(vm_b)
+ vm_b.add_incoming("exec: cat '%s'" % (fifo))
+ vm_b.launch()
+
+ vm_b.enable_migration_events('B')
+
+ iotests.log('Starting migration to B...')
+ iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo)))
+ with iotests.Timeout(3, 'Migration does not complete'):
+ # Wait for the source first (which includes setup=setup)
+ vm_a.wait_migration()
+ # Wait for the destination second (which does not)
+ vm_b.wait_migration()
+
+ iotests.log(vm_a.qmp('query-migrate')['return']['status'])
+ iotests.log(vm_b.qmp('query-migrate')['return']['status'])
+
+ iotests.log(vm_a.qmp('query-status'))
+ iotests.log(vm_b.qmp('query-status'))
diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out
new file mode 100644
index 0000000..5a58e5e
--- /dev/null
+++ b/tests/qemu-iotests/262.out
@@ -0,0 +1,17 @@
+Launching source VM...
+Enabling migration QMP events on A...
+{"return": {}}
+Launching destination VM...
+Enabling migration QMP events on B...
+{"return": {}}
+Starting migration to B...
+{"return": {}}
+{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+completed
+completed
+{"return": {"running": false, "singlestep": false, "status": "postmigrate"}}
+{"return": {"running": true, "singlestep": false, "status": "running"}}
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index f13e5f2..5a37839 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -271,3 +271,5 @@
254 rw backing quick
255 rw quick
256 rw quick
+258 rw quick
+262 rw quick migration
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index ce74177..91172c3 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -583,6 +583,22 @@
elif status == 'null':
return error
+ def enable_migration_events(self, name):
+ log('Enabling migration QMP events on %s...' % name)
+ log(self.qmp('migrate-set-capabilities', capabilities=[
+ {
+ 'capability': 'events',
+ 'state': True
+ }
+ ]))
+
+ def wait_migration(self):
+ while True:
+ event = self.event_wait('MIGRATION')
+ log(event, filters=[filter_qmp_event])
+ if event['data']['status'] == 'completed':
+ break
+
def node_info(self, node_name):
nodes = self.qmp('query-named-block-nodes')
for x in nodes['return']:
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 481b750..374bef6 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -101,6 +101,13 @@
nperm, nshared);
}
+static int bdrv_test_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ return 0;
+}
+
static BlockDriver bdrv_test = {
.format_name = "test",
.instance_size = sizeof(BDRVTestState),
@@ -112,6 +119,8 @@
.bdrv_co_drain_end = bdrv_test_co_drain_end,
.bdrv_child_perm = bdrv_test_child_perm,
+
+ .bdrv_change_backing_file = bdrv_test_change_backing_file,
};
static void aio_ret_cb(void *opaque, int ret)
@@ -678,6 +687,7 @@
&error_abort);
s = bs->opaque;
blk_insert_bs(blk, bs, &error_abort);
+ blk_set_disable_request_queuing(blk, true);
blk_set_aio_context(blk, ctx_a, &error_abort);
aio_context_acquire(ctx_a);
@@ -1672,6 +1682,466 @@
bdrv_unref(bs_child);
}
+
+typedef struct TestSimpleBlockJob {
+ BlockJob common;
+ bool should_complete;
+ bool *did_complete;
+} TestSimpleBlockJob;
+
+static int coroutine_fn test_simple_job_run(Job *job, Error **errp)
+{
+ TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
+
+ while (!s->should_complete) {
+ job_sleep_ns(job, 0);
+ }
+
+ return 0;
+}
+
+static void test_simple_job_clean(Job *job)
+{
+ TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
+ *s->did_complete = true;
+}
+
+static const BlockJobDriver test_simple_job_driver = {
+ .job_driver = {
+ .instance_size = sizeof(TestSimpleBlockJob),
+ .free = block_job_free,
+ .user_resume = block_job_user_resume,
+ .drain = block_job_drain,
+ .run = test_simple_job_run,
+ .clean = test_simple_job_clean,
+ },
+};
+
+static int drop_intermediate_poll_update_filename(BdrvChild *child,
+ BlockDriverState *new_base,
+ const char *filename,
+ Error **errp)
+{
+ /*
+ * We are free to poll here, which may change the block graph, if
+ * it is not drained.
+ */
+
+ /* If the job is not drained: Complete it, schedule job_exit() */
+ aio_poll(qemu_get_current_aio_context(), false);
+ /* If the job is not drained: Run job_exit(), finish the job */
+ aio_poll(qemu_get_current_aio_context(), false);
+
+ return 0;
+}
+
+/**
+ * Test a poll in the midst of bdrv_drop_intermediate().
+ *
+ * bdrv_drop_intermediate() calls BdrvChildRole.update_filename(),
+ * which can yield or poll. This may lead to graph changes, unless
+ * the whole subtree in question is drained.
+ *
+ * We test this on the following graph:
+ *
+ * Job
+ *
+ * |
+ * job-node
+ * |
+ * v
+ *
+ * job-node
+ *
+ * |
+ * backing
+ * |
+ * v
+ *
+ * node-2 --chain--> node-1 --chain--> node-0
+ *
+ * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
+ *
+ * This first updates node-2's backing filename by invoking
+ * drop_intermediate_poll_update_filename(), which polls twice. This
+ * causes the job to finish, which in turns causes the job-node to be
+ * deleted.
+ *
+ * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
+ * already has a pointer to the BdrvChild edge between job-node and
+ * node-1. When it tries to handle that edge, we probably get a
+ * segmentation fault because the object no longer exists.
+ *
+ *
+ * The solution is for bdrv_drop_intermediate() to drain top's
+ * subtree. This prevents graph changes from happening just because
+ * BdrvChildRole.update_filename() yields or polls. Thus, the block
+ * job is paused during that drained section and must finish before or
+ * after.
+ *
+ * (In addition, bdrv_replace_child() must keep the job paused.)
+ */
+static void test_drop_intermediate_poll(void)
+{
+ static BdrvChildRole chain_child_role;
+ BlockDriverState *chain[3];
+ TestSimpleBlockJob *job;
+ BlockDriverState *job_node;
+ bool job_has_completed = false;
+ int i;
+ int ret;
+
+ chain_child_role = child_backing;
+ chain_child_role.update_filename = drop_intermediate_poll_update_filename;
+
+ for (i = 0; i < 3; i++) {
+ char name[32];
+ snprintf(name, 32, "node-%i", i);
+
+ chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort);
+ }
+
+ job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR,
+ &error_abort);
+ bdrv_set_backing_hd(job_node, chain[1], &error_abort);
+
+ /*
+ * Establish the chain last, so the chain links are the first
+ * elements in the BDS.parents lists
+ */
+ for (i = 0; i < 3; i++) {
+ if (i) {
+ /* Takes the reference to chain[i - 1] */
+ chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1],
+ "chain", &chain_child_role,
+ &error_abort);
+ }
+ }
+
+ job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
+ 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
+
+ /* The job has a reference now */
+ bdrv_unref(job_node);
+
+ job->did_complete = &job_has_completed;
+
+ job_start(&job->common.job);
+ job->should_complete = true;
+
+ g_assert(!job_has_completed);
+ ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
+ g_assert(ret == 0);
+ g_assert(job_has_completed);
+
+ bdrv_unref(chain[2]);
+}
+
+
+typedef struct BDRVReplaceTestState {
+ bool was_drained;
+ bool was_undrained;
+ bool has_read;
+
+ int drain_count;
+
+ bool yield_before_read;
+ Coroutine *io_co;
+ Coroutine *drain_co;
+} BDRVReplaceTestState;
+
+static void bdrv_replace_test_close(BlockDriverState *bs)
+{
+}
+
+/**
+ * If @bs has a backing file:
+ * Yield if .yield_before_read is true (and wait for drain_begin to
+ * wake us up).
+ * Forward the read to bs->backing. Set .has_read to true.
+ * If drain_begin has woken us, wake it in turn.
+ *
+ * Otherwise:
+ * Set .has_read to true and return success.
+ */
+static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ int flags)
+{
+ BDRVReplaceTestState *s = bs->opaque;
+
+ if (bs->backing) {
+ int ret;
+
+ g_assert(!s->drain_count);
+
+ s->io_co = qemu_coroutine_self();
+ if (s->yield_before_read) {
+ s->yield_before_read = false;
+ qemu_coroutine_yield();
+ }
+ s->io_co = NULL;
+
+ ret = bdrv_preadv(bs->backing, offset, qiov);
+ s->has_read = true;
+
+ /* Wake up drain_co if it runs */
+ if (s->drain_co) {
+ aio_co_wake(s->drain_co);
+ }
+
+ return ret;
+ }
+
+ s->has_read = true;
+ return 0;
+}
+
+/**
+ * If .drain_count is 0, wake up .io_co if there is one; and set
+ * .was_drained.
+ * Increment .drain_count.
+ */
+static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
+{
+ BDRVReplaceTestState *s = bs->opaque;
+
+ if (!s->drain_count) {
+ /* Keep waking io_co up until it is done */
+ s->drain_co = qemu_coroutine_self();
+ while (s->io_co) {
+ aio_co_wake(s->io_co);
+ s->io_co = NULL;
+ qemu_coroutine_yield();
+ }
+ s->drain_co = NULL;
+
+ s->was_drained = true;
+ }
+ s->drain_count++;
+}
+
+/**
+ * Reduce .drain_count, set .was_undrained once it reaches 0.
+ * If .drain_count reaches 0 and the node has a backing file, issue a
+ * read request.
+ */
+static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
+{
+ BDRVReplaceTestState *s = bs->opaque;
+
+ g_assert(s->drain_count > 0);
+ if (!--s->drain_count) {
+ int ret;
+
+ s->was_undrained = true;
+
+ if (bs->backing) {
+ char data;
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
+
+ /* Queue a read request post-drain */
+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+ g_assert(ret >= 0);
+ }
+ }
+}
+
+static BlockDriver bdrv_replace_test = {
+ .format_name = "replace_test",
+ .instance_size = sizeof(BDRVReplaceTestState),
+
+ .bdrv_close = bdrv_replace_test_close,
+ .bdrv_co_preadv = bdrv_replace_test_co_preadv,
+
+ .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
+ .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
+
+ .bdrv_child_perm = bdrv_format_default_perms,
+};
+
+static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque)
+{
+ int ret;
+ char data;
+
+ ret = blk_co_pread(opaque, 0, 1, &data, 0);
+ g_assert(ret >= 0);
+}
+
+/**
+ * We test two things:
+ * (1) bdrv_replace_child_noperm() must not undrain the parent if both
+ * children are drained.
+ * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
+ * drained child. If the old child is drained, it must flush I/O
+ * requests after the new one has been attached. If the new child
+ * is drained, it must flush I/O requests before the old one is
+ * detached.
+ *
+ * To do so, we create one parent node and two child nodes; then
+ * attach one of the children (old_child_bs) to the parent, then
+ * drain both old_child_bs and new_child_bs according to
+ * old_drain_count and new_drain_count, respectively, and finally
+ * we invoke bdrv_replace_node() to replace old_child_bs by
+ * new_child_bs.
+ *
+ * The test block driver we use here (bdrv_replace_test) has a read
+ * function that:
+ * - For the parent node, can optionally yield, and then forwards the
+ * read to bdrv_preadv(),
+ * - For the child node, just returns immediately.
+ *
+ * If the read yields, the drain_begin function will wake it up.
+ *
+ * The drain_end function issues a read on the parent once it is fully
+ * undrained (which simulates requests starting to come in again).
+ */
+static void do_test_replace_child_mid_drain(int old_drain_count,
+ int new_drain_count)
+{
+ BlockBackend *parent_blk;
+ BlockDriverState *parent_bs;
+ BlockDriverState *old_child_bs, *new_child_bs;
+ BDRVReplaceTestState *parent_s;
+ BDRVReplaceTestState *old_child_s, *new_child_s;
+ Coroutine *io_co;
+ int i;
+
+ parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0,
+ &error_abort);
+ parent_s = parent_bs->opaque;
+
+ parent_blk = blk_new(qemu_get_aio_context(),
+ BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+ blk_insert_bs(parent_blk, parent_bs, &error_abort);
+
+ old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0,
+ &error_abort);
+ new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0,
+ &error_abort);
+ old_child_s = old_child_bs->opaque;
+ new_child_s = new_child_bs->opaque;
+
+ /* So that we can read something */
+ parent_bs->total_sectors = 1;
+ old_child_bs->total_sectors = 1;
+ new_child_bs->total_sectors = 1;
+
+ bdrv_ref(old_child_bs);
+ parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child",
+ &child_backing, &error_abort);
+
+ for (i = 0; i < old_drain_count; i++) {
+ bdrv_drained_begin(old_child_bs);
+ }
+ for (i = 0; i < new_drain_count; i++) {
+ bdrv_drained_begin(new_child_bs);
+ }
+
+ if (!old_drain_count) {
+ /*
+ * Start a read operation that will yield, so it will not
+ * complete before the node is drained.
+ */
+ parent_s->yield_before_read = true;
+ io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co,
+ parent_blk);
+ qemu_coroutine_enter(io_co);
+ }
+
+ /* If we have started a read operation, it should have yielded */
+ g_assert(!parent_s->has_read);
+
+ /* Reset drained status so we can see what bdrv_replace_node() does */
+ parent_s->was_drained = false;
+ parent_s->was_undrained = false;
+
+ g_assert(parent_bs->quiesce_counter == old_drain_count);
+ bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
+ g_assert(parent_bs->quiesce_counter == new_drain_count);
+
+ if (!old_drain_count && !new_drain_count) {
+ /*
+ * From undrained to undrained drains and undrains the parent,
+ * because bdrv_replace_node() contains a drained section for
+ * @old_child_bs.
+ */
+ g_assert(parent_s->was_drained && parent_s->was_undrained);
+ } else if (!old_drain_count && new_drain_count) {
+ /*
+ * From undrained to drained should drain the parent and keep
+ * it that way.
+ */
+ g_assert(parent_s->was_drained && !parent_s->was_undrained);
+ } else if (old_drain_count && !new_drain_count) {
+ /*
+ * From drained to undrained should undrain the parent and
+ * keep it that way.
+ */
+ g_assert(!parent_s->was_drained && parent_s->was_undrained);
+ } else /* if (old_drain_count && new_drain_count) */ {
+ /*
+ * From drained to drained must not undrain the parent at any
+ * point
+ */
+ g_assert(!parent_s->was_drained && !parent_s->was_undrained);
+ }
+
+ if (!old_drain_count || !new_drain_count) {
+ /*
+ * If !old_drain_count, we have started a read request before
+ * bdrv_replace_node(). If !new_drain_count, the parent must
+ * have been undrained at some point, and
+ * bdrv_replace_test_co_drain_end() starts a read request
+ * then.
+ */
+ g_assert(parent_s->has_read);
+ } else {
+ /*
+ * If the parent was never undrained, there is no way to start
+ * a read request.
+ */
+ g_assert(!parent_s->has_read);
+ }
+
+ /* A drained child must have not received any request */
+ g_assert(!(old_drain_count && old_child_s->has_read));
+ g_assert(!(new_drain_count && new_child_s->has_read));
+
+ for (i = 0; i < new_drain_count; i++) {
+ bdrv_drained_end(new_child_bs);
+ }
+ for (i = 0; i < old_drain_count; i++) {
+ bdrv_drained_end(old_child_bs);
+ }
+
+ /*
+ * By now, bdrv_replace_test_co_drain_end() must have been called
+ * at some point while the new child was attached to the parent.
+ */
+ g_assert(parent_s->has_read);
+ g_assert(new_child_s->has_read);
+
+ blk_unref(parent_blk);
+ bdrv_unref(parent_bs);
+ bdrv_unref(old_child_bs);
+ bdrv_unref(new_child_bs);
+}
+
+static void test_replace_child_mid_drain(void)
+{
+ int old_drain_count, new_drain_count;
+
+ for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) {
+ for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) {
+ do_test_replace_child_mid_drain(old_drain_count, new_drain_count);
+ }
+ }
+}
+
int main(int argc, char **argv)
{
int ret;
@@ -1758,6 +2228,12 @@
g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
test_blockjob_commit_by_drained_end);
+ g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
+ test_drop_intermediate_poll);
+
+ g_test_add_func("/bdrv-drain/replace_child/mid-drain",
+ test_replace_child_mid_drain);
+
ret = g_test_run();
qemu_event_destroy(&done_event);
return ret;