block: unify flush implementations
Add coroutine support for flush and apply the same emulation that
we already do for read/write. bdrv_aio_flush is simplified to always
go through a coroutine.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
diff --git a/block.c b/block.c
index 7184a0f..7b8b14d 100644
--- a/block.c
+++ b/block.c
@@ -53,17 +53,12 @@
static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov);
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
@@ -203,9 +198,6 @@
}
}
- if (!bdrv->bdrv_aio_flush)
- bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
-
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
}
@@ -1027,11 +1019,6 @@
nb_sectors * BDRV_SECTOR_SIZE);
}
-static inline bool bdrv_has_async_flush(BlockDriver *drv)
-{
- return drv->bdrv_aio_flush != bdrv_aio_flush_em;
-}
-
typedef struct RwCo {
BlockDriverState *bs;
int64_t sector_num;
@@ -1759,33 +1746,6 @@
return bs->device_name;
}
-int bdrv_flush(BlockDriverState *bs)
-{
- if (bs->open_flags & BDRV_O_NO_FLUSH) {
- return 0;
- }
-
- if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
- return bdrv_co_flush_em(bs);
- }
-
- if (bs->drv && bs->drv->bdrv_flush) {
- return bs->drv->bdrv_flush(bs);
- }
-
- /*
- * Some block drivers always operate in either writethrough or unsafe mode
- * and don't support bdrv_flush therefore. Usually qemu doesn't know how
- * the server works (because the behaviour is hardcoded or depends on
- * server-side configuration), so we can't ensure that everything is safe
- * on disk. Returning an error doesn't work because that would break guests
- * even if the server operates in writethrough mode.
- *
- * Let's hope the user knows what he's doing.
- */
- return 0;
-}
-
void bdrv_flush_all(void)
{
BlockDriverState *bs;
@@ -2610,22 +2570,6 @@
return -1;
}
-BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BlockDriver *drv = bs->drv;
-
- trace_bdrv_aio_flush(bs, opaque);
-
- if (bs->open_flags & BDRV_O_NO_FLUSH) {
- return bdrv_aio_noop_em(bs, cb, opaque);
- }
-
- if (!drv)
- return NULL;
- return drv->bdrv_aio_flush(bs, cb, opaque);
-}
-
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
{
acb->pool->cancel(acb);
@@ -2785,41 +2729,28 @@
return &acb->common;
}
-static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
{
- BlockDriverAIOCBSync *acb;
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
- acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
- acb->is_write = 1; /* don't bounce in the completion hadler */
- acb->qiov = NULL;
- acb->bounce = NULL;
- acb->ret = 0;
-
- if (!acb->bh)
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
-
- bdrv_flush(bs);
+ acb->req.error = bdrv_co_flush(bs);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
qemu_bh_schedule(acb->bh);
- return &acb->common;
}
-static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque)
{
- BlockDriverAIOCBSync *acb;
+ trace_bdrv_aio_flush(bs, opaque);
- acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
- acb->is_write = 1; /* don't bounce in the completion handler */
- acb->qiov = NULL;
- acb->bounce = NULL;
- acb->ret = 0;
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
- if (!acb->bh) {
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
- }
+ acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+ qemu_coroutine_enter(co, acb);
- qemu_bh_schedule(acb->bh);
return &acb->common;
}
@@ -2916,19 +2847,72 @@
return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
}
-static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockDriverAIOCB *acb;
+ RwCo *rwco = opaque;
- acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
- if (!acb) {
- return -EIO;
+ rwco->ret = bdrv_co_flush(rwco->bs);
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
+ return 0;
+ } else if (!bs->drv) {
+ return 0;
+ } else if (bs->drv->bdrv_co_flush) {
+ return bs->drv->bdrv_co_flush(bs);
+ } else if (bs->drv->bdrv_aio_flush) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ } else if (bs->drv->bdrv_flush) {
+ return bs->drv->bdrv_flush(bs);
+ } else {
+ /*
+ * Some block drivers always operate in either writethrough or unsafe
+ * mode and don't support bdrv_flush therefore. Usually qemu doesn't
+ * know how the server works (because the behaviour is hardcoded or
+ * depends on server-side configuration), so we can't ensure that
+ * everything is safe on disk. Returning an error doesn't work because
+ * that would break guests even if the server operates in writethrough
+ * mode.
+ *
+ * Let's hope the user knows what he's doing.
+ */
+ return 0;
}
- qemu_coroutine_yield();
- return co.ret;
+}
+
+int bdrv_flush(BlockDriverState *bs)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_flush_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_flush_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
}
/**************************************************************/
diff --git a/block.h b/block.h
index e77988e..65c5166 100644
--- a/block.h
+++ b/block.h
@@ -191,6 +191,7 @@
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
void bdrv_flush_all(void);
void bdrv_close_all(void);
diff --git a/block_int.h b/block_int.h
index f2f4f2d..9cb536d 100644
--- a/block_int.h
+++ b/block_int.h
@@ -83,6 +83,7 @@
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs,
int num_reqs);