push down vector linearization to posix-aio-compat.c (Christoph Hellwig)

Make all AIO requests vectored and defer linearization until the actual
I/O thread.  This prepares for using native preadv/pwritev.

Also enables asynchronous direct I/O by handling that case in the I/O thread.

Qcow and qcow2 propably want to be adopted to directly deal with multi-segment
requests, but that can be implemented later.


Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/block.c b/block.c
index 916f08e..8a78f14 100644
--- a/block.c
+++ b/block.c
@@ -47,25 +47,21 @@
 #define SECTOR_BITS 9
 #define SECTOR_SIZE (1 << SECTOR_BITS)
 
-static AIOPool vectored_aio_pool;
-
 typedef struct BlockDriverAIOCBSync {
     BlockDriverAIOCB common;
     QEMUBH *bh;
     int ret;
+    /* vector translation state */
+    QEMUIOVector *qiov;
+    uint8_t *bounce;
+    int is_write;
 } BlockDriverAIOCBSync;
 
-static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque);
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
@@ -144,10 +140,10 @@
 
 static void bdrv_register(BlockDriver *bdrv)
 {
-    if (!bdrv->bdrv_aio_read) {
+    if (!bdrv->bdrv_aio_readv) {
         /* add AIO emulation layer */
-        bdrv->bdrv_aio_read = bdrv_aio_read_em;
-        bdrv->bdrv_aio_write = bdrv_aio_write_em;
+        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
     } else if (!bdrv->bdrv_read) {
@@ -1295,91 +1291,10 @@
 /**************************************************************/
 /* async I/Os */
 
-typedef struct VectorTranslationAIOCB {
-    BlockDriverAIOCB common;
-    QEMUIOVector *iov;
-    uint8_t *bounce;
-    int is_write;
-    BlockDriverAIOCB *aiocb;
-} VectorTranslationAIOCB;
-
-static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
-{
-    VectorTranslationAIOCB *acb
-        = container_of(_acb, VectorTranslationAIOCB, common);
-
-    bdrv_aio_cancel(acb->aiocb);
-}
-
-static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
-{
-    VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
-
-    if (!s->is_write) {
-        qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
-    }
-    qemu_vfree(s->bounce);
-    s->common.cb(s->common.opaque, ret);
-    qemu_aio_release(s);
-}
-
-static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            QEMUIOVector *iov,
-                                            int nb_sectors,
-                                            BlockDriverCompletionFunc *cb,
-                                            void *opaque,
-                                            int is_write)
-
-{
-    VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
-                                                  cb, opaque);
-
-    s->iov = iov;
-    s->bounce = qemu_memalign(512, nb_sectors * 512);
-    s->is_write = is_write;
-    if (is_write) {
-        qemu_iovec_to_buffer(s->iov, s->bounce);
-        s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
-                                  bdrv_aio_rw_vector_cb, s);
-    } else {
-        s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
-                                 bdrv_aio_rw_vector_cb, s);
-    }
-    if (!s->aiocb) {
-        qemu_vfree(s->bounce);
-        qemu_aio_release(s);
-        return NULL;
-    }
-    return &s->common;
-}
-
 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
-                                 QEMUIOVector *iov, int nb_sectors,
+                                 QEMUIOVector *qiov, int nb_sectors,
                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
-    if (bdrv_check_request(bs, sector_num, nb_sectors))
-        return NULL;
-
-    return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
-                              cb, opaque, 0);
-}
-
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
-                                  QEMUIOVector *iov, int nb_sectors,
-                                  BlockDriverCompletionFunc *cb, void *opaque)
-{
-    if (bdrv_check_request(bs, sector_num, nb_sectors))
-        return NULL;
-
-    return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
-                              cb, opaque, 1);
-}
-
-static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
-                                uint8_t *buf, int nb_sectors,
-                                BlockDriverCompletionFunc *cb, void *opaque)
-{
     BlockDriver *drv = bs->drv;
     BlockDriverAIOCB *ret;
 
@@ -1388,7 +1303,8 @@
     if (bdrv_check_request(bs, sector_num, nb_sectors))
         return NULL;
 
-    ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
+    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+                              cb, opaque);
 
     if (ret) {
 	/* Update stats even though technically transfer has not happened. */
@@ -1399,9 +1315,9 @@
     return ret;
 }
 
-static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors,
-                                 BlockDriverCompletionFunc *cb, void *opaque)
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+                                  QEMUIOVector *qiov, int nb_sectors,
+                                  BlockDriverCompletionFunc *cb, void *opaque)
 {
     BlockDriver *drv = bs->drv;
     BlockDriverAIOCB *ret;
@@ -1413,7 +1329,8 @@
     if (bdrv_check_request(bs, sector_num, nb_sectors))
         return NULL;
 
-    ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
+    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+                               cb, opaque);
 
     if (ret) {
 	/* Update stats even though technically transfer has not happened. */
@@ -1436,42 +1353,62 @@
 static void bdrv_aio_bh_cb(void *opaque)
 {
     BlockDriverAIOCBSync *acb = opaque;
+
+    qemu_vfree(acb->bounce);
+
+    if (!acb->is_write)
+        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
     acb->common.cb(acb->common.opaque, acb->ret);
+
     qemu_aio_release(acb);
 }
 
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            QEMUIOVector *qiov,
+                                            int nb_sectors,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque,
+                                            int is_write)
+
 {
     BlockDriverAIOCBSync *acb;
-    int ret;
 
     acb = qemu_aio_get(bs, cb, opaque);
+    acb->is_write = is_write;
+    acb->qiov = qiov;
+    acb->bounce = qemu_memalign(512, qiov->size);
+
     if (!acb->bh)
         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    acb->ret = ret;
+
+    if (is_write) {
+        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
+        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+    } else {
+        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+    }
+
     qemu_bh_schedule(acb->bh);
+
     return &acb->common;
 }
 
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
-    BlockDriverAIOCBSync *acb;
-    int ret;
-
-    acb = qemu_aio_get(bs, cb, opaque);
-    if (!acb->bh)
-        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
-    ret = bdrv_write(bs, sector_num, buf, nb_sectors);
-    acb->ret = ret;
-    qemu_bh_schedule(acb->bh);
-    return &acb->common;
+    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
 }
 
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
 {
     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
@@ -1494,10 +1431,15 @@
 {
     int async_ret;
     BlockDriverAIOCB *acb;
+    struct iovec iov;
+    QEMUIOVector qiov;
 
     async_ret = NOT_DONE;
-    acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
-                        bdrv_rw_em_cb, &async_ret);
+    iov.iov_base = buf;
+    iov.iov_len = nb_sectors * 512;
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
+        bdrv_rw_em_cb, &async_ret);
     if (acb == NULL)
         return -1;
 
@@ -1513,10 +1455,15 @@
 {
     int async_ret;
     BlockDriverAIOCB *acb;
+    struct iovec iov;
+    QEMUIOVector qiov;
 
     async_ret = NOT_DONE;
-    acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
-                         bdrv_rw_em_cb, &async_ret);
+    iov.iov_base = (void *)buf;
+    iov.iov_len = nb_sectors * 512;
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
+        bdrv_rw_em_cb, &async_ret);
     if (acb == NULL)
         return -1;
     while (async_ret == NOT_DONE) {
@@ -1527,9 +1474,6 @@
 
 void bdrv_init(void)
 {
-    aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
-                  bdrv_aio_cancel_vector);
-
     bdrv_register(&bdrv_raw);
     bdrv_register(&bdrv_host_device);
 #ifndef _WIN32