push down vector linearization to posix-aio-compat.c (Christoph Hellwig)

Make all AIO requests vectored and defer linearization until the actual
I/O thread.  This prepares for using native preadv/pwritev.

Also enables asynchronous direct I/O by handling that case in the I/O thread.

Qcow and qcow2 propably want to be adopted to directly deal with multi-segment
requests, but that can be implemented later.


Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@7020 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/block-qcow2.c b/block-qcow2.c
index dd28c28..3bd38b0 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -1264,7 +1264,9 @@
 typedef struct QCowAIOCB {
     BlockDriverAIOCB common;
     int64_t sector_num;
+    QEMUIOVector *qiov;
     uint8_t *buf;
+    void *orig_buf;
     int nb_sectors;
     int n;
     uint64_t cluster_offset;
@@ -1307,12 +1309,8 @@
     int index_in_cluster, n1;
 
     acb->hd_aiocb = NULL;
-    if (ret < 0) {
-fail:
-        acb->common.cb(acb->common.opaque, ret);
-        qemu_aio_release(acb);
-        return;
-    }
+    if (ret < 0)
+        goto done;
 
     /* post process the read buffer */
     if (!acb->cluster_offset) {
@@ -1333,9 +1331,8 @@
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        acb->common.cb(acb->common.opaque, 0);
-        qemu_aio_release(acb);
-        return;
+        ret = 0;
+        goto done;
     }
 
     /* prepare next AIO request */
@@ -1356,32 +1353,32 @@
                                     &acb->hd_qiov, acb->n,
 				    qcow_aio_read_cb, acb);
                 if (acb->hd_aiocb == NULL)
-                    goto fail;
+                    goto done;
             } else {
                 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
                 if (ret < 0)
-                    goto fail;
+                    goto done;
             }
         } else {
             /* Note: in this case, no need to wait */
             memset(acb->buf, 0, 512 * acb->n);
             ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
             if (ret < 0)
-                goto fail;
+                goto done;
         }
     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
         /* add AIO support for compressed blocks ? */
         if (decompress_cluster(s, acb->cluster_offset) < 0)
-            goto fail;
+            goto done;
         memcpy(acb->buf,
                s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
         ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
         if (ret < 0)
-            goto fail;
+            goto done;
     } else {
         if ((acb->cluster_offset & 511) != 0) {
             ret = -EIO;
-            goto fail;
+            goto done;
         }
 
         acb->hd_iov.iov_base = acb->buf;
@@ -1391,13 +1388,22 @@
                             (acb->cluster_offset >> 9) + index_in_cluster,
                             &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
         if (acb->hd_aiocb == NULL)
-            goto fail;
+            goto done;
     }
+
+    return;
+done:
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+        qemu_vfree(acb->orig_buf);
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
 }
 
 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
 {
     QCowAIOCB *acb;
 
@@ -1406,7 +1412,13 @@
         return NULL;
     acb->hd_aiocb = NULL;
     acb->sector_num = sector_num;
-    acb->buf = buf;
+    acb->qiov = qiov;
+    if (qiov->niov > 1) {
+        acb->buf = acb->orig_buf = qemu_memalign(512, qiov->size);
+        if (is_write)
+            qemu_iovec_to_buffer(qiov, acb->buf);
+    } else
+        acb->buf = qiov->iov->iov_base;
     acb->nb_sectors = nb_sectors;
     acb->n = 0;
     acb->cluster_offset = 0;
@@ -1414,13 +1426,13 @@
     return acb;
 }
 
-static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     QCowAIOCB *acb;
 
-    acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
     if (!acb)
         return NULL;
 
@@ -1439,16 +1451,12 @@
 
     acb->hd_aiocb = NULL;
 
-    if (ret < 0) {
-    fail:
-        acb->common.cb(acb->common.opaque, ret);
-        qemu_aio_release(acb);
-        return;
-    }
+    if (ret < 0)
+        goto done;
 
     if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
         free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
-        goto fail;
+        goto done;
     }
 
     acb->nb_sectors -= acb->n;
@@ -1457,9 +1465,8 @@
 
     if (acb->nb_sectors == 0) {
         /* request completed */
-        acb->common.cb(acb->common.opaque, 0);
-        qemu_aio_release(acb);
-        return;
+        ret = 0;
+        goto done;
     }
 
     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -1473,7 +1480,7 @@
                                           n_end, &acb->n, &acb->l2meta);
     if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
         ret = -EIO;
-        goto fail;
+        goto done;
     }
     if (s->crypt_method) {
         if (!acb->cluster_data) {
@@ -1494,11 +1501,19 @@
                                     &acb->hd_qiov, acb->n,
                                     qcow_aio_write_cb, acb);
     if (acb->hd_aiocb == NULL)
-        goto fail;
+        goto done;
+
+    return;
+
+done:
+    if (acb->qiov->niov > 1)
+        qemu_vfree(acb->orig_buf);
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
 }
 
-static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1506,7 +1521,7 @@
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
-    acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
     if (!acb)
         return NULL;
 
@@ -2771,8 +2786,8 @@
     .bdrv_set_key	= qcow_set_key,
     .bdrv_make_empty	= qcow_make_empty,
 
-    .bdrv_aio_read	= qcow_aio_read,
-    .bdrv_aio_write	= qcow_aio_write,
+    .bdrv_aio_readv	= qcow_aio_readv,
+    .bdrv_aio_writev	= qcow_aio_writev,
     .bdrv_aio_cancel	= qcow_aio_cancel,
     .aiocb_size		= sizeof(QCowAIOCB),
     .bdrv_write_compressed = qcow_write_compressed,