qcow2: Add bdrv_discard support

This adds a bdrv_discard function to qcow2 that frees the discarded clusters.
It does not yet pass the discard on to the underlying file system driver, but
the space can be reused by future writes to the image.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1c2003a..5fb8c66 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -888,3 +888,85 @@
     }
     return 0;
 }
+
+/*
+ * This discards as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of discarded
+ * clusters.
+ */
+static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
+    unsigned int nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t l2_offset, *l2_table;
+    int l2_index;
+    int ret;
+    int i;
+
+    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Limit nb_clusters to one L2 table */
+    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+    for (i = 0; i < nb_clusters; i++) {
+        uint64_t old_offset;
+
+        old_offset = be64_to_cpu(l2_table[l2_index + i]);
+        old_offset &= ~QCOW_OFLAG_COPIED;
+
+        if (old_offset == 0) {
+            continue;
+        }
+
+        /* First remove L2 entries */
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        l2_table[l2_index + i] = cpu_to_be64(0);
+
+        /* Then decrease the refcount */
+        qcow2_free_any_clusters(bs, old_offset, 1);
+    }
+
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return nb_clusters;
+}
+
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t end_offset;
+    unsigned int nb_clusters;
+    int ret;
+
+    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
+
+    /* Round start up and end down */
+    offset = align_offset(offset, s->cluster_size);
+    end_offset &= ~(s->cluster_size - 1);
+
+    if (offset > end_offset) {
+        return 0;
+    }
+
+    nb_clusters = size_to_clusters(s, end_offset - offset);
+
+    /* Each L2 table is handled by its own loop iteration */
+    while (nb_clusters > 0) {
+        ret = discard_single_l2(bs, offset, nb_clusters);
+        if (ret < 0) {
+            return ret;
+        }
+
+        nb_clusters -= ret;
+        offset += (ret * s->cluster_size);
+    }
+
+    return 0;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 49bf7b9..dbe4fdd 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1084,6 +1084,13 @@
     return 0;
 }
 
+static int qcow2_discard(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors)
+{
+    return qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+        nb_sectors);
+}
+
 static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1349,6 +1356,7 @@
     .bdrv_aio_writev    = qcow2_aio_writev,
     .bdrv_aio_flush     = qcow2_aio_flush,
 
+    .bdrv_discard           = qcow2_discard,
     .bdrv_truncate          = qcow2_truncate,
     .bdrv_write_compressed  = qcow2_write_compressed,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 6d80120..a019831 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -209,6 +209,8 @@
                                          int compressed_size);
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+    int nb_sectors);
 
 /* qcow2-snapshot.c functions */
 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);