qcow2: Version 3 images

This adds the basic infrastructure to qcow2 to handle version 3 images.
It includes code to create v3 images, allow header updates for v3 images
and checks feature bits.

It still misses support for zero clusters, so this is not a fully
compliant implementation of v3 yet.

The default for creating new images stays at v2 for now.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
diff --git a/block/qcow2.c b/block/qcow2.c
index 4c82adc..3e1482f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -61,7 +61,7 @@
 
     if (buf_size >= sizeof(QCowHeader) &&
         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-        be32_to_cpu(cow_header->version) >= QCOW_VERSION)
+        be32_to_cpu(cow_header->version) >= 2)
         return 100;
     else
         return 0;
@@ -169,6 +169,19 @@
     }
 }
 
+static void report_unsupported(BlockDriverState *bs, const char *fmt, ...)
+{
+    char msg[64];
+    va_list ap;
+
+    va_start(ap, fmt);
+    vsnprintf(msg, sizeof(msg), fmt, ap);
+    va_end(ap);
+
+    qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        bs->device_name, "qcow2", msg);
+}
+
 static int qcow2_open(BlockDriverState *bs, int flags)
 {
     BDRVQcowState *s = bs->opaque;
@@ -199,14 +212,64 @@
         ret = -EINVAL;
         goto fail;
     }
-    if (header.version != QCOW_VERSION) {
-        char version[64];
-        snprintf(version, sizeof(version), "QCOW version %d", header.version);
-        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bs->device_name, "qcow2", version);
+    if (header.version < 2 || header.version > 3) {
+        report_unsupported(bs, "QCOW version %d", header.version);
         ret = -ENOTSUP;
         goto fail;
     }
+
+    s->qcow_version = header.version;
+
+    /* Initialise version 3 header fields */
+    if (header.version == 2) {
+        header.incompatible_features    = 0;
+        header.compatible_features      = 0;
+        header.autoclear_features       = 0;
+        header.refcount_order           = 4;
+        header.header_length            = 72;
+    } else {
+        be64_to_cpus(&header.incompatible_features);
+        be64_to_cpus(&header.compatible_features);
+        be64_to_cpus(&header.autoclear_features);
+        be32_to_cpus(&header.refcount_order);
+        be32_to_cpus(&header.header_length);
+    }
+
+    if (header.header_length > sizeof(header)) {
+        s->unknown_header_fields_size = header.header_length - sizeof(header);
+        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
+        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+                         s->unknown_header_fields_size);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
+    /* Handle feature bits */
+    s->incompatible_features    = header.incompatible_features;
+    s->compatible_features      = header.compatible_features;
+    s->autoclear_features       = header.autoclear_features;
+
+    if (s->incompatible_features != 0) {
+        report_unsupported(bs, "incompatible features mask %" PRIx64,
+                           header.incompatible_features);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
+    if (!bs->read_only && s->autoclear_features != 0) {
+        s->autoclear_features = 0;
+        qcow2_update_header(bs);
+    }
+
+    /* Check support for various header values */
+    if (header.refcount_order != 4) {
+        report_unsupported(bs, "%d bit reference counts",
+                           1 << header.refcount_order);
+        ret = -ENOTSUP;
+        goto fail;
+    }
+
     if (header.cluster_bits < MIN_CLUSTER_BITS ||
         header.cluster_bits > MAX_CLUSTER_BITS) {
         ret = -EINVAL;
@@ -285,7 +348,7 @@
     } else {
         ext_end = s->cluster_size;
     }
-    if (qcow2_read_extensions(bs, sizeof(header), ext_end)) {
+    if (qcow2_read_extensions(bs, header.header_length, ext_end)) {
         ret = -EINVAL;
         goto fail;
     }
@@ -321,6 +384,7 @@
     return ret;
 
  fail:
+    g_free(s->unknown_header_fields);
     cleanup_unknown_header_ext(bs);
     qcow2_free_snapshots(bs);
     qcow2_refcount_close(bs);
@@ -682,7 +746,9 @@
     qcow2_cache_destroy(bs, s->l2_table_cache);
     qcow2_cache_destroy(bs, s->refcount_block_cache);
 
+    g_free(s->unknown_header_fields);
     cleanup_unknown_header_ext(bs);
+
     g_free(s->cluster_cache);
     qemu_vfree(s->cluster_data);
     qcow2_refcount_close(bs);
@@ -756,10 +822,10 @@
     int ret;
     uint64_t total_size;
     uint32_t refcount_table_clusters;
+    size_t header_length;
     Qcow2UnknownHeaderExtension *uext;
 
     buf = qemu_blockalign(bs, buflen);
-    memset(buf, 0, s->cluster_size);
 
     /* Header structure */
     header = (QCowHeader*) buf;
@@ -769,12 +835,14 @@
         goto fail;
     }
 
+    header_length = sizeof(*header) + s->unknown_header_fields_size;
     total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
     refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
 
     *header = (QCowHeader) {
+        /* Version 2 fields */
         .magic                  = cpu_to_be32(QCOW_MAGIC),
-        .version                = cpu_to_be32(QCOW_VERSION),
+        .version                = cpu_to_be32(s->qcow_version),
         .backing_file_offset    = 0,
         .backing_file_size      = 0,
         .cluster_bits           = cpu_to_be32(s->cluster_bits),
@@ -786,10 +854,42 @@
         .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
         .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
         .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
+
+        /* Version 3 fields */
+        .incompatible_features  = cpu_to_be64(s->incompatible_features),
+        .compatible_features    = cpu_to_be64(s->compatible_features),
+        .autoclear_features     = cpu_to_be64(s->autoclear_features),
+        .refcount_order         = cpu_to_be32(3 + REFCOUNT_SHIFT),
+        .header_length          = cpu_to_be32(header_length),
     };
 
-    buf += sizeof(*header);
-    buflen -= sizeof(*header);
+    /* For older versions, write a shorter header */
+    switch (s->qcow_version) {
+    case 2:
+        ret = offsetof(QCowHeader, incompatible_features);
+        break;
+    case 3:
+        ret = sizeof(*header);
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    buf += ret;
+    buflen -= ret;
+    memset(buf, 0, buflen);
+
+    /* Preserve any unknown field in the header */
+    if (s->unknown_header_fields_size) {
+        if (buflen < s->unknown_header_fields_size) {
+            ret = -ENOSPC;
+            goto fail;
+        }
+
+        memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
+        buf += s->unknown_header_fields_size;
+        buflen -= s->unknown_header_fields_size;
+    }
 
     /* Backing file format header extension */
     if (*bs->backing_format) {
@@ -921,7 +1021,7 @@
 static int qcow2_create2(const char *filename, int64_t total_size,
                          const char *backing_file, const char *backing_format,
                          int flags, size_t cluster_size, int prealloc,
-                         QEMUOptionParameter *options)
+                         QEMUOptionParameter *options, int version)
 {
     /* Calculate cluster_bits */
     int cluster_bits;
@@ -965,13 +1065,15 @@
     /* Write the header */
     memset(&header, 0, sizeof(header));
     header.magic = cpu_to_be32(QCOW_MAGIC);
-    header.version = cpu_to_be32(QCOW_VERSION);
+    header.version = cpu_to_be32(version);
     header.cluster_bits = cpu_to_be32(cluster_bits);
     header.size = cpu_to_be64(0);
     header.l1_table_offset = cpu_to_be64(0);
     header.l1_size = cpu_to_be32(0);
     header.refcount_table_offset = cpu_to_be64(cluster_size);
     header.refcount_table_clusters = cpu_to_be32(1);
+    header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
+    header.header_length = cpu_to_be32(sizeof(header));
 
     if (flags & BLOCK_FLAG_ENCRYPT) {
         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
@@ -1053,6 +1155,7 @@
     int flags = 0;
     size_t cluster_size = DEFAULT_CLUSTER_SIZE;
     int prealloc = 0;
+    int version = 2;
 
     /* Read out options */
     while (options && options->name) {
@@ -1078,6 +1181,16 @@
                     options->value.s);
                 return -EINVAL;
             }
+        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
+            if (!options->value.s || !strcmp(options->value.s, "0.10")) {
+                version = 2;
+            } else if (!strcmp(options->value.s, "1.1")) {
+                version = 3;
+            } else {
+                fprintf(stderr, "Invalid compatibility level: '%s'\n",
+                    options->value.s);
+                return -EINVAL;
+            }
         }
         options++;
     }
@@ -1089,7 +1202,7 @@
     }
 
     return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
-                         cluster_size, prealloc, options);
+                         cluster_size, prealloc, options, version);
 }
 
 static int qcow2_make_empty(BlockDriverState *bs)
@@ -1341,6 +1454,11 @@
         .help = "Virtual disk size"
     },
     {
+        .name = BLOCK_OPT_COMPAT_LEVEL,
+        .type = OPT_STRING,
+        .help = "Compatibility level (0.10 or 1.1)"
+    },
+    {
         .name = BLOCK_OPT_BACKING_FILE,
         .type = OPT_STRING,
         .help = "File name of a base image"