Merge remote-tracking branch 'mst/tags/for_upstream' into staging pci, virtio, vhost: fixes A bunch of fixes that missed the release. Most notably we are reverting shpc back to enabled by default state as guests uses that as an indicator that hotplug is supported (even though it's unused). Unfortunately we can't fix this on the stable branch since that would break migration. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Wed 17 May 2017 10:42:06 PM BST # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * mst/tags/for_upstream: exec: abstract address_space_do_translate() pci: deassert intx when pci device unrealize virtio: allow broken device to notify guest Revert "hw/pci: disable pci-bridge's shpc by default" acpi-defs: clean up open brace usage ACPI: don't call acpi_pcihp_device_plug_cb on xen iommu: Don't crash if machine is not PC_MACHINE pc: add 2.10 machine type pc/fwcfg: unbreak migration from qemu-2.5 and qemu-2.6 during firmware boot libvhost-user: fix crash when rings aren't ready hw/virtio: fix vhost user fails to startup when MQ hw/arm/virt: generate 64-bit addressable ACPI objects hw/acpi-defs: replace leading X with x_ in FADT field names Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

commit: adb354dd1e00aa6b8bd674f0e1f70008badded0f [log] [tgz]
author: Stefan Hajnoczi <stefanha@redhat.com> Thu May 18 10:01:00 2017 +0100
committer: Stefan Hajnoczi <stefanha@redhat.com> Thu May 18 10:01:08 2017 +0100
tree: 51c2422717a6421bdc36a54c22bcb1b292aa44b0
parent: 897eee242bc081d72ffbf84664c907dcba620ee3 [diff]
parent: a764040cc831cfe5b8bf1c80e8341b9bf2de3ce8 [diff]
diff --git a/.gitmodules b/.gitmodules
index ca323b4..5b0c212 100644
--- a/.gitmodules
+++ b/.gitmodules

@@ -34,3 +34,6 @@
 [submodule "roms/skiboot"]
 	path = roms/skiboot
 	url = git://git.qemu.org/skiboot.git
+[submodule "roms/QemuMacDrivers"]
+	path = roms/QemuMacDrivers
+	url = git://git.qemu.org/QemuMacDrivers.git

diff --git a/.travis.yml b/.travis.yml
index 9008a79..27a2d9c 100644
--- a/.travis.yml
+++ b/.travis.yml

@@ -86,9 +86,6 @@
     - env: CONFIG="--enable-trace-backends=ust"
            TEST_CMD=""
       compiler: gcc
-    - env: CONFIG="--with-coroutine=gthread"
-           TEST_CMD=""
-      compiler: gcc
     - env: CONFIG=""
       os: osx
       compiler: clang
@@ -191,7 +188,7 @@
       compiler: none
       env:
         - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5
-        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread"
+        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user"
         - TEST_CMD=""
       before_script:
         - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log

diff --git a/MAINTAINERS b/MAINTAINERS
index 8224be0..ef2ec58 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -354,6 +354,12 @@
 S: Maintained
 F: *posix*
 
+NETBSD
+L: qemu-devel@nongnu.org
+M: Kamil Rytarowski <kamil@netbsd.org>
+S: Maintained
+K: (?i)NetBSD
+
 W32, W64
 L: qemu-devel@nongnu.org
 M: Stefan Weil <sw@weilnetz.de>
@@ -1170,6 +1176,7 @@
 F: qemu-img*
 F: qemu-io*
 F: tests/qemu-iotests/
+F: util/qemu-progress.c
 T: git git://repo.or.cz/qemu/kevin.git block
 
 Block I/O path
@@ -1177,8 +1184,8 @@
 M: Fam Zheng <famz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
-F: async.c
-F: aio-*.c
+F: util/async.c
+F: util/aio-*.c
 F: block/io.c
 F: migration/block*
 F: include/block/aio.h
@@ -1307,8 +1314,8 @@
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: cpus.c
-F: main-loop.c
-F: qemu-timer.c
+F: util/main-loop.c
+F: util/qemu-timer.c
 F: vl.c
 
 Human Monitor (HMP)
@@ -1487,6 +1494,7 @@
 F: crypto/
 F: include/crypto/
 F: tests/test-crypto-*
+F: qemu.sasl
 
 Coroutines
 M: Stefan Hajnoczi <stefanha@redhat.com>
@@ -1577,6 +1585,7 @@
 
 Linux user
 M: Riku Voipio <riku.voipio@iki.fi>
+R: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 F: linux-user/
 F: default-configs/*-linux-user.mak

diff --git a/Makefile b/Makefile
index 31d41a7..c830d7a 100644
--- a/Makefile
+++ b/Makefile

@@ -552,7 +552,8 @@
 s390-ccw.img \
 spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
-u-boot.e500
+u-boot.e500 \
+qemu_vga.ndrv
 else
 BLOBS=
 endif

diff --git a/block.c b/block.c
index a45b9b5..50ba264 100644
--- a/block.c
+++ b/block.c

@@ -192,11 +192,20 @@
     }
 }
 
+/* Returns whether the image file is opened as read-only. Note that this can
+ * return false and writing to the image file is still not possible because the
+ * image is inactivated. */
 bool bdrv_is_read_only(BlockDriverState *bs)
 {
     return bs->read_only;
 }
 
+/* Returns whether the image file can be written to right now */
+bool bdrv_is_writable(BlockDriverState *bs)
+{
+    return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE);
+}
+
 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
 {
     /* Do not set read_only if copy_on_read is enabled */
@@ -762,6 +771,13 @@
     bdrv_drained_end(bs);
 }
 
+static int bdrv_child_cb_inactivate(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    assert(bs->open_flags & BDRV_O_INACTIVE);
+    return 0;
+}
+
 /*
  * Returns the options and flags that a temporary snapshot should get, based on
  * the originally requested flags (the originally requested image will have
@@ -800,6 +816,7 @@
      * the parent. */
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+    qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
 
     /* Inherit the read-only option from the parent if it's not set */
     qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
@@ -821,6 +838,7 @@
     .inherit_options = bdrv_inherited_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
+    .inactivate      = bdrv_child_cb_inactivate,
 };
 
 /*
@@ -842,6 +860,7 @@
     .inherit_options = bdrv_inherited_fmt_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
+    .inactivate      = bdrv_child_cb_inactivate,
 };
 
 static void bdrv_backing_attach(BdrvChild *c)
@@ -908,6 +927,7 @@
      * which is only applied on the top level (BlockBackend) */
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+    qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
 
     /* backing files always opened read-only */
     qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
@@ -926,6 +946,7 @@
     .inherit_options = bdrv_backing_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
+    .inactivate      = bdrv_child_cb_inactivate,
 };
 
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
@@ -1150,6 +1171,11 @@
             .type = QEMU_OPT_STRING,
             .help = "discard operation (ignore/off, unmap/on)",
         },
+        {
+            .name = BDRV_OPT_FORCE_SHARE,
+            .type = QEMU_OPT_BOOL,
+            .help = "always accept other writers (default: off)",
+        },
         { /* end of list */ }
     },
 };
@@ -1189,6 +1215,16 @@
     drv = bdrv_find_format(driver_name);
     assert(drv != NULL);
 
+    bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
+
+    if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
+        error_setg(errp,
+                   BDRV_OPT_FORCE_SHARE
+                   "=on can only be used with read-only images");
+        ret = -EINVAL;
+        goto fail_opts;
+    }
+
     if (file != NULL) {
         filename = blk_bs(file)->filename;
     } else {
@@ -1448,6 +1484,22 @@
 static void bdrv_child_abort_perm_update(BdrvChild *c);
 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
 
+static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
+                            BdrvChild *c,
+                            const BdrvChildRole *role,
+                            uint64_t parent_perm, uint64_t parent_shared,
+                            uint64_t *nperm, uint64_t *nshared)
+{
+    if (bs->drv && bs->drv->bdrv_child_perm) {
+        bs->drv->bdrv_child_perm(bs, c, role,
+                                 parent_perm, parent_shared,
+                                 nperm, nshared);
+    }
+    if (child_bs && child_bs->force_share) {
+        *nshared = BLK_PERM_ALL;
+    }
+}
+
 /*
  * Check whether permissions on this node can be changed in a way that
  * @cumulative_perms and @cumulative_shared_perms are the new cumulative
@@ -1467,7 +1519,7 @@
 
     /* Write permissions never work with read-only images */
     if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
-        bdrv_is_read_only(bs))
+        !bdrv_is_writable(bs))
     {
         error_setg(errp, "Block node is read-only");
         return -EPERM;
@@ -1492,9 +1544,9 @@
     /* Check all children */
     QLIST_FOREACH(c, &bs->children, next) {
         uint64_t cur_perm, cur_shared;
-        drv->bdrv_child_perm(bs, c, c->role,
-                             cumulative_perms, cumulative_shared_perms,
-                             &cur_perm, &cur_shared);
+        bdrv_child_perm(bs, c->bs, c, c->role,
+                        cumulative_perms, cumulative_shared_perms,
+                        &cur_perm, &cur_shared);
         ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children,
                                     errp);
         if (ret < 0) {
@@ -1554,9 +1606,9 @@
     /* Update all children */
     QLIST_FOREACH(c, &bs->children, next) {
         uint64_t cur_perm, cur_shared;
-        drv->bdrv_child_perm(bs, c, c->role,
-                             cumulative_perms, cumulative_shared_perms,
-                             &cur_perm, &cur_shared);
+        bdrv_child_perm(bs, c->bs, c, c->role,
+                        cumulative_perms, cumulative_shared_perms,
+                        &cur_perm, &cur_shared);
         bdrv_child_set_perm(c, cur_perm, cur_shared);
     }
 }
@@ -1586,7 +1638,7 @@
     return g_strdup("another user");
 }
 
-static char *bdrv_perm_names(uint64_t perm)
+char *bdrv_perm_names(uint64_t perm)
 {
     struct perm_name {
         uint64_t perm;
@@ -1752,7 +1804,7 @@
         bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
 
         /* Format drivers may touch metadata even if the guest doesn't write */
-        if (!bdrv_is_read_only(bs)) {
+        if (bdrv_is_writable(bs)) {
             perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
         }
 
@@ -1778,6 +1830,10 @@
                   BLK_PERM_WRITE_UNCHANGED;
     }
 
+    if (bs->open_flags & BDRV_O_INACTIVE) {
+        shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+    }
+
     *nperm = perm;
     *nshared = shared;
 }
@@ -1891,8 +1947,8 @@
 
     assert(parent_bs->drv);
     assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
-    parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
-                                    perm, shared_perm, &perm, &shared_perm);
+    bdrv_child_perm(parent_bs, child_bs, NULL, child_role,
+                    perm, shared_perm, &perm, &shared_perm);
 
     child = bdrv_root_attach_child(child_bs, child_name, child_role,
                                    perm, shared_perm, parent_bs, errp);
@@ -3916,7 +3972,8 @@
 
 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
-    BdrvChild *child;
+    BdrvChild *child, *parent;
+    uint64_t perm, shared_perm;
     Error *local_err = NULL;
     int ret;
 
@@ -3952,6 +4009,26 @@
         error_setg_errno(errp, -ret, "Could not refresh total sector count");
         return;
     }
+
+    /* Update permissions, they may differ for inactive nodes */
+    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
+    ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err);
+    if (ret < 0) {
+        bs->open_flags |= BDRV_O_INACTIVE;
+        error_propagate(errp, local_err);
+        return;
+    }
+    bdrv_set_perm(bs, perm, shared_perm);
+
+    QLIST_FOREACH(parent, &bs->parents, next_parent) {
+        if (parent->role->activate) {
+            parent->role->activate(parent, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+        }
+    }
 }
 
 void bdrv_invalidate_cache_all(Error **errp)
@@ -3976,7 +4053,7 @@
 static int bdrv_inactivate_recurse(BlockDriverState *bs,
                                    bool setting_flag)
 {
-    BdrvChild *child;
+    BdrvChild *child, *parent;
     int ret;
 
     if (!setting_flag && bs->drv->bdrv_inactivate) {
@@ -3986,6 +4063,27 @@
         }
     }
 
+    if (setting_flag) {
+        uint64_t perm, shared_perm;
+
+        bs->open_flags |= BDRV_O_INACTIVE;
+
+        QLIST_FOREACH(parent, &bs->parents, next_parent) {
+            if (parent->role->inactivate) {
+                ret = parent->role->inactivate(parent);
+                if (ret < 0) {
+                    bs->open_flags &= ~BDRV_O_INACTIVE;
+                    return ret;
+                }
+            }
+        }
+
+        /* Update permissions, they may differ for inactive nodes */
+        bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
+        bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort);
+        bdrv_set_perm(bs, perm, shared_perm);
+    }
+
     QLIST_FOREACH(child, &bs->children, next) {
         ret = bdrv_inactivate_recurse(child->bs, setting_flag);
         if (ret < 0) {
@@ -3993,9 +4091,6 @@
         }
     }
 
-    if (setting_flag) {
-        bs->open_flags |= BDRV_O_INACTIVE;
-    }
     return 0;
 }
 

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 3c08893..a5196e8 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c

@@ -1,6 +1,7 @@
 /*
  * Block protocol for I/O error injection
  *
+ * Copyright (C) 2016-2017 Red Hat, Inc.
  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -37,7 +38,12 @@
 typedef struct BDRVBlkdebugState {
     int state;
     int new_state;
-    int align;
+    uint64_t align;
+    uint64_t max_transfer;
+    uint64_t opt_write_zero;
+    uint64_t max_write_zero;
+    uint64_t opt_discard;
+    uint64_t max_discard;
 
     /* For blkdebug_refresh_filename() */
     char *config_file;
@@ -342,6 +348,31 @@
             .type = QEMU_OPT_SIZE,
             .help = "Required alignment in bytes",
         },
+        {
+            .name = "max-transfer",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum transfer size in bytes",
+        },
+        {
+            .name = "opt-write-zero",
+            .type = QEMU_OPT_SIZE,
+            .help = "Optimum write zero alignment in bytes",
+        },
+        {
+            .name = "max-write-zero",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum write zero size in bytes",
+        },
+        {
+            .name = "opt-discard",
+            .type = QEMU_OPT_SIZE,
+            .help = "Optimum discard alignment in bytes",
+        },
+        {
+            .name = "max-discard",
+            .type = QEMU_OPT_SIZE,
+            .help = "Maximum discard size in bytes",
+        },
         { /* end of list */ }
     },
 };
@@ -352,8 +383,8 @@
     BDRVBlkdebugState *s = bs->opaque;
     QemuOpts *opts;
     Error *local_err = NULL;
-    uint64_t align;
     int ret;
+    uint64_t align;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -382,19 +413,69 @@
         goto out;
     }
 
-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", 0);
-    if (align < INT_MAX && is_power_of_2(align)) {
-        s->align = align;
-    } else if (align) {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
+    bs->supported_write_flags = BDRV_REQ_FUA &
+        bs->file->bs->supported_write_flags;
+    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+        bs->file->bs->supported_zero_flags;
+    ret = -EINVAL;
+
+    /* Set alignment overrides */
+    s->align = qemu_opt_get_size(opts, "align", 0);
+    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
+        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
+                   s->align);
+        goto out;
+    }
+    align = MAX(s->align, bs->file->bs->bl.request_alignment);
+
+    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
+    if (s->max_transfer &&
+        (s->max_transfer >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
+        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
+                   s->max_transfer);
+        goto out;
+    }
+
+    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
+    if (s->opt_write_zero &&
+        (s->opt_write_zero >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
+        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
+                   s->opt_write_zero);
+        goto out;
+    }
+
+    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
+    if (s->max_write_zero &&
+        (s->max_write_zero >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_write_zero,
+                          MAX(s->opt_write_zero, align)))) {
+        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
+                   s->max_write_zero);
+        goto out;
+    }
+
+    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
+    if (s->opt_discard &&
+        (s->opt_discard >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
+        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
+                   s->opt_discard);
+        goto out;
+    }
+
+    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
+    if (s->max_discard &&
+        (s->max_discard >= INT_MAX ||
+         !QEMU_IS_ALIGNED(s->max_discard,
+                          MAX(s->opt_discard, align)))) {
+        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
+                   s->max_discard);
         goto out;
     }
 
     ret = 0;
-    goto out;
-
 out:
     if (ret < 0) {
         g_free(s->config_file);
@@ -403,11 +484,30 @@
     return ret;
 }
 
-static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
+static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    int error = rule->options.inject.error;
-    bool immediately = rule->options.inject.immediately;
+    BlkdebugRule *rule = NULL;
+    int error;
+    bool immediately;
+
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (bytes && inject_offset >= offset &&
+             inject_offset < offset + bytes))
+        {
+            break;
+        }
+    }
+
+    if (!rule || !rule->options.inject.error) {
+        return 0;
+    }
+
+    immediately = rule->options.inject.immediately;
+    error = rule->options.inject.error;
 
     if (rule->options.inject.once) {
         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
@@ -426,21 +526,18 @@
 blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                    QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;
 
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (inject_offset >= offset && inject_offset < offset + bytes))
-        {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
     }
 
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
     }
 
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -450,21 +547,18 @@
 blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                     QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;
 
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        uint64_t inject_offset = rule->options.inject.offset;
-
-        if (inject_offset == -1 ||
-            (inject_offset >= offset && inject_offset < offset + bytes))
-        {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
     }
 
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
     }
 
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -472,22 +566,81 @@
 
 static int blkdebug_co_flush(BlockDriverState *bs)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err = rule_check(bs, 0, 0);
 
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.offset == -1) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, rule);
+    if (err) {
+        return err;
     }
 
     return bdrv_co_flush(bs->file->bs);
 }
 
+static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
+                                                  int64_t offset, int count,
+                                                  BdrvRequestFlags flags)
+{
+    uint32_t align = MAX(bs->bl.request_alignment,
+                         bs->bl.pwrite_zeroes_alignment);
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * preferred alignment (so that we test the fallback to writes on
+     * unaligned portions), and check that the block layer never hands
+     * us anything unaligned that crosses an alignment boundary.  */
+    if (count < align) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, align));
+    assert(QEMU_IS_ALIGNED(count, align));
+    if (bs->bl.max_pwrite_zeroes) {
+        assert(count <= bs->bl.max_pwrite_zeroes);
+    }
+
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+}
+
+static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
+                                             int64_t offset, int count)
+{
+    uint32_t align = bs->bl.pdiscard_alignment;
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * minimum alignment, and ensure that unaligned requests do not
+     * cross optimum discard boundaries. */
+    if (count < bs->bl.request_alignment) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
+    if (align && count >= align) {
+        assert(QEMU_IS_ALIGNED(offset, align));
+        assert(QEMU_IS_ALIGNED(count, align));
+    }
+    if (bs->bl.max_pdiscard) {
+        assert(count <= bs->bl.max_pdiscard);
+    }
+
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pdiscard(bs->file->bs, offset, count);
+}
 
 static void blkdebug_close(BlockDriverState *bs)
 {
@@ -715,6 +868,21 @@
     if (s->align) {
         bs->bl.request_alignment = s->align;
     }
+    if (s->max_transfer) {
+        bs->bl.max_transfer = s->max_transfer;
+    }
+    if (s->opt_write_zero) {
+        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
+    }
+    if (s->max_write_zero) {
+        bs->bl.max_pwrite_zeroes = s->max_write_zero;
+    }
+    if (s->opt_discard) {
+        bs->bl.pdiscard_alignment = s->opt_discard;
+    }
+    if (s->max_discard) {
+        bs->bl.max_pdiscard = s->max_discard;
+    }
 }
 
 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
@@ -742,6 +910,8 @@
     .bdrv_co_preadv         = blkdebug_co_preadv,
     .bdrv_co_pwritev        = blkdebug_co_pwritev,
     .bdrv_co_flush_to_disk  = blkdebug_co_flush,
+    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
+    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
 
     .bdrv_debug_event           = blkdebug_debug_event,
     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,

diff --git a/block/block-backend.c b/block/block-backend.c
index f5bf13e..f3a6008 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c

@@ -130,6 +130,56 @@
     return blk_name(child->opaque);
 }
 
+/*
+ * Notifies the user of the BlockBackend that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+static void blk_root_activate(BdrvChild *child, Error **errp)
+{
+    BlockBackend *blk = child->opaque;
+    Error *local_err = NULL;
+
+    if (!blk->disable_perm) {
+        return;
+    }
+
+    blk->disable_perm = false;
+
+    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        blk->disable_perm = true;
+        return;
+    }
+}
+
+static int blk_root_inactivate(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+
+    if (blk->disable_perm) {
+        return 0;
+    }
+
+    /* Only inactivate BlockBackends for guest devices (which are inactive at
+     * this point because the VM is stopped) and unattached monitor-owned
+     * BlockBackends. If there is still any other user like a block job, then
+     * we simply can't inactivate the image. */
+    if (!blk->dev && !blk->name[0]) {
+        return -EPERM;
+    }
+
+    blk->disable_perm = true;
+    if (blk->root) {
+        bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
+    }
+
+    return 0;
+}
+
 static const BdrvChildRole child_root = {
     .inherit_options    = blk_root_inherit_options,
 
@@ -140,6 +190,9 @@
 
     .drained_begin      = blk_root_drained_begin,
     .drained_end        = blk_root_drained_end,
+
+    .activate           = blk_root_activate,
+    .inactivate         = blk_root_inactivate,
 };
 
 /*
@@ -601,34 +654,6 @@
     *shared_perm = blk->shared_perm;
 }
 
-/*
- * Notifies the user of all BlockBackends that migration has completed. qdev
- * devices can tighten their permissions in response (specifically revoke
- * shared write permissions that we needed for storage migration).
- *
- * If an error is returned, the VM cannot be allowed to be resumed.
- */
-void blk_resume_after_migration(Error **errp)
-{
-    BlockBackend *blk;
-    Error *local_err = NULL;
-
-    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
-        if (!blk->disable_perm) {
-            continue;
-        }
-
-        blk->disable_perm = false;
-
-        blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            blk->disable_perm = true;
-            return;
-        }
-    }
-}
-
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
     if (blk->dev) {

diff --git a/block/crypto.c b/block/crypto.c
index 6828180..10e5ddc 100644
--- a/block/crypto.c
+++ b/block/crypto.c

@@ -56,10 +56,10 @@
 
 
 static ssize_t block_crypto_read_func(QCryptoBlock *block,
-                                      void *opaque,
                                       size_t offset,
                                       uint8_t *buf,
                                       size_t buflen,
+                                      void *opaque,
                                       Error **errp)
 {
     BlockDriverState *bs = opaque;
@@ -83,10 +83,10 @@
 
 
 static ssize_t block_crypto_write_func(QCryptoBlock *block,
-                                       void *opaque,
                                        size_t offset,
                                        const uint8_t *buf,
                                        size_t buflen,
+                                       void *opaque,
                                        Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
@@ -102,8 +102,8 @@
 
 
 static ssize_t block_crypto_init_func(QCryptoBlock *block,
-                                      void *opaque,
                                       size_t headerlen,
+                                      void *opaque,
                                       Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;

diff --git a/block/curl.c b/block/curl.c
index aa6e8cc..2a244e2 100644
--- a/block/curl.c
+++ b/block/curl.c

@@ -76,15 +76,12 @@
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000
 
-#define FIND_RET_NONE   0
-#define FIND_RET_OK     1
-#define FIND_RET_WAIT   2
-
 #define CURL_BLOCK_OPT_URL       "url"
 #define CURL_BLOCK_OPT_READAHEAD "readahead"
 #define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
 #define CURL_BLOCK_OPT_TIMEOUT "timeout"
 #define CURL_BLOCK_OPT_COOKIE    "cookie"
+#define CURL_BLOCK_OPT_COOKIE_SECRET "cookie-secret"
 #define CURL_BLOCK_OPT_USERNAME "username"
 #define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
 #define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
@@ -93,14 +90,17 @@
 struct BDRVCURLState;
 
 typedef struct CURLAIOCB {
-    BlockAIOCB common;
+    Coroutine *co;
     QEMUIOVector *qiov;
 
-    int64_t sector_num;
-    int nb_sectors;
+    uint64_t offset;
+    uint64_t bytes;
+    int ret;
 
     size_t start;
     size_t end;
+
+    QSIMPLEQ_ENTRY(CURLAIOCB) next;
 } CURLAIOCB;
 
 typedef struct CURLSocket {
@@ -115,7 +115,7 @@
     CURL *curl;
     QLIST_HEAD(, CURLSocket) sockets;
     char *orig_buf;
-    size_t buf_start;
+    uint64_t buf_start;
     size_t buf_off;
     size_t buf_len;
     char range[128];
@@ -126,7 +126,7 @@
 typedef struct BDRVCURLState {
     CURLM *multi;
     QEMUTimer timer;
-    size_t len;
+    uint64_t len;
     CURLState states[CURL_NUM_STATES];
     char *url;
     size_t readahead_size;
@@ -136,6 +136,7 @@
     bool accept_range;
     AioContext *aio_context;
     QemuMutex mutex;
+    QSIMPLEQ_HEAD(, CURLAIOCB) free_state_waitq;
     char *username;
     char *password;
     char *proxyusername;
@@ -147,6 +148,7 @@
 static void curl_multi_read(void *arg);
 
 #ifdef NEED_CURL_TIMER_CALLBACK
+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 {
     BDRVCURLState *s = opaque;
@@ -163,6 +165,7 @@
 }
 #endif
 
+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                         void *userp, void *sp)
 {
@@ -212,6 +215,7 @@
     return 0;
 }
 
+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
     BDRVCURLState *s = opaque;
@@ -226,6 +230,7 @@
     return realsize;
 }
 
+/* Called from curl_multi_do_locked, with s->mutex held.  */
 static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 {
     CURLState *s = ((CURLState*)opaque);
@@ -253,7 +258,7 @@
             continue;
 
         if ((s->buf_off >= acb->end)) {
-            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;
+            size_t request_length = acb->bytes;
 
             qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                 acb->end - acb->start);
@@ -264,9 +269,11 @@
                                   request_length - offset);
             }
 
-            acb->common.cb(acb->common.opaque, 0);
-            qemu_aio_unref(acb);
+            acb->ret = 0;
             s->acb[i] = NULL;
+            qemu_mutex_unlock(&s->s->mutex);
+            aio_co_wake(acb->co);
+            qemu_mutex_lock(&s->s->mutex);
         }
     }
 
@@ -275,18 +282,19 @@
     return size * nmemb;
 }
 
-static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
-                         CURLAIOCB *acb)
+/* Called with s->mutex held.  */
+static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
+                          CURLAIOCB *acb)
 {
     int i;
-    size_t end = start + len;
-    size_t clamped_end = MIN(end, s->len);
-    size_t clamped_len = clamped_end - start;
+    uint64_t end = start + len;
+    uint64_t clamped_end = MIN(end, s->len);
+    uint64_t clamped_len = clamped_end - start;
 
     for (i=0; i<CURL_NUM_STATES; i++) {
         CURLState *state = &s->states[i];
-        size_t buf_end = (state->buf_start + state->buf_off);
-        size_t buf_fend = (state->buf_start + state->buf_len);
+        uint64_t buf_end = (state->buf_start + state->buf_off);
+        uint64_t buf_fend = (state->buf_start + state->buf_len);
 
         if (!state->orig_buf)
             continue;
@@ -305,9 +313,8 @@
             if (clamped_len < len) {
                 qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
             }
-            acb->common.cb(acb->common.opaque, 0);
-
-            return FIND_RET_OK;
+            acb->ret = 0;
+            return true;
         }
 
         // Wait for unfinished chunks
@@ -325,13 +332,13 @@
             for (j=0; j<CURL_NUM_ACB; j++) {
                 if (!state->acb[j]) {
                     state->acb[j] = acb;
-                    return FIND_RET_WAIT;
+                    return true;
                 }
             }
         }
     }
 
-    return FIND_RET_NONE;
+    return false;
 }
 
 /* Called with s->mutex held.  */
@@ -376,11 +383,11 @@
                         continue;
                     }
 
-                    qemu_mutex_unlock(&s->mutex);
-                    acb->common.cb(acb->common.opaque, -EIO);
-                    qemu_mutex_lock(&s->mutex);
-                    qemu_aio_unref(acb);
+                    acb->ret = -EIO;
                     state->acb[i] = NULL;
+                    qemu_mutex_unlock(&s->mutex);
+                    aio_co_wake(acb->co);
+                    qemu_mutex_lock(&s->mutex);
                 }
             }
 
@@ -449,32 +456,28 @@
 #endif
 }
 
-static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
+/* Called with s->mutex held.  */
+static CURLState *curl_find_state(BDRVCURLState *s)
 {
     CURLState *state = NULL;
-    int i, j;
+    int i;
 
-    do {
-        for (i=0; i<CURL_NUM_STATES; i++) {
-            for (j=0; j<CURL_NUM_ACB; j++)
-                if (s->states[i].acb[j])
-                    continue;
-            if (s->states[i].in_use)
-                continue;
-
+    for (i = 0; i < CURL_NUM_STATES; i++) {
+        if (!s->states[i].in_use) {
             state = &s->states[i];
             state->in_use = 1;
             break;
         }
-        if (!state) {
-            aio_poll(bdrv_get_aio_context(bs), true);
-        }
-    } while(!state);
+    }
+    return state;
+}
 
+static int curl_init_state(BDRVCURLState *s, CURLState *state)
+{
     if (!state->curl) {
         state->curl = curl_easy_init();
         if (!state->curl) {
-            return NULL;
+            return -EIO;
         }
         curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
         curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
@@ -527,11 +530,18 @@
     QLIST_INIT(&state->sockets);
     state->s = s;
 
-    return state;
+    return 0;
 }
 
+/* Called with s->mutex held.  */
 static void curl_clean_state(CURLState *s)
 {
+    CURLAIOCB *next;
+    int j;
+    for (j = 0; j < CURL_NUM_ACB; j++) {
+        assert(!s->acb[j]);
+    }
+
     if (s->s->multi)
         curl_multi_remove_handle(s->s->multi, s->curl);
 
@@ -543,6 +553,14 @@
     }
 
     s->in_use = 0;
+
+    next = QSIMPLEQ_FIRST(&s->s->free_state_waitq);
+    if (next) {
+        QSIMPLEQ_REMOVE_HEAD(&s->s->free_state_waitq, next);
+        qemu_mutex_unlock(&s->s->mutex);
+        aio_co_wake(next->co);
+        qemu_mutex_lock(&s->s->mutex);
+    }
 }
 
 static void curl_parse_filename(const char *filename, QDict *options,
@@ -556,6 +574,7 @@
     BDRVCURLState *s = bs->opaque;
     int i;
 
+    qemu_mutex_lock(&s->mutex);
     for (i = 0; i < CURL_NUM_STATES; i++) {
         if (s->states[i].in_use) {
             curl_clean_state(&s->states[i]);
@@ -571,6 +590,7 @@
         curl_multi_cleanup(s->multi);
         s->multi = NULL;
     }
+    qemu_mutex_unlock(&s->mutex);
 
     timer_del(&s->timer);
 }
@@ -624,6 +644,11 @@
             .help = "Pass the cookie or list of cookies with each request"
         },
         {
+            .name = CURL_BLOCK_OPT_COOKIE_SECRET,
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret used as cookie passed with each request"
+        },
+        {
             .name = CURL_BLOCK_OPT_USERNAME,
             .type = QEMU_OPT_STRING,
             .help = "Username for HTTP auth"
@@ -657,6 +682,7 @@
     Error *local_err = NULL;
     const char *file;
     const char *cookie;
+    const char *cookie_secret;
     double d;
     const char *secretid;
     const char *protocol_delimiter;
@@ -668,6 +694,7 @@
         return -EROFS;
     }
 
+    qemu_mutex_init(&s->mutex);
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -693,7 +720,22 @@
     s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
 
     cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    s->cookie = g_strdup(cookie);
+    cookie_secret = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE_SECRET);
+
+    if (cookie && cookie_secret) {
+        error_setg(errp,
+                   "curl driver cannot handle both cookie and cookie secret");
+        goto out_noclean;
+    }
+
+    if (cookie_secret) {
+        s->cookie = qcrypto_secret_lookup_as_utf8(cookie_secret, errp);
+        if (!s->cookie) {
+            goto out_noclean;
+        }
+    } else {
+        s->cookie = g_strdup(cookie);
+    }
 
     file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
     if (file == NULL) {
@@ -736,14 +778,22 @@
     }
 
     DPRINTF("CURL: Opening %s\n", file);
+    QSIMPLEQ_INIT(&s->free_state_waitq);
     s->aio_context = bdrv_get_aio_context(bs);
     s->url = g_strdup(file);
-    state = curl_init_state(bs, s);
-    if (!state)
+    qemu_mutex_lock(&s->mutex);
+    state = curl_find_state(s);
+    qemu_mutex_unlock(&s->mutex);
+    if (!state) {
         goto out_noclean;
+    }
 
     // Get file size
 
+    if (curl_init_state(s, state) < 0) {
+        goto out;
+    }
+
     s->accept_range = false;
     curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
     curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
@@ -771,7 +821,7 @@
     }
 #endif
 
-    s->len = (size_t)d;
+    s->len = d;
 
     if ((!strncasecmp(s->url, "http://", strlen("http://"))
         || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -780,13 +830,14 @@
                 "Server does not support 'range' (byte ranges).");
         goto out;
     }
-    DPRINTF("CURL: Size = %zd\n", s->len);
+    DPRINTF("CURL: Size = %" PRIu64 "\n", s->len);
 
+    qemu_mutex_lock(&s->mutex);
     curl_clean_state(state);
+    qemu_mutex_unlock(&s->mutex);
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 
-    qemu_mutex_init(&s->mutex);
     curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
     qemu_opts_del(opts);
@@ -797,53 +848,51 @@
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 out_noclean:
+    qemu_mutex_destroy(&s->mutex);
     g_free(s->cookie);
     g_free(s->url);
     qemu_opts_del(opts);
     return -EINVAL;
 }
 
-static const AIOCBInfo curl_aiocb_info = {
-    .aiocb_size         = sizeof(CURLAIOCB),
-};
-
-
-static void curl_readv_bh_cb(void *p)
+static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
 {
     CURLState *state;
     int running;
-    int ret = -EINPROGRESS;
 
-    CURLAIOCB *acb = p;
-    BlockDriverState *bs = acb->common.bs;
     BDRVCURLState *s = bs->opaque;
 
-    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
-    size_t end;
+    uint64_t start = acb->offset;
+    uint64_t end;
 
     qemu_mutex_lock(&s->mutex);
 
     // In case we have the requested data already (e.g. read-ahead),
     // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
-        case FIND_RET_OK:
-            qemu_aio_unref(acb);
-            // fall through
-        case FIND_RET_WAIT:
-            goto out;
-        default:
-            break;
+    if (curl_find_buf(s, start, acb->bytes, acb)) {
+        goto out;
     }
 
     // No cache found, so let's start a new request
-    state = curl_init_state(acb->common.bs, s);
-    if (!state) {
-        ret = -EIO;
+    for (;;) {
+        state = curl_find_state(s);
+        if (state) {
+            break;
+        }
+        QSIMPLEQ_INSERT_TAIL(&s->free_state_waitq, acb, next);
+        qemu_mutex_unlock(&s->mutex);
+        qemu_coroutine_yield();
+        qemu_mutex_lock(&s->mutex);
+    }
+
+    if (curl_init_state(s, state) < 0) {
+        curl_clean_state(state);
+        acb->ret = -EIO;
         goto out;
     }
 
     acb->start = 0;
-    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);
+    acb->end = MIN(acb->bytes, s->len - start);
 
     state->buf_off = 0;
     g_free(state->orig_buf);
@@ -853,14 +902,14 @@
     state->orig_buf = g_try_malloc(state->buf_len);
     if (state->buf_len && state->orig_buf == NULL) {
         curl_clean_state(state);
-        ret = -ENOMEM;
+        acb->ret = -ENOMEM;
         goto out;
     }
     state->acb[0] = acb;
 
-    snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
-            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
+    snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
+    DPRINTF("CURL (AIO): Reading %" PRIu64 " at %" PRIu64 " (%s)\n",
+            acb->bytes, start, state->range);
     curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
 
     curl_multi_add_handle(s->multi, state->curl);
@@ -870,26 +919,24 @@
 
 out:
     qemu_mutex_unlock(&s->mutex);
-    if (ret != -EINPROGRESS) {
-        acb->common.cb(acb->common.opaque, ret);
-        qemu_aio_unref(acb);
-    }
 }
 
-static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    CURLAIOCB *acb;
+    CURLAIOCB acb = {
+        .co = qemu_coroutine_self(),
+        .ret = -EINPROGRESS,
+        .qiov = qiov,
+        .offset = offset,
+        .bytes = bytes
+    };
 
-    acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque);
-
-    acb->qiov = qiov;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-
-    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
-    return &acb->common;
+    curl_setup_preadv(bs, &acb);
+    while (acb.ret == -EINPROGRESS) {
+        qemu_coroutine_yield();
+    }
+    return acb.ret;
 }
 
 static void curl_close(BlockDriverState *bs)
@@ -920,7 +967,7 @@
     .bdrv_close                 = curl_close,
     .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,
 
     .bdrv_detach_aio_context    = curl_detach_aio_context,
     .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -936,7 +983,7 @@
     .bdrv_close                 = curl_close,
     .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,
 
     .bdrv_detach_aio_context    = curl_detach_aio_context,
     .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -952,7 +999,7 @@
     .bdrv_close                 = curl_close,
     .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,
 
     .bdrv_detach_aio_context    = curl_detach_aio_context,
     .bdrv_attach_aio_context    = curl_attach_aio_context,
@@ -968,7 +1015,7 @@
     .bdrv_close                 = curl_close,
     .bdrv_getlength             = curl_getlength,
 
-    .bdrv_aio_readv             = curl_aio_readv,
+    .bdrv_co_preadv             = curl_co_preadv,
 
     .bdrv_detach_aio_context    = curl_detach_aio_context,
     .bdrv_attach_aio_context    = curl_attach_aio_context,

diff --git a/block/file-posix.c b/block/file-posix.c
index 19c48a0..4354d49 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c

@@ -129,12 +129,23 @@
 
 #define MAX_BLOCKSIZE	4096
 
+/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
+ * leaving a few more bytes for its future use. */
+#define RAW_LOCK_PERM_BASE             100
+#define RAW_LOCK_SHARED_BASE           200
+
 typedef struct BDRVRawState {
     int fd;
+    int lock_fd;
+    bool use_lock;
     int type;
     int open_flags;
     size_t buf_align;
 
+    /* The current permissions. */
+    uint64_t perm;
+    uint64_t shared_perm;
+
 #ifdef CONFIG_XFS
     bool is_xfs:1;
 #endif
@@ -392,6 +403,11 @@
             .type = QEMU_OPT_STRING,
             .help = "host AIO implementation (threads, native)",
         },
+        {
+            .name = "locking",
+            .type = QEMU_OPT_STRING,
+            .help = "file locking mode (on/off/auto, default: auto)",
+        },
         { /* end of list */ }
     },
 };
@@ -406,6 +422,7 @@
     BlockdevAioOptions aio, aio_default;
     int fd, ret;
     struct stat st;
+    OnOffAuto locking;
 
     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -435,6 +452,37 @@
     }
     s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
 
+    locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"),
+                              ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+    switch (locking) {
+    case ON_OFF_AUTO_ON:
+        s->use_lock = true;
+#ifndef F_OFD_SETLK
+        fprintf(stderr,
+                "File lock requested but OFD locking syscall is unavailable, "
+                "falling back to POSIX file locks.\n"
+                "Due to the implementation, locks can be lost unexpectedly.\n");
+#endif
+        break;
+    case ON_OFF_AUTO_OFF:
+        s->use_lock = false;
+        break;
+    case ON_OFF_AUTO_AUTO:
+#ifdef F_OFD_SETLK
+        s->use_lock = true;
+#else
+        s->use_lock = false;
+#endif
+        break;
+    default:
+        abort();
+    }
+
     s->open_flags = open_flags;
     raw_parse_flags(bdrv_flags, &s->open_flags);
 
@@ -450,6 +498,21 @@
     }
     s->fd = fd;
 
+    s->lock_fd = -1;
+    if (s->use_lock) {
+        fd = qemu_open(filename, s->open_flags);
+        if (fd < 0) {
+            ret = -errno;
+            error_setg_errno(errp, errno, "Could not open '%s' for locking",
+                             filename);
+            qemu_close(s->fd);
+            goto fail;
+        }
+        s->lock_fd = fd;
+    }
+    s->perm = 0;
+    s->shared_perm = BLK_PERM_ALL;
+
 #ifdef CONFIG_LINUX_AIO
      /* Currently Linux does AIO only for files opened with O_DIRECT */
     if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -537,6 +600,161 @@
     return raw_open_common(bs, options, flags, 0, errp);
 }
 
+typedef enum {
+    RAW_PL_PREPARE,
+    RAW_PL_COMMIT,
+    RAW_PL_ABORT,
+} RawPermLockOp;
+
+#define PERM_FOREACH(i) \
+    for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++)
+
+/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the
+ * file; if @unlock == true, also unlock the unneeded bytes.
+ * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
+ */
+static int raw_apply_lock_bytes(BDRVRawState *s,
+                                uint64_t perm_lock_bits,
+                                uint64_t shared_perm_lock_bits,
+                                bool unlock, Error **errp)
+{
+    int ret;
+    int i;
+
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        if (perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        if (shared_perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
+static int raw_check_lock_bytes(BDRVRawState *s,
+                                uint64_t perm, uint64_t shared_perm,
+                                Error **errp)
+{
+    int ret;
+    int i;
+
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (perm & p) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                char *perm_name = bdrv_perm_names(p);
+                error_setg(errp,
+                           "Failed to get \"%s\" lock",
+                           perm_name);
+                g_free(perm_name);
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (!(shared_perm & p)) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                char *perm_name = bdrv_perm_names(p);
+                error_setg(errp,
+                           "Failed to get shared \"%s\" lock",
+                           perm_name);
+                g_free(perm_name);
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int raw_handle_perm_lock(BlockDriverState *bs,
+                                RawPermLockOp op,
+                                uint64_t new_perm, uint64_t new_shared,
+                                Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    if (!s->use_lock) {
+        return 0;
+    }
+
+    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
+        return 0;
+    }
+
+    assert(s->lock_fd > 0);
+
+    switch (op) {
+    case RAW_PL_PREPARE:
+        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
+                                   ~s->shared_perm | ~new_shared,
+                                   false, errp);
+        if (!ret) {
+            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
+            if (!ret) {
+                return 0;
+            }
+        }
+        op = RAW_PL_ABORT;
+        /* fall through to unlock bytes. */
+    case RAW_PL_ABORT:
+        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    case RAW_PL_COMMIT:
+        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    }
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
@@ -1405,6 +1623,10 @@
         qemu_close(s->fd);
         s->fd = -1;
     }
+    if (s->lock_fd >= 0) {
+        qemu_close(s->lock_fd);
+        s->lock_fd = -1;
+    }
 }
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
@@ -1949,6 +2171,25 @@
     }
 };
 
+static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+}
+
+static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
+{
+    BDRVRawState *s = bs->opaque;
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    s->perm = perm;
+    s->shared_perm = shared;
+}
+
+static void raw_abort_perm_update(BlockDriverState *bs)
+{
+    raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
+}
+
 BlockDriver bdrv_file = {
     .format_name = "file",
     .protocol_name = "file",
@@ -1979,7 +2220,9 @@
     .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
-
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
     .create_opts = &raw_create_opts,
 };
 
@@ -2438,6 +2681,9 @@
     .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
     .bdrv_probe_blocksizes = hdev_probe_blocksizes,
     .bdrv_probe_geometry = hdev_probe_geometry,
 

diff --git a/block/file-win32.c b/block/file-win32.c
index d1eb0a1..8f14f0b 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c

@@ -344,6 +344,12 @@
         goto fail;
     }
 
+    if (qdict_get_try_bool(options, "locking", false)) {
+        error_setg(errp, "locking=on is not supported on Windows");
+        ret = -EINVAL;
+        goto fail;
+    }
+
     filename = qemu_opt_get(opts, "filename");
 
     use_aio = get_aio_option(opts, flags, &local_err);

diff --git a/block/io.c b/block/io.c
index 40bd94f..fdd7485 100644
--- a/block/io.c
+++ b/block/io.c

@@ -1784,8 +1784,8 @@
 
     if (ret & BDRV_BLOCK_RAW) {
         assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
-                                    *pnum, pnum, file);
+        ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+                                       *pnum, pnum, file);
         goto out;
     }
 

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 100398c..347d94b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c

@@ -309,14 +309,19 @@
         uint64_t *l2_table, uint64_t stop_flags)
 {
     int i;
+    QCow2ClusterType first_cluster_type;
     uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
     uint64_t first_entry = be64_to_cpu(l2_table[0]);
     uint64_t offset = first_entry & mask;
 
-    if (!offset)
+    if (!offset) {
         return 0;
+    }
 
-    assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
+    /* must be allocated */
+    first_cluster_type = qcow2_get_cluster_type(first_entry);
+    assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
+           first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -328,14 +333,21 @@
 	return i;
 }
 
-static int count_contiguous_clusters_by_type(int nb_clusters,
-                                             uint64_t *l2_table,
-                                             int wanted_type)
+/*
+ * Checks how many consecutive unallocated clusters in a given L2
+ * table have the same cluster type.
+ */
+static int count_contiguous_clusters_unallocated(int nb_clusters,
+                                                 uint64_t *l2_table,
+                                                 QCow2ClusterType wanted_type)
 {
     int i;
 
+    assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
+           wanted_type == QCOW2_CLUSTER_UNALLOCATED);
     for (i = 0; i < nb_clusters; i++) {
-        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
+        uint64_t entry = be64_to_cpu(l2_table[i]);
+        QCow2ClusterType type = qcow2_get_cluster_type(entry);
 
         if (type != wanted_type) {
             break;
@@ -487,6 +499,7 @@
     int l1_bits, c;
     unsigned int offset_in_cluster;
     uint64_t bytes_available, bytes_needed, nb_clusters;
+    QCow2ClusterType type;
     int ret;
 
     offset_in_cluster = offset_into_cluster(s, offset);
@@ -509,13 +522,13 @@
 
     l1_index = offset >> l1_bits;
     if (l1_index >= s->l1_size) {
-        ret = QCOW2_CLUSTER_UNALLOCATED;
+        type = QCOW2_CLUSTER_UNALLOCATED;
         goto out;
     }
 
     l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
     if (!l2_offset) {
-        ret = QCOW2_CLUSTER_UNALLOCATED;
+        type = QCOW2_CLUSTER_UNALLOCATED;
         goto out;
     }
 
@@ -544,38 +557,37 @@
      * true */
     assert(nb_clusters <= INT_MAX);
 
-    ret = qcow2_get_cluster_type(*cluster_offset);
-    switch (ret) {
+    type = qcow2_get_cluster_type(*cluster_offset);
+    if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN ||
+                                type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+        qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+                                " in pre-v3 image (L2 offset: %#" PRIx64
+                                ", L2 index: %#x)", l2_offset, l2_index);
+        ret = -EIO;
+        goto fail;
+    }
+    switch (type) {
     case QCOW2_CLUSTER_COMPRESSED:
         /* Compressed clusters can only be processed one by one */
         c = 1;
         *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
         break;
-    case QCOW2_CLUSTER_ZERO:
-        if (s->qcow_version < 3) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
-                                    " in pre-v3 image (L2 offset: %#" PRIx64
-                                    ", L2 index: %#x)", l2_offset, l2_index);
-            ret = -EIO;
-            goto fail;
-        }
-        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
-                                              QCOW2_CLUSTER_ZERO);
-        *cluster_offset = 0;
-        break;
+    case QCOW2_CLUSTER_ZERO_PLAIN:
     case QCOW2_CLUSTER_UNALLOCATED:
         /* how many empty clusters ? */
-        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
-                                              QCOW2_CLUSTER_UNALLOCATED);
+        c = count_contiguous_clusters_unallocated(nb_clusters,
+                                                  &l2_table[l2_index], type);
         *cluster_offset = 0;
         break;
+    case QCOW2_CLUSTER_ZERO_ALLOC:
     case QCOW2_CLUSTER_NORMAL:
         /* how many allocated clusters ? */
         c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                                      &l2_table[l2_index], QCOW_OFLAG_ZERO);
         *cluster_offset &= L2E_OFFSET_MASK;
         if (offset_into_cluster(s, *cluster_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
+            qcow2_signal_corruption(bs, true, -1, -1,
+                                    "Cluster allocation offset %#"
                                     PRIx64 " unaligned (L2 offset: %#" PRIx64
                                     ", L2 index: %#x)", *cluster_offset,
                                     l2_offset, l2_index);
@@ -602,7 +614,7 @@
     assert(bytes_available - offset_in_cluster <= UINT_MAX);
     *bytes = bytes_available - offset_in_cluster;
 
-    return ret;
+    return type;
 
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
@@ -835,7 +847,7 @@
      * Don't discard clusters that reach a refcount of 0 (e.g. compressed
      * clusters), the next write will reuse them anyway.
      */
-    if (j != 0) {
+    if (!m->keep_old_clusters && j != 0) {
         for (i = 0; i < j; i++) {
             qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
                                     QCOW2_DISCARD_NEVER);
@@ -860,7 +872,7 @@
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
-        int cluster_type = qcow2_get_cluster_type(l2_entry);
+        QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
 
         switch(cluster_type) {
         case QCOW2_CLUSTER_NORMAL:
@@ -870,7 +882,8 @@
             break;
         case QCOW2_CLUSTER_UNALLOCATED:
         case QCOW2_CLUSTER_COMPRESSED:
-        case QCOW2_CLUSTER_ZERO:
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+        case QCOW2_CLUSTER_ZERO_ALLOC:
             break;
         default:
             abort();
@@ -1132,8 +1145,9 @@
     uint64_t entry;
     uint64_t nb_clusters;
     int ret;
+    bool keep_old_clusters = false;
 
-    uint64_t alloc_cluster_offset;
+    uint64_t alloc_cluster_offset = 0;
 
     trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
                              *bytes);
@@ -1170,31 +1184,54 @@
      * wrong with our code. */
     assert(nb_clusters > 0);
 
+    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
+        (entry & QCOW_OFLAG_COPIED) &&
+        (!*host_offset ||
+         start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
+    {
+        /* Try to reuse preallocated zero clusters; contiguous normal clusters
+         * would be fine, too, but count_cow_clusters() above has limited
+         * nb_clusters already to a range of COW clusters */
+        int preallocated_nb_clusters =
+            count_contiguous_clusters(nb_clusters, s->cluster_size,
+                                      &l2_table[l2_index], QCOW_OFLAG_COPIED);
+        assert(preallocated_nb_clusters > 0);
+
+        nb_clusters = preallocated_nb_clusters;
+        alloc_cluster_offset = entry & L2E_OFFSET_MASK;
+
+        /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2()
+         * should not free them. */
+        keep_old_clusters = true;
+    }
+
     qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
-    /* Allocate, if necessary at a given offset in the image file */
-    alloc_cluster_offset = start_of_cluster(s, *host_offset);
-    ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
-                                  &nb_clusters);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Can't extend contiguous allocation */
-    if (nb_clusters == 0) {
-        *bytes = 0;
-        return 0;
-    }
-
-    /* !*host_offset would overwrite the image header and is reserved for "no
-     * host offset preferred". If 0 was a valid host offset, it'd trigger the
-     * following overlap check; do that now to avoid having an invalid value in
-     * *host_offset. */
     if (!alloc_cluster_offset) {
-        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                            nb_clusters * s->cluster_size);
-        assert(ret < 0);
-        goto fail;
+        /* Allocate, if necessary at a given offset in the image file */
+        alloc_cluster_offset = start_of_cluster(s, *host_offset);
+        ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+                                      &nb_clusters);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        /* Can't extend contiguous allocation */
+        if (nb_clusters == 0) {
+            *bytes = 0;
+            return 0;
+        }
+
+        /* !*host_offset would overwrite the image header and is reserved for
+         * "no host offset preferred". If 0 was a valid host offset, it'd
+         * trigger the following overlap check; do that now to avoid having an
+         * invalid value in *host_offset. */
+        if (!alloc_cluster_offset) {
+            ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+                                                nb_clusters * s->cluster_size);
+            assert(ret < 0);
+            goto fail;
+        }
     }
 
     /*
@@ -1225,6 +1262,8 @@
         .offset         = start_of_cluster(s, guest_offset),
         .nb_clusters    = nb_clusters,
 
+        .keep_old_clusters  = keep_old_clusters,
+
         .cow_start = {
             .offset     = 0,
             .nb_bytes   = offset_into_cluster(s, guest_offset),
@@ -1472,24 +1511,25 @@
          * but rather fall through to the backing file.
          */
         switch (qcow2_get_cluster_type(old_l2_entry)) {
-            case QCOW2_CLUSTER_UNALLOCATED:
-                if (full_discard || !bs->backing) {
-                    continue;
-                }
-                break;
+        case QCOW2_CLUSTER_UNALLOCATED:
+            if (full_discard || !bs->backing) {
+                continue;
+            }
+            break;
 
-            case QCOW2_CLUSTER_ZERO:
-                if (!full_discard) {
-                    continue;
-                }
-                break;
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+            if (!full_discard) {
+                continue;
+            }
+            break;
 
-            case QCOW2_CLUSTER_NORMAL:
-            case QCOW2_CLUSTER_COMPRESSED:
-                break;
+        case QCOW2_CLUSTER_ZERO_ALLOC:
+        case QCOW2_CLUSTER_NORMAL:
+        case QCOW2_CLUSTER_COMPRESSED:
+            break;
 
-            default:
-                abort();
+        default:
+            abort();
         }
 
         /* First remove L2 entries */
@@ -1509,35 +1549,36 @@
     return nb_clusters;
 }
 
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
+int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, enum qcow2_discard_type type,
+                          bool full_discard)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset;
+    uint64_t end_offset = offset + bytes;
     uint64_t nb_clusters;
+    int64_t cleared;
     int ret;
 
-    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
-    /* The caller must cluster-align start; round end down except at EOF */
+    /* Caller must pass aligned values, except at image end */
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-    if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
-        end_offset = start_of_cluster(s, end_offset);
-    }
+    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
 
-    nb_clusters = size_to_clusters(s, end_offset - offset);
+    nb_clusters = size_to_clusters(s, bytes);
 
     s->cache_discards = true;
 
     /* Each L2 table is handled by its own loop iteration */
     while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
-        if (ret < 0) {
+        cleared = discard_single_l2(bs, offset, nb_clusters, type,
+                                    full_discard);
+        if (cleared < 0) {
+            ret = cleared;
             goto fail;
         }
 
-        nb_clusters -= ret;
-        offset += (ret * s->cluster_size);
+        nb_clusters -= cleared;
+        offset += (cleared * s->cluster_size);
     }
 
     ret = 0;
@@ -1561,6 +1602,7 @@
     int l2_index;
     int ret;
     int i;
+    bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);
 
     ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
     if (ret < 0) {
@@ -1573,12 +1615,22 @@
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t old_offset;
+        QCow2ClusterType cluster_type;
 
         old_offset = be64_to_cpu(l2_table[l2_index + i]);
 
-        /* Update L2 entries */
+        /*
+         * Minimize L2 changes if the cluster already reads back as
+         * zeroes with correct allocation.
+         */
+        cluster_type = qcow2_get_cluster_type(old_offset);
+        if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
+            (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
+            continue;
+        }
+
         qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
+        if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
             qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
         } else {
@@ -1591,31 +1643,39 @@
     return nb_clusters;
 }
 
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
-                        int flags)
+int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
+    uint64_t end_offset = offset + bytes;
     uint64_t nb_clusters;
+    int64_t cleared;
     int ret;
 
+    /* Caller must pass aligned values, except at image end */
+    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+    assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+           end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
+
     /* The zero flag is only supported by version 3 and newer */
     if (s->qcow_version < 3) {
         return -ENOTSUP;
     }
 
     /* Each L2 table is handled by its own loop iteration */
-    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+    nb_clusters = size_to_clusters(s, bytes);
 
     s->cache_discards = true;
 
     while (nb_clusters > 0) {
-        ret = zero_single_l2(bs, offset, nb_clusters, flags);
-        if (ret < 0) {
+        cleared = zero_single_l2(bs, offset, nb_clusters, flags);
+        if (cleared < 0) {
+            ret = cleared;
             goto fail;
         }
 
-        nb_clusters -= ret;
-        offset += (ret * s->cluster_size);
+        nb_clusters -= cleared;
+        offset += (cleared * s->cluster_size);
     }
 
     ret = 0;
@@ -1699,14 +1759,14 @@
         for (j = 0; j < s->l2_size; j++) {
             uint64_t l2_entry = be64_to_cpu(l2_table[j]);
             int64_t offset = l2_entry & L2E_OFFSET_MASK;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
-            bool preallocated = offset != 0;
+            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
 
-            if (cluster_type != QCOW2_CLUSTER_ZERO) {
+            if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
+                cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
                 continue;
             }
 
-            if (!preallocated) {
+            if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                 if (!bs->backing) {
                     /* not backed; therefore we can simply deallocate the
                      * cluster */
@@ -1741,7 +1801,7 @@
                                         "%#" PRIx64 " unaligned (L2 offset: %#"
                                         PRIx64 ", L2 index: %#x)", offset,
                                         l2_offset, j);
-                if (!preallocated) {
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                     qcow2_free_clusters(bs, offset, s->cluster_size,
                                         QCOW2_DISCARD_ALWAYS);
                 }
@@ -1751,7 +1811,7 @@
 
             ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
             if (ret < 0) {
-                if (!preallocated) {
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                     qcow2_free_clusters(bs, offset, s->cluster_size,
                                         QCOW2_DISCARD_ALWAYS);
                 }
@@ -1760,7 +1820,7 @@
 
             ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
             if (ret < 0) {
-                if (!preallocated) {
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                     qcow2_free_clusters(bs, offset, s->cluster_size,
                                         QCOW2_DISCARD_ALWAYS);
                 }

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4efca7e..7c06061 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c

@@ -1028,18 +1028,17 @@
         }
         break;
     case QCOW2_CLUSTER_NORMAL:
-    case QCOW2_CLUSTER_ZERO:
-        if (l2_entry & L2E_OFFSET_MASK) {
-            if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
-                qcow2_signal_corruption(bs, false, -1, -1,
-                                        "Cannot free unaligned cluster %#llx",
-                                        l2_entry & L2E_OFFSET_MASK);
-            } else {
-                qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                                    nb_clusters << s->cluster_bits, type);
-            }
+    case QCOW2_CLUSTER_ZERO_ALLOC:
+        if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
+            qcow2_signal_corruption(bs, false, -1, -1,
+                                    "Cannot free unaligned cluster %#llx",
+                                    l2_entry & L2E_OFFSET_MASK);
+        } else {
+            qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+                                nb_clusters << s->cluster_bits, type);
         }
         break;
+    case QCOW2_CLUSTER_ZERO_PLAIN:
     case QCOW2_CLUSTER_UNALLOCATED:
         break;
     default:
@@ -1059,9 +1058,9 @@
     int64_t l1_table_offset, int l1_size, int addend)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
+    uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
     bool l1_allocated = false;
-    int64_t old_offset, old_l2_offset;
+    int64_t old_entry, old_l2_offset;
     int i, j, l1_modified = 0, nb_csectors;
     int ret;
 
@@ -1089,15 +1088,16 @@
             goto fail;
         }
 
-        for(i = 0;i < l1_size; i++)
+        for (i = 0; i < l1_size; i++) {
             be64_to_cpus(&l1_table[i]);
+        }
     } else {
         assert(l1_size == s->l1_size);
         l1_table = s->l1_table;
         l1_allocated = false;
     }
 
-    for(i = 0; i < l1_size; i++) {
+    for (i = 0; i < l1_size; i++) {
         l2_offset = l1_table[i];
         if (l2_offset) {
             old_l2_offset = l2_offset;
@@ -1117,81 +1117,79 @@
                 goto fail;
             }
 
-            for(j = 0; j < s->l2_size; j++) {
+            for (j = 0; j < s->l2_size; j++) {
                 uint64_t cluster_index;
+                uint64_t offset;
 
-                offset = be64_to_cpu(l2_table[j]);
-                old_offset = offset;
-                offset &= ~QCOW_OFLAG_COPIED;
+                entry = be64_to_cpu(l2_table[j]);
+                old_entry = entry;
+                entry &= ~QCOW_OFLAG_COPIED;
+                offset = entry & L2E_OFFSET_MASK;
 
-                switch (qcow2_get_cluster_type(offset)) {
-                    case QCOW2_CLUSTER_COMPRESSED:
-                        nb_csectors = ((offset >> s->csize_shift) &
-                                       s->csize_mask) + 1;
-                        if (addend != 0) {
-                            ret = update_refcount(bs,
-                                (offset & s->cluster_offset_mask) & ~511,
+                switch (qcow2_get_cluster_type(entry)) {
+                case QCOW2_CLUSTER_COMPRESSED:
+                    nb_csectors = ((entry >> s->csize_shift) &
+                                   s->csize_mask) + 1;
+                    if (addend != 0) {
+                        ret = update_refcount(bs,
+                                (entry & s->cluster_offset_mask) & ~511,
                                 nb_csectors * 512, abs(addend), addend < 0,
                                 QCOW2_DISCARD_SNAPSHOT);
-                            if (ret < 0) {
-                                goto fail;
-                            }
-                        }
-                        /* compressed clusters are never modified */
-                        refcount = 2;
-                        break;
-
-                    case QCOW2_CLUSTER_NORMAL:
-                    case QCOW2_CLUSTER_ZERO:
-                        if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
-                            qcow2_signal_corruption(bs, true, -1, -1, "Data "
-                                                    "cluster offset %#llx "
-                                                    "unaligned (L2 offset: %#"
-                                                    PRIx64 ", L2 index: %#x)",
-                                                    offset & L2E_OFFSET_MASK,
-                                                    l2_offset, j);
-                            ret = -EIO;
-                            goto fail;
-                        }
-
-                        cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
-                        if (!cluster_index) {
-                            /* unallocated */
-                            refcount = 0;
-                            break;
-                        }
-                        if (addend != 0) {
-                            ret = qcow2_update_cluster_refcount(bs,
-                                    cluster_index, abs(addend), addend < 0,
-                                    QCOW2_DISCARD_SNAPSHOT);
-                            if (ret < 0) {
-                                goto fail;
-                            }
-                        }
-
-                        ret = qcow2_get_refcount(bs, cluster_index, &refcount);
                         if (ret < 0) {
                             goto fail;
                         }
-                        break;
+                    }
+                    /* compressed clusters are never modified */
+                    refcount = 2;
+                    break;
 
-                    case QCOW2_CLUSTER_UNALLOCATED:
-                        refcount = 0;
-                        break;
+                case QCOW2_CLUSTER_NORMAL:
+                case QCOW2_CLUSTER_ZERO_ALLOC:
+                    if (offset_into_cluster(s, offset)) {
+                        qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
+                                                "allocation offset %#" PRIx64
+                                                " unaligned (L2 offset: %#"
+                                                PRIx64 ", L2 index: %#x)",
+                                                offset, l2_offset, j);
+                        ret = -EIO;
+                        goto fail;
+                    }
 
-                    default:
-                        abort();
+                    cluster_index = offset >> s->cluster_bits;
+                    assert(cluster_index);
+                    if (addend != 0) {
+                        ret = qcow2_update_cluster_refcount(bs,
+                                    cluster_index, abs(addend), addend < 0,
+                                    QCOW2_DISCARD_SNAPSHOT);
+                        if (ret < 0) {
+                            goto fail;
+                        }
+                    }
+
+                    ret = qcow2_get_refcount(bs, cluster_index, &refcount);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+                    break;
+
+                case QCOW2_CLUSTER_ZERO_PLAIN:
+                case QCOW2_CLUSTER_UNALLOCATED:
+                    refcount = 0;
+                    break;
+
+                default:
+                    abort();
                 }
 
                 if (refcount == 1) {
-                    offset |= QCOW_OFLAG_COPIED;
+                    entry |= QCOW_OFLAG_COPIED;
                 }
-                if (offset != old_offset) {
+                if (entry != old_entry) {
                     if (addend > 0) {
                         qcow2_cache_set_dependency(bs, s->l2_table_cache,
                             s->refcount_block_cache);
                     }
-                    l2_table[j] = cpu_to_be64(offset);
+                    l2_table[j] = cpu_to_be64(entry);
                     qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
                                                  l2_table);
                 }
@@ -1441,12 +1439,7 @@
             }
             break;
 
-        case QCOW2_CLUSTER_ZERO:
-            if ((l2_entry & L2E_OFFSET_MASK) == 0) {
-                break;
-            }
-            /* fall through */
-
+        case QCOW2_CLUSTER_ZERO_ALLOC:
         case QCOW2_CLUSTER_NORMAL:
         {
             uint64_t offset = l2_entry & L2E_OFFSET_MASK;
@@ -1476,6 +1469,7 @@
             break;
         }
 
+        case QCOW2_CLUSTER_ZERO_PLAIN:
         case QCOW2_CLUSTER_UNALLOCATED:
             break;
 
@@ -1638,10 +1632,10 @@
         for (j = 0; j < s->l2_size; j++) {
             uint64_t l2_entry = be64_to_cpu(l2_table[j]);
             uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
+            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
 
-            if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
-                ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
+            if (cluster_type == QCOW2_CLUSTER_NORMAL ||
+                cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
                 ret = qcow2_get_refcount(bs,
                                          data_offset >> s->cluster_bits,
                                          &refcount);

diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0324243..44243e0 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c

@@ -440,10 +440,9 @@
 
     /* The VM state isn't needed any more in the active L1 table; in fact, it
      * hurts by causing expensive COW for the next snapshot. */
-    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
-                           align_offset(sn->vm_state_size, s->cluster_size)
-                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER, false);
+    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
+                          align_offset(sn->vm_state_size, s->cluster_size),
+                          QCOW2_DISCARD_NEVER, false);
 
 #ifdef DEBUG_ALLOC
     {

diff --git a/block/qcow2.c b/block/qcow2.c
index 1c26977..a8d61f0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c

@@ -1385,7 +1385,7 @@
         *file = bs->file->bs;
         status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
     }
-    if (ret == QCOW2_CLUSTER_ZERO) {
+    if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
         status |= BDRV_BLOCK_ZERO;
     } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
         status |= BDRV_BLOCK_DATA;
@@ -1482,7 +1482,8 @@
             }
             break;
 
-        case QCOW2_CLUSTER_ZERO:
+        case QCOW2_CLUSTER_ZERO_PLAIN:
+        case QCOW2_CLUSTER_ZERO_ALLOC:
             qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             break;
 
@@ -2139,7 +2140,7 @@
          * too, as long as the bulk is allocated here). Therefore, using
          * floating point arithmetic is fine. */
         int64_t meta_size = 0;
-        uint64_t nreftablee, nrefblocke, nl1e, nl2e;
+        uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count;
         int64_t aligned_total_size = align_offset(total_size, cluster_size);
         int refblock_bits, refblock_size;
         /* refcount entry size in bytes */
@@ -2182,11 +2183,12 @@
         nrefblocke = (aligned_total_size + meta_size + cluster_size)
                    / (cluster_size - rces - rces * sizeof(uint64_t)
                                                  / cluster_size);
-        meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size;
+        refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size);
+        meta_size += refblock_count * cluster_size;
 
         /* total size of refcount tables */
-        nreftablee = nrefblocke / refblock_size;
-        nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
+        nreftablee = align_offset(refblock_count,
+                                  cluster_size / sizeof(uint64_t));
         meta_size += nreftablee * sizeof(uint64_t);
 
         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
@@ -2449,6 +2451,10 @@
     BlockDriverState *file;
     int64_t res;
 
+    if (start + count > bs->total_sectors) {
+        count = bs->total_sectors - start;
+    }
+
     if (!count) {
         return true;
     }
@@ -2467,6 +2473,9 @@
     uint32_t tail = (offset + count) % s->cluster_size;
 
     trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
+    if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) {
+        tail = 0;
+    }
 
     if (head || tail) {
         int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
@@ -2490,7 +2499,9 @@
         count = s->cluster_size;
         nr = s->cluster_size;
         ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
-        if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
+        if (ret != QCOW2_CLUSTER_UNALLOCATED &&
+            ret != QCOW2_CLUSTER_ZERO_PLAIN &&
+            ret != QCOW2_CLUSTER_ZERO_ALLOC) {
             qemu_co_mutex_unlock(&s->lock);
             return -ENOTSUP;
         }
@@ -2501,7 +2512,7 @@
     trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
 
     /* Whatever is left can use real zero clusters */
-    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
+    ret = qcow2_cluster_zeroize(bs, offset, count, flags);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
@@ -2524,8 +2535,8 @@
     }
 
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
-                                 QCOW2_DISCARD_REQUEST, false);
+    ret = qcow2_cluster_discard(bs, offset, count, QCOW2_DISCARD_REQUEST,
+                                false);
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
@@ -2832,9 +2843,8 @@
 static int qcow2_make_empty(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t start_sector;
-    int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) /
-                       BDRV_SECTOR_SIZE);
+    uint64_t offset, end_offset;
+    int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
     int l1_clusters, ret = 0;
 
     l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
@@ -2851,18 +2861,15 @@
 
     /* This fallback code simply discards every active cluster; this is slow,
      * but works in all cases */
-    for (start_sector = 0; start_sector < bs->total_sectors;
-         start_sector += sector_step)
-    {
+    end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
+    for (offset = 0; offset < end_offset; offset += step) {
         /* As this function is generally used after committing an external
          * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
          * default action for this kind of discard is to pass the discard,
          * which will ideally result in an actually smaller image file, as
          * is probably desired. */
-        ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
-                                     MIN(sector_step,
-                                         bs->total_sectors - start_sector),
-                                     QCOW2_DISCARD_SNAPSHOT, true);
+        ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
+                                    QCOW2_DISCARD_SNAPSHOT, true);
         if (ret < 0) {
             break;
         }

diff --git a/block/qcow2.h b/block/qcow2.h
index f8aeb08..1801dc3 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h

@@ -322,6 +322,9 @@
     /** Number of newly allocated clusters */
     int nb_clusters;
 
+    /** Do not free the old clusters */
+    bool keep_old_clusters;
+
     /**
      * Requests that overlap with this allocation and wait to be restarted
      * when the allocating request has completed.
@@ -346,12 +349,13 @@
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
 
-enum {
+typedef enum QCow2ClusterType {
     QCOW2_CLUSTER_UNALLOCATED,
+    QCOW2_CLUSTER_ZERO_PLAIN,
+    QCOW2_CLUSTER_ZERO_ALLOC,
     QCOW2_CLUSTER_NORMAL,
     QCOW2_CLUSTER_COMPRESSED,
-    QCOW2_CLUSTER_ZERO
-};
+} QCow2ClusterType;
 
 typedef enum QCow2MetadataOverlap {
     QCOW2_OL_MAIN_HEADER_BITNR    = 0,
@@ -440,12 +444,15 @@
     return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
 }
 
-static inline int qcow2_get_cluster_type(uint64_t l2_entry)
+static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry)
 {
     if (l2_entry & QCOW_OFLAG_COMPRESSED) {
         return QCOW2_CLUSTER_COMPRESSED;
     } else if (l2_entry & QCOW_OFLAG_ZERO) {
-        return QCOW2_CLUSTER_ZERO;
+        if (l2_entry & L2E_OFFSET_MASK) {
+            return QCOW2_CLUSTER_ZERO_ALLOC;
+        }
+        return QCOW2_CLUSTER_ZERO_PLAIN;
     } else if (!(l2_entry & L2E_OFFSET_MASK)) {
         return QCOW2_CLUSTER_UNALLOCATED;
     } else {
@@ -544,10 +551,11 @@
                                          int compressed_size);
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
-                        int flags);
+int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, enum qcow2_discard_type type,
+                          bool full_discard);
+int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
+                          uint64_t bytes, int flags);
 
 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                                BlockDriverAmendStatusCB *status_cb,

diff --git a/block/replication.c b/block/replication.c
index d300c15..3885f04 100644
--- a/block/replication.c
+++ b/block/replication.c

@@ -22,9 +22,17 @@
 #include "qapi/error.h"
 #include "replication.h"
 
+typedef enum {
+    BLOCK_REPLICATION_NONE,             /* block replication is not started */
+    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
+    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
+    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+    BLOCK_REPLICATION_DONE,             /* block replication is done */
+} ReplicationStage;
+
 typedef struct BDRVReplicationState {
     ReplicationMode mode;
-    int replication_state;
+    ReplicationStage stage;
     BdrvChild *active_disk;
     BdrvChild *hidden_disk;
     BdrvChild *secondary_disk;
@@ -36,14 +44,6 @@
     int error;
 } BDRVReplicationState;
 
-enum {
-    BLOCK_REPLICATION_NONE,             /* block replication is not started */
-    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
-    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
-    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
-    BLOCK_REPLICATION_DONE,             /* block replication is done */
-};
-
 static void replication_start(ReplicationState *rs, ReplicationMode mode,
                               Error **errp);
 static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
@@ -141,10 +141,10 @@
 {
     BDRVReplicationState *s = bs->opaque;
 
-    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
+    if (s->stage == BLOCK_REPLICATION_RUNNING) {
         replication_stop(s->rs, false, NULL);
     }
-    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
+    if (s->stage == BLOCK_REPLICATION_FAILOVER) {
         block_job_cancel_sync(s->active_disk->bs->job);
     }
 
@@ -174,7 +174,7 @@
 
 static int replication_get_io_status(BDRVReplicationState *s)
 {
-    switch (s->replication_state) {
+    switch (s->stage) {
     case BLOCK_REPLICATION_NONE:
         return -EIO;
     case BLOCK_REPLICATION_RUNNING:
@@ -403,7 +403,7 @@
     BlockDriverState *bs = opaque;
     BDRVReplicationState *s = bs->opaque;
 
-    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
+    if (s->stage != BLOCK_REPLICATION_FAILOVER) {
         /* The backup job is cancelled unexpectedly */
         s->error = -EIO;
     }
@@ -445,7 +445,7 @@
     aio_context_acquire(aio_context);
     s = bs->opaque;
 
-    if (s->replication_state != BLOCK_REPLICATION_NONE) {
+    if (s->stage != BLOCK_REPLICATION_NONE) {
         error_setg(errp, "Block replication is running or done");
         aio_context_release(aio_context);
         return;
@@ -545,7 +545,7 @@
         abort();
     }
 
-    s->replication_state = BLOCK_REPLICATION_RUNNING;
+    s->stage = BLOCK_REPLICATION_RUNNING;
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
         secondary_do_checkpoint(s, errp);
@@ -581,7 +581,7 @@
     aio_context_acquire(aio_context);
     s = bs->opaque;
 
-    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
+    if (s->stage != BLOCK_REPLICATION_RUNNING) {
         error_setg(errp, "Block replication is not running");
         aio_context_release(aio_context);
         return;
@@ -601,7 +601,7 @@
     BDRVReplicationState *s = bs->opaque;
 
     if (ret == 0) {
-        s->replication_state = BLOCK_REPLICATION_DONE;
+        s->stage = BLOCK_REPLICATION_DONE;
 
         /* refresh top bs's filename */
         bdrv_refresh_filename(bs);
@@ -610,7 +610,7 @@
         s->hidden_disk = NULL;
         s->error = 0;
     } else {
-        s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED;
+        s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
         s->error = -EIO;
     }
 }
@@ -625,7 +625,7 @@
     aio_context_acquire(aio_context);
     s = bs->opaque;
 
-    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
+    if (s->stage != BLOCK_REPLICATION_RUNNING) {
         error_setg(errp, "Block replication is not running");
         aio_context_release(aio_context);
         return;
@@ -633,7 +633,7 @@
 
     switch (s->mode) {
     case REPLICATION_MODE_PRIMARY:
-        s->replication_state = BLOCK_REPLICATION_DONE;
+        s->stage = BLOCK_REPLICATION_DONE;
         s->error = 0;
         break;
     case REPLICATION_MODE_SECONDARY:
@@ -648,12 +648,12 @@
 
         if (!failover) {
             secondary_do_checkpoint(s, errp);
-            s->replication_state = BLOCK_REPLICATION_DONE;
+            s->stage = BLOCK_REPLICATION_DONE;
             aio_context_release(aio_context);
             return;
         }
 
-        s->replication_state = BLOCK_REPLICATION_FAILOVER;
+        s->stage = BLOCK_REPLICATION_FAILOVER;
         commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                             BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
                             NULL, replication_done, bs, true, errp);

diff --git a/blockdev.c b/blockdev.c
index 0b38c3d..c63f4e8 100644
--- a/blockdev.c
+++ b/blockdev.c

@@ -2923,10 +2923,9 @@
         goto out;
     }
 
-    /* complete all in-flight operations before resizing the device */
-    bdrv_drain_all();
-
+    bdrv_drained_begin(bs);
     ret = blk_truncate(blk, size, errp);
+    bdrv_drained_end(bs);
 
 out:
     blk_unref(blk);
@@ -3151,6 +3150,7 @@
     Error *local_err = NULL;
     int flags;
     int64_t size;
+    bool set_backing_hd = false;
 
     if (!backup->has_speed) {
         backup->speed = 0;
@@ -3201,6 +3201,8 @@
     }
     if (backup->sync == MIRROR_SYNC_MODE_NONE) {
         source = bs;
+        flags |= BDRV_O_NO_BACKING;
+        set_backing_hd = true;
     }
 
     size = bdrv_getlength(bs);
@@ -3227,7 +3229,9 @@
     }
 
     if (backup->format) {
-        options = qdict_new();
+        if (!options) {
+            options = qdict_new();
+        }
         qdict_put_str(options, "driver", backup->format);
     }
 
@@ -3238,6 +3242,14 @@
 
     bdrv_set_aio_context(target_bs, aio_context);
 
+    if (set_backing_hd) {
+        bdrv_set_backing_hd(target_bs, source, &local_err);
+        if (local_err) {
+            bdrv_unref(target_bs);
+            goto out;
+        }
+    }
+
     if (backup->has_bitmap) {
         bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
         if (!bmap) {

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 714a692..04f95dd 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c

@@ -744,10 +744,7 @@
     qemu_init_cpu_list();
     module_call_init(MODULE_INIT_QOM);
 
-    if ((envlist = envlist_create()) == NULL) {
-        (void) fprintf(stderr, "Unable to allocate envlist\n");
-        exit(1);
-    }
+    envlist = envlist_create();
 
     /* add current environment into the list */
     for (wrk = environ; *wrk != NULL; wrk++) {
@@ -785,10 +782,7 @@
                 usage();
         } else if (!strcmp(r, "ignore-environment")) {
             envlist_free(envlist);
-            if ((envlist = envlist_create()) == NULL) {
-                (void) fprintf(stderr, "Unable to allocate envlist\n");
-                exit(1);
-            }
+            envlist = envlist_create();
         } else if (!strcmp(r, "U")) {
             r = argv[optind++];
             if (envlist_unsetenv(envlist, r) != 0)
@@ -956,10 +950,10 @@
     }
 
     for (wrk = target_environ; *wrk; wrk++) {
-        free(*wrk);
+        g_free(*wrk);
     }
 
-    free(target_environ);
+    g_free(target_environ);
 
     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
         qemu_log("guest_base  0x%lx\n", guest_base);

diff --git a/configure b/configure
index 7c020c0..139638e 100755
--- a/configure
+++ b/configure

@@ -611,6 +611,7 @@
   audio_possible_drivers="oss sdl"
   oss_lib="-lossaudio"
   HOST_VARIANT_DIR="netbsd"
+  supported_os="yes"
 ;;
 OpenBSD)
   bsd="yes"
@@ -1334,7 +1335,7 @@
   --oss-lib                path to OSS library
   --cpu=CPU                Build for host CPU [$cpu]
   --with-coroutine=BACKEND coroutine backend. Supported options:
-                           gthread, ucontext, sigaltstack, windows
+                           ucontext, sigaltstack, windows
   --enable-gcov            enable test coverage analysis with gcov
   --gcov=GCOV              use specified gcov [$gcov_tool]
   --disable-blobs          disable installing provided firmware blobs
@@ -2014,7 +2015,7 @@
   else
 
     xen_libs="-lxenstore -lxenctrl -lxenguest"
-    xen_stable_libs="-lxencall -lxenforeignmemory -lxengnttab -lxenevtchn"
+    xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
 
     # First we test whether Xen headers and libraries are available.
     # If no, we are done and there is no Xen support.
@@ -4418,10 +4419,8 @@
 # check and set a backend for coroutine
 
 # We prefer ucontext, but it's not always possible. The fallback
-# is sigcontext. gthread is not selectable except explicitly, because
-# it is not functional enough to run QEMU proper. (It is occasionally
-# useful for debugging purposes.)  On Windows the only valid backend
-# is the Windows-specific one.
+# is sigcontext. On Windows the only valid backend is the Windows
+# specific one.
 
 ucontext_works=no
 if test "$darwin" != "yes"; then
@@ -4460,7 +4459,7 @@
       feature_not_found "ucontext"
     fi
     ;;
-  gthread|sigaltstack)
+  sigaltstack)
     if test "$mingw32" = "yes"; then
       error_exit "only the 'windows' coroutine backend is valid for Windows"
     fi
@@ -4472,14 +4471,7 @@
 fi
 
 if test "$coroutine_pool" = ""; then
-  if test "$coroutine" = "gthread"; then
-    coroutine_pool=no
-  else
-    coroutine_pool=yes
-  fi
-fi
-if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
-  error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
+  coroutine_pool=yes
 fi
 
 if test "$debug_stack_usage" = "yes"; then
@@ -6110,12 +6102,14 @@
   ppc64)
     TARGET_BASE_ARCH=ppc
     TARGET_ABI_DIR=ppc
+    mttcg=yes
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
   ;;
   ppc64le)
     TARGET_ARCH=ppc64
     TARGET_BASE_ARCH=ppc
     TARGET_ABI_DIR=ppc
+    mttcg=yes
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
   ;;
   ppc64abi32)

diff --git a/cpus.c b/cpus.c
index 740b8dc..516e5cb 100644
--- a/cpus.c
+++ b/cpus.c

@@ -50,6 +50,7 @@
 #include "qapi-event.h"
 #include "hw/nmi.h"
 #include "sysemu/replay.h"
+#include "hw/boards.h"
 
 #ifdef CONFIG_LINUX
 
@@ -1483,6 +1484,12 @@
                 /* Ignore everything else? */
                 break;
             }
+        } else if (cpu->unplug) {
+            qemu_tcg_destroy_vcpu(cpu);
+            cpu->created = false;
+            qemu_cond_signal(&qemu_cpu_cond);
+            qemu_mutex_unlock_iothread();
+            return NULL;
         }
 
         atomic_mb_set(&cpu->exit_request, 0);
@@ -1859,6 +1866,8 @@
 
 CpuInfoList *qmp_query_cpus(Error **errp)
 {
+    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
     CpuInfoList *head = NULL, *cur_item = NULL;
     CPUState *cpu;
 
@@ -1909,6 +1918,13 @@
 #else
         info->value->arch = CPU_INFO_ARCH_OTHER;
 #endif
+        info->value->has_props = !!mc->cpu_index_to_instance_props;
+        if (info->value->has_props) {
+            CpuInstanceProperties *props;
+            props = g_malloc0(sizeof(*props));
+            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
+            info->value->props = props;
+        }
 
         /* XXX: waiting for the qapi to support GSList */
         if (!cur_item) {

diff --git a/cputlb.c b/cputlb.c
index f5d056c..743776a 100644
--- a/cputlb.c
+++ b/cputlb.c

@@ -930,7 +930,13 @@
         tlb_addr = tlbe->addr_write;
     }
 
-    /* Notice an IO access, or a notdirty page.  */
+    /* Check notdirty */
+    if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
+        tlb_set_dirty(ENV_GET_CPU(env), addr);
+        tlb_addr = tlb_addr & ~TLB_NOTDIRTY;
+    }
+
+    /* Notice an IO access  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
         /* There's really nothing that can be done to
            support this apart from stop-the-world.  */

diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index d5a31bb..2b97d89 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c

@@ -473,9 +473,9 @@
      * then encrypted.
      */
     rv = readfunc(block,
-                  opaque,
                   slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                   splitkey, splitkeylen,
+                  opaque,
                   errp);
     if (rv < 0) {
         goto cleanup;
@@ -676,9 +676,10 @@
 
     /* Read the entire LUKS header, minus the key material from
      * the underlying device */
-    rv = readfunc(block, opaque, 0,
+    rv = readfunc(block, 0,
                   (uint8_t *)&luks->header,
                   sizeof(luks->header),
+                  opaque,
                   errp);
     if (rv < 0) {
         ret = rv;
@@ -1245,7 +1246,7 @@
         QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
 
     /* Reserve header space to match payload offset */
-    initfunc(block, opaque, block->payload_offset, &local_err);
+    initfunc(block, block->payload_offset, opaque, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
@@ -1267,9 +1268,10 @@
 
 
     /* Write out the partition header and key slot headers */
-    writefunc(block, opaque, 0,
+    writefunc(block, 0,
               (const uint8_t *)&luks->header,
               sizeof(luks->header),
+              opaque,
               &local_err);
 
     /* Delay checking local_err until we've byte-swapped */
@@ -1295,10 +1297,11 @@
 
     /* Write out the master key material, starting at the
      * sector immediately following the partition header. */
-    if (writefunc(block, opaque,
+    if (writefunc(block,
                   luks->header.key_slots[0].key_offset *
                   QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                   splitkey, splitkeylen,
+                  opaque,
                   errp) != splitkeylen) {
         goto error;
     }

diff --git a/crypto/init.c b/crypto/init.c
index f65207e..f131c42 100644
--- a/crypto/init.c
+++ b/crypto/init.c

@@ -32,6 +32,8 @@
 #include <gcrypt.h>
 #endif
 
+#include "crypto/random.h"
+
 /* #define DEBUG_GNUTLS */
 
 /*
@@ -146,5 +148,9 @@
     gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
 #endif
 
+    if (qcrypto_random_init(errp) < 0) {
+        return -1;
+    }
+
     return 0;
 }

diff --git a/crypto/random-gcrypt.c b/crypto/random-gcrypt.c
index 0de9a09..9f1c9ee 100644
--- a/crypto/random-gcrypt.c
+++ b/crypto/random-gcrypt.c

@@ -31,3 +31,5 @@
     gcry_randomize(buf, buflen, GCRY_STRONG_RANDOM);
     return 0;
 }
+
+int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; }

diff --git a/crypto/random-gnutls.c b/crypto/random-gnutls.c
index 04b45a8..5350003 100644
--- a/crypto/random-gnutls.c
+++ b/crypto/random-gnutls.c

@@ -41,3 +41,6 @@
 
     return 0;
 }
+
+
+int qcrypto_random_init(Error **errp G_GNUC_UNUSED) { return 0; }

diff --git a/crypto/random-platform.c b/crypto/random-platform.c
index 82b755a..92eed0e 100644
--- a/crypto/random-platform.c
+++ b/crypto/random-platform.c

@@ -22,14 +22,16 @@
 
 #include "crypto/random.h"
 
-int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,
-                         size_t buflen G_GNUC_UNUSED,
-                         Error **errp)
-{
-    int fd;
-    int ret = -1;
-    int got;
+#ifdef _WIN32
+#include <wincrypt.h>
+static HCRYPTPROV hCryptProv;
+#else
+static int fd; /* a file handle to either /dev/urandom or /dev/random */
+#endif
 
+int qcrypto_random_init(Error **errp)
+{
+#ifndef _WIN32
     /* TBD perhaps also add support for BSD getentropy / Linux
      * getrandom syscalls directly */
     fd = open("/dev/urandom", O_RDONLY);
@@ -41,6 +43,25 @@
         error_setg(errp, "No /dev/urandom or /dev/random found");
         return -1;
     }
+#else
+    if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL,
+                             CRYPT_SILENT | CRYPT_VERIFYCONTEXT)) {
+        error_setg_win32(errp, GetLastError(),
+                         "Unable to create cryptographic provider");
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int qcrypto_random_bytes(uint8_t *buf G_GNUC_UNUSED,
+                         size_t buflen G_GNUC_UNUSED,
+                         Error **errp)
+{
+#ifndef _WIN32
+    int ret = -1;
+    int got;
 
     while (buflen > 0) {
         got = read(fd, buf, buflen);
@@ -59,6 +80,14 @@
 
     ret = 0;
  cleanup:
-    close(fd);
     return ret;
+#else
+    if (!CryptGenRandom(hCryptProv, buflen, buf)) {
+        error_setg_win32(errp, GetLastError(),
+                         "Unable to read random bytes");
+        return -1;
+    }
+
+    return 0;
+#endif
 }

diff --git a/device_tree.c b/device_tree.c
index 6e06320..a24ddff 100644
--- a/device_tree.c
+++ b/device_tree.c

@@ -148,6 +148,7 @@
     d = opendir(dirname);
     if (!d) {
         error_setg(&error_fatal, "%s cannot open %s", __func__, dirname);
+        return;
     }
 
     while ((de = readdir(d)) != NULL) {

diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt
index b059405..4274fe9 100644
--- a/docs/qdev-device-use.txt
+++ b/docs/qdev-device-use.txt

@@ -182,15 +182,13 @@
 
   This lets you control I/O ports and IRQs.
 
-* -usbdevice serial:vendorid=VID,productid=PRID becomes
-  -device usb-serial,vendorid=VID,productid=PRID
+* -usbdevice serial::chardev becomes -device usb-serial,chardev=dev.
 
 * -usbdevice braille doesn't support LEGACY-CHARDEV syntax.  It always
   uses "braille".  With -device, this useful default is gone, so you
   have to use something like
 
-  -device usb-braille,chardev=braille,vendorid=VID,productid=PRID
-  -chardev braille,id=braille
+  -device usb-braille,chardev=braille -chardev braille,id=braille
 
 * -virtioconsole becomes
   -device virtio-serial-pci,class=C,vectors=V,ioeventfd=IOEVENTFD,max_ports=N

diff --git a/exec.c b/exec.c
index f942eb2..cdb01d3 100644
--- a/exec.c
+++ b/exec.c

@@ -2119,10 +2119,10 @@
          * In that case just map until the end of the page.
          */
         if (block->offset == 0) {
-            return xen_map_cache(addr, 0, 0);
+            return xen_map_cache(addr, 0, 0, false);
         }
 
-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, false);
     }
     return ramblock_ptr(block, addr);
 }
@@ -2152,10 +2152,10 @@
          * In that case just map the requested area.
          */
         if (block->offset == 0) {
-            return xen_map_cache(addr, *size, 1);
+            return xen_map_cache(addr, *size, 1, true);
         }
 
-        block->host = xen_map_cache(block->offset, block->max_length, 1);
+        block->host = xen_map_cache(block->offset, block->max_length, 1, true);
     }
 
     return ramblock_ptr(block, addr);

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index f3ebca4..a248656 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c

@@ -452,6 +452,11 @@
     return telldir(fs->dir.stream);
 }
 
+static bool local_is_mapped_file_metadata(FsContext *fs_ctx, const char *name)
+{
+    return !strcmp(name, VIRTFS_META_DIR);
+}
+
 static struct dirent *local_readdir(FsContext *ctx, V9fsFidOpenState *fs)
 {
     struct dirent *entry;
@@ -465,8 +470,8 @@
     if (ctx->export_flags & V9FS_SM_MAPPED) {
         entry->d_type = DT_UNKNOWN;
     } else if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        if (!strcmp(entry->d_name, VIRTFS_META_DIR)) {
-            /* skp the meta data directory */
+        if (local_is_mapped_file_metadata(ctx, entry->d_name)) {
+            /* skip the meta data directory */
             goto again;
         }
         entry->d_type = DT_UNKNOWN;
@@ -559,6 +564,12 @@
     int err = -1;
     int dirfd;
 
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
     if (dirfd == -1) {
         return -1;
@@ -605,6 +616,12 @@
     int err = -1;
     int dirfd;
 
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
     if (dirfd == -1) {
         return -1;
@@ -694,6 +711,12 @@
     int err = -1;
     int dirfd;
 
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     /*
      * Mark all the open to not follow symlinks
      */
@@ -752,6 +775,12 @@
     int err = -1;
     int dirfd;
 
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(fs_ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
     if (dirfd == -1) {
         return -1;
@@ -826,6 +855,12 @@
     int ret = -1;
     int odirfd, ndirfd;
 
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     odirfd = local_opendir_nofollow(ctx, odirpath);
     if (odirfd == -1) {
         goto out;
@@ -1096,6 +1131,12 @@
 static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path,
                               const char *name, V9fsPath *target)
 {
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     if (dir_path) {
         v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
     } else if (strcmp(name, "/")) {
@@ -1116,6 +1157,13 @@
     int ret;
     int odirfd, ndirfd;
 
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        (local_is_mapped_file_metadata(ctx, old_name) ||
+         local_is_mapped_file_metadata(ctx, new_name))) {
+        errno = EINVAL;
+        return -1;
+    }
+
     odirfd = local_opendir_nofollow(ctx, olddir->data);
     if (odirfd == -1) {
         return -1;
@@ -1206,6 +1254,12 @@
     int ret;
     int dirfd;
 
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE &&
+        local_is_mapped_file_metadata(ctx, name)) {
+        errno = EINVAL;
+        return -1;
+    }
+
     dirfd = local_opendir_nofollow(ctx, dir->data);
     if (dirfd == -1) {
         return -1;

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 9c7f41a..5df97c9 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c

@@ -332,12 +332,14 @@
         str = g_strdup_printf("ring-ref%u", i);
         if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
                                  &xen_9pdev->rings[i].ref) == -1) {
+            g_free(str);
             goto out;
         }
         g_free(str);
         str = g_strdup_printf("event-channel-%u", i);
         if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
                                  &xen_9pdev->rings[i].evtchn) == -1) {
+            g_free(str);
             goto out;
         }
         g_free(str);
@@ -378,7 +380,7 @@
         if (xen_9pdev->rings[i].evtchndev == NULL) {
             goto out;
         }
-        fcntl(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), F_SETFD, FD_CLOEXEC);
+        qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
         xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
                                             (xen_9pdev->rings[i].evtchndev,
                                              xendev->dom,

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 4ddfb68..36a6cc4 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c

@@ -24,6 +24,7 @@
 #include "hw/acpi/aml-build.h"
 #include "qemu/bswap.h"
 #include "qemu/bitops.h"
+#include "sysemu/numa.h"
 
 static GArray *build_alloc_array(void)
 {
@@ -1636,3 +1637,28 @@
     numamem->base_addr = cpu_to_le64(base);
     numamem->range_length = cpu_to_le64(len);
 }
+
+/*
+ * ACPI spec 5.2.17 System Locality Distance Information Table
+ * (Revision 2.0 or later)
+ */
+void build_slit(GArray *table_data, BIOSLinker *linker)
+{
+    int slit_start, i, j;
+    slit_start = table_data->len;
+
+    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+
+    build_append_int_noprefix(table_data, nb_numa_nodes, 8);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        for (j = 0; j < nb_numa_nodes; j++) {
+            assert(numa_info[i].distance[j]);
+            build_append_int_noprefix(table_data, numa_info[i].distance[j], 1);
+        }
+    }
+
+    build_header(linker, table_data,
+                 (void *)(table_data->data + slit_start),
+                 "SLIT",
+                 table_data->len - slit_start, 1, NULL, NULL);
+}

diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 8c719d3..a233fe1 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c

@@ -503,7 +503,6 @@
 
         /* build Processor object for each processor */
         for (i = 0; i < arch_ids->len; i++) {
-            int j;
             Aml *dev;
             Aml *uid = aml_int(i);
             GArray *madt_buf = g_array_new(0, 1, 1);
@@ -557,9 +556,9 @@
              * as a result _PXM is required for all CPUs which might
              * be hot-plugged. For simplicity, add it for all CPUs.
              */
-            j = numa_get_node_for_cpu(i);
-            if (j < nb_numa_nodes) {
-                aml_append(dev, aml_name_decl("_PXM", aml_int(j)));
+            if (arch_ids->cpus[i].props.has_node_id) {
+                aml_append(dev, aml_name_decl("_PXM",
+                           aml_int(arch_ids->cpus[i].props.node_id)));
             }
 
             aml_append(cpus_dev, dev);

diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index c409374..f276967 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c

@@ -705,7 +705,7 @@
      * Reason: part of PIIX4 southbridge, needs to be wired up,
      * e.g. by mips_malta_init()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->hotpluggable = false;
     hc->plug = piix4_device_plug_cb;
     hc->unplug_request = piix4_device_unplug_request_cb;

diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index fe2d5a7..3246268 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c

@@ -1076,7 +1076,7 @@
     dc->vmsd = &vmstate_sl_nand_info;
     dc->props = sl_nand_properties;
     /* Reason: init() method uses drive_get() */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo sl_nand_info = {

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6e5f339..e585206 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c

@@ -486,30 +486,25 @@
     AcpiSystemResourceAffinityTable *srat;
     AcpiSratProcessorGiccAffinity *core;
     AcpiSratMemoryAffinity *numamem;
-    int i, j, srat_start;
+    int i, srat_start;
     uint64_t mem_base;
-    uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t));
-
-    for (i = 0; i < vms->smp_cpus; i++) {
-        j = numa_get_node_for_cpu(i);
-        if (j < nb_numa_nodes) {
-                cpu_node[i] = j;
-        }
-    }
+    MachineClass *mc = MACHINE_GET_CLASS(vms);
+    const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms));
 
     srat_start = table_data->len;
     srat = acpi_data_push(table_data, sizeof(*srat));
     srat->reserved1 = cpu_to_le32(1);
 
-    for (i = 0; i < vms->smp_cpus; ++i) {
+    for (i = 0; i < cpu_list->len; ++i) {
+        int node_id = cpu_list->cpus[i].props.has_node_id ?
+            cpu_list->cpus[i].props.node_id : 0;
         core = acpi_data_push(table_data, sizeof(*core));
         core->type = ACPI_SRAT_PROCESSOR_GICC;
         core->length = sizeof(*core);
-        core->proximity = cpu_to_le32(cpu_node[i]);
+        core->proximity = cpu_to_le32(node_id);
         core->acpi_processor_uid = cpu_to_le32(i);
         core->flags = cpu_to_le32(1);
     }
-    g_free(cpu_node);
 
     mem_base = vms->memmap[VIRT_MEM].base;
     for (i = 0; i < nb_numa_nodes; ++i) {

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5f62a03..c7c8159 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c

@@ -338,7 +338,7 @@
 {
     int cpu;
     int addr_cells = 1;
-    unsigned int i;
+    const MachineState *ms = MACHINE(vms);
 
     /*
      * From Documentation/devicetree/bindings/arm/cpus.txt
@@ -369,6 +369,7 @@
     for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
         char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
+        CPUState *cs = CPU(armcpu);
 
         qemu_fdt_add_subnode(vms->fdt, nodename);
         qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
@@ -389,9 +390,9 @@
                                   armcpu->mp_affinity);
         }
 
-        i = numa_get_node_for_cpu(cpu);
-        if (i < nb_numa_nodes) {
-            qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i);
+        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
+            qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id",
+                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
         }
 
         g_free(nodename);
@@ -1194,10 +1195,35 @@
     virt_build_smbios(vms);
 }
 
+static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
+{
+    uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER;
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+
+    if (!vmc->disallow_affinity_adjustment) {
+        /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
+         * GIC's target-list limitations. 32-bit KVM hosts currently
+         * always create clusters of 4 CPUs, but that is expected to
+         * change when they gain support for gicv3. When KVM is enabled
+         * it will override the changes we make here, therefore our
+         * purposes are to make TCG consistent (with 64-bit KVM hosts)
+         * and to improve SGI efficiency.
+         */
+        if (vms->gic_version == 3) {
+            clustersz = GICV3_TARGETLIST_BITS;
+        } else {
+            clustersz = GIC_TARGETLIST_BITS;
+        }
+    }
+    return arm_cpu_mp_affinity(idx, clustersz);
+}
+
 static void machvirt_init(MachineState *machine)
 {
     VirtMachineState *vms = VIRT_MACHINE(machine);
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    const CPUArchIdList *possible_cpus;
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *secure_sysmem = NULL;
@@ -1210,7 +1236,6 @@
     CPUClass *cc;
     Error *err = NULL;
     bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
-    uint8_t clustersz;
 
     if (!cpu_model) {
         cpu_model = "cortex-a15";
@@ -1263,10 +1288,8 @@
      */
     if (vms->gic_version == 3) {
         virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
-        clustersz = GICV3_TARGETLIST_BITS;
     } else {
         virt_max_cpus = GIC_NCPU;
-        clustersz = GIC_TARGETLIST_BITS;
     }
 
     if (max_cpus > virt_max_cpus) {
@@ -1324,21 +1347,35 @@
         exit(1);
     }
 
-    for (n = 0; n < smp_cpus; n++) {
-        Object *cpuobj = object_new(typename);
-        if (!vmc->disallow_affinity_adjustment) {
-            /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the
-             * GIC's target-list limitations. 32-bit KVM hosts currently
-             * always create clusters of 4 CPUs, but that is expected to
-             * change when they gain support for gicv3. When KVM is enabled
-             * it will override the changes we make here, therefore our
-             * purposes are to make TCG consistent (with 64-bit KVM hosts)
-             * and to improve SGI efficiency.
-             */
-            uint8_t aff1 = n / clustersz;
-            uint8_t aff0 = n % clustersz;
-            object_property_set_int(cpuobj, (aff1 << ARM_AFF1_SHIFT) | aff0,
-                                    "mp-affinity", NULL);
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    for (n = 0; n < possible_cpus->len; n++) {
+        Object *cpuobj;
+        CPUState *cs;
+        int node_id;
+
+        if (n >= smp_cpus) {
+            break;
+        }
+
+        cpuobj = object_new(typename);
+        object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
+                                "mp-affinity", NULL);
+
+        cs = CPU(cpuobj);
+        cs->cpu_index = n;
+
+        node_id = possible_cpus->cpus[cs->cpu_index].props.node_id;
+        if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
+            /* by default CPUState::numa_node was 0 if it's not set via CLI
+             * keep it this way for now but in future we probably should
+             * refuse to start up with incomplete numa mapping */
+             node_id = 0;
+        }
+        if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
+            cs->numa_node = node_id;
+        } else {
+            /* CPU isn't device_add compatible yet, this shouldn't happen */
+            error_setg(&error_abort, "user set node-id not implemented");
         }
 
         if (!vms->secure) {
@@ -1518,6 +1555,46 @@
     }
 }
 
+static CpuInstanceProperties
+virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+    assert(cpu_index < possible_cpus->len);
+    return possible_cpus->cpus[cpu_index].props;
+}
+
+static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
+{
+    int n;
+    VirtMachineState *vms = VIRT_MACHINE(ms);
+
+    if (ms->possible_cpus) {
+        assert(ms->possible_cpus->len == max_cpus);
+        return ms->possible_cpus;
+    }
+
+    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                                  sizeof(CPUArchId) * max_cpus);
+    ms->possible_cpus->len = max_cpus;
+    for (n = 0; n < ms->possible_cpus->len; n++) {
+        ms->possible_cpus->cpus[n].arch_id =
+            virt_cpu_mp_affinity(vms, n);
+        ms->possible_cpus->cpus[n].props.has_thread_id = true;
+        ms->possible_cpus->cpus[n].props.thread_id = n;
+
+        /* default distribution of CPUs over NUMA nodes */
+        if (nb_numa_nodes) {
+            /* preset values but do not enable them i.e. 'has_node_id = false',
+             * numa init code will enable them later if manual mapping wasn't
+             * present on CLI */
+            ms->possible_cpus->cpus[n].props.node_id = n % nb_numa_nodes;
+        }
+    }
+    return ms->possible_cpus;
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -1534,6 +1611,8 @@
     mc->pci_allow_0_address = true;
     /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
     mc->minimum_page_bits = 12;
+    mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
+    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
 }
 
 static const TypeInfo virt_machine_info = {

diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c
index 511b004..4f65f8c 100644
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c

@@ -292,7 +292,7 @@
     dc->vmsd = &mv88w8618_audio_vmsd;
     dc->props = mv88w8618_audio_properties;
     /* Reason: pointer property "wm8750" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo mv88w8618_audio_info = {

diff --git a/hw/audio/pcspk.c b/hw/audio/pcspk.c
index 7980022..9b99358 100644
--- a/hw/audio/pcspk.c
+++ b/hw/audio/pcspk.c

@@ -223,7 +223,7 @@
     dc->vmsd = &vmstate_spk;
     dc->props = pcspk_properties;
     /* Reason: realize sets global pcspk_state */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pcspk_info = {

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ae303d4..7428db9 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c

@@ -227,6 +227,29 @@
     return NVME_NO_COMPLETE;
 }
 
+static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
+    NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+    const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+    const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
+    uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
+    uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
+
+    if (slba + nlb > ns->id_ns.nsze) {
+        return NVME_LBA_RANGE | NVME_DNR;
+    }
+
+    req->has_sg = false;
+    block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
+                     BLOCK_ACCT_WRITE);
+    req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, aio_slba, aio_nlb,
+                                        BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
+    return NVME_NO_COMPLETE;
+}
+
 static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     NvmeRequest *req)
 {
@@ -279,6 +302,8 @@
     switch (cmd->opcode) {
     case NVME_CMD_FLUSH:
         return nvme_flush(n, ns, cmd, req);
+    case NVME_CMD_WRITE_ZEROS:
+        return nvme_write_zeros(n, ns, cmd, req);
     case NVME_CMD_WRITE:
     case NVME_CMD_READ:
         return nvme_rw(n, ns, cmd, req);
@@ -895,6 +920,7 @@
     id->sqes = (0x6 << 4) | 0x6;
     id->cqes = (0x4 << 4) | 0x4;
     id->nn = cpu_to_le32(n->num_namespaces);
+    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
     id->psd[0].mp = cpu_to_le16(0x9c4);
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);

diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8fb0c10..a0d1564 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h

@@ -179,6 +179,7 @@
     NVME_CMD_READ               = 0x02,
     NVME_CMD_WRITE_UNCOR        = 0x04,
     NVME_CMD_COMPARE            = 0x05,
+    NVME_CMD_WRITE_ZEROS        = 0x08,
     NVME_CMD_DSM                = 0x09,
 };
 

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 98c16a7..604d37d 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c

@@ -42,9 +42,7 @@
 
 static void virtio_blk_free_request(VirtIOBlockReq *req)
 {
-    if (req) {
-        g_free(req);
-    }
+    g_free(req);
 }
 
 static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)

diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index 58f1f02..4601267 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c

@@ -137,20 +137,21 @@
 #endif
 
     if (s->file) {
+        AddressSpace *as = s->cpu ? s->cpu->as :  NULL;
+
         if (!s->force_raw) {
             size = load_elf_as(s->file, NULL, NULL, &entry, NULL, NULL,
-                               big_endian, 0, 0, 0, s->cpu->as);
+                               big_endian, 0, 0, 0, as);
 
             if (size < 0) {
                 size = load_uimage_as(s->file, &entry, NULL, NULL, NULL, NULL,
-                                      s->cpu->as);
+                                      as);
             }
         }
 
         if (size < 0 || s->force_raw) {
             /* Default to the maximum size being the machine's ram size */
-            size = load_image_targphys_as(s->file, s->addr, ram_size,
-                                          s->cpu->as);
+            size = load_image_targphys_as(s->file, s->addr, ram_size, as);
         } else {
             s->addr = entry;
         }

diff --git a/hw/core/machine.c b/hw/core/machine.c
index ada9eea..fd6a436 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c

@@ -17,8 +17,10 @@
 #include "qapi/visitor.h"
 #include "hw/sysbus.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
+#include "sysemu/numa.h"
 
 static char *machine_get_accel(Object *obj, Error **errp)
 {
@@ -388,6 +390,102 @@
     return head;
 }
 
+/**
+ * machine_set_cpu_numa_node:
+ * @machine: machine object to modify
+ * @props: specifies which cpu objects to assign to
+ *         numa node specified by @props.node_id
+ * @errp: if an error occurs, a pointer to an area to store the error
+ *
+ * Associate NUMA node specified by @props.node_id with cpu slots that
+ * match socket/core/thread-ids specified by @props. It's recommended to use
+ * query-hotpluggable-cpus.props values to specify affected cpu slots,
+ * which would lead to exact 1:1 mapping of cpu slots to NUMA node.
+ *
+ * However for CLI convenience it's possible to pass in subset of properties,
+ * which would affect all cpu slots that match it.
+ * Ex for pc machine:
+ *    -smp 4,cores=2,sockets=2 -numa node,nodeid=0 -numa node,nodeid=1 \
+ *    -numa cpu,node-id=0,socket_id=0 \
+ *    -numa cpu,node-id=1,socket_id=1
+ * will assign all child cores of socket 0 to node 0 and
+ * of socket 1 to node 1.
+ *
+ * On attempt of reassigning (already assigned) cpu slot to another NUMA node,
+ * return error.
+ * Empty subset is disallowed and function will return with error in this case.
+ */
+void machine_set_cpu_numa_node(MachineState *machine,
+                               const CpuInstanceProperties *props, Error **errp)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    bool match = false;
+    int i;
+
+    if (!mc->possible_cpu_arch_ids) {
+        error_setg(errp, "mapping of CPUs to NUMA node is not supported");
+        return;
+    }
+
+    /* disabling node mapping is not supported, forbid it */
+    assert(props->has_node_id);
+
+    /* force board to initialize possible_cpus if it hasn't been done yet */
+    mc->possible_cpu_arch_ids(machine);
+
+    for (i = 0; i < machine->possible_cpus->len; i++) {
+        CPUArchId *slot = &machine->possible_cpus->cpus[i];
+
+        /* reject unsupported by board properties */
+        if (props->has_thread_id && !slot->props.has_thread_id) {
+            error_setg(errp, "thread-id is not supported");
+            return;
+        }
+
+        if (props->has_core_id && !slot->props.has_core_id) {
+            error_setg(errp, "core-id is not supported");
+            return;
+        }
+
+        if (props->has_socket_id && !slot->props.has_socket_id) {
+            error_setg(errp, "socket-id is not supported");
+            return;
+        }
+
+        /* skip slots with explicit mismatch */
+        if (props->has_thread_id && props->thread_id != slot->props.thread_id) {
+                continue;
+        }
+
+        if (props->has_core_id && props->core_id != slot->props.core_id) {
+                continue;
+        }
+
+        if (props->has_socket_id && props->socket_id != slot->props.socket_id) {
+                continue;
+        }
+
+        /* reject assignment if slot is already assigned, for compatibility
+         * of legacy cpu_index mapping with SPAPR core based mapping do not
+         * error out if cpu thread and matched core have the same node-id */
+        if (slot->props.has_node_id &&
+            slot->props.node_id != props->node_id) {
+            error_setg(errp, "CPU is already assigned to node-id: %" PRId64,
+                       slot->props.node_id);
+            return;
+        }
+
+        /* assign slot to node as it's matched '-numa cpu' key */
+        match = true;
+        slot->props.node_id = props->node_id;
+        slot->props.has_node_id = props->has_node_id;
+    }
+
+    if (!match) {
+        error_setg(errp, "no match found");
+    }
+}
+
 static void machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -400,6 +498,7 @@
      * On Linux, each node's border has to be 8MB aligned
      */
     mc->numa_mem_align_shift = 23;
+    mc->numa_auto_assign_ram = numa_default_auto_assign_ram;
 
     object_class_property_add_str(oc, "accel",
         machine_get_accel, machine_set_accel, &error_abort);
@@ -580,6 +679,69 @@
     return machine->mem_merge;
 }
 
+static char *cpu_slot_to_string(const CPUArchId *cpu)
+{
+    GString *s = g_string_new(NULL);
+    if (cpu->props.has_socket_id) {
+        g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id);
+    }
+    if (cpu->props.has_core_id) {
+        if (s->len) {
+            g_string_append_printf(s, ", ");
+        }
+        g_string_append_printf(s, "core-id: %"PRId64, cpu->props.core_id);
+    }
+    if (cpu->props.has_thread_id) {
+        if (s->len) {
+            g_string_append_printf(s, ", ");
+        }
+        g_string_append_printf(s, "thread-id: %"PRId64, cpu->props.thread_id);
+    }
+    return g_string_free(s, false);
+}
+
+static void machine_numa_validate(MachineState *machine)
+{
+    int i;
+    GString *s = g_string_new(NULL);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine);
+
+    assert(nb_numa_nodes);
+    for (i = 0; i < possible_cpus->len; i++) {
+        const CPUArchId *cpu_slot = &possible_cpus->cpus[i];
+
+        /* at this point numa mappings are initilized by CLI options
+         * or with default mappings so it's sufficient to list
+         * all not yet mapped CPUs here */
+        /* TODO: make it hard error in future */
+        if (!cpu_slot->props.has_node_id) {
+            char *cpu_str = cpu_slot_to_string(cpu_slot);
+            g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i,
+                                   cpu_str);
+            g_free(cpu_str);
+        }
+    }
+    if (s->len) {
+        error_report("warning: CPU(s) not present in any NUMA nodes: %s",
+                     s->str);
+        error_report("warning: All CPU(s) up to maxcpus should be described "
+                     "in NUMA config, ability to start up with partial NUMA "
+                     "mappings is obsoleted and will be removed in future");
+    }
+    g_string_free(s, true);
+}
+
+void machine_run_board_init(MachineState *machine)
+{
+    MachineClass *machine_class = MACHINE_GET_CLASS(machine);
+
+    if (nb_numa_nodes) {
+        machine_numa_validate(machine);
+    }
+    machine_class->init(machine);
+}
+
 static void machine_class_finalize(ObjectClass *klass, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(klass);

diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
index 1485d5b..f9d76c4 100644
--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c

@@ -91,7 +91,7 @@
     dc->vmsd = &vmstate_or_irq;
 
     /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo or_irq_type_info = {

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 02b632f..1863db9 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c

@@ -1118,6 +1118,7 @@
      * should override it in their class_init()
      */
     dc->hotpluggable = true;
+    dc->user_creatable = true;
 }
 
 void device_reset(DeviceState *dev)

diff --git a/hw/core/register.c b/hw/core/register.c
index dc335a7..da38ef3 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c

@@ -288,7 +288,7 @@
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     /* Reason: needs to be wired up to work */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo register_info = {

diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index c0f560b..5d0887f 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c

@@ -326,6 +326,17 @@
     DeviceClass *k = DEVICE_CLASS(klass);
     k->init = sysbus_device_init;
     k->bus_type = TYPE_SYSTEM_BUS;
+    /*
+     * device_add plugs devices into a suitable bus.  For "real" buses,
+     * that actually connects the device.  For sysbus, the connections
+     * need to be made separately, and device_add can't do that.  The
+     * device would be left unconnected, and will probably not work
+     *
+     * However, a few machines can handle device_add/-device with
+     * a few specific sysbus devices. In those cases, the device
+     * subclass needs to override it and set user_creatable=true.
+     */
+    k->user_creatable = false;
 }
 
 static const TypeInfo sysbus_device_type_info = {

diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index 7ef8a96..1de15a1 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c

@@ -94,7 +94,8 @@
     uint32_t dval;
     int x, y, y_start;
     unsigned int width, height;
-    ram_addr_t page, page_min, page_max;
+    ram_addr_t page;
+    DirtyBitmapSnapshot *snap = NULL;
 
     if (surface_bits_per_pixel(surface) != 32) {
         return;
@@ -103,29 +104,32 @@
     height = s->height;
 
     y_start = -1;
-    page_min = -1;
-    page_max = 0;
-    page = 0;
     pix = memory_region_get_ram_ptr(&s->vram_mem);
     data = (uint32_t *)surface_data(surface);
 
-    memory_region_sync_dirty_bitmap(&s->vram_mem);
+    if (!s->full_update) {
+        memory_region_sync_dirty_bitmap(&s->vram_mem);
+        snap = memory_region_snapshot_and_clear_dirty(&s->vram_mem, 0x0,
+                                              memory_region_size(&s->vram_mem),
+                                              DIRTY_MEMORY_VGA);
+    }
+
     for (y = 0; y < height; y++) {
-        int update = s->full_update;
+        int update;
 
         page = (ram_addr_t)y * width;
-        update |= memory_region_get_dirty(&s->vram_mem, page, width,
-                                          DIRTY_MEMORY_VGA);
+
+        if (s->full_update) {
+            update = 1;
+        } else {
+            update = memory_region_snapshot_get_dirty(&s->vram_mem, snap, page,
+                                                      width);
+        }
+
         if (update) {
             if (y_start < 0) {
                 y_start = y;
             }
-            if (page < page_min) {
-                page_min = page;
-            }
-            if (page > page_max) {
-                page_max = page;
-            }
 
             for (x = 0; x < width; x++) {
                 dval = *pix++;
@@ -134,7 +138,7 @@
             }
         } else {
             if (y_start >= 0) {
-                dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start);
+                dpy_gfx_update(s->con, 0, y_start, width, y - y_start);
                 y_start = -1;
             }
             pix += width;
@@ -143,17 +147,14 @@
     }
     s->full_update = 0;
     if (y_start >= 0) {
-        dpy_gfx_update(s->con, 0, y_start, s->width, y - y_start);
-    }
-    if (page_max >= page_min) {
-        memory_region_reset_dirty(&s->vram_mem,
-                              page_min, page_max - page_min, DIRTY_MEMORY_VGA);
+        dpy_gfx_update(s->con, 0, y_start, width, y - y_start);
     }
     /* vsync interrupt? */
     if (s->regs[0] & CG3_CR_ENABLE_INTS) {
         s->regs[1] |= CG3_SR_PENDING_INT;
         qemu_irq_raise(s->irq);
     }
+    g_free(snap);
 }
 
 static void cg3_invalidate_display(void *opaque)

diff --git a/hw/display/jazz_led.c b/hw/display/jazz_led.c
index b72fdb1..3c97d56 100644
--- a/hw/display/jazz_led.c
+++ b/hw/display/jazz_led.c

@@ -227,13 +227,13 @@
 static void jazz_led_text_update(void *opaque, console_ch_t *chardata)
 {
     LedState *s = opaque;
-    char buf[2];
+    char buf[3];
 
     dpy_text_cursor(s->con, -1, -1);
     qemu_console_resize(s->con, 2, 1);
 
     /* TODO: draw the segments */
-    snprintf(buf, 2, "%02hhx\n", s->segments);
+    snprintf(buf, 3, "%02hhx", s->segments);
     console_write_ch(chardata++, ATTR2CHTYPE(buf[0], QEMU_COLOR_BLUE,
                                              QEMU_COLOR_BLACK, 1));
     console_write_ch(chardata++, ATTR2CHTYPE(buf[1], QEMU_COLOR_BLUE,

diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 2094adb..9d254ef 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c

@@ -1414,6 +1414,7 @@
 {
     SM501State *s = (SM501State *)opaque;
     DisplaySurface *surface = qemu_console_surface(s->con);
+    DirtyBitmapSnapshot *snap;
     int y, c_x = 0, c_y = 0;
     int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
     int width = get_width(s, crt);
@@ -1425,9 +1426,7 @@
     draw_hwc_line_func *draw_hwc_line = NULL;
     int full_update = 0;
     int y_start = -1;
-    ram_addr_t page_min = ~0l;
-    ram_addr_t page_max = 0l;
-    ram_addr_t offset;
+    ram_addr_t offset = 0;
     uint32_t *palette;
     uint8_t hwc_palette[3 * 3];
     uint8_t *hwc_src = NULL;
@@ -1479,17 +1478,17 @@
 
     /* draw each line according to conditions */
     memory_region_sync_dirty_bitmap(&s->local_mem_region);
+    snap = memory_region_snapshot_and_clear_dirty(&s->local_mem_region,
+              offset, width * height * src_bpp, DIRTY_MEMORY_VGA);
     for (y = 0, offset = 0; y < height; y++, offset += width * src_bpp) {
         int update, update_hwc;
-        ram_addr_t page0 = offset;
-        ram_addr_t page1 = offset + width * src_bpp - 1;
 
         /* check if hardware cursor is enabled and we're within its range */
         update_hwc = draw_hwc_line && c_y <= y && y < c_y + SM501_HWC_HEIGHT;
         update = full_update || update_hwc;
         /* check dirty flags for each line */
-        update |= memory_region_get_dirty(&s->local_mem_region, page0,
-                                          page1 - page0, DIRTY_MEMORY_VGA);
+        update |= memory_region_snapshot_get_dirty(&s->local_mem_region, snap,
+                                                   offset, width * src_bpp);
 
         /* draw line and change status */
         if (update) {
@@ -1507,12 +1506,6 @@
             if (y_start < 0) {
                 y_start = y;
             }
-            if (page0 < page_min) {
-                page_min = page0;
-            }
-            if (page1 > page_max) {
-                page_max = page1;
-            }
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -1521,18 +1514,12 @@
             }
         }
     }
+    g_free(snap);
 
     /* complete flush to display */
     if (y_start >= 0) {
         dpy_gfx_update(s->con, 0, y_start, width, y - y_start);
     }
-
-    /* clear dirty flags */
-    if (page_min != ~0l) {
-        memory_region_reset_dirty(&s->local_mem_region,
-                                  page_min, page_max + TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
-    }
 }
 
 static const GraphicHwOps sm501_ops = {

diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 0e66dcd..6593c1d 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c

@@ -104,36 +104,23 @@
     }
 }
 
-static int tcx_check_dirty(TCXState *s, ram_addr_t addr, int len)
+static int tcx_check_dirty(TCXState *s, DirtyBitmapSnapshot *snap,
+                           ram_addr_t addr, int len)
 {
     int ret;
 
-    ret = memory_region_get_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA);
+    ret = memory_region_snapshot_get_dirty(&s->vram_mem, snap, addr, len);
 
     if (s->depth == 24) {
-        ret |= memory_region_get_dirty(&s->vram_mem,
-                                       s->vram24_offset + addr * 4, len * 4,
-                                       DIRTY_MEMORY_VGA);
-        ret |= memory_region_get_dirty(&s->vram_mem,
-                                       s->cplane_offset + addr * 4, len * 4,
-                                       DIRTY_MEMORY_VGA);
+        ret |= memory_region_snapshot_get_dirty(&s->vram_mem, snap,
+                                       s->vram24_offset + addr * 4, len * 4);
+        ret |= memory_region_snapshot_get_dirty(&s->vram_mem, snap,
+                                       s->cplane_offset + addr * 4, len * 4);
     }
 
     return ret;
 }
 
-static void tcx_reset_dirty(TCXState *s, ram_addr_t addr, int len)
-{
-    memory_region_reset_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA);
-
-    if (s->depth == 24) {
-        memory_region_reset_dirty(&s->vram_mem, s->vram24_offset + addr * 4,
-                                  len * 4, DIRTY_MEMORY_VGA);
-        memory_region_reset_dirty(&s->vram_mem, s->cplane_offset + addr * 4,
-                                  len * 4, DIRTY_MEMORY_VGA);
-    }
-}
-
 static void update_palette_entries(TCXState *s, int start, int end)
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
@@ -233,7 +220,8 @@
 {
     TCXState *ts = opaque;
     DisplaySurface *surface = qemu_console_surface(ts->con);
-    ram_addr_t page, page_min, page_max;
+    ram_addr_t page;
+    DirtyBitmapSnapshot *snap = NULL;
     int y, y_start, dd, ds;
     uint8_t *d, *s;
 
@@ -243,22 +231,20 @@
 
     page = 0;
     y_start = -1;
-    page_min = -1;
-    page_max = 0;
     d = surface_data(surface);
     s = ts->vram;
     dd = surface_stride(surface);
     ds = 1024;
 
     memory_region_sync_dirty_bitmap(&ts->vram_mem);
+    snap = memory_region_snapshot_and_clear_dirty(&ts->vram_mem, 0x0,
+                                             memory_region_size(&ts->vram_mem),
+                                             DIRTY_MEMORY_VGA);
+
     for (y = 0; y < ts->height; y++, page += ds) {
-        if (tcx_check_dirty(ts, page, ds)) {
+        if (tcx_check_dirty(ts, snap, page, ds)) {
             if (y_start < 0)
                 y_start = y;
-            if (page < page_min)
-                page_min = page;
-            if (page > page_max)
-                page_max = page;
 
             tcx_draw_line32(ts, d, s, ts->width);
             if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
@@ -280,17 +266,15 @@
         dpy_gfx_update(ts->con, 0, y_start,
                        ts->width, y - y_start);
     }
-    /* reset modified pages */
-    if (page_max >= page_min) {
-        tcx_reset_dirty(ts, page_min, page_max - page_min);
-    }
+    g_free(snap);
 }
 
 static void tcx24_update_display(void *opaque)
 {
     TCXState *ts = opaque;
     DisplaySurface *surface = qemu_console_surface(ts->con);
-    ram_addr_t page, page_min, page_max;
+    ram_addr_t page;
+    DirtyBitmapSnapshot *snap = NULL;
     int y, y_start, dd, ds;
     uint8_t *d, *s;
     uint32_t *cptr, *s24;
@@ -301,8 +285,6 @@
 
     page = 0;
     y_start = -1;
-    page_min = -1;
-    page_max = 0;
     d = surface_data(surface);
     s = ts->vram;
     s24 = ts->vram24;
@@ -311,14 +293,15 @@
     ds = 1024;
 
     memory_region_sync_dirty_bitmap(&ts->vram_mem);
+    snap = memory_region_snapshot_and_clear_dirty(&ts->vram_mem, 0x0,
+                                             memory_region_size(&ts->vram_mem),
+                                             DIRTY_MEMORY_VGA);
+
     for (y = 0; y < ts->height; y++, page += ds) {
-        if (tcx_check_dirty(ts, page, ds)) {
+        if (tcx_check_dirty(ts, snap, page, ds)) {
             if (y_start < 0)
                 y_start = y;
-            if (page < page_min)
-                page_min = page;
-            if (page > page_max)
-                page_max = page;
+
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
             if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
                 tcx_draw_cursor32(ts, d, y, ts->width);
@@ -341,10 +324,7 @@
         dpy_gfx_update(ts->con, 0, y_start,
                        ts->width, y - y_start);
     }
-    /* reset modified pages */
-    if (page_max >= page_min) {
-        tcx_reset_dirty(ts, page_min, page_max - page_min);
-    }
+    g_free(snap);
 }
 
 static void tcx_invalidate_display(void *opaque)

diff --git a/hw/display/vga.c b/hw/display/vga.c
index b2516c8..dcc95f8 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c

@@ -1630,7 +1630,7 @@
     if (!full_update) {
         vga_sync_dirty_bitmap(s);
         snap = memory_region_snapshot_and_clear_dirty(&s->vram, addr1,
-                                                      bwidth * height,
+                                                      line_offset * height,
                                                       DIRTY_MEMORY_VGA);
     }
 

diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index f49b7fe..8c106a6 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c

@@ -600,6 +600,22 @@
     }
 }
 
+void virtio_gpu_gl_block(void *opaque, bool block)
+{
+    VirtIOGPU *g = opaque;
+
+    if (block) {
+        g->renderer_blocked++;
+    } else {
+        g->renderer_blocked--;
+    }
+    assert(g->renderer_blocked >= 0);
+
+    if (g->renderer_blocked == 0) {
+        virtio_gpu_process_cmdq(g);
+    }
+}
+
 int virtio_gpu_virgl_init(VirtIOGPU *g)
 {
     int ret;

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index e1056f3..cfb5dfa 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c

@@ -929,28 +929,14 @@
     return 0;
 }
 
-static void virtio_gpu_gl_block(void *opaque, bool block)
-{
-    VirtIOGPU *g = opaque;
-
-    if (block) {
-        g->renderer_blocked++;
-    } else {
-        g->renderer_blocked--;
-    }
-    assert(g->renderer_blocked >= 0);
-
-    if (g->renderer_blocked == 0) {
-        virtio_gpu_process_cmdq(g);
-    }
-}
-
 const GraphicHwOps virtio_gpu_ops = {
     .invalidate = virtio_gpu_invalidate_display,
     .gfx_update = virtio_gpu_update_display,
     .text_update = virtio_gpu_text_update,
     .ui_info = virtio_gpu_ui_info,
+#ifdef CONFIG_VIRGL
     .gl_block = virtio_gpu_gl_block,
+#endif
 };
 
 static const VMStateDescription vmstate_virtio_gpu_scanout = {

diff --git a/hw/dma/i8257.c b/hw/dma/i8257.c
index 8bd82e8..bd23e89 100644
--- a/hw/dma/i8257.c
+++ b/hw/dma/i8257.c

@@ -601,7 +601,7 @@
     idc->schedule = i8257_dma_schedule;
     idc->register_channel = i8257_dma_register_channel;
     /* Reason: needs to be wired up by isa_bus_dma() to work */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo i8257_info = {

diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c
index 9d545e4..9c6bdc6 100644
--- a/hw/dma/sparc32_dma.c
+++ b/hw/dma/sparc32_dma.c

@@ -305,7 +305,7 @@
     dc->vmsd = &vmstate_dma;
     dc->props = sparc32_dma_properties;
     /* Reason: pointer property "iommu_opaque" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo sparc32_dma_info = {

diff --git a/hw/gpio/omap_gpio.c b/hw/gpio/omap_gpio.c
index dabef4a..1df394e 100644
--- a/hw/gpio/omap_gpio.c
+++ b/hw/gpio/omap_gpio.c

@@ -773,7 +773,7 @@
     dc->reset = omap_gpif_reset;
     dc->props = omap_gpio_properties;
     /* Reason: pointer property "clk" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo omap_gpio_info = {
@@ -804,7 +804,7 @@
     dc->reset = omap2_gpif_reset;
     dc->props = omap2_gpio_properties;
     /* Reason: pointer properties "iclk", "fclk0", ..., "fclk5" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo omap2_gpio_info = {

diff --git a/hw/i2c/omap_i2c.c b/hw/i2c/omap_i2c.c
index f7c92ea..f6e80be 100644
--- a/hw/i2c/omap_i2c.c
+++ b/hw/i2c/omap_i2c.c

@@ -491,7 +491,7 @@
     dc->props = omap_i2c_properties;
     dc->reset = omap_i2c_reset;
     /* Reason: pointer properties "iclk", "fclk" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->realize = omap_i2c_realize;
 }
 

diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c
index 5b7bd89..b13ec0f 100644
--- a/hw/i2c/smbus_eeprom.c
+++ b/hw/i2c/smbus_eeprom.c

@@ -123,7 +123,7 @@
     sc->read_data = eeprom_read_data;
     dc->props = smbus_eeprom_properties;
     /* Reason: pointer property "data" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo smbus_eeprom_info = {

diff --git a/hw/i2c/smbus_ich9.c b/hw/i2c/smbus_ich9.c
index 48fab22..ea51e09 100644
--- a/hw/i2c/smbus_ich9.c
+++ b/hw/i2c/smbus_ich9.c

@@ -103,7 +103,7 @@
      * Reason: part of ICH9 southbridge, needs to be wired up by
      * pc_q35_init()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index c75f73e..afcadac 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c

@@ -2335,7 +2335,8 @@
     srat->reserved1 = cpu_to_le32(1);
 
     for (i = 0; i < apic_ids->len; i++) {
-        int j = numa_get_node_for_cpu(i);
+        int node_id = apic_ids->cpus[i].props.has_node_id ?
+            apic_ids->cpus[i].props.node_id : 0;
         uint32_t apic_id = apic_ids->cpus[i].arch_id;
 
         if (apic_id < 255) {
@@ -2345,9 +2346,7 @@
             core->type = ACPI_SRAT_PROCESSOR_APIC;
             core->length = sizeof(*core);
             core->local_apic_id = apic_id;
-            if (j < nb_numa_nodes) {
-                core->proximity_lo = j;
-            }
+            core->proximity_lo = node_id;
             memset(core->proximity_hi, 0, 3);
             core->local_sapic_eid = 0;
             core->flags = cpu_to_le32(1);
@@ -2358,9 +2357,7 @@
             core->type = ACPI_SRAT_PROCESSOR_x2APIC;
             core->length = sizeof(*core);
             core->x2apic_id = cpu_to_le32(apic_id);
-            if (j < nb_numa_nodes) {
-                core->proximity_domain = cpu_to_le32(j);
-            }
+            core->proximity_domain = cpu_to_le32(node_id);
             core->flags = cpu_to_le32(1);
         }
     }
@@ -2707,6 +2704,10 @@
     if (pcms->numa_nodes) {
         acpi_add_table(table_offsets, tables_blob);
         build_srat(tables_blob, tables->linker, machine);
+        if (have_numa_distance) {
+            acpi_add_table(table_offsets, tables_blob);
+            build_slit(tables_blob, tables->linker);
+        }
     }
     if (acpi_get_mcfg(&mcfg)) {
         acpi_add_table(table_offsets, tables_blob);

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 516ebae..329058d 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c

@@ -1199,6 +1199,8 @@
     dc->vmsd = &vmstate_amdvi;
     dc->hotpluggable = false;
     dc_class->realize = amdvi_realize;
+    /* Supported by the pc-q35-* machine types */
+    dc->user_creatable = true;
 }
 
 static const TypeInfo amdvi = {

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a12b176..9ba2162 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c

@@ -3019,6 +3019,8 @@
     dc->hotpluggable = false;
     x86_class->realize = vtd_realize;
     x86_class->int_remap = vtd_int_remap;
+    /* Supported by the pc-q35-* machine types */
+    dc->user_creatable = true;
 }
 
 static const TypeInfo vtd_info = {

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8063241..816bfa8 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c

@@ -597,7 +597,7 @@
      * wiring: its A20 output line needs to be wired up by
      * port92_init().
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo port92_info = {
@@ -747,7 +747,9 @@
 {
     FWCfgState *fw_cfg;
     uint64_t *numa_fw_cfg;
-    int i, j;
+    int i;
+    const CPUArchIdList *cpus;
+    MachineClass *mc = MACHINE_GET_CLASS(pcms);
 
     fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
     fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
@@ -782,12 +784,12 @@
      */
     numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes);
     numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
-    for (i = 0; i < max_cpus; i++) {
-        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
+    cpus = mc->possible_cpu_arch_ids(MACHINE(pcms));
+    for (i = 0; i < cpus->len; i++) {
+        unsigned int apic_id = cpus->cpus[i].arch_id;
         assert(apic_id < pcms->apic_id_limit);
-        j = numa_get_node_for_cpu(i);
-        if (j < nb_numa_nodes) {
-            numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
+        if (cpus->cpus[i].props.has_node_id) {
+            numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id);
         }
     }
     for (i = 0; i < nb_numa_nodes; i++) {
@@ -1891,6 +1893,7 @@
                             DeviceState *dev, Error **errp)
 {
     int idx;
+    int node_id;
     CPUState *cs;
     CPUArchId *cpu_slot;
     X86CPUTopoInfo topo;
@@ -1980,6 +1983,22 @@
 
     cs = CPU(cpu);
     cs->cpu_index = idx;
+
+    node_id = cpu_slot->props.node_id;
+    if (!cpu_slot->props.has_node_id) {
+        /* by default CPUState::numa_node was 0 if it's not set via CLI
+         * keep it this way for now but in future we probably should
+         * refuse to start up with incomplete numa mapping */
+        node_id = 0;
+    }
+    if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) {
+        cs->numa_node = node_id;
+    } else if (cs->numa_node != node_id) {
+            error_setg(errp, "node-id %d must match numa node specified"
+                "with -numa option for cpu-index %d",
+                cs->numa_node, cs->cpu_index);
+            return;
+    }
 }
 
 static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
@@ -2241,12 +2260,14 @@
     }
 }
 
-static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
+static CpuInstanceProperties
+pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
 {
-    X86CPUTopoInfo topo;
-    x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index,
-                          &topo);
-    return topo.pkg_id;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+    assert(cpu_index < possible_cpus->len);
+    return possible_cpus->cpus[cpu_index].props;
 }
 
 static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
@@ -2278,6 +2299,15 @@
         ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
         ms->possible_cpus->cpus[i].props.has_thread_id = true;
         ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
+
+        /* default distribution of CPUs over NUMA nodes */
+        if (nb_numa_nodes) {
+            /* preset values but do not enable them i.e. 'has_node_id = false',
+             * numa init code will enable them later if manual mapping wasn't
+             * present on CLI */
+            ms->possible_cpus->cpus[i].props.node_id =
+                topo.pkg_id % nb_numa_nodes;
+        }
     }
     return ms->possible_cpus;
 }
@@ -2321,7 +2351,7 @@
     pcmc->save_tsc_khz = true;
     pcmc->linuxboot_dma_enabled = true;
     mc->get_hotplug_handler = pc_get_hotpug_handler;
-    mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
+    mc->cpu_index_to_instance_props = pc_cpu_index_to_props;
     mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
     mc->has_hotpluggable_cpus = true;
     mc->default_boot_order = "cad";

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8f3d85c..2234bd0 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c

@@ -54,6 +54,7 @@
 #endif
 #include "migration/migration.h"
 #include "kvm_i386.h"
+#include "sysemu/numa.h"
 
 #define MAX_IDE_BUS 2
 
@@ -442,6 +443,7 @@
     pc_i440fx_machine_options(m);
     m->alias = "pc";
     m->is_default = 1;
+    m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
 }
 
 DEFINE_I440FX_MACHINE(v2_10, "pc-i440fx-2.10", NULL,

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index cf9a788..f243203 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c

@@ -47,6 +47,7 @@
 #include "hw/usb.h"
 #include "qemu/error-report.h"
 #include "migration/migration.h"
+#include "sysemu/numa.h"
 
 /* ICH9 AHCI has 6 ports */
 #define MAX_SATA_PORTS     6
@@ -305,6 +306,7 @@
 {
     pc_q35_machine_options(m);
     m->alias = "q35";
+    m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
 }
 
 DEFINE_Q35_MACHINE(v2_10, "pc-q35-2.10", NULL,

diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 31debdf..e60156c 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c

@@ -62,6 +62,7 @@
     hwaddr paddr_index;
     hwaddr size;
     QTAILQ_ENTRY(MapCacheRev) next;
+    bool dma;
 } MapCacheRev;
 
 typedef struct MapCache {
@@ -202,7 +203,7 @@
 }
 
 static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
-                                       uint8_t lock)
+                                       uint8_t lock, bool dma)
 {
     MapCacheEntry *entry, *pentry = NULL;
     hwaddr address_index;
@@ -289,6 +290,7 @@
     if (lock) {
         MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
         entry->lock++;
+        reventry->dma = dma;
         reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset;
         reventry->paddr_index = mapcache->last_entry->paddr_index;
         reventry->size = entry->size;
@@ -300,12 +302,12 @@
 }
 
 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
-                       uint8_t lock)
+                       uint8_t lock, bool dma)
 {
     uint8_t *p;
 
     mapcache_lock();
-    p = xen_map_cache_unlocked(phys_addr, size, lock);
+    p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
     mapcache_unlock();
     return p;
 }
@@ -426,8 +428,11 @@
     mapcache_lock();
 
     QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
-        DPRINTF("There should be no locked mappings at this time, "
-                "but "TARGET_FMT_plx" -> %p is present\n",
+        if (!reventry->dma) {
+            continue;
+        }
+        fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
+                " "TARGET_FMT_plx" -> %p is present\n",
                 reventry->paddr_index, reventry->vaddr_req);
     }
 

diff --git a/hw/input/virtio-input-hid.c b/hw/input/virtio-input-hid.c
index 3ee0c18..46c0381 100644
--- a/hw/input/virtio-input-hid.c
+++ b/hw/input/virtio-input-hid.c

@@ -484,12 +484,14 @@
         .select    = VIRTIO_INPUT_CFG_ABS_INFO,
         .subsel    = ABS_X,
         .size      = sizeof(virtio_input_absinfo),
-        .u.abs.max = const_le32(INPUT_EVENT_ABS_SIZE - 1),
+        .u.abs.min = const_le32(INPUT_EVENT_ABS_MIN),
+        .u.abs.max = const_le32(INPUT_EVENT_ABS_MAX),
     },{
         .select    = VIRTIO_INPUT_CFG_ABS_INFO,
         .subsel    = ABS_Y,
         .size      = sizeof(virtio_input_absinfo),
-        .u.abs.max = const_le32(INPUT_EVENT_ABS_SIZE - 1),
+        .u.abs.min = const_le32(INPUT_EVENT_ABS_MIN),
+        .u.abs.max = const_le32(INPUT_EVENT_ABS_MAX),
     },
     { /* end of list */ },
 };

diff --git a/hw/input/vmmouse.c b/hw/input/vmmouse.c
index 6d15a88..4747da9 100644
--- a/hw/input/vmmouse.c
+++ b/hw/input/vmmouse.c

@@ -286,7 +286,7 @@
     dc->vmsd = &vmstate_vmmouse;
     dc->props = vmmouse_properties;
     /* Reason: pointer property "ps2_mouse" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo vmmouse_info = {

diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index c3829e3..1ef56f8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c

@@ -501,7 +501,7 @@
      * Reason: APIC and CPU need to be wired up by
      * x86_cpu_apic_create()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo apic_common_type = {

diff --git a/hw/intc/etraxfs_pic.c b/hw/intc/etraxfs_pic.c
index 64a6f4b..1bfde2f 100644
--- a/hw/intc/etraxfs_pic.c
+++ b/hw/intc/etraxfs_pic.c

@@ -173,7 +173,7 @@
     dc->props = etraxfs_pic_properties;
     /*
      * Note: pointer property "interrupt_vector" may remain null, thus
-     * no need for dc->cannot_instantiate_with_device_add_yet = true;
+     * no need for dc->user_creatable = false;
      */
 }
 

diff --git a/hw/intc/grlib_irqmp.c b/hw/intc/grlib_irqmp.c
index ac7e63f..94659ee 100644
--- a/hw/intc/grlib_irqmp.c
+++ b/hw/intc/grlib_irqmp.c

@@ -360,7 +360,7 @@
     dc->reset = grlib_irqmp_reset;
     dc->props = grlib_irqmp_properties;
     /* Reason: pointer properties "set_pil_in", "set_pil_in_opaque" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->realize = grlib_irqmp_realize;
 }
 

diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c
index d9a5e8b..c2fd563 100644
--- a/hw/intc/i8259_common.c
+++ b/hw/intc/i8259_common.c

@@ -144,7 +144,7 @@
      * wiring of the slave to the master is hard-coded in device model
      * code.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pic_common_type = {

diff --git a/hw/intc/nios2_iic.c b/hw/intc/nios2_iic.c
index 190b6fd..016426f 100644
--- a/hw/intc/nios2_iic.c
+++ b/hw/intc/nios2_iic.c

@@ -80,7 +80,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     /* Reason: needs to be wired up, e.g. by nios2_10m50_ghrd_init() */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->realize = altera_iic_realize;
 }
 

diff --git a/hw/intc/omap_intc.c b/hw/intc/omap_intc.c
index 877be67..ccdda89 100644
--- a/hw/intc/omap_intc.c
+++ b/hw/intc/omap_intc.c

@@ -401,7 +401,7 @@
     dc->reset = omap_inth_reset;
     dc->props = omap_intc_properties;
     /* Reason: pointer property "clk" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->realize = omap_intc_realize;
 }
 
@@ -656,7 +656,7 @@
     dc->reset = omap_inth_reset;
     dc->props = omap2_intc_properties;
     /* Reason: pointer property "iclk", "fclk" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->realize = omap2_intc_realize;
 }
 

diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 42e0e0e..dd93531 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c

@@ -213,6 +213,7 @@
             irq->priority = irq->saved_priority;
         }
 
+        irq->status = 0;
         if (state & KVM_XICS_PENDING) {
             if (state & KVM_XICS_LEVEL_SENSITIVE) {
                 irq->status |= XICS_STATUS_ASSERTED;
@@ -228,6 +229,12 @@
                     | XICS_STATUS_REJECTED;
             }
         }
+        if (state & KVM_XICS_PRESENTED) {
+                irq->status |= XICS_STATUS_PRESENTED;
+        }
+        if (state & KVM_XICS_QUEUED) {
+                irq->status |= XICS_STATUS_QUEUED;
+        }
     }
 }
 
@@ -265,6 +272,12 @@
                 state |= KVM_XICS_PENDING;
             }
         }
+        if (irq->status & XICS_STATUS_PRESENTED) {
+                state |= KVM_XICS_PRESENTED;
+        }
+        if (irq->status & XICS_STATUS_QUEUED) {
+                state |= KVM_XICS_QUEUED;
+        }
 
         ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
         if (ret != 0) {

diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index a0866c3..e2215dc 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c

@@ -805,7 +805,7 @@
      * Reason: part of ICH9 southbridge, needs to be wired up by
      * pc_q35_init()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     hc->plug = ich9_pm_device_plug_cb;
     hc->unplug_request = ich9_pm_device_unplug_request_cb;
     hc->unplug = ich9_pm_device_unplug_cb;

diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index 5500fcc..f811eba 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c

@@ -123,7 +123,7 @@
      * Reason: part of PIIX4 southbridge, needs to be wired up,
      * e.g. by mips_malta_init()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->hotpluggable = false;
 }
 

diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index 41d5254..50dc83d 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c

@@ -494,7 +494,7 @@
      * Reason: part of VIA VT82C686 southbridge, needs to be wired up,
      * e.g. by mips_fulong2e_init()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo via_info = {

diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c
index 1834d22..457a08a 100644
--- a/hw/microblaze/boot.c
+++ b/hw/microblaze/boot.c

@@ -189,7 +189,7 @@
                                                   ram_size - initrd_offset);
             }
             if (initrd_size < 0) {
-                error_report("qemu: could not load initrd '%s'",
+                error_report("could not load initrd '%s'",
                              initrd_filename);
                 exit(EXIT_FAILURE);
             }

diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 4811843..e8b2eef 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c

@@ -1224,7 +1224,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo gt64120_pci_info = {

diff --git a/hw/misc/vmport.c b/hw/misc/vmport.c
index be40930..1655002 100644
--- a/hw/misc/vmport.c
+++ b/hw/misc/vmport.c

@@ -163,7 +163,7 @@
 
     dc->realize = vmport_realizefn;
     /* Reason: realize sets global port_state */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo vmport_info = {

diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index efa33ad..b53fcaa 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c

@@ -934,7 +934,7 @@
     dc->vmsd = &vmstate_dp8393x;
     dc->props = dp8393x_properties;
     /* Reason: dma_mr property can't be set */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo dp8393x_info = {

diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c
index efaa49f..013c8d0 100644
--- a/hw/net/etraxfs_eth.c
+++ b/hw/net/etraxfs_eth.c

@@ -630,7 +630,7 @@
     k->init = fs_eth_init;
     dc->props = etraxfs_eth_properties;
     /* Reason: pointer properties "dma_out", "dma_in" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo etraxfs_eth_info = {

diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index aa2b0d5..9da1932 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c

@@ -416,6 +416,8 @@
     dc->realize = etsec_realize;
     dc->reset = etsec_reset;
     dc->props = etsec_properties;
+    /* Supported by ppce500 machine */
+    dc->user_creatable = true;
 }
 
 static TypeInfo etsec_info = {

diff --git a/hw/net/lance.c b/hw/net/lance.c
index 573d724..92b0c68 100644
--- a/hw/net/lance.c
+++ b/hw/net/lance.c

@@ -165,7 +165,7 @@
     dc->vmsd = &vmstate_lance;
     dc->props = lance_properties;
     /* Reason: pointer property "dma" */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo lance_info = {

diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c
index e0a9aff..2b31f5b 100644
--- a/hw/nios2/boot.c
+++ b/hw/nios2/boot.c

@@ -197,7 +197,7 @@
                                                   ram_size - initrd_offset);
             }
             if (initrd_size < 0) {
-                error_report("qemu: could not load initrd '%s'",
+                error_report("could not load initrd '%s'",
                              initrd_filename);
                 exit(EXIT_FAILURE);
             }

diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c
index 840c961..cca9362 100644
--- a/hw/pci-bridge/dec.c
+++ b/hw/pci-bridge/dec.c

@@ -128,7 +128,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo dec_21154_pci_host_info = {

diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 6ac187f..ff59abf 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c

@@ -150,7 +150,7 @@
 
     dc->fw_name = "pci";
     /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     sbc->explicit_ofw_unit_address = pxb_host_ofw_unit_address;
     hc->root_bus_path = pxb_host_root_bus_path;
 }

diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index 653e711..edc88f4 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c

@@ -810,7 +810,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pbm_pci_host_info = {

diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 1999ece..85a3bb0 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c

@@ -825,7 +825,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo bonito_info = {

diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c
index 66055ee..e2629ce 100644
--- a/hw/pci-host/gpex.c
+++ b/hw/pci-host/gpex.c

@@ -136,7 +136,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo gpex_root_info = {

diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index 2c8acda..2e281f6 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c

@@ -134,7 +134,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo grackle_pci_info = {

diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index bf4221d..2d02de1 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c

@@ -685,7 +685,7 @@
      * Reason: part of PIIX3 southbridge, needs to be wired up by
      * pc_piix.c's pc_init1()
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo piix3_pci_type_info = {
@@ -739,7 +739,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     dc->hotpluggable   = false;
 }
 
@@ -868,7 +868,7 @@
     dc->fw_name = "pci";
     dc->props = i440fx_props;
     /* Reason: needs to be wired up by pc_init1 */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo i440fx_pcihost_info = {

diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index e502bc0..becc0ee 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c

@@ -508,7 +508,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo e500_host_bridge_info = {

diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 260a119..900a6ed 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c

@@ -364,7 +364,7 @@
      * Reason: PCI-facing part of the host bridge, not usable without
      * the host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo raven_info = {

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 344f77b..cd5c496 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c

@@ -156,7 +156,7 @@
     dc->realize = q35_host_realize;
     dc->props = mch_props;
     /* Reason: needs to be wired up by pc_q35_init */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->fw_name = "pci";
 }
@@ -549,7 +549,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo mch_info = {

diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index df342ac..6cf5e59 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c

@@ -366,7 +366,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo unin_main_pci_host_info = {
@@ -390,7 +390,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo u3_agp_pci_host_info = {
@@ -414,7 +414,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo unin_agp_pci_host_info = {
@@ -438,7 +438,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo unin_internal_pci_host_info = {

diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 27fde46..aa1fdf7 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c

@@ -479,7 +479,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo versatile_pci_host_info = {

diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
index 8b71e2d..a968cea 100644
--- a/hw/pci-host/xilinx-pcie.c
+++ b/hw/pci-host/xilinx-pcie.c

@@ -309,7 +309,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo xilinx_pcie_root_info = {

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 68aaedc..bae1c0a 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c

@@ -80,6 +80,8 @@
 #define CLOCKFREQ (266UL * 1000UL * 1000UL)
 #define BUSFREQ (100UL * 1000UL * 1000UL)
 
+#define NDRV_VGA_FILENAME "qemu_vga.ndrv"
+
 /* UniN device */
 static void unin_write(void *opaque, hwaddr addr, uint64_t value,
                        unsigned size)
@@ -160,7 +162,8 @@
     MACIOIDEState *macio_ide;
     BusState *adb_bus;
     MacIONVRAMState *nvr;
-    int bios_size;
+    int bios_size, ndrv_size;
+    uint8_t *ndrv_file;
     MemoryRegion *pic_mem, *escc_mem;
     MemoryRegion *escc_bar = g_new(MemoryRegion, 1);
     int ppc_boot_device;
@@ -494,6 +497,19 @@
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr);
 
+    /* MacOS NDRV VGA driver */
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME);
+    if (filename) {
+        ndrv_size = get_image_size(filename);
+        if (ndrv_size != -1) {
+            ndrv_file = g_malloc(ndrv_size);
+            ndrv_size = load_image(filename, ndrv_file);
+
+            fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size);
+        }
+        g_free(filename);
+    }
+
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 

diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 5df94e2..97bb854 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c

@@ -53,6 +53,8 @@
 #define CLOCKFREQ 266000000UL
 #define BUSFREQ 66000000UL
 
+#define NDRV_VGA_FILENAME "qemu_vga.ndrv"
+
 static void fw_cfg_boot_set(void *opaque, const char *boot_device,
                             Error **errp)
 {
@@ -99,7 +101,8 @@
     MACIOIDEState *macio_ide;
     DeviceState *dev;
     BusState *adb_bus;
-    int bios_size;
+    int bios_size, ndrv_size;
+    uint8_t *ndrv_file;
     MemoryRegion *pic_mem;
     MemoryRegion *escc_mem, *escc_bar = g_new(MemoryRegion, 1);
     uint16_t ppc_boot_device;
@@ -355,6 +358,19 @@
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
 
+    /* MacOS NDRV VGA driver */
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME);
+    if (filename) {
+        ndrv_size = get_image_size(filename);
+        if (ndrv_size != -1) {
+            ndrv_file = g_malloc(ndrv_size);
+            ndrv_size = load_image(filename, ndrv_file);
+
+            fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size);
+        }
+        g_free(filename);
+    }
+
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index d4bcdb0..231ed97 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c

@@ -511,7 +511,7 @@
      * This is the internal simulator but it could also be an external
      * BMC.
      */
-    obj = object_resolve_path_type("", TYPE_IPMI_BMC, NULL);
+    obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL);
     if (obj) {
         pnv->bmc = IPMI_BMC(obj);
     }
@@ -610,7 +610,7 @@
     /* Create the processor chips */
     chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model);
     if (!object_class_by_name(chip_typename)) {
-        error_report("qemu: invalid CPU model '%s' for %s machine",
+        error_report("invalid CPU model '%s' for %s machine",
                      machine->cpu_model, MACHINE_GET_CLASS(machine)->name);
         exit(1);
     }

diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index dc19682..6953f8b 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c

@@ -351,7 +351,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo ppc4xx_host_bridge_info = {

diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c
index 60baffa..23bcf1b 100644
--- a/hw/ppc/ppc_booke.c
+++ b/hw/ppc/ppc_booke.c

@@ -282,7 +282,6 @@
     ppc_tb_t *tb_env = env->tb_env;
     booke_timer_t *booke_timer = tb_env->opaque;
 
-    tb_env = env->tb_env;
     env->spr[SPR_BOOKE_TCR] = val;
     kvmppc_set_tcr(cpu);
 

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 80d12d0..0980d73 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c

@@ -219,7 +219,7 @@
         /* 16: Vector */
         0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */
         /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */
-        0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 18 - 23 */
+        0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */
         /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */
         0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */
         /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */
@@ -855,6 +855,8 @@
  * option vector 5: */
 static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
 {
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
     char val[2 * 3] = {
         24, 0x00, /* Hash/Radix, filled in below. */
         25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
@@ -870,8 +872,13 @@
             val[1] = 0x00; /* Hash */
         }
     } else {
-        /* TODO: TCG case, hash */
-        val[1] = 0x00;
+        if (first_ppc_cpu->env.mmu_model & POWERPC_MMU_V3) {
+            /* V3 MMU supports both hash and radix (with dynamic switching) */
+            val[1] = 0xC0;
+        } else {
+            /* Otherwise we can only do hash */
+            val[1] = 0x00;
+        }
     }
     _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support",
                      val, sizeof(val)));
@@ -2101,8 +2108,8 @@
     }
 
     spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
-    if (kvmppc_has_cap_mmu_radix()) {
-        /* KVM always allows GTSE with radix... */
+    if (!kvm_enabled() || kvmppc_has_cap_mmu_radix()) {
+        /* KVM and TCG always allow GTSE with radix... */
         spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
     }
     /* ... but not with hash (currently). */
@@ -2824,9 +2831,11 @@
     MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
     Error *local_err = NULL;
     CPUCore *cc = CPU_CORE(dev);
+    sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev);
     char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
     const char *type = object_get_typename(OBJECT(dev));
     CPUArchId *core_slot;
+    int node_id;
     int index;
 
     if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
@@ -2861,6 +2870,21 @@
         goto out;
     }
 
+    node_id = core_slot->props.node_id;
+    if (!core_slot->props.has_node_id) {
+        /* by default CPUState::numa_node was 0 if it's not set via CLI
+         * keep it this way for now but in future we probably should
+         * refuse to start up with incomplete numa mapping */
+        node_id = 0;
+    }
+    if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) {
+        sc->node_id = node_id;
+    } else if (sc->node_id != node_id) {
+        error_setg(&local_err, "node-id %d must match numa node specified"
+            "with -numa option for cpu-index %d", sc->node_id, cc->core_id);
+        goto out;
+    }
+
 out:
     g_free(base_core_type);
     error_propagate(errp, local_err);
@@ -2981,11 +3005,18 @@
     return NULL;
 }
 
-static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
+static CpuInstanceProperties
+spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
 {
-    /* Allocate to NUMA nodes on a "socket" basis (not that concept of
-     * socket means much for the paravirtualized PAPR platform) */
-    return cpu_index / smp_threads / smp_cores;
+    CPUArchId *core_slot;
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+    /* make sure possible_cpu are intialized */
+    mc->possible_cpu_arch_ids(machine);
+    /* get CPU core slot containing thread that matches cpu_index */
+    core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
+    assert(core_slot);
+    return core_slot->props;
 }
 
 static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
@@ -3012,8 +3043,15 @@
         machine->possible_cpus->cpus[i].arch_id = core_id;
         machine->possible_cpus->cpus[i].props.has_core_id = true;
         machine->possible_cpus->cpus[i].props.core_id = core_id;
-        /* TODO: add 'has_node/node' here to describe
-           to which node core belongs */
+
+        /* default distribution of CPUs over NUMA nodes */
+        if (nb_numa_nodes) {
+            /* preset values but do not enable them i.e. 'has_node_id = false',
+             * numa init code will enable them later if manual mapping wasn't
+             * present on CLI */
+            machine->possible_cpus->cpus[i].props.node_id =
+                core_id / smp_threads / smp_cores % nb_numa_nodes;
+        }
     }
     return machine->possible_cpus;
 }
@@ -3138,7 +3176,7 @@
     hc->pre_plug = spapr_machine_device_pre_plug;
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
-    mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+    mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
     mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
     hc->unplug_request = spapr_machine_device_unplug_request;
 
@@ -3242,6 +3280,7 @@
 {
     spapr_machine_2_10_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
+    mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
 }
 
 DEFINE_SPAPR_MACHINE(2_9, "2.9", false);

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4389ef4..a17ea07 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c

@@ -176,13 +176,11 @@
     const char *typename = object_class_get_name(scc->cpu_class);
     size_t size = object_type_get_instance_size(typename);
     Error *local_err = NULL;
-    int core_node_id = numa_get_node_for_cpu(cc->core_id);;
     void *obj;
     int i, j;
 
     sc->threads = g_malloc0(size * cc->nr_threads);
     for (i = 0; i < cc->nr_threads; i++) {
-        int node_id;
         char id[32];
         CPUState *cs;
 
@@ -192,17 +190,8 @@
         cs = CPU(obj);
         cs->cpu_index = cc->core_id + i;
 
-        /* Set NUMA node for the added CPUs  */
-        node_id = numa_get_node_for_cpu(cs->cpu_index);
-        if (node_id != core_node_id) {
-            error_setg(&local_err, "Invalid node-id=%d of thread[cpu-index: %d]"
-                " on CPU[core-id: %d, node-id: %d], node-id must be the same",
-                 node_id, cs->cpu_index, cc->core_id, core_node_id);
-            goto err;
-        }
-        if (node_id < nb_numa_nodes) {
-            cs->numa_node = node_id;
-        }
+        /* Set NUMA node for the threads belonged to core  */
+        cs->numa_node = sc->node_id;
 
         snprintf(id, sizeof(id), "thread[%d]", i);
         object_property_add_child(OBJECT(sc), id, obj, &local_err);
@@ -263,6 +252,11 @@
     "POWER9_v1.0",
 };
 
+static Property spapr_cpu_core_properties[] = {
+    DEFINE_PROP_INT32("node-id", sPAPRCPUCore, node_id, CPU_UNSET_NUMA_NODE_ID),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -270,6 +264,7 @@
 
     dc->realize = spapr_cpu_core_realize;
     dc->unrealize = spapr_cpu_core_unrealizefn;
+    dc->props = spapr_cpu_core_properties;
     scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data);
     g_assert(scc->cpu_class);
 }

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a1cdc87..9fa5545 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c

@@ -675,7 +675,7 @@
     /*
      * Reason: it crashes FIXME find and document the real reason
      */
-    dk->cannot_instantiate_with_device_add_yet = true;
+    dk->user_creatable = false;
 }
 
 static const TypeInfo spapr_dr_connector_info = {

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 9f18f75..0d608d6 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c

@@ -936,7 +936,7 @@
                                              target_ulong opcode,
                                              target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
+    CPUState *cs;
     target_ulong flags = args[0];
     target_ulong proc_tbl = args[1];
     target_ulong page_size = args[2];
@@ -992,16 +992,12 @@
     spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc);
 
     spapr->patb_entry = cproc; /* Save new process table */
-    if ((flags & FLAG_RADIX) || (flags & FLAG_HASH_PROC_TBL)) {
-        /* Use Process TBL */
-        env->spr[SPR_LPCR] |= LPCR_UPRT;
-    } else {
-        env->spr[SPR_LPCR] &= ~LPCR_UPRT;
-    }
-    if (flags & FLAG_GTSE) { /* Partition Uses Guest Translation Shootdwn */
-        env->spr[SPR_LPCR] |= LPCR_GTSE;
-    } else {
-        env->spr[SPR_LPCR] &= ~LPCR_GTSE;
+
+    /* Update the UPRT and GTSE bits in the LPCR for all cpus */
+    CPU_FOREACH(cs) {
+        set_spr(cs, SPR_LPCR, LPCR_UPRT | LPCR_GTSE,
+                ((flags & (FLAG_RADIX | FLAG_HASH_PROC_TBL)) ? LPCR_UPRT : 0) |
+                ((flags & FLAG_GTSE) ? LPCR_GTSE : 0));
     }
 
     if (kvm_enabled()) {

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index e7567e2..a7cff32 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c

@@ -1994,6 +1994,8 @@
     dc->props = spapr_phb_properties;
     dc->reset = spapr_phb_reset;
     dc->vmsd = &vmstate_spapr_pci;
+    /* Supported by TYPE_SPAPR_MACHINE */
+    dc->user_creatable = true;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     hp->plug = spapr_phb_hot_plug_child;
     hp->unplug = spapr_phb_hot_unplug_child;

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index a8a1bab..66a6fbeb 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c

@@ -872,7 +872,6 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
-    dc->cannot_instantiate_with_device_add_yet = true;
     dc->reset = s390_pcihost_reset;
     k->init = s390_pcihost_init;
     hc->plug = s390_pcihost_hot_plug;

diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index b4f6dd5..83d6023 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c

@@ -505,10 +505,10 @@
 
     ret = s390_set_memory_limit(machine->maxram_size, &hw_limit);
     if (ret == -E2BIG) {
-        error_setg(&err, "qemu: host supports a maximum of %" PRIu64 " GB",
+        error_setg(&err, "host supports a maximum of %" PRIu64 " GB",
                    hw_limit >> 30);
     } else if (ret) {
-        error_setg(&err, "qemu: setting the guest size failed");
+        error_setg(&err, "setting the guest size failed");
     }
 
 out:

diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
index 1f2f0ed..4008c81 100644
--- a/hw/sd/milkymist-memcard.c
+++ b/hw/sd/milkymist-memcard.c

@@ -299,7 +299,7 @@
     dc->reset = milkymist_memcard_reset;
     dc->vmsd = &vmstate_milkymist_memcard;
     /* Reason: init() method uses drive_get_next() */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo milkymist_memcard_info = {

diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 82c63a4..55c8098 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c

@@ -515,7 +515,7 @@
     k->vmsd = &vmstate_pl181;
     k->reset = pl181_reset;
     /* Reason: init() method uses drive_get_next() */
-    k->cannot_instantiate_with_device_add_yet = true;
+    k->user_creatable = false;
     k->realize = pl181_realize;
 }
 

diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c
index 1747628..38395c0 100644
--- a/hw/sh4/sh_pci.c
+++ b/hw/sh4/sh_pci.c

@@ -171,7 +171,7 @@
      * PCI-facing part of the host bridge, not usable without the
      * host-facing part, which can't be device_add'ed, yet.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo sh_pci_host_info = {

diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c
index e18299a..976d520 100644
--- a/hw/timer/i8254_common.c
+++ b/hw/timer/i8254_common.c

@@ -287,7 +287,7 @@
      * wired to the HPET, and because of that, some wiring is always
      * done by board code.
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pit_common_type = {

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 4165450..93de3e1 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c

@@ -973,7 +973,7 @@
     dc->vmsd = &vmstate_rtc;
     dc->props = mc146818rtc_properties;
     /* Reason: needs to be wired up by rtc_init() */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static void rtc_finalize(Object *obj)

diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c
index 19dd587..8910bf0 100644
--- a/hw/tricore/tricore_testboard.c
+++ b/hw/tricore/tricore_testboard.c

@@ -50,7 +50,7 @@
                            NULL, 0,
                            EM_TRICORE, 1, 0);
     if (kernel_size <= 0) {
-        error_report("qemu: no kernel file '%s'",
+        error_report("no kernel file '%s'",
                 tricoretb_binfo.kernel_filename);
         exit(1);
     }

diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c
index 9fe73339..47b7519 100644
--- a/hw/usb/dev-hub.c
+++ b/hw/usb/dev-hub.c

@@ -208,6 +208,7 @@
     USBHubPort *port = &s->ports[port1->index];
 
     if (port->wPortStatus & PORT_STAT_SUSPEND) {
+        port->wPortStatus &= ~PORT_STAT_SUSPEND;
         port->wPortChange |= PORT_STAT_C_SUSPEND;
         usb_wakeup(s->intr, 0);
     }

diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c
index 6d51373..83a4f0e 100644
--- a/hw/usb/dev-serial.c
+++ b/hw/usb/dev-serial.c

@@ -513,27 +513,18 @@
 {
     USBDevice *dev;
     Chardev *cdrv;
-    uint32_t vendorid = 0, productid = 0;
     char label[32];
     static int index;
 
     while (*filename && *filename != ':') {
         const char *p;
-        char *e;
+
         if (strstart(filename, "vendorid=", &p)) {
-            vendorid = strtol(p, &e, 16);
-            if (e == p || (*e && *e != ',' && *e != ':')) {
-                error_report("bogus vendor ID %s", p);
-                return NULL;
-            }
-            filename = e;
+            error_report("vendorid is not supported anymore");
+            return NULL;
         } else if (strstart(filename, "productid=", &p)) {
-            productid = strtol(p, &e, 16);
-            if (e == p || (*e && *e != ',' && *e != ':')) {
-                error_report("bogus product ID %s", p);
-                return NULL;
-            }
-            filename = e;
+            error_report("productid is not supported anymore");
+            return NULL;
         } else {
             error_report("unrecognized serial USB option %s", filename);
             return NULL;
@@ -554,10 +545,7 @@
 
     dev = usb_create(bus, "usb-serial");
     qdev_prop_set_chr(&dev->qdev, "chardev", cdrv);
-    if (vendorid)
-        qdev_prop_set_uint16(&dev->qdev, "vendorid", vendorid);
-    if (productid)
-        qdev_prop_set_uint16(&dev->qdev, "productid", productid);
+
     return dev;
 }
 

diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c
index 757b8b3..49cb182 100644
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c

@@ -813,7 +813,10 @@
     if (p->b.bError) {
         DPRINTF(s, D_VERBOSE, "error %d\n", p->b.bError);
     }
-    memcpy(p->abData, data, len);
+    if (len) {
+        g_assert_nonnull(data);
+        memcpy(p->abData, data, len);
+    }
     ccid_reset_error_status(s);
     usb_wakeup(s->bulk, 0);
 }

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index a2d3143..77d8e11 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c

@@ -50,7 +50,7 @@
 /* Very pessimistic, let's hope it's enough for all cases */
 #define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS)
 
-#define TRB_LINK_LIMIT  4
+#define TRB_LINK_LIMIT  32
 #define COMMAND_LIMIT   256
 #define TRANSFER_LIMIT  256
 
@@ -1790,9 +1790,6 @@
     }
 }
 
-static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer,
-                       XHCIEPContext *epctx);
-
 static int xhci_setup_packet(XHCITransfer *xfer)
 {
     USBEndpoint *ep;
@@ -1806,7 +1803,7 @@
         ep = xhci_epid_to_usbep(xfer->epctx);
         if (!ep) {
             DPRINTF("xhci: slot %d has no device\n",
-                    xfer->slotid);
+                    xfer->epctx->slotid);
             return -1;
         }
     }
@@ -1980,7 +1977,7 @@
 {
     uint64_t mfindex;
 
-    DPRINTF("xhci_submit(slotid=%d,epid=%d)\n", xfer->slotid, xfer->epid);
+    DPRINTF("xhci_submit(slotid=%d,epid=%d)\n", epctx->slotid, epctx->epid);
 
     xfer->in_xfer = epctx->type>>2;
 

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index b001a27..ad5ef78 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c

@@ -229,21 +229,10 @@
 static void usbredir_log_data(USBRedirDevice *dev, const char *desc,
     const uint8_t *data, int len)
 {
-    int i, j, n;
-
     if (dev->debug < usbredirparser_debug_data) {
         return;
     }
-
-    for (i = 0; i < len; i += j) {
-        char buf[128];
-
-        n = sprintf(buf, "%s", desc);
-        for (j = 0; j < 8 && i + j < len; j++) {
-            n += sprintf(buf + n, " %02X", data[i + j]);
-        }
-        error_report("%s", buf);
-    }
+    qemu_hexdump((char *)data, stderr, desc, len);
 }
 
 /*

diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
index 2c60310..fab196c 100644
--- a/hw/vfio/amd-xgbe.c
+++ b/hw/vfio/amd-xgbe.c

@@ -38,6 +38,8 @@
     dc->realize = amd_xgbe_realize;
     dc->desc = "VFIO AMD XGBE";
     dc->vmsd = &vfio_platform_amd_xgbe_vmstate;
+    /* Supported by TYPE_VIRT_MACHINE */
+    dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_amd_xgbe_dev_info = {

diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
index bb15d58..7bb17af 100644
--- a/hw/vfio/calxeda-xgmac.c
+++ b/hw/vfio/calxeda-xgmac.c

@@ -38,6 +38,8 @@
     dc->realize = calxeda_xgmac_realize;
     dc->desc = "VFIO Calxeda XGMAC";
     dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate;
+    /* Supported by TYPE_VIRT_MACHINE */
+    dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_calxeda_xgmac_dev_info = {

diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c
index c85f163..3570f37 100644
--- a/hw/xen/xen_backend.c
+++ b/hw/xen/xen_backend.c

@@ -147,7 +147,7 @@
         qdev_unplug(DEVICE(xendev), NULL);
         return NULL;
     }
-    fcntl(xenevtchn_fd(xendev->evtchndev), F_SETFD, FD_CLOEXEC);
+    qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev));
 
     if (ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
         xendev->gnttabdev = xengnttab_open(NULL, 0);
@@ -619,6 +619,8 @@
 
     dc->props = xendev_properties;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    /* xen-backend devices can be plugged/unplugged dynamically */
+    dc->user_creatable = true;
 }
 
 static const TypeInfo xendev_type_info = {

diff --git a/include/block/aio.h b/include/block/aio.h
index 406e323..e9aeeae 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h

@@ -454,8 +454,14 @@
  */
 static inline void aio_enable_external(AioContext *ctx)
 {
-    assert(ctx->external_disable_cnt > 0);
-    atomic_dec(&ctx->external_disable_cnt);
+    int old;
+
+    old = atomic_fetch_dec(&ctx->external_disable_cnt);
+    assert(old > 0);
+    if (old == 1) {
+        /* Kick event loop so it re-arms file descriptors */
+        aio_notify(ctx);
+    }
 }
 
 /**

diff --git a/include/block/block.h b/include/block/block.h
index 862eb56..9b355e9 100644
--- a/include/block/block.h
+++ b/include/block/block.h

@@ -109,6 +109,7 @@
 #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
 #define BDRV_OPT_READ_ONLY      "read-only"
 #define BDRV_OPT_DISCARD        "discard"
+#define BDRV_OPT_FORCE_SHARE    "force-share"
 
 
 #define BDRV_SECTOR_BITS   9
@@ -120,29 +121,32 @@
 #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
 
 /*
- * Allocation status flags
- * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status.
- * BDRV_BLOCK_ZERO: sectors read as zero
- * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by
- *                          bdrv_get_block_status.
+ * Allocation status flags for bdrv_get_block_status() and friends.
+ *
+ * Public flags:
+ * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
+ * BDRV_BLOCK_ZERO: offset reads as zero
+ * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
  * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- *                       layer (as opposed to the backing file)
- * BDRV_BLOCK_RAW: used internally to indicate that the request
- *                 was answered by the raw driver and that one
- *                 should look in bs->file directly.
+ *                       layer (short for DATA || ZERO), set by block layer
  *
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in
- * bs->file where sector data can be read from as raw data.
+ * Internal flag:
+ * BDRV_BLOCK_RAW: used internally to indicate that the request was
+ *                 answered by a passthrough driver such as raw and that the
+ *                 block layer should recompute the answer from bs->file.
  *
- * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present.
+ * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK)
+ * represent the offset in the returned BDS that is allocated for the
+ * corresponding raw data; however, whether that offset actually contains
+ * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows:
  *
  * DATA ZERO OFFSET_VALID
- *  t    t        t       sectors read as zero, bs->file is zero at offset
- *  t    f        t       sectors read as valid from bs->file at offset
- *  f    t        t       sectors preallocated, read as zero, bs->file not
+ *  t    t        t       sectors read as zero, returned file is zero at offset
+ *  t    f        t       sectors read as valid from file at offset
+ *  f    t        t       sectors preallocated, read as zero, returned file not
  *                        necessarily zero at offset
  *  f    f        t       sectors preallocated but read from backing_hd,
- *                        bs->file contains garbage at offset
+ *                        returned file contains garbage at offset
  *  t    t        f       sectors preallocated, read as zero, unknown offset
  *  t    f        f       sectors read from unknown file or offset
  *  f    t        f       not allocated or unknown offset, read as zero
@@ -224,6 +228,8 @@
     BLK_PERM_ALL                = 0x1f,
 };
 
+char *bdrv_perm_names(uint64_t perm);
+
 /* disk I/O throttling */
 void bdrv_init(void);
 void bdrv_init_with_whitelist(void);
@@ -366,8 +372,6 @@
 void bdrv_invalidate_cache_all(Error **errp);
 int bdrv_inactivate_all(void);
 
-void blk_resume_after_migration(Error **errp);
-
 /* Ensure contents are flushed to disk.  */
 int bdrv_flush(BlockDriverState *bs);
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
@@ -434,6 +438,7 @@
                             int64_t sector_num, int nb_sectors, int *pnum);
 
 bool bdrv_is_read_only(BlockDriverState *bs);
+bool bdrv_is_writable(BlockDriverState *bs);
 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
 int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
 bool bdrv_is_sg(BlockDriverState *bs);

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8773940..8d3724c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h

@@ -165,6 +165,13 @@
         int64_t offset, int count, BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
         int64_t offset, int count);
+
+    /*
+     * Building block for bdrv_block_status[_above]. The driver should
+     * answer only according to the current layer, and should not
+     * set BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
+     * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.
+     */
     int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum,
         BlockDriverState **file);
@@ -473,6 +480,12 @@
     void (*drained_begin)(BdrvChild *child);
     void (*drained_end)(BdrvChild *child);
 
+    /* Notifies the parent that the child has been activated/inactivated (e.g.
+     * when migration is completing) and it can start/stop requesting
+     * permissions and doing I/O on it. */
+    void (*activate)(BdrvChild *child, Error **errp);
+    int (*inactivate)(BdrvChild *child);
+
     void (*attach)(BdrvChild *child);
     void (*detach)(BdrvChild *child);
 };
@@ -518,6 +531,7 @@
     bool valid_key; /* if true, a valid encryption key has been set */
     bool sg;        /* if true, the device is a /dev/sg* */
     bool probed;    /* if true, format was probed rather than specified */
+    bool force_share; /* if true, always allow all shared permissions */
 
     BlockDriver *drv; /* NULL means no media */
     void *opaque;

diff --git a/include/crypto/block.h b/include/crypto/block.h
index 4a053a3..013a435 100644
--- a/include/crypto/block.h
+++ b/include/crypto/block.h

@@ -30,22 +30,22 @@
  * and QCryptoBlockOpenOptions in qapi/crypto.json */
 
 typedef ssize_t (*QCryptoBlockReadFunc)(QCryptoBlock *block,
-                                        void *opaque,
                                         size_t offset,
                                         uint8_t *buf,
                                         size_t buflen,
+                                        void *opaque,
                                         Error **errp);
 
 typedef ssize_t (*QCryptoBlockInitFunc)(QCryptoBlock *block,
-                                        void *opaque,
                                         size_t headerlen,
+                                        void *opaque,
                                         Error **errp);
 
 typedef ssize_t (*QCryptoBlockWriteFunc)(QCryptoBlock *block,
-                                         void *opaque,
                                          size_t offset,
                                          const uint8_t *buf,
                                          size_t buflen,
+                                         void *opaque,
                                          Error **errp);
 
 /**

diff --git a/include/crypto/random.h b/include/crypto/random.h
index a101353..a07229c 100644
--- a/include/crypto/random.h
+++ b/include/crypto/random.h

@@ -40,5 +40,14 @@
                          size_t buflen,
                          Error **errp);
 
+/**
+ * qcrypto_random_init:
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Initializes the handles used by qcrypto_random_bytes
+ *
+ * Returns 0 on success, -1 on error
+ */
+int qcrypto_random_init(Error **errp);
 
 #endif /* QCRYPTO_RANDOM_H */

diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index eb07c2d..88d0738 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h

@@ -392,4 +392,5 @@
 void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
                        uint64_t len, int node, MemoryAffinityFlags flags);
 
+void build_slit(GArray *table_data, BIOSLinker *linker);
 #endif

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 31d9c72..76ce021 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h

@@ -32,6 +32,7 @@
 MachineClass *find_default_machine(void);
 extern MachineState *current_machine;
 
+void machine_run_board_init(MachineState *machine);
 bool machine_usb(MachineState *machine);
 bool machine_kernel_irqchip_allowed(MachineState *machine);
 bool machine_kernel_irqchip_required(MachineState *machine);
@@ -42,6 +43,9 @@
 bool machine_mem_merge(MachineState *machine);
 void machine_register_compat_props(MachineState *machine);
 HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
+void machine_set_cpu_numa_node(MachineState *machine,
+                               const CpuInstanceProperties *props,
+                               Error **errp);
 
 /**
  * CPUArchId:
@@ -74,7 +78,10 @@
  *    of HotplugHandler object, which handles hotplug operation
  *    for a given @dev. It may return NULL if @dev doesn't require
  *    any actions to be performed by hotplug handler.
- * @cpu_index_to_socket_id:
+ * @cpu_index_to_instance_props:
+ *    used to provide @cpu_index to socket/core/thread number mapping, allowing
+ *    legacy code to perform maping from cpu_index to topology properties
+ *    Returns: tuple of socket/core/thread ids given cpu_index belongs to.
  *    used to provide @cpu_index to socket number mapping, allowing
  *    a machine to group CPU threads belonging to the same socket/package
  *    Returns: socket number given cpu_index belongs to.
@@ -136,10 +143,13 @@
     int minimum_page_bits;
     bool has_hotpluggable_cpus;
     int numa_mem_align_shift;
+    void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
+                                 int nb_nodes, ram_addr_t size);
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
-    unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
+    CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
+                                                         unsigned cpu_index);
     const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
 };
 

diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index c1288f9..9c5437d 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h

@@ -21,6 +21,7 @@
 
 #include "hw/boards.h"
 #include "hw/sysbus.h"
+#include "hw/ipmi/ipmi.h"
 #include "hw/ppc/pnv_lpc.h"
 #include "hw/ppc/pnv_psi.h"
 #include "hw/ppc/pnv_occ.h"
@@ -118,8 +119,6 @@
 #define POWERNV_MACHINE(obj) \
     OBJECT_CHECK(PnvMachineState, (obj), TYPE_POWERNV_MACHINE)
 
-typedef struct IPMIBmc IPMIBmc;
-
 typedef struct PnvMachineState {
     /*< private >*/
     MachineState parent_obj;

diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
index ccf969af..023b4f0 100644
--- a/include/hw/ppc/pnv_lpc.h
+++ b/include/hw/ppc/pnv_lpc.h

@@ -19,12 +19,12 @@
 #ifndef _PPC_PNV_LPC_H
 #define _PPC_PNV_LPC_H
 
+#include "hw/ppc/pnv_psi.h"
+
 #define TYPE_PNV_LPC "pnv-lpc"
 #define PNV_LPC(obj) \
      OBJECT_CHECK(PnvLpcController, (obj), TYPE_PNV_LPC)
 
-typedef struct PnvPsi PnvPsi;
-
 typedef struct PnvLpcController {
     DeviceState parent;
 

diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h
index f8ec330..82f299d 100644
--- a/include/hw/ppc/pnv_occ.h
+++ b/include/hw/ppc/pnv_occ.h

@@ -19,11 +19,11 @@
 #ifndef _PPC_PNV_OCC_H
 #define _PPC_PNV_OCC_H
 
+#include "hw/ppc/pnv_psi.h"
+
 #define TYPE_PNV_OCC "pnv-occ"
 #define PNV_OCC(obj) OBJECT_CHECK(PnvOCC, (obj), TYPE_PNV_OCC)
 
-typedef struct PnvPsi PnvPsi;
-
 typedef struct PnvOCC {
     DeviceState xd;
 

diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h
index 3c35665..93051e9 100644
--- a/include/hw/ppc/spapr_cpu_core.h
+++ b/include/hw/ppc/spapr_cpu_core.h

@@ -27,6 +27,7 @@
 
     /*< public >*/
     void *threads;
+    int node_id;
 } sPAPRCPUCore;
 
 typedef struct sPAPRCPUCoreClass {

diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index c215dc7..05e6acb 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h

@@ -29,6 +29,7 @@
 #define XICS_H
 
 #include "hw/qdev.h"
+#include "target/ppc/cpu-qom.h"
 
 #define XICS_IPI        0x2
 #define XICS_BUID       0x1
@@ -46,7 +47,6 @@
 typedef struct ICSState ICSState;
 typedef struct ICSIRQState ICSIRQState;
 typedef struct XICSFabric XICSFabric;
-typedef struct PowerPCCPU PowerPCCPU;
 
 #define TYPE_ICP "icp"
 #define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP)
@@ -144,6 +144,8 @@
 #define XICS_STATUS_SENT               0x2
 #define XICS_STATUS_REJECTED           0x4
 #define XICS_STATUS_MASKED_PENDING     0x8
+#define XICS_STATUS_PRESENTED          0x10
+#define XICS_STATUS_QUEUED             0x20
     uint8_t status;
 /* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */
 #define XICS_FLAGS_IRQ_LSI             0x1

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 4bf86b0..e69489e 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h

@@ -103,16 +103,17 @@
     Property *props;
 
     /*
-     * Shall we hide this device model from -device / device_add?
+     * Can this device be instantiated with -device / device_add?
      * All devices should support instantiation with device_add, and
      * this flag should not exist.  But we're not there, yet.  Some
      * devices fail to instantiate with cryptic error messages.
      * Others instantiate, but don't work.  Exposing users to such
-     * behavior would be cruel; this flag serves to protect them.  It
-     * should never be set without a comment explaining why it is set.
+     * behavior would be cruel; clearing this flag will protect them.
+     * It should never be cleared without a comment explaining why it
+     * is cleared.
      * TODO remove once we're there
      */
-    bool cannot_instantiate_with_device_add_yet;
+    bool user_creatable;
     bool hotpluggable;
 
     /* callbacks */

diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 1d69fa7..d206fc9 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h

@@ -134,12 +134,12 @@
  *   device_add, so add code like this:
  *   |* Reason: pointer property "NAME-OF-YOUR-PROP" *|
  *   DeviceClass *dc = DEVICE_CLASS(class);
- *   dc->cannot_instantiate_with_device_add_yet = true;
+ *   dc->user_creatable = false;
  *
  * - If the property may safely remain null, document it like this:
  *   |*
  *    * Note: pointer property "interrupt_vector" may remain null, thus
- *    * no need for dc->cannot_instantiate_with_device_add_yet = true;
+ *    * no need for dc->user_creatable = false;
  *    *|
  */
 #define DEFINE_PROP_PTR(_n, _s, _f)             \

diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index f3ffdce..83f474f 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h

@@ -169,6 +169,7 @@
                                   struct virtio_gpu_ctrl_command *cmd);
 void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
 void virtio_gpu_virgl_reset(VirtIOGPU *g);
+void virtio_gpu_gl_block(void *opaque, bool block);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 
 #endif

diff --git a/include/io/channel-file.h b/include/io/channel-file.h
index d2462c2..79245f1 100644
--- a/include/io/channel-file.h
+++ b/include/io/channel-file.h

@@ -71,7 +71,7 @@
 
 /**
  * qio_channel_file_new_path:
- * @fd: the file descriptor
+ * @path: the file path
  * @flags: the open flags (O_RDONLY|O_WRONLY|O_RDWR, etc)
  * @mode: the file creation mode if O_WRONLY is set in @flags
  * @errp: pointer to initialized error object

diff --git a/include/io/channel.h b/include/io/channel.h
index 5d48906..db9bb02 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h

@@ -315,7 +315,7 @@
                          Error **errp);
 
 /**
- * qio_channel_writev:
+ * qio_channel_write:
  * @ioc: the channel object
  * @buf: the memory regions to send data from
  * @buflen: the length of @buf

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 1881284..3f0926c 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h

@@ -201,16 +201,6 @@
     return find_next_zero_bit(addr, size, 0);
 }
 
-static inline unsigned long hweight_long(unsigned long w)
-{
-    unsigned long count;
-
-    for (count = 0; w; w >>= 1) {
-        count += w & 1;
-    }
-    return count;
-}
-
 /**
  * rol8 - rotate an 8-bit value left
  * @word: value to rotate

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 122ff06..1c9f5e2 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h

@@ -341,6 +341,9 @@
 #ifndef _WIN32
 int qemu_dup(int fd);
 #endif
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive);
+int qemu_unlock_fd(int fd, int64_t start, int64_t len);
+int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
 
 #if defined(__HAIKU__) && defined(__i386__)
 #define FMT_pid "%ld"

diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index f08d327..7d85057 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h

@@ -97,5 +97,6 @@
 typedef struct uWireSlave uWireSlave;
 typedef struct VirtIODevice VirtIODevice;
 typedef struct Visitor Visitor;
+typedef struct node_info NodeInfo;
 
 #endif /* QEMU_TYPEDEFS_H */

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 5d10359..55214ce 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h

@@ -258,6 +258,8 @@
 
 struct qemu_work_item;
 
+#define CPU_UNSET_NUMA_NODE_ID -1
+
 /**
  * CPUState:
  * @cpu_index: CPU index (informative).

diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 8f09dcf..7ffde5b 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h

@@ -8,6 +8,7 @@
 #include "hw/boards.h"
 
 extern int nb_numa_nodes;   /* Number of NUMA nodes */
+extern bool have_numa_distance;
 
 struct numa_addr_range {
     ram_addr_t mem_start;
@@ -15,24 +16,23 @@
     QLIST_ENTRY(numa_addr_range) entry;
 };
 
-typedef struct node_info {
+struct node_info {
     uint64_t node_mem;
-    unsigned long *node_cpu;
     struct HostMemoryBackend *node_memdev;
     bool present;
     QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
-} NodeInfo;
+    uint8_t distance[MAX_NODES];
+};
 
 extern NodeInfo numa_info[MAX_NODES];
-void parse_numa_opts(MachineClass *mc);
-void numa_post_machine_init(void);
+void parse_numa_opts(MachineState *ms);
 void query_numa_node_mem(uint64_t node_mem[]);
 extern QemuOptsList qemu_numa_opts;
 void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
 void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
 uint32_t numa_get_node(ram_addr_t addr, Error **errp);
-
-/* on success returns node index in numa_info,
- * on failure returns nb_numa_nodes */
-int numa_get_node_for_cpu(int idx);
+void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                 int nb_nodes, ram_addr_t size);
+void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                  int nb_nodes, ram_addr_t size);
 #endif

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 15656b7..be9e22c 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h

@@ -166,6 +166,10 @@
 
 #define MAX_NODES 128
 #define NUMA_NODE_UNASSIGNED MAX_NODES
+#define NUMA_DISTANCE_MIN         10
+#define NUMA_DISTANCE_DEFAULT     20
+#define NUMA_DISTANCE_MAX         254
+#define NUMA_DISTANCE_UNREACHABLE 255
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {

diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h
index b8c93b9..01daaad 100644
--- a/include/sysemu/xen-mapcache.h
+++ b/include/sysemu/xen-mapcache.h

@@ -17,7 +17,7 @@
 void xen_map_cache_init(phys_offset_to_gaddr_t f,
                         void *opaque);
 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
-                       uint8_t lock);
+                       uint8_t lock, bool dma);
 ram_addr_t xen_ram_addr_from_mapcache(void *ptr);
 void xen_invalidate_map_cache_entry(uint8_t *buffer);
 void xen_invalidate_map_cache(void);
@@ -31,7 +31,8 @@
 
 static inline uint8_t *xen_map_cache(hwaddr phys_addr,
                                      hwaddr size,
-                                     uint8_t lock)
+                                     uint8_t lock,
+                                     bool dma)
 {
     abort();
 }

diff --git a/include/ui/console.h b/include/ui/console.h
index d759338..7262bef 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h

@@ -527,4 +527,7 @@
 }
 #endif
 
+/* egl-headless.c */
+void egl_headless_init(void);
+
 #endif

diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index 88a13e8..c785d60 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h

@@ -21,7 +21,8 @@
 
 EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win);
 
-int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug);
+int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy);
+int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy);
 EGLContext qemu_egl_init_ctx(void);
 
 #endif /* EGL_HELPERS_H */

diff --git a/include/ui/input.h b/include/ui/input.h
index d06a12d..3cfd0f3 100644
--- a/include/ui/input.h
+++ b/include/ui/input.h

@@ -8,7 +8,8 @@
 #define INPUT_EVENT_MASK_REL   (1<<INPUT_EVENT_KIND_REL)
 #define INPUT_EVENT_MASK_ABS   (1<<INPUT_EVENT_KIND_ABS)
 
-#define INPUT_EVENT_ABS_SIZE   0x8000
+#define INPUT_EVENT_ABS_MIN    0x0000
+#define INPUT_EVENT_ABS_MAX    0x7FFF
 
 typedef struct QemuInputHandler QemuInputHandler;
 typedef struct QemuInputHandlerState QemuInputHandlerState;
@@ -54,12 +55,14 @@
                                uint32_t button_old, uint32_t button_new);
 
 bool qemu_input_is_absolute(void);
-int qemu_input_scale_axis(int value, int size_in, int size_out);
+int qemu_input_scale_axis(int value,
+                          int min_in, int max_in,
+                          int min_out, int max_out);
 InputEvent *qemu_input_event_new_move(InputEventKind kind,
                                       InputAxis axis, int value);
 void qemu_input_queue_rel(QemuConsole *src, InputAxis axis, int value);
-void qemu_input_queue_abs(QemuConsole *src, InputAxis axis,
-                          int value, int size);
+void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value,
+                          int min_in, int max_in);
 
 void qemu_input_check_mode_change(void);
 void qemu_add_mouse_mode_change_notifier(Notifier *notify);

diff --git a/linux-user/main.c b/linux-user/main.c
index 79d621b..ad03c9e 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c

@@ -4229,10 +4229,7 @@
     qemu_init_cpu_list();
     module_call_init(MODULE_INIT_QOM);
 
-    if ((envlist = envlist_create()) == NULL) {
-        (void) fprintf(stderr, "Unable to allocate envlist\n");
-        exit(EXIT_FAILURE);
-    }
+    envlist = envlist_create();
 
     /* add current environment into the list */
     for (wrk = environ; *wrk != NULL; wrk++) {
@@ -4429,10 +4426,10 @@
     }
 
     for (wrk = target_environ; *wrk; wrk++) {
-        free(*wrk);
+        g_free(*wrk);
     }
 
-    free(target_environ);
+    g_free(target_environ);
 
     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
         qemu_log("guest_base  0x%lx\n", guest_base);

diff --git a/migration/migration.c b/migration/migration.c
index 799952c..a5ade23 100644
--- a/migration/migration.c
+++ b/migration/migration.c

@@ -338,20 +338,11 @@
     Error *local_err = NULL;
     MigrationIncomingState *mis = opaque;
 
-    /* Make sure all file formats flush their mutable metadata */
+    /* Make sure all file formats flush their mutable metadata.
+     * If we get an error here, just don't restart the VM yet. */
     bdrv_invalidate_cache_all(&local_err);
     if (local_err) {
-        migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
-                          MIGRATION_STATUS_FAILED);
         error_report_err(local_err);
-        migrate_decompress_threads_join();
-        exit(EXIT_FAILURE);
-    }
-
-    /* If we get an error here, just don't restart the VM yet. */
-    blk_resume_after_migration(&local_err);
-    if (local_err) {
-        error_free(local_err);
         local_err = NULL;
         autostart = false;
     }

diff --git a/migration/savevm.c b/migration/savevm.c
index 352a8f2..7f66d58 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c

@@ -1612,16 +1612,11 @@
 
     qemu_announce_self();
 
-    /* Make sure all file formats flush their mutable metadata */
+    /* Make sure all file formats flush their mutable metadata.
+     * If we get an error here, just don't restart the VM yet. */
     bdrv_invalidate_cache_all(&local_err);
     if (local_err) {
         error_report_err(local_err);
-    }
-
-    /* If we get an error here, just don't restart the VM yet. */
-    blk_resume_after_migration(&local_err);
-    if (local_err) {
-        error_free(local_err);
         local_err = NULL;
         autostart = false;
     }

diff --git a/monitor.c b/monitor.c
index 078cba5..afbacfe 100644
--- a/monitor.c
+++ b/monitor.c

@@ -3254,7 +3254,7 @@
                                              TYPE_DEVICE);
         name = object_class_get_name(OBJECT_CLASS(dc));
 
-        if (!dc->cannot_instantiate_with_device_add_yet
+        if (dc->user_creatable
             && !strncmp(name, str, len)) {
             readline_add_completion(rs, name);
         }

diff --git a/numa.c b/numa.c
index 6fc2393..ca73145 100644
--- a/numa.c
+++ b/numa.c

@@ -51,6 +51,7 @@
                              * For all nodes, nodeid < max_numa_nodeid
                              */
 int nb_numa_nodes;
+bool have_numa_distance;
 NodeInfo numa_info[MAX_NODES];
 
 void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
@@ -140,10 +141,12 @@
     return -1;
 }
 
-static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
+static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
+                            QemuOpts *opts, Error **errp)
 {
     uint16_t nodenr;
     uint16List *cpus = NULL;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
 
     if (node->has_nodeid) {
         nodenr = node->nodeid;
@@ -162,7 +165,12 @@
         return;
     }
 
+    if (!mc->cpu_index_to_instance_props) {
+        error_report("NUMA is not supported by this machine-type");
+        exit(1);
+    }
     for (cpus = node->cpus; cpus; cpus = cpus->next) {
+        CpuInstanceProperties props;
         if (cpus->value >= max_cpus) {
             error_setg(errp,
                        "CPU index (%" PRIu16 ")"
@@ -170,11 +178,14 @@
                        cpus->value, max_cpus);
             return;
         }
-        bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
+        props = mc->cpu_index_to_instance_props(ms, cpus->value);
+        props.node_id = nodenr;
+        props.has_node_id = true;
+        machine_set_cpu_numa_node(ms, &props, &error_fatal);
     }
 
     if (node->has_mem && node->has_memdev) {
-        error_setg(errp, "qemu: cannot specify both mem= and memdev=");
+        error_setg(errp, "cannot specify both mem= and memdev=");
         return;
     }
 
@@ -182,7 +193,7 @@
         have_memdevs = node->has_memdev;
     }
     if (node->has_memdev != have_memdevs) {
-        error_setg(errp, "qemu: memdev option must be specified for either "
+        error_setg(errp, "memdev option must be specified for either "
                    "all or no nodes");
         return;
     }
@@ -212,9 +223,47 @@
     max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
 }
 
+static void parse_numa_distance(NumaDistOptions *dist, Error **errp)
+{
+    uint16_t src = dist->src;
+    uint16_t dst = dist->dst;
+    uint8_t val = dist->val;
+
+    if (src >= MAX_NODES || dst >= MAX_NODES) {
+        error_setg(errp,
+                   "Invalid node %" PRIu16
+                   ", max possible could be %" PRIu16,
+                   MAX(src, dst), MAX_NODES);
+        return;
+    }
+
+    if (!numa_info[src].present || !numa_info[dst].present) {
+        error_setg(errp, "Source/Destination NUMA node is missing. "
+                   "Please use '-numa node' option to declare it first.");
+        return;
+    }
+
+    if (val < NUMA_DISTANCE_MIN) {
+        error_setg(errp, "NUMA distance (%" PRIu8 ") is invalid, "
+                   "it shouldn't be less than %d.",
+                   val, NUMA_DISTANCE_MIN);
+        return;
+    }
+
+    if (src == dst && val != NUMA_DISTANCE_MIN) {
+        error_setg(errp, "Local distance of node %d should be %d.",
+                   src, NUMA_DISTANCE_MIN);
+        return;
+    }
+
+    numa_info[src].distance[dst] = val;
+    have_numa_distance = true;
+}
+
 static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
 {
     NumaOptions *object = NULL;
+    MachineState *ms = opaque;
     Error *err = NULL;
 
     {
@@ -229,12 +278,33 @@
 
     switch (object->type) {
     case NUMA_OPTIONS_TYPE_NODE:
-        numa_node_parse(&object->u.node, opts, &err);
+        parse_numa_node(ms, &object->u.node, opts, &err);
         if (err) {
             goto end;
         }
         nb_numa_nodes++;
         break;
+    case NUMA_OPTIONS_TYPE_DIST:
+        parse_numa_distance(&object->u.dist, &err);
+        if (err) {
+            goto end;
+        }
+        break;
+    case NUMA_OPTIONS_TYPE_CPU:
+        if (!object->u.cpu.has_node_id) {
+            error_setg(&err, "Missing mandatory node-id property");
+            goto end;
+        }
+        if (!numa_info[object->u.cpu.node_id].present) {
+            error_setg(&err, "Invalid node-id=%" PRId64 ", NUMA node must be "
+                "defined with -numa node,nodeid=ID before it's used with "
+                "-numa cpu,node-id=ID", object->u.cpu.node_id);
+            goto end;
+        }
+
+        machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
+                                  &err);
+        break;
     default:
         abort();
     }
@@ -249,60 +319,118 @@
     return 0;
 }
 
-static char *enumerate_cpus(unsigned long *cpus, int max_cpus)
+/* If all node pair distances are symmetric, then only distances
+ * in one direction are enough. If there is even one asymmetric
+ * pair, though, then all distances must be provided. The
+ * distance from a node to itself is always NUMA_DISTANCE_MIN,
+ * so providing it is never necessary.
+ */
+static void validate_numa_distance(void)
 {
-    int cpu;
-    bool first = true;
-    GString *s = g_string_new(NULL);
+    int src, dst;
+    bool is_asymmetrical = false;
 
-    for (cpu = find_first_bit(cpus, max_cpus);
-        cpu < max_cpus;
-        cpu = find_next_bit(cpus, max_cpus, cpu + 1)) {
-        g_string_append_printf(s, "%s%d", first ? "" : " ", cpu);
-        first = false;
-    }
-    return g_string_free(s, FALSE);
-}
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = src; dst < nb_numa_nodes; dst++) {
+            if (numa_info[src].distance[dst] == 0 &&
+                numa_info[dst].distance[src] == 0) {
+                if (src != dst) {
+                    error_report("The distance between node %d and %d is "
+                                 "missing, at least one distance value "
+                                 "between each nodes should be provided.",
+                                 src, dst);
+                    exit(EXIT_FAILURE);
+                }
+            }
 
-static void validate_numa_cpus(void)
-{
-    int i;
-    unsigned long *seen_cpus = bitmap_new(max_cpus);
-
-    for (i = 0; i < nb_numa_nodes; i++) {
-        if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, max_cpus)) {
-            bitmap_and(seen_cpus, seen_cpus,
-                       numa_info[i].node_cpu, max_cpus);
-            error_report("CPU(s) present in multiple NUMA nodes: %s",
-                         enumerate_cpus(seen_cpus, max_cpus));
-            g_free(seen_cpus);
-            exit(EXIT_FAILURE);
+            if (numa_info[src].distance[dst] != 0 &&
+                numa_info[dst].distance[src] != 0 &&
+                numa_info[src].distance[dst] !=
+                numa_info[dst].distance[src]) {
+                is_asymmetrical = true;
+            }
         }
-        bitmap_or(seen_cpus, seen_cpus,
-                  numa_info[i].node_cpu, max_cpus);
     }
 
-    if (!bitmap_full(seen_cpus, max_cpus)) {
-        char *msg;
-        bitmap_complement(seen_cpus, seen_cpus, max_cpus);
-        msg = enumerate_cpus(seen_cpus, max_cpus);
-        error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg);
-        error_report("warning: All CPU(s) up to maxcpus should be described "
-                     "in NUMA config");
-        g_free(msg);
+    if (is_asymmetrical) {
+        for (src = 0; src < nb_numa_nodes; src++) {
+            for (dst = 0; dst < nb_numa_nodes; dst++) {
+                if (src != dst && numa_info[src].distance[dst] == 0) {
+                    error_report("At least one asymmetrical pair of "
+                            "distances is given, please provide distances "
+                            "for both directions of all node pairs.");
+                    exit(EXIT_FAILURE);
+                }
+            }
+        }
     }
-    g_free(seen_cpus);
 }
 
-void parse_numa_opts(MachineClass *mc)
+static void complete_init_numa_distance(void)
+{
+    int src, dst;
+
+    /* Fixup NUMA distance by symmetric policy because if it is an
+     * asymmetric distance table, it should be a complete table and
+     * there would not be any missing distance except local node, which
+     * is verified by validate_numa_distance above.
+     */
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = 0; dst < nb_numa_nodes; dst++) {
+            if (numa_info[src].distance[dst] == 0) {
+                if (src == dst) {
+                    numa_info[src].distance[dst] = NUMA_DISTANCE_MIN;
+                } else {
+                    numa_info[src].distance[dst] = numa_info[dst].distance[src];
+                }
+            }
+        }
+    }
+}
+
+void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                 int nb_nodes, ram_addr_t size)
 {
     int i;
+    uint64_t usedmem = 0;
 
-    for (i = 0; i < MAX_NODES; i++) {
-        numa_info[i].node_cpu = bitmap_new(max_cpus);
+    /* Align each node according to the alignment
+     * requirements of the machine class
+     */
+
+    for (i = 0; i < nb_nodes - 1; i++) {
+        nodes[i].node_mem = (size / nb_nodes) &
+                            ~((1 << mc->numa_mem_align_shift) - 1);
+        usedmem += nodes[i].node_mem;
     }
+    nodes[i].node_mem = size - usedmem;
+}
 
-    if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, NULL, NULL)) {
+void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                  int nb_nodes, ram_addr_t size)
+{
+    int i;
+    uint64_t usedmem = 0, node_mem;
+    uint64_t granularity = size / nb_nodes;
+    uint64_t propagate = 0;
+
+    for (i = 0; i < nb_nodes - 1; i++) {
+        node_mem = (granularity + propagate) &
+                   ~((1 << mc->numa_mem_align_shift) - 1);
+        propagate = granularity + propagate - node_mem;
+        nodes[i].node_mem = node_mem;
+        usedmem += node_mem;
+    }
+    nodes[i].node_mem = size - usedmem;
+}
+
+void parse_numa_opts(MachineState *ms)
+{
+    int i;
+    const CPUArchIdList *possible_cpus;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+    if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) {
         exit(1);
     }
 
@@ -336,17 +464,8 @@
             }
         }
         if (i == nb_numa_nodes) {
-            uint64_t usedmem = 0;
-
-            /* Align each node according to the alignment
-             * requirements of the machine class
-             */
-            for (i = 0; i < nb_numa_nodes - 1; i++) {
-                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
-                                        ~((1 << mc->numa_mem_align_shift) - 1);
-                usedmem += numa_info[i].node_mem;
-            }
-            numa_info[i].node_mem = ram_size - usedmem;
+            assert(mc->numa_auto_assign_ram);
+            mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
         }
 
         numa_total = 0;
@@ -366,50 +485,55 @@
 
         numa_set_mem_ranges();
 
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (!bitmap_empty(numa_info[i].node_cpu, max_cpus)) {
+        /* assign CPUs to nodes using board provided default mapping */
+        if (!mc->cpu_index_to_instance_props || !mc->possible_cpu_arch_ids) {
+            error_report("default CPUs to NUMA node mapping isn't supported");
+            exit(1);
+        }
+
+        possible_cpus = mc->possible_cpu_arch_ids(ms);
+        for (i = 0; i < possible_cpus->len; i++) {
+            if (possible_cpus->cpus[i].props.has_node_id) {
                 break;
             }
         }
-        /* Historically VCPUs were assigned in round-robin order to NUMA
-         * nodes. However it causes issues with guest not handling it nice
-         * in case where cores/threads from a multicore CPU appear on
-         * different nodes. So allow boards to override default distribution
-         * rule grouping VCPUs by socket so that VCPUs from the same socket
-         * would be on the same node.
-         */
-        if (i == nb_numa_nodes) {
-            for (i = 0; i < max_cpus; i++) {
-                unsigned node_id = i % nb_numa_nodes;
-                if (mc->cpu_index_to_socket_id) {
-                    node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
-                }
 
-                set_bit(i, numa_info[node_id].node_cpu);
+        /* no CPUs are assigned to NUMA nodes */
+        if (i == possible_cpus->len) {
+            for (i = 0; i < max_cpus; i++) {
+                CpuInstanceProperties props;
+                /* fetch default mapping from board and enable it */
+                props = mc->cpu_index_to_instance_props(ms, i);
+                props.has_node_id = true;
+
+                machine_set_cpu_numa_node(ms, &props, &error_fatal);
             }
         }
 
-        validate_numa_cpus();
+        /* QEMU needs at least all unique node pair distances to build
+         * the whole NUMA distance table. QEMU treats the distance table
+         * as symmetric by default, i.e. distance A->B == distance B->A.
+         * Thus, QEMU is able to complete the distance table
+         * initialization even though only distance A->B is provided and
+         * distance B->A is not. QEMU knows the distance of a node to
+         * itself is always 10, so A->A distances may be omitted. When
+         * the distances of two nodes of a pair differ, i.e. distance
+         * A->B != distance B->A, then that means the distance table is
+         * asymmetric. In this case, the distances for both directions
+         * of all node pairs are required.
+         */
+        if (have_numa_distance) {
+            /* Validate enough NUMA distance information was provided. */
+            validate_numa_distance();
+
+            /* Validation succeeded, now fill in any missing distances. */
+            complete_init_numa_distance();
+        }
     } else {
         numa_set_mem_node_id(0, ram_size, 0);
     }
 }
 
-void numa_post_machine_init(void)
-{
-    CPUState *cpu;
-    int i;
-
-    CPU_FOREACH(cpu) {
-        for (i = 0; i < nb_numa_nodes; i++) {
-            assert(cpu->cpu_index < max_cpus);
-            if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
-                cpu->numa_node = i;
-            }
-        }
-    }
-}
-
 static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
                                            const char *name,
                                            uint64_t ram_size)
@@ -560,20 +684,6 @@
     return list;
 }
 
-int numa_get_node_for_cpu(int idx)
-{
-    int i;
-
-    assert(idx < max_cpus);
-
-    for (i = 0; i < nb_numa_nodes; i++) {
-        if (test_bit(idx, numa_info[i].node_cpu)) {
-            break;
-        }
-    }
-    return i;
-}
-
 void ram_block_notifier_add(RAMBlockNotifier *n)
 {
     QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);

diff --git a/pc-bios/README b/pc-bios/README
index dcead36..ebc699d 100644
--- a/pc-bios/README
+++ b/pc-bios/README

@@ -47,3 +47,6 @@
   (OpenPower Abstraction Layer) firmware for OpenPOWER systems. It can
   run an hypervisor OS or simply a host OS on the "baremetal"
   platform, also known as the PowerNV (Non-Virtualized) platform.
+
+- QemuMacDrivers (https://github.com/ozbenh/QemuMacDrivers) is a project to
+  provide virtualised drivers for PPC MacOS guests.

diff --git a/pc-bios/qemu_vga.ndrv b/pc-bios/qemu_vga.ndrv
new file mode 100644
index 0000000..6e02f74
--- /dev/null
+++ b/pc-bios/qemu_vga.ndrv
Binary files differ

diff --git a/qapi-schema.json b/qapi-schema.json
index 5728b7f..80603cf 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json

@@ -1325,6 +1325,9 @@
 #
 # @thread_id: ID of the underlying host thread
 #
+# @props: properties describing to which node/socket/core/thread
+#         virtual CPU belongs to, provided if supported by board (since 2.10)
+#
 # @arch: architecture of the cpu, which determines which additional fields
 #        will be listed (since 2.6)
 #
@@ -1335,7 +1338,8 @@
 ##
 { 'union': 'CpuInfo',
   'base': {'CPU': 'int', 'current': 'bool', 'halted': 'bool',
-           'qom_path': 'str', 'thread_id': 'int', 'arch': 'CpuInfoArch' },
+           'qom_path': 'str', 'thread_id': 'int',
+           '*props': 'CpuInstanceProperties', 'arch': 'CpuInfoArch' },
   'discriminator': 'arch',
   'data': { 'x86': 'CpuInfoX86',
             'sparc': 'CpuInfoSPARC',
@@ -5682,10 +5686,16 @@
 ##
 # @NumaOptionsType:
 #
+# @node: NUMA nodes configuration
+#
+# @dist: NUMA distance configuration (since 2.10)
+#
+# @cpu: property based CPU(s) to node mapping (Since: 2.10)
+#
 # Since: 2.1
 ##
 { 'enum': 'NumaOptionsType',
-  'data': [ 'node' ] }
+  'data': [ 'node', 'dist', 'cpu' ] }
 
 ##
 # @NumaOptions:
@@ -5698,7 +5708,9 @@
   'base': { 'type': 'NumaOptionsType' },
   'discriminator': 'type',
   'data': {
-    'node': 'NumaNodeOptions' }}
+    'node': 'NumaNodeOptions',
+    'dist': 'NumaDistOptions',
+    'cpu': 'NumaCpuOptions' }}
 
 ##
 # @NumaNodeOptions:
@@ -5727,6 +5739,41 @@
    '*memdev': 'str' }}
 
 ##
+# @NumaDistOptions:
+#
+# Set the distance between 2 NUMA nodes.
+#
+# @src: source NUMA node.
+#
+# @dst: destination NUMA node.
+#
+# @val: NUMA distance from source node to destination node.
+#       When a node is unreachable from another node, set the distance
+#       between them to 255.
+#
+# Since: 2.10
+##
+{ 'struct': 'NumaDistOptions',
+  'data': {
+   'src': 'uint16',
+   'dst': 'uint16',
+   'val': 'uint8' }}
+
+##
+# @NumaCpuOptions:
+#
+# Option "-numa cpu" overrides default cpu to node mapping.
+# It accepts the same set of cpu properties as returned by
+# query-hotpluggable-cpus[].props, where node-id could be used to
+# override default node mapping.
+#
+# Since: 2.10
+##
+{ 'struct': 'NumaCpuOptions',
+   'base': 'CpuInstanceProperties',
+   'data' : {} }
+
+##
 # @HostMemPolicy:
 #
 # Host memory policy types

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 614181b..ea0b3e8 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json

@@ -2127,11 +2127,15 @@
 #
 # @filename:    path to the image file
 # @aio:         AIO backend (default: threads) (since: 2.8)
+# @locking:     whether to enable file locking. If set to 'auto', only enable
+#               when Open File Descriptor (OFD) locking API is available
+#               (default: auto, since 2.10)
 #
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsFile',
   'data': { 'filename': 'str',
+            '*locking': 'OnOffAuto',
             '*aio': 'BlockdevAioOptions' } }
 
 ##
@@ -2430,8 +2434,33 @@
 #
 # @config:          filename of the configuration file
 #
-# @align:           required alignment for requests in bytes,
-#                   must be power of 2, or 0 for default
+# @align:           required alignment for requests in bytes, must be
+#                   positive power of 2, or 0 for default
+#
+# @max-transfer:    maximum size for I/O transfers in bytes, must be
+#                   positive multiple of @align and of the underlying
+#                   file's request alignment (but need not be a power of
+#                   2), or 0 for default (since 2.10)
+#
+# @opt-write-zero:  preferred alignment for write zero requests in bytes,
+#                   must be positive multiple of @align and of the
+#                   underlying file's request alignment (but need not be a
+#                   power of 2), or 0 for default (since 2.10)
+#
+# @max-write-zero:  maximum size for write zero requests in bytes, must be
+#                   positive multiple of @align, of @opt-write-zero, and of
+#                   the underlying file's request alignment (but need not
+#                   be a power of 2), or 0 for default (since 2.10)
+#
+# @opt-discard:     preferred alignment for discard requests in bytes, must
+#                   be positive multiple of @align and of the underlying
+#                   file's request alignment (but need not be a power of
+#                   2), or 0 for default (since 2.10)
+#
+# @max-discard:     maximum size for discard requests in bytes, must be
+#                   positive multiple of @align, of @opt-discard, and of
+#                   the underlying file's request alignment (but need not
+#                   be a power of 2), or 0 for default (since 2.10)
 #
 # @inject-error:    array of error injection descriptions
 #
@@ -2442,7 +2471,9 @@
 { 'struct': 'BlockdevOptionsBlkdebug',
   'data': { 'image': 'BlockdevRef',
             '*config': 'str',
-            '*align': 'int',
+            '*align': 'int', '*max-transfer': 'int32',
+            '*opt-write-zero': 'int32', '*max-write-zero': 'int32',
+            '*opt-discard': 'int32', '*max-discard': 'int32',
             '*inject-error': ['BlkdebugInjectErrorOptions'],
             '*set-state': ['BlkdebugSetStateOptions'] } }
 
@@ -2782,11 +2813,15 @@
 #               "name1=content1; name2=content2;" as explained by
 #               CURLOPT_COOKIE(3). Defaults to no cookies.
 #
+# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a
+#                 secure way. See @cookie for the format. (since 2.10)
+#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsCurlHttp',
   'base': 'BlockdevOptionsCurlBase',
-  'data': { '*cookie': 'str' } }
+  'data': { '*cookie': 'str',
+            '*cookie-secret': 'str'} }
 
 ##
 # @BlockdevOptionsCurlHttps:
@@ -2801,12 +2836,16 @@
 # @sslverify:   Whether to verify the SSL certificate's validity (defaults to
 #               true)
 #
+# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a
+#                 secure way. See @cookie for the format. (since 2.10)
+#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsCurlHttps',
   'base': 'BlockdevOptionsCurlBase',
   'data': { '*cookie': 'str',
-            '*sslverify': 'bool' } }
+            '*sslverify': 'bool',
+            '*cookie-secret': 'str'} }
 
 ##
 # @BlockdevOptionsCurlFtp:
@@ -2898,6 +2937,8 @@
 #                 (default: false)
 # @detect-zeroes: detect and optimize zero writes (Since 2.1)
 #                 (default: off)
+# @force-share:   force share all permission on added nodes.
+#                 Requires read-only=true. (Since 2.10)
 #
 # Remaining options are determined by the block driver.
 #
@@ -2909,6 +2950,7 @@
             '*discard': 'BlockdevDiscardOptions',
             '*cache': 'BlockdevCacheOptions',
             '*read-only': 'bool',
+            '*force-share': 'bool',
             '*detect-zeroes': 'BlockdevDetectZeroesOptions' },
   'discriminator': 'driver',
   'data': {

diff --git a/qdev-monitor.c b/qdev-monitor.c
index e61d596..3ecbf0b 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c

@@ -114,7 +114,7 @@
     if (dc->desc) {
         error_printf(", desc \"%s\"", dc->desc);
     }
-    if (dc->cannot_instantiate_with_device_add_yet) {
+    if (!dc->user_creatable) {
         error_printf(", no-user");
     }
     error_printf("\n");
@@ -156,7 +156,7 @@
                  ? !test_bit(i, dc->categories)
                  : !bitmap_empty(dc->categories, DEVICE_CATEGORY_MAX))
                 || (!show_no_user
-                    && dc->cannot_instantiate_with_device_add_yet)) {
+                    && !dc->user_creatable)) {
                 continue;
             }
             if (!cat_printed) {
@@ -241,7 +241,7 @@
     }
 
     dc = DEVICE_CLASS(oc);
-    if (dc->cannot_instantiate_with_device_add_yet ||
+    if (!dc->user_creatable ||
         (qdev_hotplug && !dc->hotpluggable)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
                    "pluggable device type");

diff --git a/qemu-doc.texi b/qemu-doc.texi
index 794ab4a..de0cc30 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi

@@ -182,7 +182,7 @@
 @item
 CS4231A compatible sound card
 @item
-PCI UHCI USB controller and a virtual USB hub.
+PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1 hub.
 @end itemize
 
 SMP is supported with up to 255 CPUs.
@@ -1357,10 +1357,10 @@
 @node pcsys_usb
 @section USB emulation
 
-QEMU emulates a PCI UHCI USB controller. You can virtually plug
-virtual USB devices or real host USB devices (experimental, works only
-on Linux hosts).  QEMU will automatically create and connect virtual USB hubs
-as necessary to connect multiple USB devices.
+QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can
+plug virtual USB devices or real host USB devices (only works with certain
+host operating systems). QEMU will automatically create and connect virtual
+USB hubs as necessary to connect multiple USB devices.
 
 @menu
 * usb_devices::
@@ -1369,53 +1369,64 @@
 @node usb_devices
 @subsection Connecting USB devices
 
-USB devices can be connected with the @option{-usbdevice} commandline option
-or the @code{usb_add} monitor command.  Available devices are:
+USB devices can be connected with the @option{-device usb-...} command line
+option or the @code{device_add} monitor command. Available devices are:
 
 @table @code
-@item mouse
+@item usb-mouse
 Virtual Mouse.  This will override the PS/2 mouse emulation when activated.
-@item tablet
+@item usb-tablet
 Pointer device that uses absolute coordinates (like a touchscreen).
 This means QEMU is able to report the mouse position without having
 to grab the mouse.  Also overrides the PS/2 mouse emulation when activated.
-@item disk:@var{file}
-Mass storage device based on @var{file} (@pxref{disk_images})
-@item host:@var{bus.addr}
-Pass through the host device identified by @var{bus.addr}
-(Linux only)
-@item host:@var{vendor_id:product_id}
-Pass through the host device identified by @var{vendor_id:product_id}
-(Linux only)
-@item wacom-tablet
+@item usb-storage,drive=@var{drive_id}
+Mass storage device backed by @var{drive_id} (@pxref{disk_images})
+@item usb-uas
+USB attached SCSI device, see
+@url{http://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt}
+for details
+@item usb-bot
+Bulk-only transport storage device, see
+@url{http://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt}
+for details here, too
+@item usb-mtp,x-root=@var{dir}
+Media transfer protocol device, using @var{dir} as root of the file tree
+that is presented to the guest.
+@item usb-host,hostbus=@var{bus},hostaddr=@var{addr}
+Pass through the host device identified by @var{bus} and @var{addr}
+@item usb-host,vendorid=@var{vendor},productid=@var{product}
+Pass through the host device identified by @var{vendor} and @var{product} ID
+@item usb-wacom-tablet
 Virtual Wacom PenPartner tablet.  This device is similar to the @code{tablet}
 above but it can be used with the tslib library because in addition to touch
 coordinates it reports touch pressure.
-@item keyboard
+@item usb-kbd
 Standard USB keyboard.  Will override the PS/2 keyboard (if present).
-@item serial:[vendorid=@var{vendor_id}][,product_id=@var{product_id}]:@var{dev}
+@item usb-serial,chardev=@var{id}
 Serial converter. This emulates an FTDI FT232BM chip connected to host character
-device @var{dev}. The available character devices are the same as for the
-@code{-serial} option. The @code{vendorid} and @code{productid} options can be
-used to override the default 0403:6001. For instance,
-@example
-usb_add serial:productid=FA00:tcp:192.168.0.2:4444
-@end example
-will connect to tcp port 4444 of ip 192.168.0.2, and plug that to the virtual
-serial converter, faking a Matrix Orbital LCD Display (USB ID 0403:FA00).
-@item braille
+device @var{id}.
+@item usb-braille,chardev=@var{id}
 Braille device.  This will use BrlAPI to display the braille output on a real
-or fake device.
-@item net:@var{options}
-Network adapter that supports CDC ethernet and RNDIS protocols.  @var{options}
-specifies NIC options as with @code{-net nic,}@var{options} (see description).
+or fake device referenced by @var{id}.
+@item usb-net[,netdev=@var{id}]
+Network adapter that supports CDC ethernet and RNDIS protocols.  @var{id}
+specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}.
 For instance, user-mode networking can be used with
 @example
-qemu-system-i386 [...OPTIONS...] -net user,vlan=0 -usbdevice net:vlan=0
+qemu-system-i386 [...] -netdev user,id=net0 -device usb-net,netdev=net0
 @end example
-Currently this cannot be used in machines that support PCI NICs.
-@item bt[:@var{hci-type}]
-Bluetooth dongle whose type is specified in the same format as with
+@item usb-ccid
+Smartcard reader device
+@item usb-audio
+USB audio device
+@item usb-bt-dongle
+Bluetooth dongle for the transport layer of HCI. It is connected to HCI
+scatternet 0 by default (corresponds to @code{-bt hci,vlan=0}).
+Note that the syntax for the @code{-device usb-bt-dongle} option is not as
+useful yet as it was with the legacy @code{-usbdevice} option. So to
+configure an USB bluetooth device, you might need to use
+"@code{-usbdevice bt}[:@var{hci-type}]" instead. This configures a
+bluetooth dongle whose type is specified in the same format as with
 the @option{-bt hci} option, @pxref{bt-hcis,,allowed HCI types}.  If
 no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}.
 This USB device implements the USB Transport Layer of HCI.  Example
@@ -1460,11 +1471,11 @@
 
 @item Add the device in QEMU by using:
 @example
-usb_add host:1234:5678
+device_add usb-host,vendorid=0x1234,productid=0x5678
 @end example
 
-Normally the guest OS should report that a new USB device is
-plugged. You can use the option @option{-usbdevice} to do the same.
+Normally the guest OS should report that a new USB device is plugged.
+You can use the option @option{-device usb-host,...} to do the same.
 
 @item Now you can try to use the host USB device in QEMU.
 
@@ -1732,37 +1743,45 @@
 unprivileged user, an environment variable SASL_CONF_PATH can be used
 to make it search alternate locations for the service config.
 
-The default configuration might contain
+If the TLS option is enabled for VNC, then it will provide session encryption,
+otherwise the SASL mechanism will have to provide encryption. In the latter
+case the list of possible plugins that can be used is drastically reduced. In
+fact only the GSSAPI SASL mechanism provides an acceptable level of security
+by modern standards. Previous versions of QEMU referred to the DIGEST-MD5
+mechanism, however, it has multiple serious flaws described in detail in
+RFC 6331 and thus should never be used any more. The SCRAM-SHA-1 mechanism
+provides a simple username/password auth facility similar to DIGEST-MD5, but
+does not support session encryption, so can only be used in combination with
+TLS.
 
-@example
-mech_list: digest-md5
-sasldb_path: /etc/qemu/passwd.db
-@end example
-
-This says to use the 'Digest MD5' mechanism, which is similar to the HTTP
-Digest-MD5 mechanism. The list of valid usernames & passwords is maintained
-in the /etc/qemu/passwd.db file, and can be updated using the saslpasswd2
-command. While this mechanism is easy to configure and use, it is not
-considered secure by modern standards, so only suitable for developers /
-ad-hoc testing.
-
-A more serious deployment might use Kerberos, which is done with the 'gssapi'
-mechanism
+When not using TLS the recommended configuration is
 
 @example
 mech_list: gssapi
 keytab: /etc/qemu/krb5.tab
 @end example
 
-For this to work the administrator of your KDC must generate a Kerberos
-principal for the server, with a name of  'qemu/somehost.example.com@@EXAMPLE.COM'
-replacing 'somehost.example.com' with the fully qualified host name of the
-machine running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm.
+This says to use the 'GSSAPI' mechanism with the Kerberos v5 protocol, with
+the server principal stored in /etc/qemu/krb5.tab. For this to work the
+administrator of your KDC must generate a Kerberos principal for the server,
+with a name of 'qemu/somehost.example.com@@EXAMPLE.COM' replacing
+'somehost.example.com' with the fully qualified host name of the machine
+running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm.
 
-Other configurations will be left as an exercise for the reader. It should
-be noted that only Digest-MD5 and GSSAPI provides a SSF layer for data
-encryption. For all other mechanisms, VNC should always be configured to
-use TLS and x509 certificates to protect security credentials from snooping.
+When using TLS, if username+password authentication is desired, then a
+reasonable configuration is
+
+@example
+mech_list: scram-sha-1
+sasldb_path: /etc/qemu/passwd.db
+@end example
+
+The saslpasswd2 program can be used to populate the passwd.db file with
+accounts.
+
+Other SASL configurations will be left as an exercise for the reader. Note that
+all mechanisms except GSSAPI, should be combined with use of TLS to ensure a
+secure data channel.
 
 @node gdb_usage
 @section GDB usage
@@ -1878,8 +1897,8 @@
 Windows 9x does not correctly use the CPU HLT
 instruction. The result is that it takes host CPU cycles even when
 idle. You can install the utility from
-@url{http://www.user.cityline.ru/~maxamn/amnhltm.zip} to solve this
-problem. Note that no such tool is needed for NT, 2000 or XP.
+@url{http://web.archive.org/web/20060212132151/http://www.user.cityline.ru/~maxamn/amnhltm.zip}
+to solve this problem. Note that no such tool is needed for NT, 2000 or XP.
 
 @subsubsection Windows 2000 disk full problem
 
@@ -1927,9 +1946,9 @@
 @subsubsection CPU usage reduction
 
 DOS does not correctly use the CPU HLT instruction. The result is that
-it takes host CPU cycles even when idle. You can install the utility
-from @url{http://www.vmware.com/software/dosidle210.zip} to solve this
-problem.
+it takes host CPU cycles even when idle. You can install the utility from
+@url{http://web.archive.org/web/20051222085335/http://www.vmware.com/software/dosidle210.zip}
+to solve this problem.
 
 @node QEMU System emulator for non PC targets
 @chapter QEMU System emulator for non PC targets

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index bf4ce59..e5bc28f 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx

@@ -10,15 +10,15 @@
 ETEXI
 
 DEF("bench", img_bench,
-    "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] filename")
+    "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename")
 STEXI
-@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] @var{filename}
+@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
 ETEXI
 
 DEF("check", img_check,
-    "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename")
+    "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] [-U] filename")
 STEXI
-@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename}
+@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] [-U] @var{filename}
 ETEXI
 
 DEF("create", img_create,
@@ -34,45 +34,45 @@
 ETEXI
 
 DEF("compare", img_compare,
-    "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2")
+    "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] [-U] filename1 filename2")
 STEXI
-@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2}
+@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] [-U] @var{filename1} @var{filename2}
 ETEXI
 
 DEF("convert", img_convert,
-    "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
+    "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
 STEXI
-@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
 ETEXI
 
 DEF("dd", img_dd,
-    "dd [--image-opts] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output")
+    "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output")
 STEXI
-@item dd [--image-opts] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
+@item dd [--image-opts] [-U] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
 ETEXI
 
 DEF("info", img_info,
-    "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] filename")
+    "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] [-U] filename")
 STEXI
-@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
+@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] [-U] @var{filename}
 ETEXI
 
 DEF("map", img_map,
-    "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] filename")
+    "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-U] filename")
 STEXI
-@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename}
 ETEXI
 
 DEF("snapshot", img_snapshot,
-    "snapshot [--object objectdef] [--image-opts] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
+    "snapshot [--object objectdef] [--image-opts] [-U] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
 STEXI
-@item snapshot [--object @var{objectdef}] [--image-opts] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename}
+@item snapshot [--object @var{objectdef}] [--image-opts] [-U] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename}
 ETEXI
 
 DEF("rebase", img_rebase,
-    "rebase [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+    "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
 STEXI
-@item rebase [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename}
+@item rebase [--object @var{objectdef}] [--image-opts] [-U] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename}
 ETEXI
 
 DEF("resize", img_resize,

diff --git a/qemu-img.c b/qemu-img.c
index f3b0ab4..b506839 100644
--- a/qemu-img.c
+++ b/qemu-img.c

@@ -28,6 +28,7 @@
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qbool.h"
 #include "qemu/cutils.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
@@ -283,12 +284,20 @@
 
 static BlockBackend *img_open_opts(const char *optstr,
                                    QemuOpts *opts, int flags, bool writethrough,
-                                   bool quiet)
+                                   bool quiet, bool force_share)
 {
     QDict *options;
     Error *local_err = NULL;
     BlockBackend *blk;
     options = qemu_opts_to_qdict(opts, NULL);
+    if (force_share) {
+        if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
+            && !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) {
+            error_report("--force-share/-U conflicts with image options");
+            return NULL;
+        }
+        qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true));
+    }
     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
     if (!blk) {
         error_reportf_err(local_err, "Could not open '%s': ", optstr);
@@ -305,17 +314,20 @@
 
 static BlockBackend *img_open_file(const char *filename,
                                    const char *fmt, int flags,
-                                   bool writethrough, bool quiet)
+                                   bool writethrough, bool quiet,
+                                   bool force_share)
 {
     BlockBackend *blk;
     Error *local_err = NULL;
-    QDict *options = NULL;
+    QDict *options = qdict_new();
 
     if (fmt) {
-        options = qdict_new();
         qdict_put_str(options, "driver", fmt);
     }
 
+    if (force_share) {
+        qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true));
+    }
     blk = blk_new_open(filename, NULL, options, flags, &local_err);
     if (!blk) {
         error_reportf_err(local_err, "Could not open '%s': ", filename);
@@ -334,7 +346,7 @@
 static BlockBackend *img_open(bool image_opts,
                               const char *filename,
                               const char *fmt, int flags, bool writethrough,
-                              bool quiet)
+                              bool quiet, bool force_share)
 {
     BlockBackend *blk;
     if (image_opts) {
@@ -348,9 +360,11 @@
         if (!opts) {
             return NULL;
         }
-        blk = img_open_opts(filename, opts, flags, writethrough, quiet);
+        blk = img_open_opts(filename, opts, flags, writethrough, quiet,
+                            force_share);
     } else {
-        blk = img_open_file(filename, fmt, flags, writethrough, quiet);
+        blk = img_open_file(filename, fmt, flags, writethrough, quiet,
+                            force_share);
     }
     return blk;
 }
@@ -650,6 +664,7 @@
     ImageCheck *check;
     bool quiet = false;
     bool image_opts = false;
+    bool force_share = false;
 
     fmt = NULL;
     output = NULL;
@@ -664,9 +679,10 @@
             {"output", required_argument, 0, OPTION_OUTPUT},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:r:T:q",
+        c = getopt_long(argc, argv, ":hf:r:T:qU",
                         long_options, &option_index);
         if (c == -1) {
             break;
@@ -705,6 +721,9 @@
         case 'q':
             quiet = true;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OBJECT: {
             QemuOpts *opts;
             opts = qemu_opts_parse_noisily(&qemu_object_opts,
@@ -744,7 +763,8 @@
         return 1;
     }
 
-    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   force_share);
     if (!blk) {
         return 1;
     }
@@ -947,7 +967,8 @@
         return 1;
     }
 
-    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   false);
     if (!blk) {
         return 1;
     }
@@ -1206,6 +1227,7 @@
     int c, pnum;
     uint64_t progress_base;
     bool image_opts = false;
+    bool force_share = false;
 
     cache = BDRV_DEFAULT_CACHE;
     for (;;) {
@@ -1213,9 +1235,10 @@
             {"help", no_argument, 0, 'h'},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:F:T:pqs",
+        c = getopt_long(argc, argv, ":hf:F:T:pqsU",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -1248,6 +1271,9 @@
         case 's':
             strict = true;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OBJECT: {
             QemuOpts *opts;
             opts = qemu_opts_parse_noisily(&qemu_object_opts,
@@ -1293,13 +1319,15 @@
         goto out3;
     }
 
-    blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet);
+    blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
+                    force_share);
     if (!blk1) {
         ret = 2;
         goto out3;
     }
 
-    blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet);
+    blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
+                    force_share);
     if (!blk2) {
         ret = 2;
         goto out2;
@@ -1733,13 +1761,13 @@
         qemu_co_mutex_lock(&s->lock);
         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
             qemu_co_mutex_unlock(&s->lock);
-            goto out;
+            break;
         }
         n = convert_iteration_sectors(s, s->sector_num);
         if (n < 0) {
             qemu_co_mutex_unlock(&s->lock);
             s->ret = n;
-            goto out;
+            break;
         }
         /* save current sector and allocation status to local variables */
         sector_num = s->sector_num;
@@ -1764,7 +1792,6 @@
                 error_report("error while reading sector %" PRId64
                              ": %s", sector_num, strerror(-ret));
                 s->ret = ret;
-                goto out;
             }
         } else if (!s->min_sparse && status == BLK_ZERO) {
             status = BLK_DATA;
@@ -1773,22 +1800,20 @@
 
         if (s->wr_in_order) {
             /* keep writes in order */
-            while (s->wr_offs != sector_num) {
-                if (s->ret != -EINPROGRESS) {
-                    goto out;
-                }
+            while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
                 s->wait_sector_num[index] = sector_num;
                 qemu_coroutine_yield();
             }
             s->wait_sector_num[index] = -1;
         }
 
-        ret = convert_co_write(s, sector_num, n, buf, status);
-        if (ret < 0) {
-            error_report("error while writing sector %" PRId64
-                         ": %s", sector_num, strerror(-ret));
-            s->ret = ret;
-            goto out;
+        if (s->ret == -EINPROGRESS) {
+            ret = convert_co_write(s, sector_num, n, buf, status);
+            if (ret < 0) {
+                error_report("error while writing sector %" PRId64
+                             ": %s", sector_num, strerror(-ret));
+                s->ret = ret;
+            }
         }
 
         if (s->wr_in_order) {
@@ -1809,7 +1834,6 @@
         }
     }
 
-out:
     qemu_vfree(buf);
     s->co[index] = NULL;
     s->running_coroutines--;
@@ -1871,7 +1895,7 @@
         qemu_coroutine_enter(s->co[i]);
     }
 
-    while (s->ret == -EINPROGRESS) {
+    while (s->running_coroutines) {
         main_loop_wait(false);
     }
 
@@ -1902,6 +1926,7 @@
     bool writethrough, src_writethrough, quiet = false, image_opts = false,
          skip_create = false, progress = false;
     int64_t ret = -EINVAL;
+    bool force_share = false;
 
     ImgConvertState s = (ImgConvertState) {
         /* Need at least 4k of zeros for sparse detection */
@@ -1916,9 +1941,10 @@
             {"help", no_argument, 0, 'h'},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
+        c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -2021,6 +2047,9 @@
         case 'W':
             s.wr_in_order = false;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OBJECT: {
             QemuOpts *object_opts;
             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
@@ -2080,7 +2109,8 @@
 
     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
-                               fmt, src_flags, src_writethrough, quiet);
+                               fmt, src_flags, src_writethrough, quiet,
+                               force_share);
         if (!s.src[bs_i]) {
             ret = -1;
             goto out;
@@ -2233,7 +2263,8 @@
      * the bdrv_create() call which takes different params.
      * Not critical right now, so fix can wait...
      */
-    s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet);
+    s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet,
+                             false);
     if (!s.target) {
         ret = -1;
         goto out;
@@ -2384,7 +2415,7 @@
 static ImageInfoList *collect_image_info_list(bool image_opts,
                                               const char *filename,
                                               const char *fmt,
-                                              bool chain)
+                                              bool chain, bool force_share)
 {
     ImageInfoList *head = NULL;
     ImageInfoList **last = &head;
@@ -2407,7 +2438,8 @@
         g_hash_table_insert(filenames, (gpointer)filename, NULL);
 
         blk = img_open(image_opts, filename, fmt,
-                       BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false);
+                       BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
+                       force_share);
         if (!blk) {
             goto err;
         }
@@ -2459,6 +2491,7 @@
     const char *filename, *fmt, *output;
     ImageInfoList *list;
     bool image_opts = false;
+    bool force_share = false;
 
     fmt = NULL;
     output = NULL;
@@ -2471,9 +2504,10 @@
             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":f:h",
+        c = getopt_long(argc, argv, ":f:hU",
                         long_options, &option_index);
         if (c == -1) {
             break;
@@ -2491,6 +2525,9 @@
         case 'f':
             fmt = optarg;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OUTPUT:
             output = optarg;
             break;
@@ -2530,7 +2567,8 @@
         return 1;
     }
 
-    list = collect_image_info_list(image_opts, filename, fmt, chain);
+    list = collect_image_info_list(image_opts, filename, fmt, chain,
+                                   force_share);
     if (!list) {
         return 1;
     }
@@ -2676,6 +2714,7 @@
     MapEntry curr = { .length = 0 }, next;
     int ret = 0;
     bool image_opts = false;
+    bool force_share = false;
 
     fmt = NULL;
     output = NULL;
@@ -2687,9 +2726,10 @@
             {"output", required_argument, 0, OPTION_OUTPUT},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":f:h",
+        c = getopt_long(argc, argv, ":f:hU",
                         long_options, &option_index);
         if (c == -1) {
             break;
@@ -2707,6 +2747,9 @@
         case 'f':
             fmt = optarg;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OUTPUT:
             output = optarg;
             break;
@@ -2743,7 +2786,7 @@
         return 1;
     }
 
-    blk = img_open(image_opts, filename, fmt, 0, false, false);
+    blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
     if (!blk) {
         return 1;
     }
@@ -2806,6 +2849,7 @@
     bool quiet = false;
     Error *err = NULL;
     bool image_opts = false;
+    bool force_share = false;
 
     bdrv_oflags = BDRV_O_RDWR;
     /* Parse commandline parameters */
@@ -2814,9 +2858,10 @@
             {"help", no_argument, 0, 'h'},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":la:c:d:hq",
+        c = getopt_long(argc, argv, ":la:c:d:hqU",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -2866,6 +2911,9 @@
         case 'q':
             quiet = true;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OBJECT: {
             QemuOpts *opts;
             opts = qemu_opts_parse_noisily(&qemu_object_opts,
@@ -2892,7 +2940,8 @@
     }
 
     /* Open the image */
-    blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet);
+    blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
+                   force_share);
     if (!blk) {
         return 1;
     }
@@ -2956,6 +3005,7 @@
     int c, flags, src_flags, ret;
     bool writethrough, src_writethrough;
     int unsafe = 0;
+    bool force_share = false;
     int progress = 0;
     bool quiet = false;
     Error *local_err = NULL;
@@ -2972,9 +3022,10 @@
             {"help", no_argument, 0, 'h'},
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:F:b:upt:T:q",
+        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -3024,6 +3075,9 @@
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
+        case 'U':
+            force_share = true;
+            break;
         }
     }
 
@@ -3072,7 +3126,8 @@
      * Ignore the old backing file for unsafe rebase in case we want to correct
      * the reference to a renamed or moved backing file.
      */
-    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   false);
     if (!blk) {
         ret = -1;
         goto out;
@@ -3097,6 +3152,13 @@
             qdict_put_str(options, "driver", bs->backing_format);
         }
 
+        if (force_share) {
+            if (!options) {
+                options = qdict_new();
+            }
+            qdict_put(options, BDRV_OPT_FORCE_SHARE,
+                      qbool_from_bool(true));
+        }
         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
         blk_old_backing = blk_new_open(backing_name, NULL,
                                        options, src_flags, &local_err);
@@ -3109,11 +3171,12 @@
         }
 
         if (out_baseimg[0]) {
+            options = qdict_new();
             if (out_basefmt) {
-                options = qdict_new();
                 qdict_put_str(options, "driver", out_basefmt);
-            } else {
-                options = NULL;
+            }
+            if (force_share) {
+                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
             }
 
             blk_new_backing = blk_new_open(out_baseimg, NULL,
@@ -3419,7 +3482,8 @@
     qemu_opts_del(param);
 
     blk = img_open(image_opts, filename, fmt,
-                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet);
+                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
+                   false);
     if (!blk) {
         ret = -1;
         goto out;
@@ -3573,7 +3637,8 @@
         goto out;
     }
 
-    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   false);
     if (!blk) {
         ret = -1;
         goto out;
@@ -3741,6 +3806,7 @@
     bool writethrough = false;
     struct timeval t1, t2;
     int i;
+    bool force_share = false;
 
     for (;;) {
         static const struct option long_options[] = {
@@ -3749,9 +3815,10 @@
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {"pattern", required_argument, 0, OPTION_PATTERN},
             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
+            {"force-share", no_argument, 0, 'U'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:w", long_options, NULL);
+        c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
         if (c == -1) {
             break;
         }
@@ -3845,6 +3912,9 @@
             flags |= BDRV_O_RDWR;
             is_write = true;
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_PATTERN:
         {
             unsigned long res;
@@ -3892,7 +3962,8 @@
         goto out;
     }
 
-    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
+    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
+                   force_share);
     if (!blk) {
         ret = -1;
         goto out;
@@ -4059,6 +4130,7 @@
     const char *fmt = NULL;
     int64_t size = 0;
     int64_t block_count = 0, out_pos, in_pos;
+    bool force_share = false;
     struct DdInfo dd = {
         .flags = 0,
         .count = 0,
@@ -4087,10 +4159,11 @@
     const struct option long_options[] = {
         { "help", no_argument, 0, 'h'},
         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+        { "force-share", no_argument, 0, 'U'},
         { 0, 0, 0, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, ":hf:O:", long_options, NULL))) {
+    while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
         if (c == EOF) {
             break;
         }
@@ -4110,6 +4183,9 @@
         case 'h':
             help();
             break;
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -4154,7 +4230,8 @@
         ret = -1;
         goto out;
     }
-    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false);
+    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
+                    force_share);
 
     if (!blk1) {
         ret = -1;
@@ -4222,7 +4299,7 @@
     }
 
     blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
-                    false, false);
+                    false, false, false);
 
     if (!blk2) {
         ret = -1;

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 21af9e6..4b2278f 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c

@@ -740,13 +740,13 @@
     }
 
     if (bflag) {
-        if (offset & 0x1ff) {
-            printf("offset %" PRId64 " is not sector aligned\n",
+        if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
+            printf("%" PRId64 " is not a sector-aligned value for 'offset'\n",
                    offset);
             return 0;
         }
-        if (count & 0x1ff) {
-            printf("count %"PRId64" is not sector aligned\n",
+        if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) {
+            printf("%"PRId64" is not a sector-aligned value for 'count'\n",
                    count);
             return 0;
         }
@@ -1050,14 +1050,14 @@
     }
 
     if (bflag || cflag) {
-        if (offset & 0x1ff) {
-            printf("offset %" PRId64 " is not sector aligned\n",
+        if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
+            printf("%" PRId64 " is not a sector-aligned value for 'offset'\n",
                    offset);
             return 0;
         }
 
-        if (count & 0x1ff) {
-            printf("count %"PRId64" is not sector aligned\n",
+        if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) {
+            printf("%"PRId64" is not a sector-aligned value for 'count'\n",
                    count);
             return 0;
         }
@@ -1760,7 +1760,7 @@
 static int alloc_f(BlockBackend *blk, int argc, char **argv)
 {
     BlockDriverState *bs = blk_bs(blk);
-    int64_t offset, sector_num, nb_sectors, remaining;
+    int64_t offset, sector_num, nb_sectors, remaining, count;
     char s1[64];
     int num, ret;
     int64_t sum_alloc;
@@ -1769,25 +1769,31 @@
     if (offset < 0) {
         print_cvtnum_err(offset, argv[1]);
         return 0;
-    } else if (offset & 0x1ff) {
-        printf("offset %" PRId64 " is not sector aligned\n",
+    } else if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
+        printf("%" PRId64 " is not a sector-aligned value for 'offset'\n",
                offset);
         return 0;
     }
 
     if (argc == 3) {
-        nb_sectors = cvtnum(argv[2]);
-        if (nb_sectors < 0) {
-            print_cvtnum_err(nb_sectors, argv[2]);
+        count = cvtnum(argv[2]);
+        if (count < 0) {
+            print_cvtnum_err(count, argv[2]);
             return 0;
-        } else if (nb_sectors > INT_MAX) {
-            printf("length argument cannot exceed %d, given %s\n",
-                   INT_MAX, argv[2]);
+        } else if (count > INT_MAX * BDRV_SECTOR_SIZE) {
+            printf("length argument cannot exceed %llu, given %s\n",
+                   INT_MAX * BDRV_SECTOR_SIZE, argv[2]);
             return 0;
         }
     } else {
-        nb_sectors = 1;
+        count = BDRV_SECTOR_SIZE;
     }
+    if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) {
+        printf("%" PRId64 " is not a sector-aligned value for 'count'\n",
+               count);
+        return 0;
+    }
+    nb_sectors = count >> BDRV_SECTOR_BITS;
 
     remaining = nb_sectors;
     sum_alloc = 0;
@@ -1811,8 +1817,8 @@
 
     cvtstr(offset, s1, sizeof(s1));
 
-    printf("%"PRId64"/%"PRId64" sectors allocated at offset %s\n",
-           sum_alloc, nb_sectors, s1);
+    printf("%"PRId64"/%"PRId64" bytes allocated at offset %s\n",
+           sum_alloc << BDRV_SECTOR_BITS, nb_sectors << BDRV_SECTOR_BITS, s1);
     return 0;
 }
 
@@ -1822,8 +1828,8 @@
     .argmin     = 1,
     .argmax     = 2,
     .cfunc      = alloc_f,
-    .args       = "off [sectors]",
-    .oneline    = "checks if a sector is present in the file",
+    .args       = "offset [count]",
+    .oneline    = "checks if offset is allocated in the file",
 };
 
 
@@ -1862,7 +1868,7 @@
 {
     int64_t offset;
     int64_t nb_sectors, total_sectors;
-    char s1[64];
+    char s1[64], s2[64];
     int64_t num;
     int ret;
     const char *retstr;
@@ -1888,10 +1894,11 @@
         }
 
         retstr = ret ? "    allocated" : "not allocated";
-        cvtstr(offset << 9ULL, s1, sizeof(s1));
-        printf("[% 24" PRId64 "] % 8" PRId64 "/% 8" PRId64 " sectors %s "
-               "at offset %s (%d)\n",
-               offset << 9ULL, num, nb_sectors, retstr, s1, ret);
+        cvtstr(num << BDRV_SECTOR_BITS, s1, sizeof(s1));
+        cvtstr(offset << BDRV_SECTOR_BITS, s2, sizeof(s2));
+        printf("%s (0x%" PRIx64 ") bytes %s at offset %s (0x%" PRIx64 ")\n",
+               s1, num << BDRV_SECTOR_BITS, retstr,
+               s2, offset << BDRV_SECTOR_BITS);
 
         offset += num;
         nb_sectors -= num;

diff --git a/qemu-io.c b/qemu-io.c
index ed0e2dc..34fa8a1 100644
--- a/qemu-io.c
+++ b/qemu-io.c

@@ -20,6 +20,7 @@
 #include "qemu/readline.h"
 #include "qemu/log.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qbool.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
@@ -53,7 +54,8 @@
     .oneline    = "close the current open file",
 };
 
-static int openfile(char *name, int flags, bool writethrough, QDict *opts)
+static int openfile(char *name, int flags, bool writethrough, bool force_share,
+                    QDict *opts)
 {
     Error *local_err = NULL;
     BlockDriverState *bs;
@@ -64,6 +66,18 @@
         return 1;
     }
 
+    if (force_share) {
+        if (!opts) {
+            opts = qdict_new();
+        }
+        if (qdict_haskey(opts, BDRV_OPT_FORCE_SHARE)
+            && !qdict_get_bool(opts, BDRV_OPT_FORCE_SHARE)) {
+            error_report("-U conflicts with image options");
+            QDECREF(opts);
+            return 1;
+        }
+        qdict_put(opts, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true));
+    }
     qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err);
     if (!qemuio_blk) {
         error_reportf_err(local_err, "can't open%s%s: ",
@@ -108,6 +122,7 @@
 " -r, -- open file read-only\n"
 " -s, -- use snapshot file\n"
 " -n, -- disable host cache, short for -t none\n"
+" -U, -- force shared permissions\n"
 " -k, -- use kernel AIO implementation (on Linux only)\n"
 " -t, -- use the given cache mode for the image\n"
 " -d, -- use the given discard mode for the image\n"
@@ -124,7 +139,7 @@
     .argmin     = 1,
     .argmax     = -1,
     .flags      = CMD_NOFILE_OK,
-    .args       = "[-rsnk] [-t cache] [-d discard] [-o options] [path]",
+    .args       = "[-rsnkU] [-t cache] [-d discard] [-o options] [path]",
     .oneline    = "open the file specified by path",
     .help       = open_help,
 };
@@ -147,8 +162,9 @@
     int c;
     QemuOpts *qopts;
     QDict *opts;
+    bool force_share = false;
 
-    while ((c = getopt(argc, argv, "snro:kt:d:")) != -1) {
+    while ((c = getopt(argc, argv, "snro:kt:d:U")) != -1) {
         switch (c) {
         case 's':
             flags |= BDRV_O_SNAPSHOT;
@@ -188,6 +204,9 @@
                 return 0;
             }
             break;
+        case 'U':
+            force_share = true;
+            break;
         default:
             qemu_opts_reset(&empty_opts);
             return qemuio_command_usage(&open_cmd);
@@ -211,9 +230,9 @@
     qemu_opts_reset(&empty_opts);
 
     if (optind == argc - 1) {
-        return openfile(argv[optind], flags, writethrough, opts);
+        return openfile(argv[optind], flags, writethrough, force_share, opts);
     } else if (optind == argc) {
-        return openfile(NULL, flags, writethrough, opts);
+        return openfile(NULL, flags, writethrough, force_share, opts);
     } else {
         QDECREF(opts);
         return qemuio_command_usage(&open_cmd);
@@ -257,6 +276,7 @@
 "  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
 "                       specify tracing options\n"
 "                       see qemu-img(1) man page for full description\n"
+"  -U, --force-share    force shared permissions\n"
 "  -h, --help           display this help and exit\n"
 "  -V, --version        output version information and exit\n"
 "\n"
@@ -436,7 +456,7 @@
 int main(int argc, char **argv)
 {
     int readonly = 0;
-    const char *sopt = "hVc:d:f:rsnmkt:T:";
+    const char *sopt = "hVc:d:f:rsnmkt:T:U";
     const struct option lopt[] = {
         { "help", no_argument, NULL, 'h' },
         { "version", no_argument, NULL, 'V' },
@@ -452,6 +472,7 @@
         { "trace", required_argument, NULL, 'T' },
         { "object", required_argument, NULL, OPTION_OBJECT },
         { "image-opts", no_argument, NULL, OPTION_IMAGE_OPTS },
+        { "force-share", no_argument, 0, 'U'},
         { NULL, 0, NULL, 0 }
     };
     int c;
@@ -462,6 +483,7 @@
     QDict *opts = NULL;
     const char *format = NULL;
     char *trace_file = NULL;
+    bool force_share = false;
 
 #ifdef CONFIG_POSIX
     signal(SIGPIPE, SIG_IGN);
@@ -524,6 +546,9 @@
         case 'h':
             usage(progname);
             exit(0);
+        case 'U':
+            force_share = true;
+            break;
         case OPTION_OBJECT: {
             QemuOpts *qopts;
             qopts = qemu_opts_parse_noisily(&qemu_object_opts,
@@ -595,7 +620,7 @@
                 exit(1);
             }
             opts = qemu_opts_to_qdict(qopts, NULL);
-            if (openfile(NULL, flags, writethrough, opts)) {
+            if (openfile(NULL, flags, writethrough, force_share, opts)) {
                 exit(1);
             }
         } else {
@@ -603,7 +628,8 @@
                 opts = qdict_new();
                 qdict_put_str(opts, "driver", format);
             }
-            if (openfile(argv[optind], flags, writethrough, opts)) {
+            if (openfile(argv[optind], flags, writethrough,
+                         force_share, opts)) {
                 exit(1);
             }
         }

diff --git a/qemu-options.hx b/qemu-options.hx
index 70c0ded..f07a310 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx

@@ -139,13 +139,18 @@
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
     "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
-    "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
+    "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
+    "-numa dist,src=source,dst=destination,val=distance\n", QEMU_ARCH_ALL)
 STEXI
 @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
 @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
+@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
+@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
 @findex -numa
 Define a NUMA node and assign RAM and VCPUs to it.
+Set the NUMA distance from a source node to a destination node.
 
+Legacy VCPU assignment uses @samp{cpus} option where
 @var{firstcpu} and @var{lastcpu} are CPU indexes. Each
 @samp{cpus} option represent a contiguous range of CPU indexes
 (or a single VCPU if @var{lastcpu} is omitted). A non-contiguous
@@ -159,6 +164,24 @@
 -numa node,cpus=0-2,cpus=5
 @end example
 
+@samp{cpu} option is a new alternative to @samp{cpus} option
+which uses @samp{socket-id|core-id|thread-id} properties to assign
+CPU objects to a @var{node} using topology layout properties of CPU.
+The set of properties is machine specific, and depends on used
+machine type/@samp{smp} options. It could be queried with
+@samp{hotpluggable-cpus} monitor command.
+@samp{node-id} property specifies @var{node} to which CPU object
+will be assigned, it's required for @var{node} to be declared
+with @samp{node} option before it's used with @samp{cpu} option.
+
+For example:
+@example
+-M pc \
+-smp 1,sockets=2,maxcpus=2 \
+-numa node,nodeid=0 -numa node,nodeid=1 \
+-numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1
+@end example
+
 @samp{mem} assigns a given RAM amount to a node. @samp{memdev}
 assigns RAM from a given memory backend device to a node. If
 @samp{mem} and @samp{memdev} are omitted in all nodes, RAM is
@@ -167,6 +190,17 @@
 @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore,
 if one node uses @samp{memdev}, all of them have to use it.
 
+@var{source} and @var{destination} are NUMA node IDs.
+@var{distance} is the NUMA distance from @var{source} to @var{destination}.
+The distance from a node to itself is always 10. If any pair of nodes is
+given a distance, then all pairs must be given distances. Although, when
+distances are only given in one direction for each pair of nodes, then
+the distances in the opposite directions are assumed to be the same. If,
+however, an asymmetrical pair of distances is given for even one node
+pair, then all node pairs must be provided distance values for both
+directions, even when they are symmetrical. When a node is unreachable
+from another node, set the pair's distance to 255.
+
 Note that the -@option{numa} option doesn't allocate any of the
 specified resources, it just assigns existing resources to NUMA
 nodes. This means that one still has to use the @option{-m},
@@ -604,7 +638,7 @@
 specific URLs. See the section for "Device URL Syntax" for more information.
 @item if=@var{interface}
 This option defines on which type on interface the drive is connected.
-Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio.
+Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio, none.
 @item bus=@var{bus},unit=@var{unit}
 These options define where is connected the drive by defining the bus number and
 the unit id.
@@ -876,7 +910,7 @@
 
 DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
     "-virtfs local,path=path,mount_tag=tag,security_model=[mapped-xattr|mapped-file|passthrough|none]\n"
-    "        [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n",
+    "        [,id=id][,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n",
     QEMU_ARCH_ALL)
 
 STEXI

diff --git a/qemu.sasl b/qemu.sasl
index 64fdef3..fb8a92b 100644
--- a/qemu.sasl
+++ b/qemu.sasl

@@ -1,36 +1,44 @@
-# If you want to use the non-TLS socket, then you *must* include
-# the GSSAPI or DIGEST-MD5 mechanisms, because they are the only
-# ones that can offer session encryption as well as authentication.
+# If you want to use VNC remotely without TLS, then you *must*
+# pick a mechanism which provides session encryption as well
+# as authentication.
 #
-# If you're only using TLS, then you can turn on any mechanisms
+# If you are only using TLS, then you can turn on any mechanisms
 # you like for authentication, because TLS provides the encryption
 #
-# Default to a simple username+password mechanism
-# NB digest-md5 is no longer considered secure by current standards
-mech_list: digest-md5
-
-# Before you can use GSSAPI, you need a service principle on the
-# KDC server for libvirt, and that to be exported to the keytab
-# file listed below
-#mech_list: gssapi
+# If you are only using UNIX sockets then encryption is not
+# required at all.
 #
-# You can also list many mechanisms at once, then the user can choose
-# by adding  '?auth=sasl.gssapi' to their libvirt URI, eg
-#   qemu+tcp://hostname/system?auth=sasl.gssapi
-#mech_list: digest-md5 gssapi
+# NB, previously DIGEST-MD5 was set as the default mechanism for
+# QEMU VNC. Per RFC 6331 this is vulnerable to many serious security
+# flaws as should no longer be used. Thus GSSAPI is now the default.
+#
+# To use GSSAPI requires that a QEMU service principal is
+# added to the Kerberos server for each host running QEMU.
+# This principal needs to be exported to the keytab file listed below
+mech_list: gssapi
+
+# If using TLS with VNC, or a UNIX socket only, it is possible to
+# enable plugins which don't provide session encryption. The
+# 'scram-sha-1' plugin allows plain username/password authentication
+# to be performed
+#
+#mech_list: scram-sha-1
+
+# You can also list many mechanisms at once, and the VNC server will
+# negotiate which to use by considering the list enabled on the VNC
+# client.
+#mech_list: scram-sha-1 gssapi
 
 # Some older builds of MIT kerberos on Linux ignore this option &
 # instead need KRB5_KTNAME env var.
 # For modern Linux, and other OS, this should be sufficient
 #
-# There is no default value here, uncomment if you need this
-#keytab: /etc/qemu/krb5.tab
+# This file needs to be populated with the service principal that
+# was created on the Kerberos v5 server. If switching to a non-gssapi
+# mechanism this can be commented out.
+keytab: /etc/qemu/krb5.tab
 
-# If using digest-md5 for username/passwds, then this is the file
+# If using scram-sha-1 for username/passwds, then this is the file
 # containing the passwds. Use 'saslpasswd2 -a qemu [username]'
 # to add entries, and 'sasldblistusers2 -f [sasldb_path]' to browse it
-sasldb_path: /etc/qemu/passwd.db
-
-
-auxprop_plugin: sasldb
-
+#sasldb_path: /etc/qemu/passwd.db

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index ba06be4..284ecc6 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c

@@ -2125,9 +2125,11 @@
          * we think this VM does not support online/offline memory block,
          * any other solution?
          */
-        if (!dp && errno == ENOENT) {
-            result->response =
-                GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
+        if (!dp) {
+            if (errno == ENOENT) {
+                result->response =
+                    GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
+            }
             goto out1;
         }
         closedir(dp);

diff --git a/qmp.c b/qmp.c
index ab74cd7..f656940 100644
--- a/qmp.c
+++ b/qmp.c

@@ -196,18 +196,12 @@
     }
 
     /* Continuing after completed migration. Images have been inactivated to
-     * allow the destination to take control. Need to get control back now. */
-    if (runstate_check(RUN_STATE_FINISH_MIGRATE) ||
-        runstate_check(RUN_STATE_POSTMIGRATE))
-    {
-        bdrv_invalidate_cache_all(&local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-
-    blk_resume_after_migration(&local_err);
+     * allow the destination to take control. Need to get control back now.
+     *
+     * If there are no inactive block nodes (e.g. because the VM was just
+     * paused rather than completing a migration), bdrv_inactivate_all() simply
+     * doesn't do anything. */
+    bdrv_invalidate_cache_all(&local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;

diff --git a/qom/cpu.c b/qom/cpu.c
index f02e9c0..5069876 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c

@@ -382,6 +382,7 @@
 
 static void cpu_common_initfn(Object *obj)
 {
+    uint32_t count;
     CPUState *cpu = CPU(obj);
     CPUClass *cc = CPU_GET_CLASS(obj);
 
@@ -396,7 +397,10 @@
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
 
-    cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count());
+    count = trace_get_vcpu_event_count();
+    if (count) {
+        cpu->trace_dstate = bitmap_new(count);
+    }
 
     cpu_exec_initfn(cpu);
 }
@@ -449,7 +453,7 @@
      * Reason: CPUs still need special care by board code: wiring up
      * IRQs, adding reset handlers, halting non-first CPUs, ...
      */
-    dc->cannot_instantiate_with_device_add_yet = true;
+    dc->user_creatable = false;
 }
 
 static const TypeInfo cpu_type_info = {

diff --git a/roms/QemuMacDrivers b/roms/QemuMacDrivers
new file mode 160000
index 0000000..d4e7d7a
--- /dev/null
+++ b/roms/QemuMacDrivers

@@ -0,0 +1 @@
+Subproject commit d4e7d7ac663fcb55f1b93575445fcbca372f17a7

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 73cee81..45027b9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl

@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 # (c) 2001, Dave Jones. (the file handling bit)
 # (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
 # (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
@@ -6,6 +6,7 @@
 # Licensed under the terms of the GNU GPL License version 2
 
 use strict;
+use warnings;
 
 my $P = $0;
 $P =~ s@.*/@@g;

diff --git a/scripts/clean-header-guards.pl b/scripts/clean-header-guards.pl
index 54ab99a..5e67f19 100755
--- a/scripts/clean-header-guards.pl
+++ b/scripts/clean-header-guards.pl

@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 #
 # Clean up include guards in headers
 #
@@ -28,6 +28,7 @@
 #   "cc -E -DGUARD_H -c -P -", and fed the test program on stdin.
 
 use strict;
+use warnings;
 use Getopt::Std;
 
 # Stuff we don't want to clean because we import it into our tree:

diff --git a/scripts/cleanup-trace-events.pl b/scripts/cleanup-trace-events.pl
index 7e808ef..e93abc0 100755
--- a/scripts/cleanup-trace-events.pl
+++ b/scripts/cleanup-trace-events.pl

@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright (C) 2013 Red Hat, Inc.
 #
 # Authors:

diff --git a/scripts/disas-objdump.pl b/scripts/disas-objdump.pl
index 8f7e818..bec905f 100755
--- a/scripts/disas-objdump.pl
+++ b/scripts/disas-objdump.pl

@@ -1,4 +1,6 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+
+use warnings;
 
 use File::Temp qw/ tempfile /;
 use Getopt::Long;

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 96e66a8..711a9a6 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl

@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 # (c) 2007, Joe Perches <joe@perches.com>
 #           created from checkpatch.pl
 #
@@ -11,6 +11,7 @@
 # Licensed under the terms of the GNU GPL License version 2
 
 use strict;
+use warnings;
 
 my $P = $0;
 my $V = '0.26';

diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 0f1aa63..8afc3eb 100755
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh

@@ -284,12 +284,12 @@
         shift
         # check given cpu is in the supported CPU list
         for cpu in ${qemu_target_list} ; do
-            if [ "$cpu" == "$1" ] ; then
+            if [ "$cpu" = "$1" ] ; then
                 break
             fi
         done
 
-        if [ "$cpu" == "$1" ] ; then
+        if [ "$cpu" = "$1" ] ; then
             qemu_target_list="$1"
         else
             echo "ERROR: unknown CPU \"$1\"" 1>&2

diff --git a/scripts/shaderinclude.pl b/scripts/shaderinclude.pl
index 81b5146..cd3bb40 100644
--- a/scripts/shaderinclude.pl
+++ b/scripts/shaderinclude.pl

@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use strict;
 use warnings;
 

diff --git a/scripts/switch-timer-api b/scripts/switch-timer-api
index b0e230b..41736d1 100755
--- a/scripts/switch-timer-api
+++ b/scripts/switch-timer-api

@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use strict;
 use warnings;

diff --git a/scripts/texi2pod.pl b/scripts/texi2pod.pl
index 6e8fec4..39ce584 100755
--- a/scripts/texi2pod.pl
+++ b/scripts/texi2pod.pl

@@ -1,4 +1,4 @@
-#! /usr/bin/perl -w
+#! /usr/bin/env perl
 
 #   Copyright (C) 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
 
@@ -22,6 +22,8 @@
 # markup to Perl POD format.  It's intended to be used to extract
 # something suitable for a manpage from a Texinfo document.
 
+use warnings;
+
 $output = 0;
 $skipping = 0;
 %sects = ();

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index b357aee..c185eb1 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c

@@ -458,6 +458,13 @@
     }
 }
 
+uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz)
+{
+    uint32_t Aff1 = idx / clustersz;
+    uint32_t Aff0 = idx % clustersz;
+    return (Aff1 << ARM_AFF1_SHIFT) | Aff0;
+}
+
 static void arm_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -709,9 +716,8 @@
      * so these bits always RAZ.
      */
     if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) {
-        uint32_t Aff1 = cs->cpu_index / ARM_DEFAULT_CPUS_PER_CLUSTER;
-        uint32_t Aff0 = cs->cpu_index % ARM_DEFAULT_CPUS_PER_CLUSTER;
-        cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0;
+        cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index,
+                                               ARM_DEFAULT_CPUS_PER_CLUSTER);
     }
 
     if (cpu->reset_hivecs) {
@@ -1567,6 +1573,7 @@
     DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
     DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
                         mp_affinity, ARM64_AFFINITY_INVALID),
+    DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID),
     DEFINE_PROP_END_OF_LIST()
 };
 

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1055bfe..048faed 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h

@@ -710,6 +710,8 @@
     return container_of(env, ARMCPU, env);
 }
 
+uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz);
+
 #define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e))
 
 #define ENV_OFFSET offsetof(ARMCPU, env)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7e87031..a41d595 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c

@@ -2635,28 +2635,23 @@
     X86CPU *cpu = x86_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
     uint32_t pkg_offset;
+    uint32_t limit;
 
-    /* test if maximum index reached */
-    if (index & 0x80000000) {
-        if (index > env->cpuid_xlevel) {
-            if (env->cpuid_xlevel2 > 0) {
-                /* Handle the Centaur's CPUID instruction. */
-                if (index > env->cpuid_xlevel2) {
-                    index = env->cpuid_xlevel2;
-                } else if (index < 0xC0000000) {
-                    index = env->cpuid_xlevel;
-                }
-            } else {
-                /* Intel documentation states that invalid EAX input will
-                 * return the same information as EAX=cpuid_level
-                 * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID)
-                 */
-                index =  env->cpuid_level;
-            }
-        }
+    /* Calculate & apply limits for different index ranges */
+    if (index >= 0xC0000000) {
+        limit = env->cpuid_xlevel2;
+    } else if (index >= 0x80000000) {
+        limit = env->cpuid_xlevel;
     } else {
-        if (index > env->cpuid_level)
-            index = env->cpuid_level;
+        limit = env->cpuid_level;
+    }
+
+    if (index > limit) {
+        /* Intel documentation states that invalid EAX input will
+         * return the same information as EAX=cpuid_level
+         * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID)
+         */
+        index = env->cpuid_level;
     }
 
     switch(index) {
@@ -3991,6 +3986,7 @@
     DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1),
     DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1),
 #endif
+    DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID),
     DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false),
     { .name  = "hv-spinlocks", .info  = &qdev_prop_spinlocks },
     DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false),
@@ -4079,7 +4075,7 @@
     cc->cpu_exec_enter = x86_cpu_exec_enter;
     cc->cpu_exec_exit = x86_cpu_exec_exit;
 
-    dc->cannot_instantiate_with_device_add_yet = false;
+    dc->user_creatable = true;
 }
 
 static const TypeInfo x86_cpu_type_info = {

diff --git a/target/i386/machine.c b/target/i386/machine.c
index 78ae2f9..3cb2729 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c

@@ -136,178 +136,48 @@
 #define VMSTATE_MTRR_VARS(_field, _state, _n, _v)                    \
     VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar)
 
-static int put_fpreg_error(QEMUFile *f, void *opaque, size_t size,
-                           VMStateField *field, QJSON *vmdesc)
+typedef struct x86_FPReg_tmp {
+    FPReg *parent;
+    uint64_t tmp_mant;
+    uint16_t tmp_exp;
+} x86_FPReg_tmp;
+
+static void fpreg_pre_save(void *opaque)
 {
-    fprintf(stderr, "call put_fpreg() with invalid arguments\n");
-    exit(0);
-    return 0;
-}
+    x86_FPReg_tmp *tmp = opaque;
 
-/* XXX: add that in a FPU generic layer */
-union x86_longdouble {
-    uint64_t mant;
-    uint16_t exp;
-};
-
-#define MANTD1(fp)	(fp & ((1LL << 52) - 1))
-#define EXPBIAS1 1023
-#define EXPD1(fp)	((fp >> 52) & 0x7FF)
-#define SIGND1(fp)	((fp >> 32) & 0x80000000)
-
-static void fp64_to_fp80(union x86_longdouble *p, uint64_t temp)
-{
-    int e;
-    /* mantissa */
-    p->mant = (MANTD1(temp) << 11) | (1LL << 63);
-    /* exponent + sign */
-    e = EXPD1(temp) - EXPBIAS1 + 16383;
-    e |= SIGND1(temp) >> 16;
-    p->exp = e;
-}
-
-static int get_fpreg(QEMUFile *f, void *opaque, size_t size,
-                     VMStateField *field)
-{
-    FPReg *fp_reg = opaque;
-    uint64_t mant;
-    uint16_t exp;
-
-    qemu_get_be64s(f, &mant);
-    qemu_get_be16s(f, &exp);
-    fp_reg->d = cpu_set_fp80(mant, exp);
-    return 0;
-}
-
-static int put_fpreg(QEMUFile *f, void *opaque, size_t size,
-                     VMStateField *field, QJSON *vmdesc)
-{
-    FPReg *fp_reg = opaque;
-    uint64_t mant;
-    uint16_t exp;
     /* we save the real CPU data (in case of MMX usage only 'mant'
        contains the MMX register */
-    cpu_get_fp80(&mant, &exp, fp_reg->d);
-    qemu_put_be64s(f, &mant);
-    qemu_put_be16s(f, &exp);
+    cpu_get_fp80(&tmp->tmp_mant, &tmp->tmp_exp, tmp->parent->d);
+}
 
+static int fpreg_post_load(void *opaque, int version)
+{
+    x86_FPReg_tmp *tmp = opaque;
+
+    tmp->parent->d = cpu_set_fp80(tmp->tmp_mant, tmp->tmp_exp);
     return 0;
 }
 
-static const VMStateInfo vmstate_fpreg = {
+static const VMStateDescription vmstate_fpreg_tmp = {
+    .name = "fpreg_tmp",
+    .post_load = fpreg_post_load,
+    .pre_save  = fpreg_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(tmp_mant, x86_FPReg_tmp),
+        VMSTATE_UINT16(tmp_exp, x86_FPReg_tmp),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_fpreg = {
     .name = "fpreg",
-    .get  = get_fpreg,
-    .put  = put_fpreg,
+    .fields = (VMStateField[]) {
+        VMSTATE_WITH_TMP(FPReg, x86_FPReg_tmp, vmstate_fpreg_tmp),
+        VMSTATE_END_OF_LIST()
+    }
 };
 
-static int get_fpreg_1_mmx(QEMUFile *f, void *opaque, size_t size,
-                           VMStateField *field)
-{
-    union x86_longdouble *p = opaque;
-    uint64_t mant;
-
-    qemu_get_be64s(f, &mant);
-    p->mant = mant;
-    p->exp = 0xffff;
-    return 0;
-}
-
-static const VMStateInfo vmstate_fpreg_1_mmx = {
-    .name = "fpreg_1_mmx",
-    .get  = get_fpreg_1_mmx,
-    .put  = put_fpreg_error,
-};
-
-static int get_fpreg_1_no_mmx(QEMUFile *f, void *opaque, size_t size,
-                              VMStateField *field)
-{
-    union x86_longdouble *p = opaque;
-    uint64_t mant;
-
-    qemu_get_be64s(f, &mant);
-    fp64_to_fp80(p, mant);
-    return 0;
-}
-
-static const VMStateInfo vmstate_fpreg_1_no_mmx = {
-    .name = "fpreg_1_no_mmx",
-    .get  = get_fpreg_1_no_mmx,
-    .put  = put_fpreg_error,
-};
-
-static bool fpregs_is_0(void *opaque, int version_id)
-{
-    X86CPU *cpu = opaque;
-    CPUX86State *env = &cpu->env;
-
-    return (env->fpregs_format_vmstate == 0);
-}
-
-static bool fpregs_is_1_mmx(void *opaque, int version_id)
-{
-    X86CPU *cpu = opaque;
-    CPUX86State *env = &cpu->env;
-    int guess_mmx;
-
-    guess_mmx = ((env->fptag_vmstate == 0xff) &&
-                 (env->fpus_vmstate & 0x3800) == 0);
-    return (guess_mmx && (env->fpregs_format_vmstate == 1));
-}
-
-static bool fpregs_is_1_no_mmx(void *opaque, int version_id)
-{
-    X86CPU *cpu = opaque;
-    CPUX86State *env = &cpu->env;
-    int guess_mmx;
-
-    guess_mmx = ((env->fptag_vmstate == 0xff) &&
-                 (env->fpus_vmstate & 0x3800) == 0);
-    return (!guess_mmx && (env->fpregs_format_vmstate == 1));
-}
-
-#define VMSTATE_FP_REGS(_field, _state, _n)                               \
-    VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_0, vmstate_fpreg, FPReg), \
-    VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_mmx, vmstate_fpreg_1_mmx, FPReg), \
-    VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_no_mmx, vmstate_fpreg_1_no_mmx, FPReg)
-
-static bool version_is_5(void *opaque, int version_id)
-{
-    return version_id == 5;
-}
-
-#ifdef TARGET_X86_64
-static bool less_than_7(void *opaque, int version_id)
-{
-    return version_id < 7;
-}
-
-static int get_uint64_as_uint32(QEMUFile *f, void *pv, size_t size,
-                                VMStateField *field)
-{
-    uint64_t *v = pv;
-    *v = qemu_get_be32(f);
-    return 0;
-}
-
-static int put_uint64_as_uint32(QEMUFile *f, void *pv, size_t size,
-                                VMStateField *field, QJSON *vmdesc)
-{
-    uint64_t *v = pv;
-    qemu_put_be32(f, *v);
-
-    return 0;
-}
-
-static const VMStateInfo vmstate_hack_uint64_as_uint32 = {
-    .name = "uint64_as_uint32",
-    .get  = get_uint64_as_uint32,
-    .put  = put_uint64_as_uint32,
-};
-
-#define VMSTATE_HACK_UINT32(_f, _s, _t)                                  \
-    VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_hack_uint64_as_uint32, uint64_t)
-#endif
-
 static void cpu_pre_save(void *opaque)
 {
     X86CPU *cpu = opaque;
@@ -356,6 +226,10 @@
         return -EINVAL;
     }
 
+    if (env->fpregs_format_vmstate) {
+        error_report("Unsupported old non-softfloat CPU state");
+        return -EINVAL;
+    }
     /*
      * Real mode guest segments register DPL should be zero.
      * Older KVM version were setting it wrongly.
@@ -930,7 +804,7 @@
 VMStateDescription vmstate_x86_cpu = {
     .name = "cpu",
     .version_id = 12,
-    .minimum_version_id = 3,
+    .minimum_version_id = 11,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
@@ -943,7 +817,8 @@
         VMSTATE_UINT16(env.fpus_vmstate, X86CPU),
         VMSTATE_UINT16(env.fptag_vmstate, X86CPU),
         VMSTATE_UINT16(env.fpregs_format_vmstate, X86CPU),
-        VMSTATE_FP_REGS(env.fpregs, X86CPU, 8),
+
+        VMSTATE_STRUCT_ARRAY(env.fpregs, X86CPU, 8, 0, vmstate_fpreg, FPReg),
 
         VMSTATE_SEGMENT_ARRAY(env.segs, X86CPU, 6),
         VMSTATE_SEGMENT(env.ldt, X86CPU),
@@ -952,16 +827,8 @@
         VMSTATE_SEGMENT(env.idt, X86CPU),
 
         VMSTATE_UINT32(env.sysenter_cs, X86CPU),
-#ifdef TARGET_X86_64
-        /* Hack: In v7 size changed from 32 to 64 bits on x86_64 */
-        VMSTATE_HACK_UINT32(env.sysenter_esp, X86CPU, less_than_7),
-        VMSTATE_HACK_UINT32(env.sysenter_eip, X86CPU, less_than_7),
-        VMSTATE_UINTTL_V(env.sysenter_esp, X86CPU, 7),
-        VMSTATE_UINTTL_V(env.sysenter_eip, X86CPU, 7),
-#else
         VMSTATE_UINTTL(env.sysenter_esp, X86CPU),
         VMSTATE_UINTTL(env.sysenter_eip, X86CPU),
-#endif
 
         VMSTATE_UINTTL(env.cr[0], X86CPU),
         VMSTATE_UINTTL(env.cr[2], X86CPU),
@@ -982,46 +849,45 @@
         VMSTATE_UINT64(env.fmask, X86CPU),
         VMSTATE_UINT64(env.kernelgsbase, X86CPU),
 #endif
-        VMSTATE_UINT32_V(env.smbase, X86CPU, 4),
+        VMSTATE_UINT32(env.smbase, X86CPU),
 
-        VMSTATE_UINT64_V(env.pat, X86CPU, 5),
-        VMSTATE_UINT32_V(env.hflags2, X86CPU, 5),
+        VMSTATE_UINT64(env.pat, X86CPU),
+        VMSTATE_UINT32(env.hflags2, X86CPU),
 
-        VMSTATE_UINT32_TEST(parent_obj.halted, X86CPU, version_is_5),
-        VMSTATE_UINT64_V(env.vm_hsave, X86CPU, 5),
-        VMSTATE_UINT64_V(env.vm_vmcb, X86CPU, 5),
-        VMSTATE_UINT64_V(env.tsc_offset, X86CPU, 5),
-        VMSTATE_UINT64_V(env.intercept, X86CPU, 5),
-        VMSTATE_UINT16_V(env.intercept_cr_read, X86CPU, 5),
-        VMSTATE_UINT16_V(env.intercept_cr_write, X86CPU, 5),
-        VMSTATE_UINT16_V(env.intercept_dr_read, X86CPU, 5),
-        VMSTATE_UINT16_V(env.intercept_dr_write, X86CPU, 5),
-        VMSTATE_UINT32_V(env.intercept_exceptions, X86CPU, 5),
-        VMSTATE_UINT8_V(env.v_tpr, X86CPU, 5),
+        VMSTATE_UINT64(env.vm_hsave, X86CPU),
+        VMSTATE_UINT64(env.vm_vmcb, X86CPU),
+        VMSTATE_UINT64(env.tsc_offset, X86CPU),
+        VMSTATE_UINT64(env.intercept, X86CPU),
+        VMSTATE_UINT16(env.intercept_cr_read, X86CPU),
+        VMSTATE_UINT16(env.intercept_cr_write, X86CPU),
+        VMSTATE_UINT16(env.intercept_dr_read, X86CPU),
+        VMSTATE_UINT16(env.intercept_dr_write, X86CPU),
+        VMSTATE_UINT32(env.intercept_exceptions, X86CPU),
+        VMSTATE_UINT8(env.v_tpr, X86CPU),
         /* MTRRs */
-        VMSTATE_UINT64_ARRAY_V(env.mtrr_fixed, X86CPU, 11, 8),
-        VMSTATE_UINT64_V(env.mtrr_deftype, X86CPU, 8),
+        VMSTATE_UINT64_ARRAY(env.mtrr_fixed, X86CPU, 11),
+        VMSTATE_UINT64(env.mtrr_deftype, X86CPU),
         VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, MSR_MTRRcap_VCNT, 8),
         /* KVM-related states */
-        VMSTATE_INT32_V(env.interrupt_injected, X86CPU, 9),
-        VMSTATE_UINT32_V(env.mp_state, X86CPU, 9),
-        VMSTATE_UINT64_V(env.tsc, X86CPU, 9),
-        VMSTATE_INT32_V(env.exception_injected, X86CPU, 11),
-        VMSTATE_UINT8_V(env.soft_interrupt, X86CPU, 11),
-        VMSTATE_UINT8_V(env.nmi_injected, X86CPU, 11),
-        VMSTATE_UINT8_V(env.nmi_pending, X86CPU, 11),
-        VMSTATE_UINT8_V(env.has_error_code, X86CPU, 11),
-        VMSTATE_UINT32_V(env.sipi_vector, X86CPU, 11),
+        VMSTATE_INT32(env.interrupt_injected, X86CPU),
+        VMSTATE_UINT32(env.mp_state, X86CPU),
+        VMSTATE_UINT64(env.tsc, X86CPU),
+        VMSTATE_INT32(env.exception_injected, X86CPU),
+        VMSTATE_UINT8(env.soft_interrupt, X86CPU),
+        VMSTATE_UINT8(env.nmi_injected, X86CPU),
+        VMSTATE_UINT8(env.nmi_pending, X86CPU),
+        VMSTATE_UINT8(env.has_error_code, X86CPU),
+        VMSTATE_UINT32(env.sipi_vector, X86CPU),
         /* MCE */
-        VMSTATE_UINT64_V(env.mcg_cap, X86CPU, 10),
-        VMSTATE_UINT64_V(env.mcg_status, X86CPU, 10),
-        VMSTATE_UINT64_V(env.mcg_ctl, X86CPU, 10),
-        VMSTATE_UINT64_ARRAY_V(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4, 10),
+        VMSTATE_UINT64(env.mcg_cap, X86CPU),
+        VMSTATE_UINT64(env.mcg_status, X86CPU),
+        VMSTATE_UINT64(env.mcg_ctl, X86CPU),
+        VMSTATE_UINT64_ARRAY(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4),
         /* rdtscp */
-        VMSTATE_UINT64_V(env.tsc_aux, X86CPU, 11),
+        VMSTATE_UINT64(env.tsc_aux, X86CPU),
         /* KVM pvclock msr */
-        VMSTATE_UINT64_V(env.system_time_msr, X86CPU, 11),
-        VMSTATE_UINT64_V(env.wall_clock_msr, X86CPU, 11),
+        VMSTATE_UINT64(env.system_time_msr, X86CPU),
+        VMSTATE_UINT64(env.wall_clock_msr, X86CPU),
         /* XSAVE related fields */
         VMSTATE_UINT64_V(env.xcr0, X86CPU, 12),
         VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12),

diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs
index f963777..f92ba67 100644
--- a/target/ppc/Makefile.objs
+++ b/target/ppc/Makefile.objs

@@ -4,6 +4,7 @@
 ifeq ($(CONFIG_SOFTMMU),y)
 obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o
 obj-$(TARGET_PPC64) += mmu-hash64.o mmu-book3s-v3.o compat.o
+obj-$(TARGET_PPC64) += mmu-radix64.o
 endif
 obj-$(CONFIG_KVM) += kvm.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h
index d587e69..b563c45 100644
--- a/target/ppc/cpu-models.h
+++ b/target/ppc/cpu-models.h

@@ -561,6 +561,7 @@
     CPU_POWERPC_POWER8NVL_BASE     = 0x004C0000,
     CPU_POWERPC_POWER8NVL_v10      = 0x004C0100,
     CPU_POWERPC_POWER9_BASE        = 0x004E0000,
+    CPU_POWERPC_POWER9_DD1         = 0x004E0100,
     CPU_POWERPC_970_v22            = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index e0ff041..401e10e 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h

@@ -30,6 +30,8 @@
 #define TARGET_LONG_BITS 64
 #define TARGET_PAGE_BITS 12
 
+#define TCG_GUEST_DEFAULT_MO 0
+
 /* Note that the official physical address space bits is 62-M where M
    is implementation dependent.  I've not looked up M for the set of
    cpus we emulate at the system level.  */
@@ -480,6 +482,8 @@
 #define DSISR_ISSTORE            0x02000000
 /* Not permitted by virtual page class key protection */
 #define DSISR_AMR                0x00200000
+/* Unsupported Radix Tree Configuration */
+#define DSISR_R_BADCONFIG        0x00080000
 
 /* SRR1 error code fields */
 
@@ -1221,6 +1225,7 @@
 
 PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr);
 PowerPCCPUClass *ppc_cpu_class_by_pvr_mask(uint32_t pvr);
+PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc);
 
 struct PPCVirtualHypervisor {
     Object parent;

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index f4ee7aa..a6bcb47 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c

@@ -728,6 +728,9 @@
     cs->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
 
+    /* Reset the reservation */
+    env->reserve_addr = -1;
+
     /* Any interrupt is context synchronizing, check if TCG TLB
      * needs a delayed flush on ppc64
      */

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8574c36..51249ce 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c

@@ -2380,6 +2380,17 @@
 
 #if defined(TARGET_PPC64)
     pcc->radix_page_info = kvm_get_radix_page_info();
+
+    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
+        /*
+         * POWER9 DD1 has some bugs which make it not really ISA 3.00
+         * compliant.  More importantly, advertising ISA 3.00
+         * architected mode may prevent guests from activating
+         * necessary DD1 workarounds.
+         */
+        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
+                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
+    }
 #endif /* defined(TARGET_PPC64) */
 }
 
@@ -2413,18 +2424,6 @@
     return cap_mmu_hash_v3;
 }
 
-static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
-{
-    ObjectClass *oc = OBJECT_CLASS(pcc);
-
-    while (oc && !object_class_is_abstract(oc)) {
-        oc = object_class_get_parent(oc);
-    }
-    assert(oc);
-
-    return POWERPC_CPU_CLASS(oc);
-}
-
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
 {
     uint32_t host_pvr = mfpvr();

diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
index 005c963..e7798b3 100644
--- a/target/ppc/mmu-book3s-v3.c
+++ b/target/ppc/mmu-book3s-v3.c

@@ -22,15 +22,13 @@
 #include "cpu.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
-#include "qemu/error-report.h"
+#include "mmu-radix64.h"
 
 int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
                               int mmu_idx)
 {
     if (ppc64_radix_guest(cpu)) { /* Guest uses radix */
-        /* TODO - Unsupported */
-        error_report("Guest Radix Support Unimplemented");
-        exit(1);
+        return ppc_radix64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx);
     } else { /* Guest uses hash */
         return ppc_hash64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx);
     }

diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index 636f6ab..56095da 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h

@@ -25,6 +25,11 @@
 /* Partition Table Entry Fields */
 #define PATBE1_GR 0x8000000000000000
 
+/* Process Table Entry */
+struct prtb_entry {
+    uint64_t prtbe0, prtbe1;
+};
+
 #ifdef TARGET_PPC64
 
 static inline bool ppc64_use_proc_tbl(PowerPCCPU *cpu)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
new file mode 100644
index 0000000..de18c0b
--- /dev/null
+++ b/target/ppc/mmu-radix64.c

@@ -0,0 +1,259 @@
+/*
+ *  PowerPC Radix MMU mulation helpers for QEMU.
+ *
+ *  Copyright (c) 2016 Suraj Jitindar Singh, IBM Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "exec/log.h"
+#include "mmu-radix64.h"
+#include "mmu-book3s-v3.h"
+
+static bool ppc_radix64_get_fully_qualified_addr(CPUPPCState *env, vaddr eaddr,
+                                                 uint64_t *lpid, uint64_t *pid)
+{
+    /* We don't have HV support yet and shouldn't get here with it set anyway */
+    assert(!msr_hv);
+
+    if (!msr_hv) { /* !MSR[HV] -> Guest */
+        switch (eaddr & R_EADDR_QUADRANT) {
+        case R_EADDR_QUADRANT0: /* Guest application */
+            *lpid = env->spr[SPR_LPIDR];
+            *pid = env->spr[SPR_BOOKS_PID];
+            break;
+        case R_EADDR_QUADRANT1: /* Illegal */
+        case R_EADDR_QUADRANT2:
+            return false;
+        case R_EADDR_QUADRANT3: /* Guest OS */
+            *lpid = env->spr[SPR_LPIDR];
+            *pid = 0; /* pid set to 0 -> addresses guest operating system */
+            break;
+        }
+    }
+
+    return true;
+}
+
+static void ppc_radix64_raise_segi(PowerPCCPU *cpu, int rwx, vaddr eaddr)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (rwx == 2) { /* Instruction Segment Interrupt */
+        cs->exception_index = POWERPC_EXCP_ISEG;
+    } else { /* Data Segment Interrupt */
+        cs->exception_index = POWERPC_EXCP_DSEG;
+        env->spr[SPR_DAR] = eaddr;
+    }
+    env->error_code = 0;
+}
+
+static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr,
+                                uint32_t cause)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (rwx == 2) { /* Instruction Storage Interrupt */
+        cs->exception_index = POWERPC_EXCP_ISI;
+        env->error_code = cause;
+    } else { /* Data Storage Interrupt */
+        cs->exception_index = POWERPC_EXCP_DSI;
+        if (rwx == 1) { /* Write -> Store */
+            cause |= DSISR_ISSTORE;
+        }
+        env->spr[SPR_DSISR] = cause;
+        env->spr[SPR_DAR] = eaddr;
+        env->error_code = 0;
+    }
+}
+
+
+static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
+                                   int *fault_cause, int *prot)
+{
+    CPUPPCState *env = &cpu->env;
+    const int need_prot[] = { PAGE_READ, PAGE_WRITE, PAGE_EXEC };
+
+    /* Check Page Attributes (pte58:59) */
+    if (((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO) && (rwx == 2)) {
+        /*
+         * Radix PTE entries with the non-idempotent I/O attribute are treated
+         * as guarded storage
+         */
+        *fault_cause |= SRR1_NOEXEC_GUARD;
+        return true;
+    }
+
+    /* Determine permissions allowed by Encoded Access Authority */
+    if ((pte & R_PTE_EAA_PRIV) && msr_pr) { /* Insufficient Privilege */
+        *prot = 0;
+    } else if (msr_pr || (pte & R_PTE_EAA_PRIV)) {
+        *prot = ppc_radix64_get_prot_eaa(pte);
+    } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) */
+        *prot = ppc_radix64_get_prot_eaa(pte);
+        *prot &= ppc_radix64_get_prot_amr(cpu); /* Least combined permissions */
+    }
+
+    /* Check if requested access type is allowed */
+    if (need_prot[rwx] & ~(*prot)) { /* Page Protected for that Access */
+        *fault_cause |= DSISR_PROTFAULT;
+        return true;
+    }
+
+    return false;
+}
+
+static void ppc_radix64_set_rc(PowerPCCPU *cpu, int rwx, uint64_t pte,
+                               hwaddr pte_addr, int *prot)
+{
+    CPUState *cs = CPU(cpu);
+    uint64_t npte;
+
+    npte = pte | R_PTE_R; /* Always set reference bit */
+
+    if (rwx == 1) { /* Store/Write */
+        npte |= R_PTE_C; /* Set change bit */
+    } else {
+        /*
+         * Treat the page as read-only for now, so that a later write
+         * will pass through this function again to set the C bit.
+         */
+        *prot &= ~PAGE_WRITE;
+    }
+
+    if (pte ^ npte) { /* If pte has changed then write it back */
+        stq_phys(cs->as, pte_addr, npte);
+    }
+}
+
+static uint64_t ppc_radix64_walk_tree(PowerPCCPU *cpu, int rwx, vaddr eaddr,
+                                      uint64_t base_addr, uint64_t nls,
+                                      hwaddr *raddr, int *psize,
+                                      int *fault_cause, int *prot,
+                                      hwaddr *pte_addr)
+{
+    CPUState *cs = CPU(cpu);
+    uint64_t index, pde;
+
+    if (nls < 5) { /* Directory maps less than 2**5 entries */
+        *fault_cause |= DSISR_R_BADCONFIG;
+        return 0;
+    }
+
+    /* Read page <directory/table> entry from guest address space */
+    index = eaddr >> (*psize - nls); /* Shift */
+    index &= ((1UL << nls) - 1); /* Mask */
+    pde = ldq_phys(cs->as, base_addr + (index * sizeof(pde)));
+    if (!(pde & R_PTE_VALID)) { /* Invalid Entry */
+        *fault_cause |= DSISR_NOPTE;
+        return 0;
+    }
+
+    *psize -= nls;
+
+    /* Check if Leaf Entry -> Page Table Entry -> Stop the Search */
+    if (pde & R_PTE_LEAF) {
+        uint64_t rpn = pde & R_PTE_RPN;
+        uint64_t mask = (1UL << *psize) - 1;
+
+        if (ppc_radix64_check_prot(cpu, rwx, pde, fault_cause, prot)) {
+            return 0; /* Protection Denied Access */
+        }
+
+        /* Or high bits of rpn and low bits to ea to form whole real addr */
+        *raddr = (rpn & ~mask) | (eaddr & mask);
+        *pte_addr = base_addr + (index * sizeof(pde));
+        return pde;
+    }
+
+    /* Next Level of Radix Tree */
+    return ppc_radix64_walk_tree(cpu, rwx, eaddr, pde & R_PDE_NLB,
+                                 pde & R_PDE_NLS, raddr, psize,
+                                 fault_cause, prot, pte_addr);
+}
+
+int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
+                                 int mmu_idx)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    PPCVirtualHypervisorClass *vhc =
+        PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+    hwaddr raddr, pte_addr;
+    uint64_t lpid = 0, pid = 0, offset, size, patbe, prtbe0, pte;
+    int page_size, prot, fault_cause = 0;
+
+    assert((rwx == 0) || (rwx == 1) || (rwx == 2));
+    assert(!msr_hv); /* For now there is no Radix PowerNV Support */
+    assert(cpu->vhyp);
+    assert(ppc64_use_proc_tbl(cpu));
+
+    /* Real Mode Access */
+    if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
+        /* In real mode top 4 effective addr bits (mostly) ignored */
+        raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
+
+        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
+                     PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
+                     TARGET_PAGE_SIZE);
+        return 0;
+    }
+
+    /* Virtual Mode Access - get the fully qualified address */
+    if (!ppc_radix64_get_fully_qualified_addr(env, eaddr, &lpid, &pid)) {
+        ppc_radix64_raise_segi(cpu, rwx, eaddr);
+        return 1;
+    }
+
+    /* Get Process Table */
+    patbe = vhc->get_patbe(cpu->vhyp);
+
+    /* Index Process Table by PID to Find Corresponding Process Table Entry */
+    offset = pid * sizeof(struct prtb_entry);
+    size = 1ULL << ((patbe & PATBE1_R_PRTS) + 12);
+    if (offset >= size) {
+        /* offset exceeds size of the process table */
+        ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE);
+        return 1;
+    }
+    prtbe0 = ldq_phys(cs->as, (patbe & PATBE1_R_PRTB) + offset);
+
+    /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
+    page_size = PRTBE_R_GET_RTS(prtbe0);
+    pte = ppc_radix64_walk_tree(cpu, rwx, eaddr & R_EADDR_MASK,
+                                prtbe0 & PRTBE_R_RPDB, prtbe0 & PRTBE_R_RPDS,
+                                &raddr, &page_size, &fault_cause, &prot,
+                                &pte_addr);
+    if (!pte) {
+        ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+        return 1;
+    }
+
+    /* Update Reference and Change Bits */
+    ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, &prot);
+
+    tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
+                 prot, mmu_idx, 1UL << page_size);
+    return 1;
+}

diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
new file mode 100644
index 0000000..1d5c7cf
--- /dev/null
+++ b/target/ppc/mmu-radix64.h

@@ -0,0 +1,72 @@
+#ifndef MMU_RADIX64_H
+#define MMU_RADIX64_H
+
+#ifndef CONFIG_USER_ONLY
+
+/* Radix Quadrants */
+#define R_EADDR_MASK            0x3FFFFFFFFFFFFFFF
+#define R_EADDR_QUADRANT        0xC000000000000000
+#define R_EADDR_QUADRANT0       0x0000000000000000
+#define R_EADDR_QUADRANT1       0x4000000000000000
+#define R_EADDR_QUADRANT2       0x8000000000000000
+#define R_EADDR_QUADRANT3       0xC000000000000000
+
+/* Radix Partition Table Entry Fields */
+#define PATBE1_R_PRTB           0x0FFFFFFFFFFFF000
+#define PATBE1_R_PRTS           0x000000000000001F
+
+/* Radix Process Table Entry Fields */
+#define PRTBE_R_GET_RTS(rts) \
+    ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
+#define PRTBE_R_RPDB            0x0FFFFFFFFFFFFF00
+#define PRTBE_R_RPDS            0x000000000000001F
+
+/* Radix Page Directory/Table Entry Fields */
+#define R_PTE_VALID             0x8000000000000000
+#define R_PTE_LEAF              0x4000000000000000
+#define R_PTE_SW0               0x2000000000000000
+#define R_PTE_RPN               0x01FFFFFFFFFFF000
+#define R_PTE_SW1               0x0000000000000E00
+#define R_GET_SW(sw)            (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7))
+#define R_PTE_R                 0x0000000000000100
+#define R_PTE_C                 0x0000000000000080
+#define R_PTE_ATT               0x0000000000000030
+#define R_PTE_ATT_NORMAL        0x0000000000000000
+#define R_PTE_ATT_SAO           0x0000000000000010
+#define R_PTE_ATT_NI_IO         0x0000000000000020
+#define R_PTE_ATT_TOLERANT_IO   0x0000000000000030
+#define R_PTE_EAA_PRIV          0x0000000000000008
+#define R_PTE_EAA_R             0x0000000000000004
+#define R_PTE_EAA_RW            0x0000000000000002
+#define R_PTE_EAA_X             0x0000000000000001
+#define R_PDE_NLB               PRTBE_R_RPDB
+#define R_PDE_NLS               PRTBE_R_RPDS
+
+#ifdef TARGET_PPC64
+
+int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
+                                 int mmu_idx);
+
+static inline int ppc_radix64_get_prot_eaa(uint64_t pte)
+{
+    return (pte & R_PTE_EAA_R ? PAGE_READ : 0) |
+           (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) |
+           (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
+}
+
+static inline int ppc_radix64_get_prot_amr(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+    int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
+    int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */
+
+    return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */
+           (amr & 0x1 ? 0 : PAGE_READ) |
+           (iamr & 0x1 ? 0 : PAGE_EXEC);
+}
+
+#endif /* TARGET_PPC64 */
+
+#endif /* CONFIG_USER_ONLY */
+
+#endif /* MMU_RADIX64_H */

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index f40b5a1..c0cd64d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c

@@ -73,6 +73,7 @@
 #endif
 static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
 static TCGv cpu_reserve;
+static TCGv cpu_reserve_val;
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
 
@@ -181,6 +182,9 @@
     cpu_reserve = tcg_global_mem_new(cpu_env,
                                      offsetof(CPUPPCState, reserve_addr),
                                      "reserve_addr");
+    cpu_reserve_val = tcg_global_mem_new(cpu_env,
+                                     offsetof(CPUPPCState, reserve_val),
+                                     "reserve_val");
 
     cpu_fpscr = tcg_global_mem_new(cpu_env,
                                    offsetof(CPUPPCState, fpscr), "fpscr");
@@ -214,6 +218,7 @@
     bool vsx_enabled;
     bool spe_enabled;
     bool tm_enabled;
+    bool gtse;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint64_t insns_flags;
@@ -2967,6 +2972,7 @@
 /* eieio */
 static void gen_eieio(DisasContext *ctx)
 {
+    tcg_gen_mb(TCG_MO_LD_ST | TCG_BAR_SC);
 }
 
 #if !defined(CONFIG_USER_ONLY)
@@ -3004,6 +3010,7 @@
     if (!ctx->pr) {
         gen_check_tlb_flush(ctx, false);
     }
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
     gen_stop_exception(ctx);
 }
 
@@ -3023,7 +3030,8 @@
     }                                                                \
     tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop);                \
     tcg_gen_mov_tl(cpu_reserve, t0);                                 \
-    tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val)); \
+    tcg_gen_mov_tl(cpu_reserve_val, gpr);                            \
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);                           \
     tcg_temp_free(t0);                                               \
 }
 
@@ -3155,14 +3163,31 @@
 static void gen_conditional_store(DisasContext *ctx, TCGv EA,
                                   int reg, int memop)
 {
-    TCGLabel *l1;
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+    TCGv t0;
 
-    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
-    l1 = gen_new_label();
     tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
-    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
-    tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop);
+
+    t0 = tcg_temp_new();
+    tcg_gen_atomic_cmpxchg_tl(t0, cpu_reserve, cpu_reserve_val,
+                              cpu_gpr[reg], ctx->mem_idx,
+                              DEF_MEMOP(memop) | MO_ALIGN);
+    tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_reserve_val);
+    tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
+    tcg_gen_or_tl(t0, t0, cpu_so);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
+    tcg_temp_free(t0);
+    tcg_gen_br(l2);
+
     gen_set_label(l1);
+
+    /* Address mismatch implies failure.  But we still need to provide the
+       memory barrier semantics of the instruction.  */
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+
+    gen_set_label(l2);
     tcg_gen_movi_tl(cpu_reserve, -1);
 }
 #endif
@@ -3291,6 +3316,7 @@
     if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
         gen_check_tlb_flush(ctx, true);
     }
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
 }
 
 /* wait */
@@ -4513,7 +4539,12 @@
     GEN_PRIV;
 #else
     TCGv_i32 t1;
-    CHK_HV;
+
+    if (ctx->gtse) {
+        CHK_SV; /* If gtse is set then tblie is supervisor privileged */
+    } else {
+        CHK_HV; /* Else hypervisor privileged */
+    }
 
     if (NARROW_MODE(ctx)) {
         TCGv t0 = tcg_temp_new();
@@ -6547,6 +6578,8 @@
  * different ISA versions */
 GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
 GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
+GEN_HANDLER_E(tlbiel, 0x1F, 0x12, 0x08, 0x00100001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(tlbie, 0x1F, 0x12, 0x09, 0x00100001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI),
@@ -7227,6 +7260,7 @@
         ctx.tm_enabled = false;
     }
 #endif
+    ctx.gtse = !!(env->spr[SPR_LPCR] & LPCR_GTSE);
     if ((env->flags & POWERPC_FLAG_SE) && msr_se)
         ctx.singlestep_enabled = CPU_SINGLE_STEP;
     else

diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index e82e3e6..56a0ab2 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c

@@ -8960,7 +8960,7 @@
                        PPC_FLOAT_EXT |
                        PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
                        PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBSYNC |
                        PPC_64B | PPC_64BX | PPC_ALTIVEC |
                        PPC_SEGMENT_64B | PPC_SLBI |
                        PPC_POPCNTB | PPC_POPCNTWD |
@@ -10285,6 +10285,18 @@
     return POWERPC_CPU(cpu_generic_init(TYPE_POWERPC_CPU, cpu_model));
 }
 
+PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
+{
+    ObjectClass *oc = OBJECT_CLASS(pcc);
+
+    while (oc && !object_class_is_abstract(oc)) {
+        oc = object_class_get_parent(oc);
+    }
+    assert(oc);
+
+    return POWERPC_CPU_CLASS(oc);
+}
+
 /* Sort by PVR, ordering special case "host" last. */
 static gint ppc_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -10316,6 +10328,7 @@
     ObjectClass *oc = data;
     CPUListState *s = user_data;
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    DeviceClass *family = DEVICE_CLASS(ppc_cpu_get_family_class(pcc));
     const char *typename = object_class_get_name(oc);
     char *name;
     int i;
@@ -10338,8 +10351,18 @@
         if (alias_oc != oc) {
             continue;
         }
-        (*s->cpu_fprintf)(s->file, "PowerPC %-16s (alias for %s)\n",
-                          alias->alias, name);
+        /*
+         * If running with KVM, we might update the family alias later, so
+         * avoid printing the wrong alias here and use "preferred" instead
+         */
+        if (strcmp(alias->alias, family->desc) == 0) {
+            (*s->cpu_fprintf)(s->file,
+                              "PowerPC %-16s (alias for preferred %s CPU)\n",
+                              alias->alias, family->desc);
+        } else {
+            (*s->cpu_fprintf)(s->file, "PowerPC %-16s (alias for %s)\n",
+                              alias->alias, name);
+        }
     }
     g_free(name);
 }
@@ -10436,14 +10459,6 @@
     return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
 }
 
-static void ppc_cpu_exec_enter(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    env->reserve_addr = -1;
-}
-
 /* CPUClass::reset() */
 static void ppc_cpu_reset(CPUState *s)
 {
@@ -10660,7 +10675,6 @@
     cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug;
     cc->vmsd = &vmstate_ppc_cpu;
 #endif
-    cc->cpu_exec_enter = ppc_cpu_exec_enter;
 #if defined(CONFIG_SOFTMMU)
     cc->write_elf64_note = ppc64_cpu_write_elf64_note;
     cc->write_elf32_note = ppc32_cpu_write_elf32_note;

diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 066dcd1..a1bf2ba 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c

@@ -430,6 +430,7 @@
     cc->write_elf64_note = s390_cpu_write_elf64_note;
     cc->cpu_exec_interrupt = s390_cpu_exec_interrupt;
     cc->debug_excp_handler = s390x_cpu_debug_excp_handler;
+    cc->do_unaligned_access = s390x_cpu_do_unaligned_access;
 #endif
     cc->disas_set_info = s390_cpu_disas_set_info;
 

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 058ddad..240b8a5 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h

@@ -480,6 +480,9 @@
 
 #ifndef CONFIG_USER_ONLY
 void do_restart_interrupt(CPUS390XState *env);
+void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type,
+                                   int mmu_idx, uintptr_t retaddr);
 
 static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb,
                                        uint8_t *ar)
@@ -1075,6 +1078,9 @@
 #define SIGP_MODE_Z_ARCH_TRANS_ALL_PSW 1
 #define SIGP_MODE_Z_ARCH_TRANS_CUR_PSW 2
 
+/* SIGP order code mask corresponding to bit positions 56-63 */
+#define SIGP_ORDER_MASK 0x000000ff
+
 void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr);
 int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
                   target_ulong *raddr, int *flags, bool exc);

diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 68bd2f9..9978490 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c

@@ -718,4 +718,20 @@
         cpu_loop_exit_noexc(cs);
     }
 }
+
+/* Unaligned accesses are only diagnosed with MO_ALIGN.  At the moment,
+   this is only for the atomic operations, for which we want to raise a
+   specification exception.  */
+void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type,
+                                   int mmu_idx, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+
+    if (retaddr) {
+        cpu_restore_state(cs, retaddr);
+    }
+    program_interrupt(env, PGM_SPECIFICATION, ILEN_LATER);
+}
 #endif /* CONFIG_USER_ONLY */

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9102071..0b70770 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h

@@ -25,6 +25,7 @@
 DEF_HELPER_3(celgb, i64, env, i64, i32)
 DEF_HELPER_3(cdlgb, i64, env, i64, i32)
 DEF_HELPER_3(cxlgb, i64, env, i64, i32)
+DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
 DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
@@ -83,6 +84,8 @@
 DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_2(stfle, i32, env, i64)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 075ff59..55a7c52 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def

@@ -239,12 +239,12 @@
     D(0xec7d, CLGIJ,   RIE_c, GIE, r1_o, i2_8u, 0, 0, cj, 0, 1)
 
 /* COMPARE AND SWAP */
-    D(0xba00, CS,      RS_a,  Z,   r3_32u, r1_32u, new, r1_32, cs, 0, 0)
-    D(0xeb14, CSY,     RSY_a, LD,  r3_32u, r1_32u, new, r1_32, cs, 0, 0)
-    D(0xeb30, CSG,     RSY_a, Z,   r3_o, r1_o, new, r1, cs, 0, 1)
+    D(0xba00, CS,      RS_a,  Z,   r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL)
+    D(0xeb14, CSY,     RSY_a, LD,  r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL)
+    D(0xeb30, CSG,     RSY_a, Z,   r3_o, r1_o, new, r1, cs, 0, MO_TEQ)
 /* COMPARE DOUBLE AND SWAP */
-    D(0xbb00, CDS,     RS_a,  Z,   r3_D32, r1_D32, new, r1_D32, cs, 0, 1)
-    D(0xeb31, CDSY,    RSY_a, LD,  r3_D32, r1_D32, new, r1_D32, cs, 0, 1)
+    D(0xbb00, CDS,     RS_a,  Z,   r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
+    D(0xeb31, CDSY,    RSY_a, LD,  r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
     C(0xeb3e, CDSG,    RSY_a, Z,   0, 0, 0, 0, cdsg, 0)
 
 /* COMPARE AND TRAP */
@@ -390,20 +390,20 @@
 /* LOAD ADDRESS RELATIVE LONG */
     C(0xc000, LARL,    RIL_b, Z,   0, ri2, 0, r1, mov2, 0)
 /* LOAD AND ADD */
-    C(0xebf8, LAA,     RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, add, adds32)
-    C(0xebe8, LAAG,    RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, add, adds64)
+    D(0xebf8, LAA,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, laa, adds32, MO_TESL)
+    D(0xebe8, LAAG,    RSY_a, ILA, r3, a2, new, in2_r1, laa, adds64, MO_TEQ)
 /* LOAD AND ADD LOGICAL */
-    C(0xebfa, LAAL,    RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, add, addu32)
-    C(0xebea, LAALG,   RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, add, addu64)
+    D(0xebfa, LAAL,    RSY_a, ILA, r3_32u, a2, new, in2_r1_32, laa, addu32, MO_TEUL)
+    D(0xebea, LAALG,   RSY_a, ILA, r3, a2, new, in2_r1, laa, addu64, MO_TEQ)
 /* LOAD AND AND */
-    C(0xebf4, LAN,     RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, and, nz32)
-    C(0xebe4, LANG,    RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, and, nz64)
+    D(0xebf4, LAN,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lan, nz32, MO_TESL)
+    D(0xebe4, LANG,    RSY_a, ILA, r3, a2, new, in2_r1, lan, nz64, MO_TEQ)
 /* LOAD AND EXCLUSIVE OR */
-    C(0xebf7, LAX,     RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, xor, nz32)
-    C(0xebe7, LAXG,    RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, xor, nz64)
+    D(0xebf7, LAX,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lax, nz32, MO_TESL)
+    D(0xebe7, LAXG,    RSY_a, ILA, r3, a2, new, in2_r1, lax, nz64, MO_TEQ)
 /* LOAD AND OR */
-    C(0xebf6, LAO,     RSY_a, ILA, r3_32s, m2_32s_atomic, new, m2_32_r1_atomic, or, nz32)
-    C(0xebe6, LAOG,    RSY_a, ILA, r3, m2_64_atomic, new, m2_64_r1_atomic, or, nz64)
+    D(0xebf6, LAO,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lao, nz32, MO_TESL)
+    D(0xebe6, LAOG,    RSY_a, ILA, r3, a2, new, in2_r1, lao, nz64, MO_TEQ)
 /* LOAD AND TEST */
     C(0x1200, LTR,     RR_a,  Z,   0, r2_o, 0, cond_r1r2_32, mov2, s32)
     C(0xb902, LTGR,    RRE,   Z,   0, r2_o, 0, r1, mov2, s64)
@@ -504,7 +504,9 @@
     C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
     C(0xebf2, LOC,     RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0)
     C(0xebe2, LOCG,    RSY_b, LOC, r1, m2_64, r1, 0, loc, 0)
-/* LOAD PAIR DISJOINT TODO */
+/* LOAD PAIR DISJOINT */
+    D(0xc804, LPD,     SSF,   ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
+    D(0xc805, LPDG,    SSF,   ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ)
 /* LOAD POSITIVE */
     C(0x1000, LPR,     RR_a,  Z,   0, r2_32s, new, r1_32, abs, abs32)
     C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
@@ -747,6 +749,8 @@
     C(0xe33e, STRV,    RXY_a, Z,   la2, r1_32u, new, m1_32, rev32, 0)
     C(0xe32f, STRVG,   RXY_a, Z,   la2, r1_o, new, m1_64, rev64, 0)
 
+/* STORE FACILITY LIST EXTENDED */
+    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
 /* STORE FPC */
     C(0xb29c, STFPC,   S,     Z,   0, a2, new, m2_32, efpc, 0)
 
@@ -843,6 +847,8 @@
 /* LOAD CONTROL */
     C(0xb700, LCTL,    RS_a,  Z,   0, a2, 0, 0, lctl, 0)
     C(0xeb2f, LCTLG,   RSY_a, Z,   0, a2, 0, 0, lctlg, 0)
+/* LOAD PROGRAM PARAMETER */
+    C(0xb280, LPP,     S,   LPP,   0, m2_64, 0, 0, lpp, 0)
 /* LOAD PSW */
     C(0x8200, LPSW,    S,     Z,   0, a2, 0, 0, lpsw, 0)
 /* LOAD PSW EXTENDED */

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1a249d8..fb10542 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c

@@ -1764,8 +1764,6 @@
     return SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-#define SIGP_ORDER_MASK 0x000000ff
-
 static int handle_sigp(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
     CPUS390XState *env = &cpu->env;

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 675aba2..f6e5bce 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c

@@ -23,6 +23,7 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/storage-keys.h"
@@ -844,6 +845,45 @@
     return cc;
 }
 
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
+                  uint32_t r1, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
+    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+    Int128 oldv;
+    bool fail;
+
+    if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+        fail = !int128_eq(oldv, cmpv);
+#endif
+    } else {
+        uint64_t oldh, oldl;
+
+        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
+        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
+
+        oldv = int128_make128(oldl, oldh);
+        fail = !int128_eq(oldv, cmpv);
+        if (fail) {
+            newv = oldv;
+        }
+
+        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
+        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+    }
+
+    env->cc_op = fail;
+    env->regs[r1] = int128_gethi(oldv);
+    env->regs[r1 + 1] = int128_getlo(oldv);
+}
+
 #if !defined(CONFIG_USER_ONLY)
 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 {

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index eca8244..23ec52c 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c

@@ -517,8 +517,7 @@
     /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered register"
        as parameter (input). Status (output) is always R1. */
 
-    /* sigp contains the order code in bit positions 56-63, mask it here. */
-    switch (order_code & 0xff) {
+    switch (order_code & SIGP_ORDER_MASK) {
     case SIGP_SET_ARCH:
         /* switch arch */
         break;
@@ -678,3 +677,62 @@
     }
 }
 #endif
+
+/* The maximum bit defined at the moment is 129.  */
+#define MAX_STFL_WORDS  3
+
+/* Canonicalize the current cpu's features into the 64-bit words required
+   by STFLE.  Return the index-1 of the max word that is non-zero.  */
+static unsigned do_stfle(CPUS390XState *env, uint64_t words[MAX_STFL_WORDS])
+{
+    S390CPU *cpu = s390_env_get_cpu(env);
+    const unsigned long *features = cpu->model->features;
+    unsigned max_bit = 0;
+    S390Feat feat;
+
+    memset(words, 0, sizeof(uint64_t) * MAX_STFL_WORDS);
+
+    if (test_bit(S390_FEAT_ZARCH, features)) {
+        /* z/Architecture is always active if around */
+        words[0] = 1ull << (63 - 2);
+    }
+
+    for (feat = find_first_bit(features, S390_FEAT_MAX);
+         feat < S390_FEAT_MAX;
+         feat = find_next_bit(features, S390_FEAT_MAX, feat + 1)) {
+        const S390FeatDef *def = s390_feat_def(feat);
+        if (def->type == S390_FEAT_TYPE_STFL) {
+            unsigned bit = def->bit;
+            if (bit > max_bit) {
+                max_bit = bit;
+            }
+            assert(bit / 64 < MAX_STFL_WORDS);
+            words[bit / 64] |= 1ULL << (63 - bit % 64);
+        }
+    }
+
+    return max_bit / 64;
+}
+
+void HELPER(stfl)(CPUS390XState *env)
+{
+    uint64_t words[MAX_STFL_WORDS];
+
+    do_stfle(env, words);
+    cpu_stl_data(env, 200, words[0] >> 32);
+}
+
+uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
+{
+    uint64_t words[MAX_STFL_WORDS];
+    unsigned count_m1 = env->regs[0] & 0xff;
+    unsigned max_m1 = do_stfle(env, words);
+    unsigned i;
+
+    for (i = 0; i <= count_m1; ++i) {
+        cpu_stq_data(env, addr + 8 * i, words[i]);
+    }
+
+    env->regs[0] = deposit64(env->regs[0], 0, 8, max_m1);
+    return (count_m1 >= max_m1 ? 0 : 3);
+}

diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 01c6217..4c48c59 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c

@@ -1194,6 +1194,7 @@
     FAC_SCF,                /* store clock fast */
     FAC_SFLE,               /* store facility list extended */
     FAC_ILA,                /* interlocked access facility 1 */
+    FAC_LPP,                /* load-program-parameter */
 } DisasFacility;
 
 struct DisasInsn {
@@ -1517,6 +1518,21 @@
     int imm = is_imm ? get_field(s->fields, i2) : 0;
     DisasCompare c;
 
+    /* BCR with R2 = 0 causes no branching */
+    if (have_field(s->fields, r2) && get_field(s->fields, r2) == 0) {
+        if (m1 == 14) {
+            /* Perform serialization */
+            /* FIXME: check for fast-BCR-serialization facility */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        }
+        if (m1 == 15) {
+            /* Perform serialization */
+            /* FIXME: perform checkpoint-synchronisation */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        }
+        return NO_EXIT;
+    }
+
     disas_jcc(s, &c, m1);
     return help_branch(s, &c, is_imm, imm, o->in2);
 }
@@ -1942,102 +1958,47 @@
 
 static ExitStatus op_cs(DisasContext *s, DisasOps *o)
 {
-    /* FIXME: needs an atomic solution for CONFIG_USER_ONLY.  */
     int d2 = get_field(s->fields, d2);
     int b2 = get_field(s->fields, b2);
-    int is_64 = s->insn->data;
-    TCGv_i64 addr, mem, cc, z;
+    TCGv_i64 addr, cc;
 
     /* Note that in1 = R3 (new value) and
        in2 = (zero-extended) R1 (expected value).  */
 
-    /* Load the memory into the (temporary) output.  While the PoO only talks
-       about moving the memory to R1 on inequality, if we include equality it
-       means that R1 is equal to the memory in all conditions.  */
     addr = get_address(s, 0, b2, d2);
-    if (is_64) {
-        tcg_gen_qemu_ld64(o->out, addr, get_mem_index(s));
-    } else {
-        tcg_gen_qemu_ld32u(o->out, addr, get_mem_index(s));
-    }
+    tcg_gen_atomic_cmpxchg_i64(o->out, addr, o->in2, o->in1,
+                               get_mem_index(s), s->insn->data | MO_ALIGN);
+    tcg_temp_free_i64(addr);
 
     /* Are the memory and expected values (un)equal?  Note that this setcond
        produces the output CC value, thus the NE sense of the test.  */
     cc = tcg_temp_new_i64();
     tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in2, o->out);
-
-    /* If the memory and expected values are equal (CC==0), copy R3 to MEM.
-       Recall that we are allowed to unconditionally issue the store (and
-       thus any possible write trap), so (re-)store the original contents
-       of MEM in case of inequality.  */
-    z = tcg_const_i64(0);
-    mem = tcg_temp_new_i64();
-    tcg_gen_movcond_i64(TCG_COND_EQ, mem, cc, z, o->in1, o->out);
-    if (is_64) {
-        tcg_gen_qemu_st64(mem, addr, get_mem_index(s));
-    } else {
-        tcg_gen_qemu_st32(mem, addr, get_mem_index(s));
-    }
-    tcg_temp_free_i64(z);
-    tcg_temp_free_i64(mem);
-    tcg_temp_free_i64(addr);
-
-    /* Store CC back to cc_op.  Wait until after the store so that any
-       exception gets the old cc_op value.  */
     tcg_gen_extrl_i64_i32(cc_op, cc);
     tcg_temp_free_i64(cc);
     set_cc_static(s);
+
     return NO_EXIT;
 }
 
 static ExitStatus op_cdsg(DisasContext *s, DisasOps *o)
 {
-    /* FIXME: needs an atomic solution for CONFIG_USER_ONLY.  */
     int r1 = get_field(s->fields, r1);
     int r3 = get_field(s->fields, r3);
     int d2 = get_field(s->fields, d2);
     int b2 = get_field(s->fields, b2);
-    TCGv_i64 addrh, addrl, memh, meml, outh, outl, cc, z;
+    TCGv_i64 addr;
+    TCGv_i32 t_r1, t_r3;
 
     /* Note that R1:R1+1 = expected value and R3:R3+1 = new value.  */
+    addr = get_address(s, 0, b2, d2);
+    t_r1 = tcg_const_i32(r1);
+    t_r3 = tcg_const_i32(r3);
+    gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+    tcg_temp_free_i64(addr);
+    tcg_temp_free_i32(t_r1);
+    tcg_temp_free_i32(t_r3);
 
-    addrh = get_address(s, 0, b2, d2);
-    addrl = get_address(s, 0, b2, d2 + 8);
-    outh = tcg_temp_new_i64();
-    outl = tcg_temp_new_i64();
-
-    tcg_gen_qemu_ld64(outh, addrh, get_mem_index(s));
-    tcg_gen_qemu_ld64(outl, addrl, get_mem_index(s));
-
-    /* Fold the double-word compare with arithmetic.  */
-    cc = tcg_temp_new_i64();
-    z = tcg_temp_new_i64();
-    tcg_gen_xor_i64(cc, outh, regs[r1]);
-    tcg_gen_xor_i64(z, outl, regs[r1 + 1]);
-    tcg_gen_or_i64(cc, cc, z);
-    tcg_gen_movi_i64(z, 0);
-    tcg_gen_setcond_i64(TCG_COND_NE, cc, cc, z);
-
-    memh = tcg_temp_new_i64();
-    meml = tcg_temp_new_i64();
-    tcg_gen_movcond_i64(TCG_COND_EQ, memh, cc, z, regs[r3], outh);
-    tcg_gen_movcond_i64(TCG_COND_EQ, meml, cc, z, regs[r3 + 1], outl);
-    tcg_temp_free_i64(z);
-
-    tcg_gen_qemu_st64(memh, addrh, get_mem_index(s));
-    tcg_gen_qemu_st64(meml, addrl, get_mem_index(s));
-    tcg_temp_free_i64(memh);
-    tcg_temp_free_i64(meml);
-    tcg_temp_free_i64(addrh);
-    tcg_temp_free_i64(addrl);
-
-    /* Save back state now that we've passed all exceptions.  */
-    tcg_gen_mov_i64(regs[r1], outh);
-    tcg_gen_mov_i64(regs[r1 + 1], outl);
-    tcg_gen_extrl_i64_i32(cc_op, cc);
-    tcg_temp_free_i64(outh);
-    tcg_temp_free_i64(outl);
-    tcg_temp_free_i64(cc);
     set_cc_static(s);
     return NO_EXIT;
 }
@@ -2363,6 +2324,50 @@
 }
 #endif
 
+static ExitStatus op_laa(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the addition for setting CC.  */
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    return NO_EXIT;
+}
+
+static ExitStatus op_lan(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_and_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+    return NO_EXIT;
+}
+
+static ExitStatus op_lao(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_or_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+    return NO_EXIT;
+}
+
+static ExitStatus op_lax(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_xor_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+    return NO_EXIT;
+}
+
 static ExitStatus op_ldeb(DisasContext *s, DisasOps *o)
 {
     gen_helper_ldeb(o->out, cpu_env, o->in2);
@@ -2558,6 +2563,7 @@
     tcg_temp_free_i32(r3);
     return NO_EXIT;
 }
+
 static ExitStatus op_lra(DisasContext *s, DisasOps *o)
 {
     check_privileged(s);
@@ -2567,6 +2573,14 @@
     return NO_EXIT;
 }
 
+static ExitStatus op_lpp(DisasContext *s, DisasOps *o)
+{
+    check_privileged(s);
+
+    tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp));
+    return NO_EXIT;
+}
+
 static ExitStatus op_lpsw(DisasContext *s, DisasOps *o)
 {
     TCGv_i64 t1, t2;
@@ -2750,6 +2764,31 @@
     return NO_EXIT;
 }
 
+static ExitStatus op_lpd(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 a1, a2;
+    TCGMemOp mop = s->insn->data;
+
+    /* In a parallel context, stop the world and single step.  */
+    if (parallel_cpus) {
+        potential_page_fault(s);
+        gen_exception(EXCP_ATOMIC);
+        return EXIT_NORETURN;
+    }
+
+    /* In a serial context, perform the two loads ... */
+    a1 = get_address(s, 0, get_field(s->fields, b1), get_field(s->fields, d1));
+    a2 = get_address(s, 0, get_field(s->fields, b2), get_field(s->fields, d2));
+    tcg_gen_qemu_ld_i64(o->out, a1, get_mem_index(s), mop | MO_ALIGN);
+    tcg_gen_qemu_ld_i64(o->out2, a2, get_mem_index(s), mop | MO_ALIGN);
+    tcg_temp_free_i64(a1);
+    tcg_temp_free_i64(a2);
+
+    /* ... and indicate that we performed them while interlocked.  */
+    gen_op_movi_cc(s, 0);
+    return NO_EXIT;
+}
+
 #ifndef CONFIG_USER_ONLY
 static ExitStatus op_lura(DisasContext *s, DisasOps *o)
 {
@@ -3382,6 +3421,7 @@
     check_privileged(s);
     potential_page_fault(s);
     gen_helper_sigp(cc_op, cpu_env, o->in2, r1, o->in1);
+    set_cc_static(s);
     tcg_temp_free_i32(r1);
     return NO_EXIT;
 }
@@ -3628,15 +3668,8 @@
 
 static ExitStatus op_stfl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 f, a;
-    /* We really ought to have more complete indication of facilities
-       that we implement.  Address this when STFLE is implemented.  */
     check_privileged(s);
-    f = tcg_const_i64(0xc0000000);
-    a = tcg_const_i64(200);
-    tcg_gen_qemu_st32(f, a, get_mem_index(s));
-    tcg_temp_free_i64(f);
-    tcg_temp_free_i64(a);
+    gen_helper_stfl(cpu_env);
     return NO_EXIT;
 }
 
@@ -3802,6 +3835,14 @@
 }
 #endif
 
+static ExitStatus op_stfle(DisasContext *s, DisasOps *o)
+{
+    potential_page_fault(s);
+    gen_helper_stfle(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return NO_EXIT;
+}
+
 static ExitStatus op_st8(DisasContext *s, DisasOps *o)
 {
     tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s));
@@ -4420,6 +4461,22 @@
 }
 #define SPEC_wout_r1_D32 SPEC_r1_even
 
+static void wout_r3_P32(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    int r3 = get_field(f, r3);
+    store_reg32_i64(r3, o->out);
+    store_reg32_i64(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P32 SPEC_r3_even
+
+static void wout_r3_P64(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    int r3 = get_field(f, r3);
+    store_reg(r3, o->out);
+    store_reg(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P64 SPEC_r3_even
+
 static void wout_e1(DisasContext *s, DisasFields *f, DisasOps *o)
 {
     store_freg32_i64(get_field(f, r1), o->out);
@@ -4486,21 +4543,17 @@
 }
 #define SPEC_wout_m2_32 0
 
-static void wout_m2_32_r1_atomic(DisasContext *s, DisasFields *f, DisasOps *o)
+static void wout_in2_r1(DisasContext *s, DisasFields *f, DisasOps *o)
 {
-    /* XXX release reservation */
-    tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s));
-    store_reg32_i64(get_field(f, r1), o->in2);
-}
-#define SPEC_wout_m2_32_r1_atomic 0
-
-static void wout_m2_64_r1_atomic(DisasContext *s, DisasFields *f, DisasOps *o)
-{
-    /* XXX release reservation */
-    tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s));
     store_reg(get_field(f, r1), o->in2);
 }
-#define SPEC_wout_m2_64_r1_atomic 0
+#define SPEC_wout_in2_r1 0
+
+static void wout_in2_r1_32(DisasContext *s, DisasFields *f, DisasOps *o)
+{
+    store_reg32_i64(get_field(f, r1), o->in2);
+}
+#define SPEC_wout_in2_r1_32 0
 
 /* ====================================================================== */
 /* The "INput 1" generators.  These load the first operand to an insn.  */
@@ -4944,24 +4997,6 @@
 }
 #define SPEC_in2_mri2_64 0
 
-static void in2_m2_32s_atomic(DisasContext *s, DisasFields *f, DisasOps *o)
-{
-    /* XXX should reserve the address */
-    in1_la2(s, f, o);
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld32s(o->in2, o->addr1, get_mem_index(s));
-}
-#define SPEC_in2_m2_32s_atomic 0
-
-static void in2_m2_64_atomic(DisasContext *s, DisasFields *f, DisasOps *o)
-{
-    /* XXX should reserve the address */
-    in1_la2(s, f, o);
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld64(o->in2, o->addr1, get_mem_index(s));
-}
-#define SPEC_in2_m2_64_atomic 0
-
 static void in2_i2(DisasContext *s, DisasFields *f, DisasOps *o)
 {
     o->in2 = tcg_const_i64(get_field(f, i2));

diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 9a481c3..9da7e1e 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c

@@ -301,6 +301,7 @@
 #ifdef CONFIG_USER_ONLY
     cc->handle_mmu_fault = superh_cpu_handle_mmu_fault;
 #else
+    cc->do_unaligned_access = superh_cpu_do_unaligned_access;
     cc->get_phys_page_debug = superh_cpu_get_phys_page_debug;
 #endif
     cc->disas_set_info = superh_cpu_disas_set_info;

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index cad8989..6c07c6b 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h

@@ -24,6 +24,7 @@
 #include "cpu-qom.h"
 
 #define TARGET_LONG_BITS 32
+#define ALIGNED_ONLY
 
 /* CPU Subtypes */
 #define SH_CPU_SH7750  (1 << 0)
@@ -92,14 +93,6 @@
 
 #define DELAY_SLOT             (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
-#define DELAY_SLOT_TRUE        (1 << 2)
-#define DELAY_SLOT_CLEARME     (1 << 3)
-/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
- * after the delay slot should be taken or not. It is calculated from SR_T.
- *
- * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
- * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
- */
 
 typedef struct tlb_t {
     uint32_t vpn;		/* virtual page number */
@@ -149,7 +142,8 @@
     uint32_t sgr;		/* saved global register 15 */
     uint32_t dbr;		/* debug base register */
     uint32_t pc;		/* program counter */
-    uint32_t delayed_pc;	/* target of delayed jump */
+    uint32_t delayed_pc;        /* target of delayed branch */
+    uint32_t delayed_cond;      /* condition of delayed branch */
     uint32_t mach;		/* multiply and accumulate high */
     uint32_t macl;		/* multiply and accumulate low */
     uint32_t pr;		/* procedure register */
@@ -222,6 +216,9 @@
 hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int superh_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int superh_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                    MMUAccessType access_type,
+                                    int mmu_idx, uintptr_t retaddr);
 
 void sh4_translate_init(void);
 SuperHCPU *cpu_sh4_init(const char *cpu_model);
@@ -383,8 +380,7 @@
 {
     *pc = env->pc;
     *cs_base = 0;
-    *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL
-                    | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME))   /* Bits  0- 3 */
+    *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) /* Bits 0-1 */
             | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
             | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
             | (env->sr & (1u << SR_FD))                        /* Bit 15 */

diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 036c5ca..8f8ce81 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c

@@ -168,10 +168,8 @@
         /* Branch instruction should be executed again before delay slot. */
 	env->spc -= 2;
 	/* Clear flags for exception/interrupt routine. */
-	env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL | DELAY_SLOT_TRUE);
+        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
     }
-    if (env->flags & DELAY_SLOT_CLEARME)
-        env->flags = 0;
 
     if (do_exp) {
         env->expevt = cs->exception_index;

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 684d3f3..528a40a 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c

@@ -24,6 +24,22 @@
 
 #ifndef CONFIG_USER_ONLY
 
+void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                    MMUAccessType access_type,
+                                    int mmu_idx, uintptr_t retaddr)
+{
+    switch (access_type) {
+    case MMU_INST_FETCH:
+    case MMU_DATA_LOAD:
+        cs->exception_index = 0x0e0;
+        break;
+    case MMU_DATA_STORE:
+        cs->exception_index = 0x100;
+        break;
+    }
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
 void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
               int mmu_idx, uintptr_t retaddr)
 {
@@ -32,10 +48,7 @@
     ret = superh_cpu_handle_mmu_fault(cs, addr, access_type, mmu_idx);
     if (ret) {
         /* now we have a real cpu fault */
-        if (retaddr) {
-            cpu_restore_state(cs, retaddr);
-        }
-        cpu_loop_exit(cs);
+        cpu_loop_exit_restore(cs, retaddr);
     }
 }
 
@@ -59,10 +72,7 @@
     CPUState *cs = CPU(sh_env_get_cpu(env));
 
     cs->exception_index = index;
-    if (retaddr) {
-        cpu_restore_state(cs, retaddr);
-    }
-    cpu_loop_exit(cs);
+    cpu_loop_exit_restore(cs, retaddr);
 }
 
 void helper_raise_illegal_instruction(CPUSH4State *env)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index c89a147..0bc2f9f 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c

@@ -37,7 +37,8 @@
     struct TranslationBlock *tb;
     target_ulong pc;
     uint16_t opcode;
-    uint32_t flags;
+    uint32_t tbflags;    /* should stay unmodified during the TB translation */
+    uint32_t envflags;   /* should stay in sync with env->flags using TCG ops */
     int bstate;
     int memidx;
     uint32_t delayed_pc;
@@ -49,7 +50,7 @@
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(ctx) 1
 #else
-#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD)))
+#define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
 #endif
 
 enum {
@@ -71,7 +72,7 @@
 static TCGv cpu_fregs[32];
 
 /* internal register indexes */
-static TCGv cpu_flags, cpu_delayed_pc;
+static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond;
 
 #include "exec/gen-icount.h"
 
@@ -146,6 +147,10 @@
     cpu_delayed_pc = tcg_global_mem_new_i32(cpu_env,
 					    offsetof(CPUSH4State, delayed_pc),
 					    "_delayed_pc_");
+    cpu_delayed_cond = tcg_global_mem_new_i32(cpu_env,
+                                              offsetof(CPUSH4State,
+                                                       delayed_cond),
+                                              "_delayed_cond_");
     cpu_ldst = tcg_global_mem_new_i32(cpu_env,
 				      offsetof(CPUSH4State, ldst), "_ldst_");
 
@@ -199,12 +204,23 @@
 {
     tcg_gen_andi_i32(cpu_sr, src,
                      ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
-    tcg_gen_shri_i32(cpu_sr_q, src, SR_Q);
-    tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1);
-    tcg_gen_shri_i32(cpu_sr_m, src, SR_M);
-    tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1);
-    tcg_gen_shri_i32(cpu_sr_t, src, SR_T);
-    tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
+    tcg_gen_extract_i32(cpu_sr_q, src, SR_Q, 1);
+    tcg_gen_extract_i32(cpu_sr_m, src, SR_M, 1);
+    tcg_gen_extract_i32(cpu_sr_t, src, SR_T, 1);
+}
+
+static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
+{
+    if (save_pc) {
+        tcg_gen_movi_i32(cpu_pc, ctx->pc);
+    }
+    if (ctx->delayed_pc != (uint32_t) -1) {
+        tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
+    }
+    if ((ctx->tbflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
+        != ctx->envflags) {
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags);
+    }
 }
 
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
@@ -241,6 +257,7 @@
 	/* Target is not statically known, it comes necessarily from a
 	   delayed jump as immediate jump are conditinal jumps */
 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
+        tcg_gen_discard_i32(cpu_delayed_pc);
 	if (ctx->singlestep_enabled)
             gen_helper_debug(cpu_env);
 	tcg_gen_exit_tb(0);
@@ -249,24 +266,17 @@
     }
 }
 
-static inline void gen_branch_slot(uint32_t delayed_pc, int t)
-{
-    TCGLabel *label = gen_new_label();
-    tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
-    tcg_gen_brcondi_i32(t ? TCG_COND_EQ : TCG_COND_NE, cpu_sr_t, 0, label);
-    tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
-    gen_set_label(label);
-}
-
 /* Immediate conditional jump (bt or bf) */
 static void gen_conditional_jump(DisasContext * ctx,
 				 target_ulong ift, target_ulong ifnott)
 {
     TCGLabel *l1 = gen_new_label();
+    gen_save_cpu_state(ctx, false);
     tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1);
     gen_goto_tb(ctx, 0, ifnott);
     gen_set_label(l1);
     gen_goto_tb(ctx, 1, ift);
+    ctx->bstate = BS_BRANCH;
 }
 
 /* Delayed conditional jump (bt or bf) */
@@ -277,20 +287,14 @@
 
     l1 = gen_new_label();
     ds = tcg_temp_new();
-    tcg_gen_andi_i32(ds, cpu_flags, DELAY_SLOT_TRUE);
+    tcg_gen_mov_i32(ds, cpu_delayed_cond);
+    tcg_gen_discard_i32(cpu_delayed_cond);
     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
     gen_goto_tb(ctx, 1, ctx->pc + 2);
     gen_set_label(l1);
-    tcg_gen_andi_i32(cpu_flags, cpu_flags, ~DELAY_SLOT_TRUE);
     gen_jump(ctx);
 }
 
-static inline void gen_store_flags(uint32_t flags)
-{
-    tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
-    tcg_gen_ori_i32(cpu_flags, cpu_flags, flags);
-}
-
 static inline void gen_load_fpr64(TCGv_i64 t, int reg)
 {
     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
@@ -298,13 +302,7 @@
 
 static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(tmp, t);
-    tcg_gen_mov_i32(cpu_fregs[reg + 1], tmp);
-    tcg_gen_shri_i64(t, t, 32);
-    tcg_gen_extrl_i64_i32(tmp, t);
-    tcg_gen_mov_i32(cpu_fregs[reg], tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t);
 }
 
 #define B3_0 (ctx->opcode & 0xf)
@@ -317,51 +315,50 @@
 #define B11_8 ((ctx->opcode >> 8) & 0xf)
 #define B15_12 ((ctx->opcode >> 12) & 0xf)
 
-#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\
-                        && (ctx->flags & (1u << SR_RB))\
+#define REG(x) ((x) < 8 && (ctx->tbflags & (1u << SR_MD))\
+                        && (ctx->tbflags & (1u << SR_RB))\
                 ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\
-                               || !(ctx->flags & (1u << SR_RB)))\
+#define ALTREG(x) ((x) < 8 && (!(ctx->tbflags & (1u << SR_MD))\
+                               || !(ctx->tbflags & (1u << SR_RB)))\
 		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
+#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x))
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
-#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
+#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
 #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
 
 #define CHECK_NOT_DELAY_SLOT \
-  if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))     \
-  {                                                           \
-      tcg_gen_movi_i32(cpu_pc, ctx->pc);                      \
-      gen_helper_raise_slot_illegal_instruction(cpu_env);     \
-      ctx->bstate = BS_BRANCH;                                \
-      return;                                                 \
-  }
+    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {     \
+        gen_save_cpu_state(ctx, true);                               \
+        gen_helper_raise_slot_illegal_instruction(cpu_env);          \
+        ctx->bstate = BS_EXCP;                                       \
+        return;                                                      \
+    }
 
-#define CHECK_PRIVILEGED                                        \
-  if (IS_USER(ctx)) {                                           \
-      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
-      if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-          gen_helper_raise_slot_illegal_instruction(cpu_env);   \
-      } else {                                                  \
-          gen_helper_raise_illegal_instruction(cpu_env);        \
-      }                                                         \
-      ctx->bstate = BS_BRANCH;                                  \
-      return;                                                   \
-  }
+#define CHECK_PRIVILEGED                                             \
+    if (IS_USER(ctx)) {                                              \
+        gen_save_cpu_state(ctx, true);                               \
+        if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+            gen_helper_raise_slot_illegal_instruction(cpu_env);      \
+        } else {                                                     \
+            gen_helper_raise_illegal_instruction(cpu_env);           \
+        }                                                            \
+        ctx->bstate = BS_EXCP;                                       \
+        return;                                                      \
+    }
 
-#define CHECK_FPU_ENABLED                                       \
-  if (ctx->flags & (1u << SR_FD)) {                             \
-      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
-      if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-          gen_helper_raise_slot_fpu_disable(cpu_env);           \
-      } else {                                                  \
-          gen_helper_raise_fpu_disable(cpu_env);                \
-      }                                                         \
-      ctx->bstate = BS_BRANCH;                                  \
-      return;                                                   \
-  }
+#define CHECK_FPU_ENABLED                                            \
+    if (ctx->tbflags & (1u << SR_FD)) {                              \
+        gen_save_cpu_state(ctx, true);                               \
+        if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
+            gen_helper_raise_slot_fpu_disable(cpu_env);              \
+        } else {                                                     \
+            gen_helper_raise_fpu_disable(cpu_env);                   \
+        }                                                            \
+        ctx->bstate = BS_EXCP;                                       \
+        return;                                                      \
+    }
 
 static void _decode_opc(DisasContext * ctx)
 {
@@ -409,7 +406,7 @@
     case 0x000b:		/* rts */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0028:		/* clrmac */
@@ -431,7 +428,7 @@
 	CHECK_NOT_DELAY_SLOT
         gen_write_sr(cpu_ssr);
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0058:		/* sets */
@@ -497,15 +494,13 @@
     case 0xa000:		/* bra disp */
 	CHECK_NOT_DELAY_SLOT
 	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
-	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
-	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	return;
     }
 
@@ -939,7 +934,7 @@
 	return;
     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, XREG(B7_4));
 	    gen_store_fpr64(fp, XREG(B11_8));
@@ -950,7 +945,7 @@
 	return;
     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B7_4);
 	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
@@ -966,7 +961,7 @@
 	return;
     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -980,7 +975,7 @@
 	return;
     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -998,7 +993,7 @@
 	CHECK_FPU_ENABLED
         TCGv addr = tcg_temp_new_i32();
         tcg_gen_subi_i32(addr, REG(B11_8), 4);
-        if (ctx->flags & FPSCR_SZ) {
+        if (ctx->tbflags & FPSCR_SZ) {
 	    int fr = XREG(B7_4);
             tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL);
 	    tcg_gen_subi_i32(addr, addr, 4);
@@ -1015,7 +1010,7 @@
 	{
 	    TCGv addr = tcg_temp_new_i32();
 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
-            if (ctx->flags & FPSCR_SZ) {
+            if (ctx->tbflags & FPSCR_SZ) {
 		int fr = XREG(B11_8);
                 tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
                                     ctx->memidx, MO_TEUL);
@@ -1034,7 +1029,7 @@
 	{
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
-            if (ctx->flags & FPSCR_SZ) {
+            if (ctx->tbflags & FPSCR_SZ) {
 		int fr = XREG(B7_4);
                 tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr,
                                     ctx->memidx, MO_TEUL);
@@ -1056,7 +1051,7 @@
     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	{
 	    CHECK_FPU_ENABLED
-            if (ctx->flags & FPSCR_PR) {
+            if (ctx->tbflags & FPSCR_PR) {
                 TCGv_i64 fp0, fp1;
 
 		if (ctx->opcode & 0x0110)
@@ -1125,7 +1120,7 @@
     case 0xf00e: /* fmac FR0,RM,Rn */
         {
             CHECK_FPU_ENABLED
-            if (ctx->flags & FPSCR_PR) {
+            if (ctx->tbflags & FPSCR_PR) {
                 break; /* illegal instruction */
             } else {
                 gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
@@ -1155,25 +1150,23 @@
 	return;
     case 0x8b00:		/* bf label */
 	CHECK_NOT_DELAY_SLOT
-	    gen_conditional_jump(ctx, ctx->pc + 2,
-				 ctx->pc + 4 + B7_0s * 2);
-	ctx->bstate = BS_BRANCH;
+        gen_conditional_jump(ctx, ctx->pc + 2, ctx->pc + 4 + B7_0s * 2);
 	return;
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
-	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0);
-	ctx->flags |= DELAY_SLOT_CONDITIONAL;
+        tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
+        ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2;
+        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8900:		/* bt label */
 	CHECK_NOT_DELAY_SLOT
-	    gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2,
-				 ctx->pc + 2);
-	ctx->bstate = BS_BRANCH;
+        gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, ctx->pc + 2);
 	return;
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
-	gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1);
-	ctx->flags |= DELAY_SLOT_CONDITIONAL;
+        tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
+        ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2;
+        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
@@ -1281,11 +1274,11 @@
 	{
 	    TCGv imm;
 	    CHECK_NOT_DELAY_SLOT
-            tcg_gen_movi_i32(cpu_pc, ctx->pc);
+            gen_save_cpu_state(ctx, true);
 	    imm = tcg_const_i32(B7_0);
             gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
-	    ctx->bstate = BS_BRANCH;
+            ctx->bstate = BS_EXCP;
 	}
 	return;
     case 0xc800:		/* tst #imm,R0 */
@@ -1354,14 +1347,14 @@
     case 0x0023:		/* braf Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->pc + 4);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0003:		/* bsrf Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x4015:		/* cmp/pl Rn */
@@ -1377,14 +1370,14 @@
     case 0x402b:		/* jmp @Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400b:		/* jsr @Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-	ctx->flags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400e:		/* ldc Rm,SR */
@@ -1508,17 +1501,23 @@
         }
         ctx->has_movcal = 1;
 	return;
-    case 0x40a9:
-	/* MOVUA.L @Rm,R0 (Rm) -> R0
-	   Load non-boundary-aligned data */
-        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-	return;
-    case 0x40e9:
-	/* MOVUA.L @Rm+,R0   (Rm) -> R0, Rm + 4 -> Rm
-	   Load non-boundary-aligned data */
-        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
-	return;
+    case 0x40a9:                /* movua.l @Rm,R0 */
+        /* Load non-boundary-aligned data */
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
+                                MO_TEUL | MO_UNALN);
+            return;
+        }
+        break;
+    case 0x40e9:                /* movua.l @Rm+,R0 */
+        /* Load non-boundary-aligned data */
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
+                                MO_TEUL | MO_UNALN);
+            tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
+            return;
+        }
+        break;
     case 0x0029:		/* movt Rn */
         tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
 	return;
@@ -1576,10 +1575,11 @@
 	else
 	    break;
     case 0x00ab:		/* synco */
-	if (ctx->features & SH_FEATURE_SH4A)
-	    return;
-	else
-	    break;
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+            return;
+        }
+        break;
     case 0x4024:		/* rotcl Rn */
 	{
 	    TCGv tmp = tcg_temp_new();
@@ -1640,19 +1640,14 @@
 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 16);
 	return;
     case 0x401b:		/* tas.b @Rn */
-	{
-	    TCGv addr, val;
-	    addr = tcg_temp_local_new();
-	    tcg_gen_mov_i32(addr, REG(B11_8));
-	    val = tcg_temp_local_new();
-            tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
+        {
+            TCGv val = tcg_const_i32(0x80);
+            tcg_gen_atomic_fetch_or_i32(val, REG(B11_8), val,
+                                        ctx->memidx, MO_UB);
             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
-	    tcg_gen_ori_i32(val, val, 0x80);
-            tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
-	    tcg_temp_free(val);
-	    tcg_temp_free(addr);
-	}
-	return;
+            tcg_temp_free(val);
+        }
+        return;
     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
 	tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fpul);
@@ -1663,7 +1658,7 @@
 	return;
     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
@@ -1678,7 +1673,7 @@
 	return;
     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
@@ -1699,7 +1694,7 @@
 	return;
     case 0xf05d: /* fabs FRn/DRn */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1713,7 +1708,7 @@
 	return;
     case 0xf06d: /* fsqrt FRn */
 	CHECK_FPU_ENABLED
-        if (ctx->flags & FPSCR_PR) {
+        if (ctx->tbflags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1731,13 +1726,13 @@
 	break;
     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-        if (!(ctx->flags & FPSCR_PR)) {
+        if (!(ctx->tbflags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
 	}
 	return;
     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-        if (!(ctx->flags & FPSCR_PR)) {
+        if (!(ctx->tbflags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
 	}
 	return;
@@ -1761,7 +1756,7 @@
 	return;
     case 0xf0ed: /* fipr FVm,FVn */
         CHECK_FPU_ENABLED
-        if ((ctx->flags & FPSCR_PR) == 0) {
+        if ((ctx->tbflags & FPSCR_PR) == 0) {
             TCGv m, n;
             m = tcg_const_i32((ctx->opcode >> 8) & 3);
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
@@ -1774,7 +1769,7 @@
     case 0xf0fd: /* ftrv XMTRX,FVn */
         CHECK_FPU_ENABLED
         if ((ctx->opcode & 0x0300) == 0x0100 &&
-            (ctx->flags & FPSCR_PR) == 0) {
+            (ctx->tbflags & FPSCR_PR) == 0) {
             TCGv n;
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
             gen_helper_ftrv(cpu_env, n);
@@ -1788,31 +1783,25 @@
 	    ctx->opcode, ctx->pc);
     fflush(stderr);
 #endif
-    tcg_gen_movi_i32(cpu_pc, ctx->pc);
-    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+    gen_save_cpu_state(ctx, true);
+    if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         gen_helper_raise_slot_illegal_instruction(cpu_env);
     } else {
         gen_helper_raise_illegal_instruction(cpu_env);
     }
-    ctx->bstate = BS_BRANCH;
+    ctx->bstate = BS_EXCP;
 }
 
 static void decode_opc(DisasContext * ctx)
 {
-    uint32_t old_flags = ctx->flags;
+    uint32_t old_flags = ctx->envflags;
 
     _decode_opc(ctx);
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
-        if (ctx->flags & DELAY_SLOT_CLEARME) {
-            gen_store_flags(0);
-        } else {
-	    /* go out of the delay slot */
-	    uint32_t new_flags = ctx->flags;
-	    new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-	    gen_store_flags(new_flags);
-        }
-        ctx->flags = 0;
+        /* go out of the delay slot */
+        ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags);
         ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
 	    gen_delayed_conditional_jump(ctx);
@@ -1821,10 +1810,6 @@
 	}
 
     }
-
-    /* go into a delay slot */
-    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
-        gen_store_flags(ctx->flags);
 }
 
 void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
@@ -1838,16 +1823,17 @@
 
     pc_start = tb->pc;
     ctx.pc = pc_start;
-    ctx.flags = (uint32_t)tb->flags;
+    ctx.tbflags = (uint32_t)tb->flags;
+    ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
     ctx.bstate = BS_NONE;
-    ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0;
+    ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
     ctx.delayed_pc = -1; /* use delayed pc from env pointer */
     ctx.tb = tb;
     ctx.singlestep_enabled = cs->singlestep_enabled;
     ctx.features = env->features;
-    ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
+    ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
 
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
@@ -1860,14 +1846,14 @@
 
     gen_tb_start(tb);
     while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
-        tcg_gen_insn_start(ctx.pc, ctx.flags);
+        tcg_gen_insn_start(ctx.pc, ctx.envflags);
         num_insns++;
 
         if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
             /* We have hit a breakpoint - make sure PC is up-to-date */
-            tcg_gen_movi_i32(cpu_pc, ctx.pc);
+            gen_save_cpu_state(&ctx, true);
             gen_helper_debug(cpu_env);
-            ctx.bstate = BS_BRANCH;
+            ctx.bstate = BS_EXCP;
             /* The address covered by the breakpoint must be included in
                [tb->pc, tb->pc + tb->size) in order to for it to be
                properly cleared -- thus we increment the PC here so that
@@ -1896,23 +1882,20 @@
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     if (cs->singlestep_enabled) {
-        tcg_gen_movi_i32(cpu_pc, ctx.pc);
+        gen_save_cpu_state(&ctx, true);
         gen_helper_debug(cpu_env);
     } else {
 	switch (ctx.bstate) {
         case BS_STOP:
-            /* gen_op_interrupt_restart(); */
-            /* fall through */
+            gen_save_cpu_state(&ctx, true);
+            tcg_gen_exit_tb(0);
+            break;
         case BS_NONE:
-            if (ctx.flags) {
-                gen_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
-	    }
+            gen_save_cpu_state(&ctx, false);
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
         case BS_EXCP:
-            /* gen_op_interrupt_restart(); */
-            tcg_gen_exit_tb(0);
-            break;
+            /* fall through */
         case BS_BRANCH:
         default:
             break;
@@ -1941,4 +1924,7 @@
 {
     env->pc = data[0];
     env->flags = data[1];
+    /* Theoretically delayed_pc should also be restored. In practice the
+       branch instruction is re-executed after exception, so the delayed
+       branch target will be recomputed. */
 }

diff --git a/tests/.gitignore b/tests/.gitignore
index a966740..40c2e3e 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore

@@ -11,6 +11,8 @@
 qht-bench
 rcutorture
 test-aio
+test-aio-multithread
+test-arm-mptimer
 test-base64
 test-bitops
 test-bitcnt
@@ -24,6 +26,7 @@
 test-crypto-block
 test-crypto-cipher
 test-crypto-hash
+test-crypto-hmac
 test-crypto-ivgen
 test-crypto-pbkdf
 test-crypto-secret
@@ -37,6 +40,7 @@
 test-crypto-xts
 test-cutils
 test-hbitmap
+test-hmp
 test-int128
 test-iov
 test-io-channel-buffer

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 31931c0..16ff8f3 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include

@@ -260,6 +260,7 @@
 check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
 check-qtest-i386-y += tests/postcopy-test$(EXESUF)
 check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF)
+check-qtest-i386-y += tests/numa-test$(EXESUF)
 check-qtest-x86_64-y += $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -300,6 +301,7 @@
 check-qtest-ppc64-y += tests/test-filter-mirror$(EXESUF)
 check-qtest-ppc64-y += tests/test-filter-redirector$(EXESUF)
 check-qtest-ppc64-y += tests/display-vga-test$(EXESUF)
+check-qtest-ppc64-y += tests/numa-test$(EXESUF)
 check-qtest-ppc64-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
 
 check-qtest-sh4-y = tests/endianness-test$(EXESUF)
@@ -324,6 +326,8 @@
 check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
 gcov-files-arm-y += hw/timer/arm_mptimer.c
 
+check-qtest-aarch64-y = tests/numa-test$(EXESUF)
+
 check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
 
 check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
@@ -753,6 +757,7 @@
 tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
 tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
 tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y)
+tests/numa-test$(EXESUF): tests/numa-test.o
 
 tests/migration/stress$(EXESUF): tests/migration/stress.o
 	$(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"LINK","$(TARGET_DIR)$@")

diff --git a/tests/acpi-test-data/pc/SLIT.cphp b/tests/acpi-test-data/pc/SLIT.cphp
new file mode 100644
index 0000000..74ec3b4
--- /dev/null
+++ b/tests/acpi-test-data/pc/SLIT.cphp
Binary files differ

diff --git a/tests/acpi-test-data/pc/SLIT.memhp b/tests/acpi-test-data/pc/SLIT.memhp
new file mode 100644
index 0000000..74ec3b4
--- /dev/null
+++ b/tests/acpi-test-data/pc/SLIT.memhp
Binary files differ

diff --git a/tests/acpi-test-data/pc/SRAT.memhp b/tests/acpi-test-data/pc/SRAT.memhp
index 66ce9a8..a7dddf7 100644
--- a/tests/acpi-test-data/pc/SRAT.memhp
+++ b/tests/acpi-test-data/pc/SRAT.memhp
Binary files differ

diff --git a/tests/acpi-test-data/q35/SLIT.cphp b/tests/acpi-test-data/q35/SLIT.cphp
new file mode 100644
index 0000000..74ec3b4
--- /dev/null
+++ b/tests/acpi-test-data/q35/SLIT.cphp
Binary files differ

diff --git a/tests/acpi-test-data/q35/SLIT.memhp b/tests/acpi-test-data/q35/SLIT.memhp
new file mode 100644
index 0000000..74ec3b4
--- /dev/null
+++ b/tests/acpi-test-data/q35/SLIT.memhp
Binary files differ

diff --git a/tests/acpi-test-data/q35/SRAT.memhp b/tests/acpi-test-data/q35/SRAT.memhp
index 66ce9a8..a7dddf7 100644
--- a/tests/acpi-test-data/q35/SRAT.memhp
+++ b/tests/acpi-test-data/q35/SRAT.memhp
Binary files differ

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index bdef3b9..63da978 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c

@@ -723,7 +723,8 @@
     data.machine = MACHINE_PC;
     data.variant = ".cphp";
     test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6"
-                  " -numa node -numa node",
+                  " -numa node -numa node"
+                  " -numa dist,src=0,dst=1,val=21",
                   &data);
     free_test_data(&data);
 }
@@ -736,7 +737,8 @@
     data.machine = MACHINE_Q35;
     data.variant = ".cphp";
     test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6"
-                  " -numa node -numa node",
+                  " -numa node -numa node"
+                  " -numa dist,src=0,dst=1,val=21",
                   &data);
     free_test_data(&data);
 }
@@ -785,7 +787,10 @@
     memset(&data, 0, sizeof(data));
     data.machine = MACHINE_Q35;
     data.variant = ".memhp";
-    test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data);
+    test_acpi_one(" -m 128,slots=3,maxmem=1G"
+                  " -numa node -numa node"
+                  " -numa dist,src=0,dst=1,val=21",
+                  &data);
     free_test_data(&data);
 }
 
@@ -796,7 +801,10 @@
     memset(&data, 0, sizeof(data));
     data.machine = MACHINE_PC;
     data.variant = ".memhp";
-    test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data);
+    test_acpi_one(" -m 128,slots=3,maxmem=1G"
+                  " -numa node -numa node"
+                  " -numa dist,src=0,dst=1,val=21",
+                  &data);
     free_test_data(&data);
 }
 

diff --git a/tests/drive_del-test.c b/tests/drive_del-test.c
index 121b9c9..2175139 100644
--- a/tests/drive_del-test.c
+++ b/tests/drive_del-test.c

@@ -92,7 +92,7 @@
 static void test_drive_del_device_del(void)
 {
     /* Start with a drive used by a device that unplugs instantaneously */
-    qtest_start("-drive if=none,id=drive0,file=/dev/null,format=raw"
+    qtest_start("-drive if=none,id=drive0,file=null-co://,format=raw"
                 " -device virtio-scsi-pci"
                 " -device scsi-hd,drive=drive0,id=dev0");
 

diff --git a/tests/numa-test.c b/tests/numa-test.c
new file mode 100644
index 0000000..c3475d6
--- /dev/null
+++ b/tests/numa-test.c

@@ -0,0 +1,302 @@
+/*
+ * NUMA configuration test cases
+ *
+ * Copyright (c) 2017 Red Hat Inc.
+ * Authors:
+ *  Igor Mammedov <imammedo@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+static char *make_cli(const char *generic_cli, const char *test_cli)
+{
+    return g_strdup_printf("%s %s", generic_cli ? generic_cli : "", test_cli);
+}
+
+static char *hmp_info_numa(void)
+{
+    QDict *resp;
+    char *s;
+
+    resp = qmp("{ 'execute': 'human-monitor-command', 'arguments': "
+                      "{ 'command-line': 'info numa '} }");
+    g_assert(resp);
+    g_assert(qdict_haskey(resp, "return"));
+    s = g_strdup(qdict_get_str(resp, "return"));
+    g_assert(s);
+    QDECREF(resp);
+    return s;
+}
+
+static void test_mon_explicit(const void *data)
+{
+    char *s;
+    char *cli;
+
+    cli = make_cli(data, "-smp 8 "
+                   "-numa node,nodeid=0,cpus=0-3 "
+                   "-numa node,nodeid=1,cpus=4-7 ");
+    qtest_start(cli);
+
+    s = hmp_info_numa();
+    g_assert(strstr(s, "node 0 cpus: 0 1 2 3"));
+    g_assert(strstr(s, "node 1 cpus: 4 5 6 7"));
+    g_free(s);
+
+    qtest_end();
+    g_free(cli);
+}
+
+static void test_mon_default(const void *data)
+{
+    char *s;
+    char *cli;
+
+    cli = make_cli(data, "-smp 8 -numa node -numa node");
+    qtest_start(cli);
+
+    s = hmp_info_numa();
+    g_assert(strstr(s, "node 0 cpus: 0 2 4 6"));
+    g_assert(strstr(s, "node 1 cpus: 1 3 5 7"));
+    g_free(s);
+
+    qtest_end();
+    g_free(cli);
+}
+
+static void test_mon_partial(const void *data)
+{
+    char *s;
+    char *cli;
+
+    cli = make_cli(data, "-smp 8 "
+                   "-numa node,nodeid=0,cpus=0-1 "
+                   "-numa node,nodeid=1,cpus=4-5 ");
+    qtest_start(cli);
+
+    s = hmp_info_numa();
+    g_assert(strstr(s, "node 0 cpus: 0 1 2 3 6 7"));
+    g_assert(strstr(s, "node 1 cpus: 4 5"));
+    g_free(s);
+
+    qtest_end();
+    g_free(cli);
+}
+
+static QList *get_cpus(QDict **resp)
+{
+    *resp = qmp("{ 'execute': 'query-cpus' }");
+    g_assert(*resp);
+    g_assert(qdict_haskey(*resp, "return"));
+    return  qdict_get_qlist(*resp, "return");
+}
+
+static void test_query_cpus(const void *data)
+{
+    char *cli;
+    QDict *resp;
+    QList *cpus;
+    const QObject *e;
+
+    cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7");
+    qtest_start(cli);
+    cpus = get_cpus(&resp);
+    g_assert(cpus);
+
+    while ((e = qlist_pop(cpus))) {
+        QDict *cpu, *props;
+        int64_t cpu_idx, node;
+
+        cpu = qobject_to_qdict(e);
+        g_assert(qdict_haskey(cpu, "CPU"));
+        g_assert(qdict_haskey(cpu, "props"));
+
+        cpu_idx = qdict_get_int(cpu, "CPU");
+        props = qdict_get_qdict(cpu, "props");
+        g_assert(qdict_haskey(props, "node-id"));
+        node = qdict_get_int(props, "node-id");
+        if (cpu_idx >= 0 && cpu_idx < 4) {
+            g_assert_cmpint(node, ==, 0);
+        } else {
+            g_assert_cmpint(node, ==, 1);
+        }
+    }
+
+    QDECREF(resp);
+    qtest_end();
+    g_free(cli);
+}
+
+static void pc_numa_cpu(const void *data)
+{
+    char *cli;
+    QDict *resp;
+    QList *cpus;
+    const QObject *e;
+
+    cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 "
+        "-numa node,nodeid=0 -numa node,nodeid=1 "
+        "-numa cpu,node-id=1,socket-id=0 "
+        "-numa cpu,node-id=0,socket-id=1,core-id=0 "
+        "-numa cpu,node-id=0,socket-id=1,core-id=1,thread-id=0 "
+        "-numa cpu,node-id=1,socket-id=1,core-id=1,thread-id=1");
+    qtest_start(cli);
+    cpus = get_cpus(&resp);
+    g_assert(cpus);
+
+    while ((e = qlist_pop(cpus))) {
+        QDict *cpu, *props;
+        int64_t socket, core, thread, node;
+
+        cpu = qobject_to_qdict(e);
+        g_assert(qdict_haskey(cpu, "props"));
+        props = qdict_get_qdict(cpu, "props");
+
+        g_assert(qdict_haskey(props, "node-id"));
+        node = qdict_get_int(props, "node-id");
+        g_assert(qdict_haskey(props, "socket-id"));
+        socket = qdict_get_int(props, "socket-id");
+        g_assert(qdict_haskey(props, "core-id"));
+        core = qdict_get_int(props, "core-id");
+        g_assert(qdict_haskey(props, "thread-id"));
+        thread = qdict_get_int(props, "thread-id");
+
+        if (socket == 0) {
+            g_assert_cmpint(node, ==, 1);
+        } else if (socket == 1 && core == 0) {
+            g_assert_cmpint(node, ==, 0);
+        } else if (socket == 1 && core == 1 && thread == 0) {
+            g_assert_cmpint(node, ==, 0);
+        } else if (socket == 1 && core == 1 && thread == 1) {
+            g_assert_cmpint(node, ==, 1);
+        } else {
+            g_assert(false);
+        }
+    }
+
+    QDECREF(resp);
+    qtest_end();
+    g_free(cli);
+}
+
+static void spapr_numa_cpu(const void *data)
+{
+    char *cli;
+    QDict *resp;
+    QList *cpus;
+    const QObject *e;
+
+    cli = make_cli(data, "-smp 4,cores=4 "
+        "-numa node,nodeid=0 -numa node,nodeid=1 "
+        "-numa cpu,node-id=0,core-id=0 "
+        "-numa cpu,node-id=0,core-id=1 "
+        "-numa cpu,node-id=0,core-id=2 "
+        "-numa cpu,node-id=1,core-id=3");
+    qtest_start(cli);
+    cpus = get_cpus(&resp);
+    g_assert(cpus);
+
+    while ((e = qlist_pop(cpus))) {
+        QDict *cpu, *props;
+        int64_t core, node;
+
+        cpu = qobject_to_qdict(e);
+        g_assert(qdict_haskey(cpu, "props"));
+        props = qdict_get_qdict(cpu, "props");
+
+        g_assert(qdict_haskey(props, "node-id"));
+        node = qdict_get_int(props, "node-id");
+        g_assert(qdict_haskey(props, "core-id"));
+        core = qdict_get_int(props, "core-id");
+
+        if (core >= 0 && core < 3) {
+            g_assert_cmpint(node, ==, 0);
+        } else if (core == 3) {
+            g_assert_cmpint(node, ==, 1);
+        } else {
+            g_assert(false);
+        }
+    }
+
+    QDECREF(resp);
+    qtest_end();
+    g_free(cli);
+}
+
+static void aarch64_numa_cpu(const void *data)
+{
+    char *cli;
+    QDict *resp;
+    QList *cpus;
+    const QObject *e;
+
+    cli = make_cli(data, "-smp 2 "
+        "-numa node,nodeid=0 -numa node,nodeid=1 "
+        "-numa cpu,node-id=1,thread-id=0 "
+        "-numa cpu,node-id=0,thread-id=1");
+    qtest_start(cli);
+    cpus = get_cpus(&resp);
+    g_assert(cpus);
+
+    while ((e = qlist_pop(cpus))) {
+        QDict *cpu, *props;
+        int64_t thread, node;
+
+        cpu = qobject_to_qdict(e);
+        g_assert(qdict_haskey(cpu, "props"));
+        props = qdict_get_qdict(cpu, "props");
+
+        g_assert(qdict_haskey(props, "node-id"));
+        node = qdict_get_int(props, "node-id");
+        g_assert(qdict_haskey(props, "thread-id"));
+        thread = qdict_get_int(props, "thread-id");
+
+        if (thread == 0) {
+            g_assert_cmpint(node, ==, 1);
+        } else if (thread == 1) {
+            g_assert_cmpint(node, ==, 0);
+        } else {
+            g_assert(false);
+        }
+    }
+
+    QDECREF(resp);
+    qtest_end();
+    g_free(cli);
+}
+
+int main(int argc, char **argv)
+{
+    const char *args = NULL;
+    const char *arch = qtest_get_arch();
+
+    if (strcmp(arch, "aarch64") == 0) {
+        args = "-machine virt";
+    }
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_data_func("/numa/mon/default", args, test_mon_default);
+    qtest_add_data_func("/numa/mon/cpus/explicit", args, test_mon_explicit);
+    qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial);
+    qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus);
+
+    if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) {
+        qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu);
+    }
+
+    if (!strcmp(arch, "ppc64")) {
+        qtest_add_data_func("/numa/spapr/cpu/explicit", args, spapr_numa_cpu);
+    }
+
+    if (!strcmp(arch, "aarch64")) {
+        qtest_add_data_func("/numa/aarch64/cpu/explicit", args,
+                            aarch64_numa_cpu);
+    }
+
+    return g_test_run();
+}

diff --git a/tests/nvme-test.c b/tests/nvme-test.c
index c8bece4..7674a44 100644
--- a/tests/nvme-test.c
+++ b/tests/nvme-test.c

@@ -22,7 +22,7 @@
     g_test_init(&argc, &argv, NULL);
     qtest_add_func("/nvme/nop", nop);
 
-    qtest_start("-drive id=drv0,if=none,file=/dev/null,format=raw "
+    qtest_start("-drive id=drv0,if=none,file=null-co://,format=raw "
                 "-device nvme,drive=drv0,serial=foo");
     ret = g_test_run();
 

diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index de35a18..e86f876 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c

@@ -41,7 +41,7 @@
     struct uffdio_api api_struct;
     uint64_t ioctl_mask;
 
-    int ufd = ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+    int ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 
     if (ufd == -1) {
         g_test_message("Skipping test: userfaultfd not available");

diff --git a/tests/qemu-iotests/019.out b/tests/qemu-iotests/019.out
index 0124264..17a7c03 100644
--- a/tests/qemu-iotests/019.out
+++ b/tests/qemu-iotests/019.out

@@ -542,8 +542,8 @@
 
 Checking if backing clusters are allocated when they shouldn't
 
-0/128 sectors allocated at offset 1 MiB
-0/128 sectors allocated at offset 4.001 GiB
+0/65536 bytes allocated at offset 1 MiB
+0/65536 bytes allocated at offset 4.001 GiB
 Reading
 
 === IO: pattern 42
@@ -1086,8 +1086,8 @@
 
 Checking if backing clusters are allocated when they shouldn't
 
-0/128 sectors allocated at offset 1 MiB
-0/128 sectors allocated at offset 4.001 GiB
+0/65536 bytes allocated at offset 1 MiB
+0/65536 bytes allocated at offset 4.001 GiB
 Reading
 
 === IO: pattern 42

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 0d472d5..e00c11b 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030

@@ -63,8 +63,8 @@
     def test_stream_intermediate(self):
         self.assert_no_active_block_jobs()
 
-        self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
-                            qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img),
+        self.assertNotEqual(qemu_io('-f', 'raw', '-rU', '-c', 'map', backing_img),
+                            qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', mid_img),
                             'image file map matches backing file before streaming')
 
         result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid')
@@ -114,7 +114,7 @@
         self.assert_no_active_block_jobs()
 
         # The image map is empty before the operation
-        empty_map = qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img)
+        empty_map = qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', test_img)
 
         # This is a no-op: no data should ever be copied from the base image
         result = self.vm.qmp('block-stream', device='drive0', base=mid_img)
@@ -197,8 +197,8 @@
 
         # Check that the maps don't match before the streaming operations
         for i in range(2, self.num_imgs, 2):
-            self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]),
-                                qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]),
+            self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[i]),
+                                qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[i-1]),
                                 'image file map matches backing file before streaming')
 
         # Create all streaming jobs
@@ -351,8 +351,8 @@
     def test_stream_base_node_name(self):
         self.assert_no_active_block_jobs()
 
-        self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]),
-                            qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]),
+        self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[4]),
+                            qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[3]),
                             'image file map matches backing file before streaming')
 
         # Error: the base node does not exist
@@ -422,8 +422,8 @@
         if not iotests.supports_quorum():
             return
 
-        self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]),
-                            qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]),
+        self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.children[0]),
+                            qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.backing[0]),
                             'image file map matches backing file before streaming')
 
         self.assert_no_active_block_jobs()

diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046
index e528b67..f2ebecf 100755
--- a/tests/qemu-iotests/046
+++ b/tests/qemu-iotests/046

@@ -192,7 +192,7 @@
 
 function verify_io()
 {
-    if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then
+    if ($QEMU_IMG info -U -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then
         # For v2 images, discarded clusters are read from the backing file
         # Keep the variable empty so that the backing file value can be used as
         # the default below

diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index aafcd24..ba4da65 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055

@@ -458,17 +458,18 @@
         except OSError:
             pass
 
-    def do_prepare_drives(self, fmt, args):
+    def do_prepare_drives(self, fmt, args, attach_target):
         self.vm = iotests.VM().add_drive(test_img)
 
         qemu_img('create', '-f', fmt, blockdev_target_img,
                  str(TestDriveCompression.image_len), *args)
-        self.vm.add_drive(blockdev_target_img, format=fmt, interface="none")
+        if attach_target:
+            self.vm.add_drive(blockdev_target_img, format=fmt, interface="none")
 
         self.vm.launch()
 
-    def do_test_compress_complete(self, cmd, format, **args):
-        self.do_prepare_drives(format['type'], format['args'])
+    def do_test_compress_complete(self, cmd, format, attach_target, **args):
+        self.do_prepare_drives(format['type'], format['args'], attach_target)
 
         self.assert_no_active_block_jobs()
 
@@ -484,15 +485,16 @@
 
     def test_complete_compress_drive_backup(self):
         for format in TestDriveCompression.fmt_supports_compression:
-            self.do_test_compress_complete('drive-backup', format,
+            self.do_test_compress_complete('drive-backup', format, False,
                                            target=blockdev_target_img, mode='existing')
 
     def test_complete_compress_blockdev_backup(self):
         for format in TestDriveCompression.fmt_supports_compression:
-            self.do_test_compress_complete('blockdev-backup', format, target='drive1')
+            self.do_test_compress_complete('blockdev-backup', format, True,
+                                           target='drive1')
 
-    def do_test_compress_cancel(self, cmd, format, **args):
-        self.do_prepare_drives(format['type'], format['args'])
+    def do_test_compress_cancel(self, cmd, format, attach_target, **args):
+        self.do_prepare_drives(format['type'], format['args'], attach_target)
 
         self.assert_no_active_block_jobs()
 
@@ -506,15 +508,16 @@
 
     def test_compress_cancel_drive_backup(self):
         for format in TestDriveCompression.fmt_supports_compression:
-            self.do_test_compress_cancel('drive-backup', format,
+            self.do_test_compress_cancel('drive-backup', format, False,
                                          target=blockdev_target_img, mode='existing')
 
     def test_compress_cancel_blockdev_backup(self):
        for format in TestDriveCompression.fmt_supports_compression:
-            self.do_test_compress_cancel('blockdev-backup', format, target='drive1')
+            self.do_test_compress_cancel('blockdev-backup', format, True,
+                                         target='drive1')
 
-    def do_test_compress_pause(self, cmd, format, **args):
-        self.do_prepare_drives(format['type'], format['args'])
+    def do_test_compress_pause(self, cmd, format, attach_target, **args):
+        self.do_prepare_drives(format['type'], format['args'], attach_target)
 
         self.assert_no_active_block_jobs()
 
@@ -546,12 +549,13 @@
 
     def test_compress_pause_drive_backup(self):
         for format in TestDriveCompression.fmt_supports_compression:
-            self.do_test_compress_pause('drive-backup', format,
+            self.do_test_compress_pause('drive-backup', format, False,
                                         target=blockdev_target_img, mode='existing')
 
     def test_compress_pause_blockdev_backup(self):
         for format in TestDriveCompression.fmt_supports_compression:
-            self.do_test_compress_pause('blockdev-backup', format, target='drive1')
+            self.do_test_compress_pause('blockdev-backup', format, True,
+                                        target='drive1')
 
 if __name__ == '__main__':
     iotests.main(supported_fmts=['raw', 'qcow2'])

diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index 5d40206..9e8f5b9 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out

@@ -135,7 +135,7 @@
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed
+qcow2: Marking image as corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed
 read failed: Input/output error
 
 === Testing unaligned pre-allocated zero cluster ===
@@ -166,7 +166,7 @@
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qcow2: Image is corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed
+qcow2: Image is corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed
 read failed: Input/output error
 read failed: Input/output error
 
@@ -176,7 +176,7 @@
 wrote 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed
-qcow2: Marking image as corrupt: Data cluster offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed
+qcow2: Marking image as corrupt: Cluster allocation offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed
 discard 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read failed: Input/output error

diff --git a/tests/qemu-iotests/066 b/tests/qemu-iotests/066
index c2116a3..8638217 100755
--- a/tests/qemu-iotests/066
+++ b/tests/qemu-iotests/066

@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Test case for discarding preallocated zero clusters in qcow2
+# Test case for preallocated zero clusters in qcow2
 #
 # Copyright (C) 2013 Red Hat, Inc.
 #
@@ -55,8 +55,134 @@
 $QEMU_IO -c "write 0 256k" -c "write -z 0 256k" -c "write 64M 512" \
 	 -c "discard 0 $IMG_SIZE" -c "read -P 0 0 $IMG_SIZE" "$TEST_IMG" \
          | _filter_qemu_io
+
 # Check the image (there shouldn't be any leaks)
 _check_test_img
+# Map the image (we want all clusters to be gone)
+$QEMU_IMG map "$TEST_IMG"
+
+_cleanup_test_img
+
+
+echo
+echo '=== Writing to preallocated zero clusters ==='
+echo
+
+_make_test_img $IMG_SIZE
+
+# Create data clusters (not aligned to an L2 table)
+$QEMU_IO -c 'write -P 42 1M 256k' "$TEST_IMG" | _filter_qemu_io
+orig_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+
+# Convert the data clusters to preallocated zero clusters
+$QEMU_IO -c 'write -z 1M 256k' "$TEST_IMG" | _filter_qemu_io
+
+# Now write to them (with a COW needed for the head and tail)
+$QEMU_IO -c "write -P 23 $(((1024 + 32) * 1024)) 192k" "$TEST_IMG" \
+    | _filter_qemu_io
+
+# Check metadata correctness
+_check_test_img
+
+# Check data correctness
+$QEMU_IO -c "read -P  0 $(( 1024             * 1024)) 32k" \
+         -c "read -P 23 $(((1024 + 32)       * 1024)) 192k" \
+         -c "read -P  0 $(((1024 + 32 + 192) * 1024)) 32k" \
+         "$TEST_IMG" \
+         | _filter_qemu_io
+
+# Check that we have actually reused the original area
+new_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+if [ "$new_map" = "$orig_map" ]; then
+    echo 'Successfully reused original clusters.'
+else
+    echo 'Failed to reuse original clusters.'
+    echo 'Original map:'
+    echo "$orig_map"
+    echo 'New map:'
+    echo "$new_map"
+fi
+
+_cleanup_test_img
+
+
+echo
+echo '=== Writing to a snapshotted preallocated zero cluster ==='
+echo
+
+_make_test_img 64k
+
+# Create a preallocated zero cluster
+$QEMU_IO -c 'write -P 42 0 64k' -c 'write -z 0 64k' "$TEST_IMG" \
+    | _filter_qemu_io
+
+# Snapshot it
+$QEMU_IMG snapshot -c foo "$TEST_IMG"
+
+# Write to the cluster
+$QEMU_IO -c 'write -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Check metadata correctness
+_check_test_img
+
+# Check data correctness
+$QEMU_IO -c 'read -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG snapshot -a foo "$TEST_IMG"
+$QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+_cleanup_test_img
+
+
+echo
+echo '=== Consecutive write to a preallocated zero cluster ==='
+echo
+
+_make_test_img 192k
+
+# Create three normal clusters
+$QEMU_IO -c 'write -P 42 0 192k' "$TEST_IMG" | _filter_qemu_io
+orig_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+
+# Make the middle cluster a preallocated zero cluster
+$QEMU_IO -c 'write -z 64k 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Try to overwrite everything: This should reuse the whole range. To test that
+# this only issues a single continuous write request, use blkdebug.
+$QEMU_IO -c 'write -P 42 0 192k' \
+    "json:{
+        'driver': '$IMGFMT',
+        'file': {
+            'driver': 'blkdebug',
+            'image.filename': '$TEST_IMG',
+            'set-state': [{
+                'event': 'write_aio',
+                'new_state': 2
+            }],
+            'inject-error': [{
+                'event': 'write_aio',
+                'state': 2
+            }]
+        }
+    }" \
+    | _filter_qemu_io
+
+# Check metadata correctness
+_check_test_img
+
+# Check that we have actually reused the original area
+new_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+if [ "$new_map" = "$orig_map" ]; then
+    echo 'Successfully reused original clusters.'
+else
+    echo 'Failed to reuse original clusters.'
+    echo 'Original map:'
+    echo "$orig_map"
+    echo 'New map:'
+    echo "$new_map"
+fi
+
+_cleanup_test_img
+
 
 # success, all done
 echo "*** done"

diff --git a/tests/qemu-iotests/066.out b/tests/qemu-iotests/066.out
index 7c1f31a..3d9da9b 100644
--- a/tests/qemu-iotests/066.out
+++ b/tests/qemu-iotests/066.out

@@ -14,4 +14,50 @@
 read 67109376/67109376 bytes at offset 0
 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
+Offset          Length          Mapped to       File
+
+=== Writing to preallocated zero clusters ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67109376
+wrote 262144/262144 bytes at offset 1048576
+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 262144/262144 bytes at offset 1048576
+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 196608/196608 bytes at offset 1081344
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+No errors were found on the image.
+read 32768/32768 bytes at offset 1048576
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 196608/196608 bytes at offset 1081344
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 32768/32768 bytes at offset 1277952
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Successfully reused original clusters.
+
+=== Writing to a snapshotted preallocated zero cluster ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+No errors were found on the image.
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Consecutive write to a preallocated zero cluster ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=196608
+wrote 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 196608/196608 bytes at offset 0
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+No errors were found on the image.
+Successfully reused original clusters.
 *** done

diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085
index c53e97f..b97adcd 100755
--- a/tests/qemu-iotests/085
+++ b/tests/qemu-iotests/085

@@ -45,7 +45,7 @@
         rm -f "${TEST_DIR}/${i}-${snapshot_virt0}"
         rm -f "${TEST_DIR}/${i}-${snapshot_virt1}"
     done
-    rm -f "${TEST_IMG}.1" "${TEST_IMG}.2"
+    rm -f "${TEST_IMG}" "${TEST_IMG}.1" "${TEST_IMG}.2" "${TEST_IMG}.base"
 
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
@@ -87,24 +87,26 @@
 }
 
 # ${1}: unique identifier for the snapshot filename
-# ${2}: true: open backing images; false: don't open them (default)
+# ${2}: extra_params to the blockdev-add command
+# ${3}: filename
+function do_blockdev_add()
+{
+    cmd="{ 'execute': 'blockdev-add', 'arguments':
+           { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${2}
+             'file':
+             { 'driver': 'file', 'filename': '${3}',
+               'node-name': 'file_${1}' } } }"
+    _send_qemu_cmd $h "${cmd}" "return"
+}
+
+# ${1}: unique identifier for the snapshot filename
 function add_snapshot_image()
 {
-    if [ "${2}" = "true" ]; then
-        extra_params=""
-    else
-        extra_params="'backing': '', "
-    fi
     base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}"
     snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}"
     _make_test_img -b "${base_image}" "$size"
     mv "${TEST_IMG}" "${snapshot_file}"
-    cmd="{ 'execute': 'blockdev-add', 'arguments':
-           { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${extra_params}
-             'file':
-             { 'driver': 'file', 'filename': '${snapshot_file}',
-               'node-name': 'file_${1}' } } }"
-    _send_qemu_cmd $h "${cmd}" "return"
+    do_blockdev_add "$1" "'backing': '', " "${snapshot_file}"
 }
 
 # ${1}: unique identifier for the snapshot filename
@@ -222,7 +224,10 @@
 echo
 
 SNAPSHOTS=$((${SNAPSHOTS}+1))
-add_snapshot_image ${SNAPSHOTS} true
+
+TEST_IMG="$TEST_IMG.base" _make_test_img "$size"
+_make_test_img -b "${TEST_IMG}.base" "$size"
+do_blockdev_add ${SNAPSHOTS} "" "${TEST_IMG}"
 blockdev_snapshot ${SNAPSHOTS} error
 
 echo

diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 182acb4..a5d4cc3 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out

@@ -78,7 +78,8 @@
 
 === Invalid command - snapshot node has a backing image ===
 
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/12-snapshot-v0.IMGFMT
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "The snapshot already has a backing image"}}
 

diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 9de57dd..6d52f7d 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087

@@ -82,8 +82,7 @@
       "driver": "$IMGFMT",
       "node-name": "disk",
       "file": {
-          "driver": "file",
-          "filename": "$TEST_IMG"
+          "driver": "null-co"
       }
     }
   }
@@ -92,8 +91,7 @@
       "driver": "$IMGFMT",
       "node-name": "test-node",
       "file": {
-          "driver": "file",
-          "filename": "$TEST_IMG"
+          "driver": "null-co"
       }
     }
   }

diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091
index 32bbd56..10ac4a8 100755
--- a/tests/qemu-iotests/091
+++ b/tests/qemu-iotests/091

@@ -95,7 +95,9 @@
 echo "vm2: flush io, and quit"
 _send_qemu_cmd $h2 'qemu-io disk flush' "(qemu)"
 _send_qemu_cmd $h2 'quit' ""
+_send_qemu_cmd $h1 'quit' ""
 
+wait
 echo "Check image pattern"
 ${QEMU_IO} -c "read -P 0x22 0 4M" "${TEST_IMG}" | _filter_testdir | _filter_qemu_io
 

diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out
index eecde16..ccf172a 100644
--- a/tests/qemu-iotests/102.out
+++ b/tests/qemu-iotests/102.out

@@ -6,7 +6,7 @@
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image resized.
-[                       0]      128/     128 sectors     allocated at offset 0 bytes (1)
+64 KiB (0x10000) bytes     allocated at offset 0 bytes (0x0)
 Offset          Length          Mapped to       File
 
 === Testing map on an image file truncated outside of qemu ===
@@ -17,5 +17,5 @@
 Image resized.
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) qemu-io drv0 map
-[                       0]      128/     128 sectors     allocated at offset 0 bytes (1)
+64 KiB (0x10000) bytes     allocated at offset 0 bytes (0x0)
 *** done

diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
index 9317d80..47d8656 100644
--- a/tests/qemu-iotests/122.out
+++ b/tests/qemu-iotests/122.out

@@ -112,7 +112,7 @@
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 63963136/63963136 bytes at offset 3145728
 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
 
 convert -c -S 0:
 read 3145728/3145728 bytes at offset 0
@@ -134,7 +134,7 @@
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
 
 convert -c -S 0 with source backing file:
 read 3145728/3145728 bytes at offset 0
@@ -152,7 +152,7 @@
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
 
 convert -c -S 0 -B ...
 read 3145728/3145728 bytes at offset 0
@@ -176,11 +176,11 @@
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 convert -S 4k
-[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 8192},
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
-{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 9216},
+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 10240},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
 
 convert -c -S 4k
@@ -192,9 +192,9 @@
 { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
 
 convert -S 8k
-[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": 8192},
+[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 17408},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
 
 convert -c -S 8k

diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out
index 4f334d8..db6b296 100644
--- a/tests/qemu-iotests/146.out
+++ b/tests/qemu-iotests/146.out

@@ -2,39 +2,39 @@
 
 === Testing VPC Autodetect ===
 
-[                       0]  266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing VPC with current_size force ===
 
-[                       0]  266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing VPC with chs force ===
 
-[                       0]  266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing Hyper-V Autodetect ===
 
-[                       0]  266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing Hyper-V with current_size force ===
 
-[                       0]  266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing Hyper-V with chs force ===
 
-[                       0]  266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing d2v Autodetect ===
 
-[                       0]   514560/  514560 sectors     allocated at offset 0 bytes (1)
+251.250 MiB (0xfb40000) bytes     allocated at offset 0 bytes (0x0)
 
 === Testing d2v with current_size force ===
 
-[                       0]   514560/  514560 sectors     allocated at offset 0 bytes (1)
+251.250 MiB (0xfb40000) bytes     allocated at offset 0 bytes (0x0)
 
 === Testing d2v with chs force ===
 
-[                       0]   514560/  514560 sectors     allocated at offset 0 bytes (1)
+251.250 MiB (0xfb40000) bytes     allocated at offset 0 bytes (0x0)
 
 === Testing Image create, default ===
 
@@ -42,15 +42,15 @@
 
 === Read created image, default opts ====
 
-[                       0]  8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0)
 
 === Read created image, force_size_calc=chs ====
 
-[                       0]  8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0)
 
 === Read created image, force_size_calc=current_size ====
 
-[                       0]  8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0)
 
 === Testing Image create, force_size ===
 
@@ -58,13 +58,13 @@
 
 === Read created image, default opts ====
 
-[                       0]  8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0)
 
 === Read created image, force_size_calc=chs ====
 
-[                       0]  8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0)
 
 === Read created image, force_size_calc=current_size ====
 
-[                       0]  8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0)
 *** done

diff --git a/tests/qemu-iotests/153 b/tests/qemu-iotests/153
new file mode 100755
index 0000000..0b45d78
--- /dev/null
+++ b/tests/qemu-iotests/153

@@ -0,0 +1,233 @@
+#!/bin/bash
+#
+# Test image locking
+#
+# Copyright 2016, 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=famz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    rm -f "${TEST_IMG}.base"
+    rm -f "${TEST_IMG}.convert"
+    rm -f "${TEST_IMG}.a"
+    rm -f "${TEST_IMG}.b"
+    rm -f "${TEST_IMG}.lnk"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+size=32M
+
+_check_ofd()
+{
+    _make_test_img $size >/dev/null
+    if $QEMU_IMG_PROG info --image-opts "driver=file,locking=on,filename=$TEST_IMG" 2>&1 |
+        grep -q 'falling back to POSIX file'; then
+        return 1
+    else
+        return 0
+    fi
+}
+
+_check_ofd || _notrun "OFD lock not available"
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+_run_cmd()
+{
+    echo
+    (echo "$@"; "$@" 2>&1 1>/dev/null) | _filter_testdir
+}
+
+function _do_run_qemu()
+{
+    (
+        if ! test -t 0; then
+            while read cmd; do
+                echo $cmd
+            done
+        fi
+        echo quit
+    ) | $QEMU -nographic -monitor stdio -serial none "$@" 1>/dev/null
+}
+
+function _run_qemu_with_images()
+{
+    _do_run_qemu \
+        $(for i in $@; do echo "-drive if=none,file=$i"; done) 2>&1 \
+        | _filter_testdir | _filter_qemu
+}
+
+echo "== readonly=off,force-share=on should be rejected =="
+_run_qemu_with_images null-co://,readonly=off,force-share=on
+
+for opts1 in "" "read-only=on" "read-only=on,force-share=on"; do
+    echo
+    echo "== Creating base image =="
+    TEST_IMG="${TEST_IMG}.base" _make_test_img $size
+
+    echo
+    echo "== Creating test image =="
+    $QEMU_IMG create -f $IMGFMT "${TEST_IMG}" -b ${TEST_IMG}.base | _filter_img_create
+
+    echo
+    echo "== Launching QEMU, opts: '$opts1' =="
+    _launch_qemu -drive file="${TEST_IMG}",if=none,$opts1
+    h=$QEMU_HANDLE
+
+    for opts2 in "" "read-only=on" "read-only=on,force-share=on"; do
+        echo
+        echo "== Launching another QEMU, opts: '$opts2' =="
+        echo "quit" | \
+            $QEMU -nographic -monitor stdio \
+            -drive file="${TEST_IMG}",if=none,$opts2 2>&1 1>/dev/null | \
+            _filter_testdir | _filter_qemu
+    done
+
+    for L in "" "-U"; do
+
+        echo
+        echo "== Running utility commands $L =="
+        _run_cmd $QEMU_IO $L -c "read 0 512" "${TEST_IMG}"
+        _run_cmd $QEMU_IO $L -r -c "read 0 512" "${TEST_IMG}"
+        _run_cmd $QEMU_IO -c "open $L ${TEST_IMG}" -c "read 0 512"
+        _run_cmd $QEMU_IO -c "open -r $L ${TEST_IMG}" -c "read 0 512"
+        _run_cmd $QEMU_IMG info        $L "${TEST_IMG}"
+        _run_cmd $QEMU_IMG check       $L "${TEST_IMG}"
+        _run_cmd $QEMU_IMG compare     $L "${TEST_IMG}" "${TEST_IMG}"
+        _run_cmd $QEMU_IMG map         $L "${TEST_IMG}"
+        _run_cmd $QEMU_IMG amend -o "" $L "${TEST_IMG}"
+        _run_cmd $QEMU_IMG commit      $L "${TEST_IMG}"
+        _run_cmd $QEMU_IMG resize      $L "${TEST_IMG}" $size
+        _run_cmd $QEMU_IMG rebase      $L "${TEST_IMG}" -b "${TEST_IMG}.base"
+        _run_cmd $QEMU_IMG snapshot -l $L "${TEST_IMG}"
+        _run_cmd $QEMU_IMG convert     $L "${TEST_IMG}" "${TEST_IMG}.convert"
+        _run_cmd $QEMU_IMG dd          $L if="${TEST_IMG}" of="${TEST_IMG}.convert" bs=512 count=1
+        _run_cmd $QEMU_IMG bench       $L -c 1 "${TEST_IMG}"
+        _run_cmd $QEMU_IMG bench       $L -w -c 1 "${TEST_IMG}"
+    done
+    _send_qemu_cmd $h "{ 'execute': 'quit', }" ""
+    echo
+    echo "Round done"
+    _cleanup_qemu
+done
+
+for opt1 in $test_opts; do
+    for opt2 in $test_opts; do
+        echo
+        echo "== Two devices with the same image ($opt1 - $opt2) =="
+        _run_qemu_with_images "${TEST_IMG},$opt1" "${TEST_IMG},$opt2"
+    done
+done
+
+echo "== Creating ${TEST_IMG}.[abc] ==" | _filter_testdir
+(
+    $QEMU_IMG create -f qcow2 "${TEST_IMG}.a" -b "${TEST_IMG}"
+    $QEMU_IMG create -f qcow2 "${TEST_IMG}.b" -b "${TEST_IMG}"
+    $QEMU_IMG create -f qcow2 "${TEST_IMG}.c" -b "${TEST_IMG}.b"
+) | _filter_img_create
+
+echo
+echo "== Two devices sharing the same file in backing chain =="
+_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}.b"
+_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}.c"
+
+echo
+echo "== Backing image also as an active device =="
+_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}"
+
+echo
+echo "== Backing image also as an active device (ro) =="
+_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG},readonly=on"
+
+echo
+echo "== Symbolic link =="
+rm -f "${TEST_IMG}.lnk" &>/dev/null
+ln -s ${TEST_IMG} "${TEST_IMG}.lnk" || echo "Failed to create link"
+_run_qemu_with_images "${TEST_IMG}.lnk" "${TEST_IMG}"
+
+echo
+echo "== Closing an image should unlock it =="
+_launch_qemu
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'qmp_capabilities' }" \
+    'return'
+
+echo "Adding drive"
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line': 'drive_add 0 if=none,id=d0,file=${TEST_IMG}' } }" \
+    ""
+
+_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512'
+
+echo "Closing drive"
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line': 'drive_del d0' } }" \
+    ""
+
+_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512'
+
+echo "Adding two and closing one"
+for d in d0 d1; do
+    _send_qemu_cmd $QEMU_HANDLE \
+        "{ 'execute': 'human-monitor-command',
+           'arguments': { 'command-line': 'drive_add 0 if=none,id=$d,file=${TEST_IMG},readonly=on' } }" \
+        ""
+done
+
+_run_cmd $QEMU_IMG info "${TEST_IMG}"
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line': 'drive_del d0' } }" \
+    ""
+
+_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512'
+
+echo "Closing the other"
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line': 'drive_del d1' } }" \
+    ""
+
+_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512'
+
+_cleanup_qemu
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0

diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out
new file mode 100644
index 0000000..5ba0b63
--- /dev/null
+++ b/tests/qemu-iotests/153.out

@@ -0,0 +1,390 @@
+QA output created by 153
+== readonly=off,force-share=on should be rejected ==
+QEMU_PROG: -drive if=none,file=null-co://,readonly=off,force-share=on: force-share=on can only be used with read-only images
+
+== Creating base image ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432
+
+== Creating test image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base
+
+== Launching QEMU, opts: '' ==
+
+== Launching another QEMU, opts: '' ==
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock
+Is another process using the image?
+
+== Launching another QEMU, opts: 'read-only=on' ==
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,read-only=on: Failed to get shared "write" lock
+Is another process using the image?
+
+== Launching another QEMU, opts: 'read-only=on,force-share=on' ==
+
+== Running utility commands  ==
+
+_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2
+can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+
+_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2
+can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_io_wrapper -c open  TEST_DIR/t.qcow2 -c read 0 512
+can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+
+_qemu_io_wrapper -c open -r  TEST_DIR/t.qcow2 -c read 0 512
+can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper info TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper check TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper map TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper amend -o  TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper commit TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+== Running utility commands -U ==
+
+_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2
+can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+
+_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2
+
+_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512
+can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+
+_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512
+
+_qemu_img_wrapper info -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper check -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper map -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper amend -o  -U TEST_DIR/t.qcow2
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper commit -U TEST_DIR/t.qcow2
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert
+
+_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1
+
+_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images
+
+Round done
+
+== Creating base image ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432
+
+== Creating test image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base
+
+== Launching QEMU, opts: 'read-only=on' ==
+
+== Launching another QEMU, opts: '' ==
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock
+Is another process using the image?
+
+== Launching another QEMU, opts: 'read-only=on' ==
+
+== Launching another QEMU, opts: 'read-only=on,force-share=on' ==
+
+== Running utility commands  ==
+
+_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2
+can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+
+_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2
+
+_qemu_io_wrapper -c open  TEST_DIR/t.qcow2 -c read 0 512
+can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+
+_qemu_io_wrapper -c open -r  TEST_DIR/t.qcow2 -c read 0 512
+
+_qemu_img_wrapper info TEST_DIR/t.qcow2
+
+_qemu_img_wrapper check TEST_DIR/t.qcow2
+
+_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper map TEST_DIR/t.qcow2
+
+_qemu_img_wrapper amend -o  TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper commit TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2
+
+_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert
+
+_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1
+
+_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+== Running utility commands -U ==
+
+_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2
+can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+
+_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2
+
+_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512
+can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+
+_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512
+
+_qemu_img_wrapper info -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper check -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper map -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper amend -o  -U TEST_DIR/t.qcow2
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper commit -U TEST_DIR/t.qcow2
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base
+qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock
+Is another process using the image?
+
+_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert
+
+_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1
+
+_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images
+
+Round done
+
+== Creating base image ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432
+
+== Creating test image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base
+
+== Launching QEMU, opts: 'read-only=on,force-share=on' ==
+
+== Launching another QEMU, opts: '' ==
+
+== Launching another QEMU, opts: 'read-only=on' ==
+
+== Launching another QEMU, opts: 'read-only=on,force-share=on' ==
+
+== Running utility commands  ==
+
+_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2
+
+_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2
+
+_qemu_io_wrapper -c open  TEST_DIR/t.qcow2 -c read 0 512
+
+_qemu_io_wrapper -c open -r  TEST_DIR/t.qcow2 -c read 0 512
+
+_qemu_img_wrapper info TEST_DIR/t.qcow2
+
+_qemu_img_wrapper check TEST_DIR/t.qcow2
+
+_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper map TEST_DIR/t.qcow2
+
+_qemu_img_wrapper amend -o  TEST_DIR/t.qcow2
+
+_qemu_img_wrapper commit TEST_DIR/t.qcow2
+
+_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M
+
+_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base
+
+_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2
+
+_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert
+
+_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1
+
+_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2
+
+== Running utility commands -U ==
+
+_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2
+can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+
+_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2
+
+_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512
+can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images
+
+_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512
+
+_qemu_img_wrapper info -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper check -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper map -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper amend -o  -U TEST_DIR/t.qcow2
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper commit -U TEST_DIR/t.qcow2
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M
+qemu-img: unrecognized option '-U'
+Try 'qemu-img --help' for more information
+
+_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base
+
+_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2
+
+_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert
+
+_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1
+
+_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2
+
+_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2
+qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images
+
+Round done
+== Creating TEST_DIR/t.qcow2.[abc] ==
+Formatting 'TEST_DIR/t.IMGFMT.a', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT
+Formatting 'TEST_DIR/t.IMGFMT.b', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT
+Formatting 'TEST_DIR/t.IMGFMT.c', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.b
+
+== Two devices sharing the same file in backing chain ==
+
+== Backing image also as an active device ==
+QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+
+== Backing image also as an active device (ro) ==
+
+== Symbolic link ==
+QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+
+== Closing an image should unlock it ==
+{"return": {}}
+Adding drive
+
+_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512
+can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+Closing drive
+
+_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512
+Adding two and closing one
+
+_qemu_img_wrapper info TEST_DIR/t.qcow2
+
+_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512
+can't open device TEST_DIR/t.qcow2: Failed to get "write" lock
+Is another process using the image?
+Closing the other
+
+_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512
+*** done

diff --git a/tests/qemu-iotests/154 b/tests/qemu-iotests/154
index 7ca7219..dd8a426 100755
--- a/tests/qemu-iotests/154
+++ b/tests/qemu-iotests/154

@@ -2,7 +2,7 @@
 #
 # qcow2 specific bdrv_pwrite_zeroes tests with backing files (complements 034)
 #
-# Copyright (C) 2016 Red Hat, Inc.
+# Copyright (C) 2016-2017 Red Hat, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -42,7 +42,10 @@
 _supported_os Linux
 
 CLUSTER_SIZE=4k
-size=128M
+size=$((128 * 1024 * 1024))
+
+# This test requires zero clusters, added in v3 images
+_unsupported_imgopts compat=0.10
 
 echo
 echo == backing file contains zeros ==
@@ -299,6 +302,159 @@
 
 $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
 
+echo
+echo == unaligned image tail cluster, no allocation needed ==
+
+# With no backing file, write to all or part of unallocated partial cluster
+# will mark the cluster as zero, but does not allocate.
+# Re-create the image each time to get back to unallocated clusters.
+
+# Write at the front: sector-wise, the request is: 128m... | 00 -- -- --
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at the back: sector-wise, the request is: 128m... | -- -- -- 00
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at middle: sector-wise, the request is: 128m... | -- 00 00 --
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write entire cluster: sector-wise, the request is: 128m... | 00 00 00 00
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Repeat with backing file holding unallocated cluster.
+# TODO: Note that this forces an allocation, because we aren't yet able to
+# quickly detect that reads beyond EOF of the backing file are always zero
+CLUSTER_SIZE=2048 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024))
+
+# Write at the front: sector-wise, the request is:
+# backing: 128m... | -- --
+# active:  128m... | 00 -- -- --
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at the back: sector-wise, the request is:
+# backing: 128m... | -- --
+# active:  128m... | -- -- -- 00
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at middle: sector-wise, the request is:
+# backing: 128m... | -- --
+# active:  128m... | -- 00 00 --
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write entire cluster: sector-wise, the request is:
+# backing: 128m... | -- --
+# active:  128m... | 00 00 00 00
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Repeat with backing file holding zero'd cluster
+# TODO: Note that this forces an allocation, because we aren't yet able to
+# quickly detect that reads beyond EOF of the backing file are always zero
+$QEMU_IO -c "write -z $size 512" "$TEST_IMG.base" | _filter_qemu_io
+
+# Write at the front: sector-wise, the request is:
+# backing: 128m... | 00 00
+# active:  128m... | 00 -- -- --
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at the back: sector-wise, the request is:
+# backing: 128m... | 00 00
+# active:  128m... | -- -- -- 00
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at middle: sector-wise, the request is:
+# backing: 128m... | 00 00
+# active:  128m... | -- 00 00 --
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write entire cluster: sector-wise, the request is:
+# backing: 128m... | 00 00
+# active:  128m... | 00 00 00 00
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# A preallocated cluster maintains its allocation, whether it stays as
+# data due to a partial write:
+# Convert 128m... | XX XX => ... | XX 00
+_make_test_img $((size + 1024))
+$QEMU_IO -c "write -P 1 $((size)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 1 $((size)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# or because it is the entire cluster and can use the zero flag:
+# Convert 128m... | XX XX => ... | 00 00
+$QEMU_IO -c "write -z $((size)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $size 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+echo
+echo == unaligned image tail cluster, allocation required ==
+
+# Write beyond backing file must COW
+# Backing file: 128m... | XX --
+# Active layer: 128m... | -- -- 00 --
+CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024))
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -P 1 $((size)) 512" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z $((size + 1024)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 1 $((size)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $((size + 512)) 1536" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Writes at boundaries of (partial) cluster must not lose mid-cluster data
+# Backing file: 128m: ... | -- XX
+# Active layer: 128m: ... | 00 -- -- 00
+CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024))
+_make_test_img -b "$TEST_IMG.base" $((size + 2048))
+$QEMU_IO -c "write -P 1 $((size + 512)) 512" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z $((size)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $((size)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 1 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $((size + 1024)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $((size)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 1 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 $((size + 1024)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
 # success, all done
 echo "*** done"
 rm -f $seq.full

diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out
index da9eabd..d8485ee 100644
--- a/tests/qemu-iotests/154.out
+++ b/tests/qemu-iotests/154.out

@@ -42,9 +42,9 @@
 read 2048/2048 bytes at offset 67584
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 36864, "length": 28672, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
+{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 69632, "length": 134148096, "depth": 1, "zero": true, "data": false}]
 
 == backing file contains non-zero data after write_zeroes ==
@@ -69,9 +69,9 @@
 read 3072/3072 bytes at offset 40960
 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 36864, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
+{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 45056, "length": 134172672, "depth": 1, "zero": true, "data": false}]
 
 == write_zeroes covers non-zero data ==
@@ -143,13 +143,13 @@
 read 5120/5120 bytes at offset 68608
 5 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 36864, "length": 4096, "depth": 0, "zero": true, "data": false},
 { "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false},
-{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
+{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 53248, "length": 4096, "depth": 0, "zero": true, "data": false},
 { "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672},
+{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false},
 { "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}]
 
@@ -186,13 +186,13 @@
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
 { "start": 32768, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false},
 { "start": 49152, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
+{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false},
 { "start": 65536, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672},
+{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}]
 
 == spanning two clusters, partially overwriting backing file ==
@@ -212,7 +212,7 @@
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 6144
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 20480},
+[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 8192, "length": 134209536, "depth": 1, "zero": true, "data": false}]
 
 == spanning multiple clusters, non-zero in first cluster ==
@@ -227,7 +227,7 @@
 read 10240/10240 bytes at offset 67584
 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 69632, "length": 8192, "depth": 0, "zero": true, "data": false},
 { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
 
@@ -257,7 +257,7 @@
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
 { "start": 65536, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
 
 == spanning multiple clusters, partially overwriting backing file ==
@@ -278,8 +278,136 @@
 read 1024/1024 bytes at offset 76800
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480},
+{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576},
+{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
 { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
+
+== unaligned image tail cluster, no allocation needed ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
+wrote 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
+wrote 512/512 bytes at offset 134219264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
+wrote 1024/1024 bytes at offset 134218240
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
+wrote 2048/2048 bytes at offset 134217728
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 512/512 bytes at offset 134219264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 1024/1024 bytes at offset 134218240
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 2048/2048 bytes at offset 134217728
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+wrote 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 512/512 bytes at offset 134219264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 1024/1024 bytes at offset 134218240
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 2048/2048 bytes at offset 134217728
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2048/2048 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134218752
+wrote 1024/1024 bytes at offset 134217728
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 134218240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134218240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+1024/1024 bytes allocated at offset 128 MiB
+[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false},
+{ "start": 134217728, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+wrote 1024/1024 bytes at offset 134217728
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+1024/1024 bytes allocated at offset 128 MiB
+read 1024/1024 bytes at offset 134217728
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false},
+{ "start": 134217728, "length": 1024, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+
+== unaligned image tail cluster, allocation required ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 134218752
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 134218240
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 512/512 bytes at offset 134218240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134218240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 134218752
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 134219264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134217728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 134218240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 134218752
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
 *** done

diff --git a/tests/qemu-iotests/172 b/tests/qemu-iotests/172
index 1b7d3a1..826d6fe 100755
--- a/tests/qemu-iotests/172
+++ b/tests/qemu-iotests/172

@@ -30,6 +30,8 @@
 _cleanup()
 {
 	_cleanup_test_img
+    rm -f "$TEST_IMG.2"
+    rm -f "$TEST_IMG.3"
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
@@ -86,6 +88,9 @@
 
 _make_test_img $size
 
+TEST_IMG="$TEST_IMG.2" _make_test_img $size
+TEST_IMG="$TEST_IMG.3" _make_test_img $size
+
 # Default drive semantics:
 #
 # By default you get a single empty floppy drive. You can override it with
@@ -105,7 +110,7 @@
 
 check_floppy_qtree -fda "$TEST_IMG"
 check_floppy_qtree -fdb "$TEST_IMG"
-check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG"
+check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG.2"
 
 
 echo
@@ -114,7 +119,7 @@
 
 check_floppy_qtree -drive if=floppy,file="$TEST_IMG"
 check_floppy_qtree -drive if=floppy,file="$TEST_IMG",index=1
-check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG",index=1
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG.2",index=1
 
 echo
 echo
@@ -122,7 +127,7 @@
 
 check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0
 check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
 
 echo
@@ -131,7 +136,7 @@
 
 check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
 check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -device floppy,drive=none0 -device floppy,drive=none1,unit=1
 
 echo
@@ -139,58 +144,58 @@
 echo === Mixing -fdX and -global ===
 
 # Working
-check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0
-check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveB=none0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveA=none0
 
 # Conflicting (-fdX wins)
-check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0
-check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveA=none0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveB=none0
 
 echo
 echo
 echo === Mixing -fdX and -device ===
 
 # Working
-check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
-check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1
 
-check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
-check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0
 
 # Conflicting
-check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0
-check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1
 
 echo
 echo
 echo === Mixing -drive and -device ===
 
 # Working
-check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
-check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1
 
 # Conflicting
-check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0
 
 echo
 echo
 echo === Mixing -global and -device ===
 
 # Working
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveA=none0 -device floppy,drive=none1
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
 
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveB=none0 -device floppy,drive=none1
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
 
 # Conflicting
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
-check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \
                    -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1
 
 echo
@@ -199,8 +204,8 @@
 
 # Working
 check_floppy_qtree -drive if=floppy,file="$TEST_IMG" \
-                   -drive if=none,file="$TEST_IMG" \
-                   -drive if=none,file="$TEST_IMG" \
+                   -drive if=none,file="$TEST_IMG.2" \
+                   -drive if=none,file="$TEST_IMG.3" \
                    -global isa-fdc.driveB=none0 \
                    -device floppy,drive=none1
 

diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 54b5329..2732966 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out

@@ -1,5 +1,7 @@
 QA output created by 172
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=737280
+Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=737280
+Formatting 'TEST_DIR/t.IMGFMT.3', fmt=IMGFMT size=737280
 
 
 === Default ===
@@ -99,7 +101,7 @@
                 share-rw = false
                 drive-type = "288"
 
-Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
+Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -205,7 +207,7 @@
                 share-rw = false
                 drive-type = "288"
 
-Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2.2,index=1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -300,7 +302,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -395,7 +397,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -436,7 +438,7 @@
 
 === Mixing -fdX and -global ===
 
-Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -474,7 +476,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -512,7 +514,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -539,7 +541,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -569,7 +571,7 @@
 
 === Mixing -fdX and -device ===
 
-Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -607,7 +609,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -645,7 +647,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -683,7 +685,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -721,18 +723,18 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0
 QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use
 QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed.
 
-Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1
 QEMU_PROG: -device floppy,drive=none0,unit=1: Floppy unit 1 is in use
 QEMU_PROG: -device floppy,drive=none0,unit=1: Device initialization failed.
 
 
 === Mixing -drive and -device ===
 
-Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -770,7 +772,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -808,14 +810,14 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0
 QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use
 QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed.
 
 
 === Mixing -global and -device ===
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -853,7 +855,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -891,7 +893,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -929,7 +931,7 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
 
           dev: isa-fdc, id ""
             iobase = 1008 (0x3f0)
@@ -967,18 +969,18 @@
                 share-rw = false
                 drive-type = "144"
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
 QEMU_PROG: -device floppy,drive=none1,unit=0: Floppy unit 0 is in use
 QEMU_PROG: -device floppy,drive=none1,unit=0: Device initialization failed.
 
-Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1
 QEMU_PROG: -device floppy,drive=none1,unit=1: Floppy unit 1 is in use
 QEMU_PROG: -device floppy,drive=none1,unit=1: Device initialization failed.
 
 
 === Too many floppy drives ===
 
-Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -drive if=none,file=TEST_DIR/t.qcow2.3 -global isa-fdc.driveB=none0 -device floppy,drive=none1
 QEMU_PROG: -device floppy,drive=none1: Can't create floppy unit 2, bus supports only 2 units
 QEMU_PROG: -device floppy,drive=none1: Device initialization failed.
 

diff --git a/tests/qemu-iotests/177 b/tests/qemu-iotests/177
new file mode 100755
index 0000000..2005c17
--- /dev/null
+++ b/tests/qemu-iotests/177

@@ -0,0 +1,114 @@
+#!/bin/bash
+#
+# Test corner cases with unusual block geometries
+#
+# Copyright (C) 2016-2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=eblake@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+
+CLUSTER_SIZE=1M
+size=128M
+options=driver=blkdebug,image.driver=qcow2
+
+echo
+echo "== setting up files =="
+
+TEST_IMG="$TEST_IMG.base" _make_test_img $size
+$QEMU_IO -c "write -P 11 0 $size" "$TEST_IMG.base" | _filter_qemu_io
+_make_test_img -b "$TEST_IMG.base"
+$QEMU_IO -c "write -P 22 0 $size" "$TEST_IMG" | _filter_qemu_io
+
+# Limited to 64k max-transfer
+echo
+echo "== constrained alignment and max-transfer =="
+limits=align=4k,max-transfer=64k
+$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \
+         -c "write -P 33 1000 128k" -c "read -P 33 1000 128k" | _filter_qemu_io
+
+echo
+echo "== write zero with constrained max-transfer =="
+limits=align=512,max-transfer=64k,opt-write-zero=$CLUSTER_SIZE
+$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \
+         -c "write -z 8003584 2093056" | _filter_qemu_io
+
+# non-power-of-2 write-zero/discard alignments
+echo
+echo "== non-power-of-2 write zeroes limits =="
+
+limits=align=512,opt-write-zero=15M,max-write-zero=15M,opt-discard=15M,max-discard=15M
+$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \
+         -c "write -z 32M 32M" | _filter_qemu_io
+
+echo
+echo "== non-power-of-2 discard limits =="
+
+limits=align=512,opt-write-zero=15M,max-write-zero=15M,opt-discard=15M,max-discard=15M
+$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \
+         -c "discard 80000001 30M" | _filter_qemu_io
+
+echo
+echo "== verify image content =="
+
+function verify_io()
+{
+    if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" |
+	    grep "compat: 0.10" > /dev/null); then
+        # For v2 images, discarded clusters are read from the backing file
+        discarded=11
+    else
+        # Discarded clusters are zeroed for v3 or later
+        discarded=0
+    fi
+
+    echo read -P 22 0 1000
+    echo read -P 33 1000 128k
+    echo read -P 22 132072 7871512
+    echo read -P 0 8003584 2093056
+    echo read -P 22 10096640 23457792
+    echo read -P 0 32M 32M
+    echo read -P 22 64M 13M
+    echo read -P $discarded 77M 29M
+    echo read -P 22 106M 22M
+}
+
+verify_io | $QEMU_IO -r "$TEST_IMG" | _filter_qemu_io
+
+_check_test_img
+
+# success, all done
+echo "*** done"
+status=0

diff --git a/tests/qemu-iotests/177.out b/tests/qemu-iotests/177.out
new file mode 100644
index 0000000..e887542
--- /dev/null
+++ b/tests/qemu-iotests/177.out

@@ -0,0 +1,49 @@
+QA output created by 177
+
+== setting up files ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
+wrote 134217728/134217728 bytes at offset 0
+128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 134217728/134217728 bytes at offset 0
+128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== constrained alignment and max-transfer ==
+wrote 131072/131072 bytes at offset 1000
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 1000
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== write zero with constrained max-transfer ==
+wrote 2093056/2093056 bytes at offset 8003584
+1.996 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== non-power-of-2 write zeroes limits ==
+wrote 33554432/33554432 bytes at offset 33554432
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== non-power-of-2 discard limits ==
+discard 31457280/31457280 bytes at offset 80000001
+30 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify image content ==
+read 1000/1000 bytes at offset 0
+1000 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 1000
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 7871512/7871512 bytes at offset 132072
+7.507 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2093056/2093056 bytes at offset 8003584
+1.996 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 23457792/23457792 bytes at offset 10096640
+22.371 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 33554432/33554432 bytes at offset 33554432
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 13631488/13631488 bytes at offset 67108864
+13 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 30408704/30408704 bytes at offset 80740352
+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 23068672/23068672 bytes at offset 111149056
+22 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+No errors were found on the image.
+*** done

diff --git a/tests/qemu-iotests/179 b/tests/qemu-iotests/179
new file mode 100755
index 0000000..7bc8db8
--- /dev/null
+++ b/tests/qemu-iotests/179

@@ -0,0 +1,130 @@
+#!/bin/bash
+#
+# Test case for write zeroes with unmap
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=eblake@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+# v2 images can't mark clusters as zero
+_unsupported_imgopts compat=0.10
+
+echo
+echo '=== Testing write zeroes with unmap ==='
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img -b "$TEST_IMG.base"
+
+# Offsets chosen at or near 2M boundaries so test works at all cluster sizes
+# 8k and larger (smaller clusters fail due to non-contiguous allocations)
+
+# Aligned writes to unallocated cluster should not allocate mapping, but must
+# mark cluster as zero, whether or not unmap was requested
+$QEMU_IO -c "write -z -u 2M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 6M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Unaligned writes need not allocate mapping if the cluster already reads
+# as zero, but must mark cluster as zero, whether or not unmap was requested
+$QEMU_IO -c "write -z -u 10485761 2097150" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 14680065 2097150" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Requesting unmap of normal data must deallocate; omitting unmap should
+# preserve the mapping
+$QEMU_IO -c "write 18M 14M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 20M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 24M 6M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Likewise when writing on already-mapped zero data
+$QEMU_IO -c "write -z -u 26M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 28M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Writing on unmapped zeroes does not allocate
+$QEMU_IO -c "write -z 32M 8M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 34M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 36M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Writing zero overrides a backing file, regardless of backing cluster type
+$QEMU_IO -c "write -z 40M 8M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write 48M 8M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 42M 2M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z 44M 2M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 50M 2M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z 52M 2M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 58M 2M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z 60M 2M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Final check that mappings are correct and images are still sane
+TEST_IMG="$TEST_IMG.base" _check_test_img
+_check_test_img
+
+echo
+echo '=== Testing cache optimization ==='
+echo
+
+BLKDBG_TEST_IMG="blkdebug:$TEST_DIR/blkdebug.conf:$TEST_IMG.base"
+
+cat > "$TEST_DIR/blkdebug.conf" <<EOF
+[inject-error]
+event = "l2_update"
+errno = "5"
+immediately = "on"
+once = "off"
+EOF
+
+# None of the following writes should trigger an L2 update, because the
+# cluster already reads as zero, and we don't have to change allocation
+$QEMU_IO -c "w -z -u 20M 2M" "$BLKDBG_TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "w -z 20M 2M" "$BLKDBG_TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "w -z 28M 2M" "$BLKDBG_TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo '*** done'
+status=0

diff --git a/tests/qemu-iotests/179.out b/tests/qemu-iotests/179.out
new file mode 100644
index 0000000..80722b2
--- /dev/null
+++ b/tests/qemu-iotests/179.out

@@ -0,0 +1,156 @@
+QA output created by 179
+
+=== Testing write zeroes with unmap ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
+wrote 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 6291456
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0)
+2 MiB (0x200000) bytes     allocated at offset 2 MiB (0x200000)
+2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
+2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
+56 MiB (0x3800000) bytes not allocated at offset 8 MiB (0x800000)
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+wrote 2097150/2097150 bytes at offset 10485761
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097150/2097150 bytes at offset 14680065
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0)
+2 MiB (0x200000) bytes     allocated at offset 2 MiB (0x200000)
+2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
+2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
+2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000)
+2 MiB (0x200000) bytes     allocated at offset 10 MiB (0xa00000)
+2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000)
+2 MiB (0x200000) bytes     allocated at offset 14 MiB (0xe00000)
+48 MiB (0x3000000) bytes not allocated at offset 16 MiB (0x1000000)
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+wrote 14680064/14680064 bytes at offset 18874368
+14 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 20971520
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 6291456/6291456 bytes at offset 25165824
+6 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0)
+2 MiB (0x200000) bytes     allocated at offset 2 MiB (0x200000)
+2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
+2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
+2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000)
+2 MiB (0x200000) bytes     allocated at offset 10 MiB (0xa00000)
+2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000)
+2 MiB (0x200000) bytes     allocated at offset 14 MiB (0xe00000)
+2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
+14 MiB (0xe00000) bytes     allocated at offset 18 MiB (0x1200000)
+32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000)
+[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 6291456, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}]
+wrote 2097152/2097152 bytes at offset 27262976
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 29360128
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0)
+2 MiB (0x200000) bytes     allocated at offset 2 MiB (0x200000)
+2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
+2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
+2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000)
+2 MiB (0x200000) bytes     allocated at offset 10 MiB (0xa00000)
+2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000)
+2 MiB (0x200000) bytes     allocated at offset 14 MiB (0xe00000)
+2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
+14 MiB (0xe00000) bytes     allocated at offset 18 MiB (0x1200000)
+32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000)
+[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "zero": true, "data": false},
+{ "start": 29360128, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}]
+wrote 8388608/8388608 bytes at offset 33554432
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 35651584
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 37748736
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+2 MiB (0x200000) bytes not allocated at offset 0 bytes (0x0)
+2 MiB (0x200000) bytes     allocated at offset 2 MiB (0x200000)
+2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
+2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
+2 MiB (0x200000) bytes not allocated at offset 8 MiB (0x800000)
+2 MiB (0x200000) bytes     allocated at offset 10 MiB (0xa00000)
+2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000)
+2 MiB (0x200000) bytes     allocated at offset 14 MiB (0xe00000)
+2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
+22 MiB (0x1600000) bytes     allocated at offset 18 MiB (0x1200000)
+24 MiB (0x1800000) bytes not allocated at offset 40 MiB (0x2800000)
+[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "zero": true, "data": false},
+{ "start": 29360128, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}]
+wrote 8388608/8388608 bytes at offset 41943040
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8388608/8388608 bytes at offset 50331648
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 44040192
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 46137344
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 52428800
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 54525952
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 60817408
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 62914560
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+42 MiB (0x2a00000) bytes not allocated at offset 0 bytes (0x0)
+4 MiB (0x400000) bytes     allocated at offset 42 MiB (0x2a00000)
+4 MiB (0x400000) bytes not allocated at offset 46 MiB (0x2e00000)
+4 MiB (0x400000) bytes     allocated at offset 50 MiB (0x3200000)
+4 MiB (0x400000) bytes not allocated at offset 54 MiB (0x3600000)
+4 MiB (0x400000) bytes     allocated at offset 58 MiB (0x3a00000)
+2 MiB (0x200000) bytes not allocated at offset 62 MiB (0x3e00000)
+[{ "start": 0, "length": 18874368, "depth": 1, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 1, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 1, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 1, "zero": true, "data": false},
+{ "start": 29360128, "length": 2097152, "depth": 1, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 10485760, "depth": 1, "zero": true, "data": false},
+{ "start": 44040192, "length": 4194304, "depth": 0, "zero": true, "data": false},
+{ "start": 48234496, "length": 2097152, "depth": 1, "zero": true, "data": false},
+{ "start": 50331648, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 52428800, "length": 4194304, "depth": 0, "zero": true, "data": false},
+{ "start": 56623104, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 1, "zero": true, "data": false},
+{ "start": 60817408, "length": 4194304, "depth": 0, "zero": true, "data": false},
+{ "start": 65011712, "length": 2097152, "depth": 1, "zero": true, "data": false}]
+No errors were found on the image.
+No errors were found on the image.
+
+=== Testing cache optimization ===
+
+wrote 2097152/2097152 bytes at offset 20971520
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 20971520
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 29360128
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done

diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182
new file mode 100755
index 0000000..7ecbb22
--- /dev/null
+++ b/tests/qemu-iotests/182

@@ -0,0 +1,68 @@
+#!/bin/bash
+#
+# Test image locking for POSIX locks
+#
+# Copyright 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=famz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+size=32M
+
+_make_test_img $size
+
+echo "Starting QEMU"
+_launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \
+    -device virtio-blk-pci,drive=drive0
+
+echo
+echo "Starting a second QEMU using the same image should fail"
+echo 'quit' | $QEMU -monitor stdio \
+    -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \
+    -device virtio-blk-pci,drive=drive0 2>&1 | _filter_testdir 2>&1 |
+    _filter_qemu |
+    sed -e '/falling back to POSIX file/d' \
+        -e '/locks can be lost unexpectedly/d'
+
+_cleanup_qemu
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0

diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out
new file mode 100644
index 0000000..23a4dbf
--- /dev/null
+++ b/tests/qemu-iotests/182.out

@@ -0,0 +1,8 @@
+QA output created by 182
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432
+Starting QEMU
+
+Starting a second QEMU using the same image should fail
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,id=drive0,file.locking=on: Failed to get "write" lock
+Is another process using the image?
+*** done

diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index f58548d..2f595b2 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter

@@ -152,10 +152,12 @@
         -e "/log_size: [0-9]\\+/d"
 }
 
-# filter out offsets and file names from qemu-img map
+# filter out offsets and file names from qemu-img map; good for both
+# human and json output
 _filter_qemu_img_map()
 {
     sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
+        -e 's/"offset": [0-9]\+/"offset": OFFSET/g' \
         -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt
 }
 

diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern
index ddfbca1..34f4a8d 100644
--- a/tests/qemu-iotests/common.pattern
+++ b/tests/qemu-iotests/common.pattern

@@ -18,7 +18,7 @@
 
 function do_is_allocated() {
     local start=$1
-    local size=$(( $2 / 512))
+    local size=$2
     local step=$3
     local count=$4
 

diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 893962d..5c8ea0f 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group

@@ -154,6 +154,7 @@
 149 rw auto sudo
 150 rw auto quick
 152 rw auto quick
+153 rw auto quick
 154 rw auto backing quick
 155 rw auto
 156 rw auto quick
@@ -169,4 +170,7 @@
 174 auto
 175 auto quick
 176 rw auto backing
+177 rw auto quick
+179 rw auto quick
 181 rw auto migration
+182 rw auto quick

diff --git a/tests/test-crypto-block.c b/tests/test-crypto-block.c
index 85e6603..95c4bd5 100644
--- a/tests/test-crypto-block.c
+++ b/tests/test-crypto-block.c

@@ -187,10 +187,10 @@
 
 
 static ssize_t test_block_read_func(QCryptoBlock *block,
-                                    void *opaque,
                                     size_t offset,
                                     uint8_t *buf,
                                     size_t buflen,
+                                    void *opaque,
                                     Error **errp)
 {
     Buffer *header = opaque;
@@ -204,8 +204,8 @@
 
 
 static ssize_t test_block_init_func(QCryptoBlock *block,
-                                    void *opaque,
                                     size_t headerlen,
+                                    void *opaque,
                                     Error **errp)
 {
     Buffer *header = opaque;
@@ -219,10 +219,10 @@
 
 
 static ssize_t test_block_write_func(QCryptoBlock *block,
-                                     void *opaque,
                                      size_t offset,
                                      const uint8_t *buf,
                                      size_t buflen,
+                                     void *opaque,
                                      Error **errp)
 {
     Buffer *header = opaque;

diff --git a/tests/test-replication.c b/tests/test-replication.c
index 3016c6f..cebeb79 100644
--- a/tests/test-replication.c
+++ b/tests/test-replication.c

@@ -179,7 +179,8 @@
     char *cmdline;
 
     cmdline = g_strdup_printf("driver=replication,mode=primary,node-name=xxx,"
-                              "file.driver=qcow2,file.file.filename=%s"
+                              "file.driver=qcow2,file.file.filename=%s,"
+                              "file.file.locking=off"
                               , p_local_disk);
     opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false);
     g_free(cmdline);
@@ -310,7 +311,9 @@
     Error *local_err = NULL;
 
     /* add s_local_disk and forge S_LOCAL_DISK_ID */
-    cmdline = g_strdup_printf("file.filename=%s,driver=qcow2", s_local_disk);
+    cmdline = g_strdup_printf("file.filename=%s,driver=qcow2,"
+                              "file.locking=off",
+                              s_local_disk);
     opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false);
     g_free(cmdline);
 
@@ -331,8 +334,10 @@
     /* add S_(ACTIVE/HIDDEN)_DISK and forge S_ID */
     cmdline = g_strdup_printf("driver=replication,mode=secondary,top-id=%s,"
                               "file.driver=qcow2,file.file.filename=%s,"
+                              "file.file.locking=off,"
                               "file.backing.driver=qcow2,"
                               "file.backing.file.filename=%s,"
+                              "file.backing.file.locking=off,"
                               "file.backing.backing=%s"
                               , S_ID, s_active_disk, s_hidden_disk
                               , S_LOCAL_DISK_ID);

diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c
index 79a2e69..6c71e46 100644
--- a/tests/test-x86-cpuid-compat.c
+++ b/tests/test-x86-cpuid-compat.c

@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qbool.h"
@@ -78,6 +79,90 @@
     qtest_add_data_func(name, args, test_cpuid_prop);
 }
 
+
+/* Parameters to a add_feature_test() test case */
+typedef struct FeatureTestArgs {
+    /* cmdline to start QEMU */
+    const char *cmdline;
+    /*
+     * cpuid-input-eax and cpuid-input-ecx values to look for,
+     * in "feature-words" and "filtered-features" properties.
+     */
+    uint32_t in_eax, in_ecx;
+    /* The register name to look for, in the X86CPUFeatureWordInfo array */
+    const char *reg;
+    /* The bit to check in X86CPUFeatureWordInfo.features */
+    int bitnr;
+    /* The expected value for the bit in (X86CPUFeatureWordInfo.features) */
+    bool expected_value;
+} FeatureTestArgs;
+
+/* Get the value for a feature word in a X86CPUFeatureWordInfo list */
+static uint32_t get_feature_word(QList *features, uint32_t eax, uint32_t ecx,
+                                 const char *reg)
+{
+    const QListEntry *e;
+
+    for (e = qlist_first(features); e; e = qlist_next(e)) {
+        QDict *w = qobject_to_qdict(qlist_entry_obj(e));
+        const char *rreg = qdict_get_str(w, "cpuid-register");
+        uint32_t reax = qdict_get_int(w, "cpuid-input-eax");
+        bool has_ecx = qdict_haskey(w, "cpuid-input-ecx");
+        uint32_t recx = 0;
+
+        if (has_ecx) {
+            recx = qdict_get_int(w, "cpuid-input-ecx");
+        }
+        if (eax == reax && (!has_ecx || ecx == recx) && !strcmp(rreg, reg)) {
+            return qint_get_int(qobject_to_qint(qdict_get(w, "features")));
+        }
+    }
+    return 0;
+}
+
+static void test_feature_flag(const void *data)
+{
+    const FeatureTestArgs *args = data;
+    char *path;
+    QList *present, *filtered;
+    uint32_t value;
+
+    qtest_start(args->cmdline);
+    path = get_cpu0_qom_path();
+    present = qobject_to_qlist(qom_get(path, "feature-words"));
+    filtered = qobject_to_qlist(qom_get(path, "filtered-features"));
+    value = get_feature_word(present, args->in_eax, args->in_ecx, args->reg);
+    value |= get_feature_word(filtered, args->in_eax, args->in_ecx, args->reg);
+    qtest_end();
+
+    g_assert(!!(value & (1U << args->bitnr)) == args->expected_value);
+
+    QDECREF(present);
+    QDECREF(filtered);
+    g_free(path);
+}
+
+/*
+ * Add test case to ensure that a given feature flag is set in
+ * either "feature-words" or "filtered-features", when running QEMU
+ * using cmdline
+ */
+static FeatureTestArgs *add_feature_test(const char *name, const char *cmdline,
+                                         uint32_t eax, uint32_t ecx,
+                                         const char *reg, int bitnr,
+                                         bool expected_value)
+{
+    FeatureTestArgs *args = g_new0(FeatureTestArgs, 1);
+    args->cmdline = cmdline;
+    args->in_eax = eax;
+    args->in_ecx = ecx;
+    args->reg = reg;
+    args->bitnr = bitnr;
+    args->expected_value = expected_value;
+    qtest_add_data_func(name, args, test_feature_flag);
+    return args;
+}
+
 #ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static void test_plus_minus_subprocess(void)
 {
@@ -229,5 +314,31 @@
                    "-machine pc-i440fx-2.7 -cpu 486,+xstore",
                    "xlevel2", 0);
 
+    /* Test feature parsing */
+    add_feature_test("x86/cpuid/features/plus",
+                     "-cpu 486,+arat",
+                     6, 0, "EAX", 2, true);
+    add_feature_test("x86/cpuid/features/minus",
+                     "-cpu pentium,-mmx",
+                     1, 0, "EDX", 23, false);
+    add_feature_test("x86/cpuid/features/on",
+                     "-cpu 486,arat=on",
+                     6, 0, "EAX", 2, true);
+    add_feature_test("x86/cpuid/features/off",
+                     "-cpu pentium,mmx=off",
+                     1, 0, "EDX", 23, false);
+    add_feature_test("x86/cpuid/features/max-plus-invtsc",
+                     "-cpu max,+invtsc",
+                     0x80000007, 0, "EDX", 8, true);
+    add_feature_test("x86/cpuid/features/max-invtsc-on",
+                     "-cpu max,invtsc=on",
+                     0x80000007, 0, "EDX", 8, true);
+    add_feature_test("x86/cpuid/features/max-minus-mmx",
+                     "-cpu max,-mmx",
+                     1, 0, "EDX", 23, false);
+    add_feature_test("x86/cpuid/features/max-invtsc-on,mmx=off",
+                     "-cpu max,mmx=off",
+                     1, 0, "EDX", 23, false);
+
     return g_test_run();
 }

diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
index f25bae5..5b500fe 100644
--- a/tests/usb-hcd-uhci-test.c
+++ b/tests/usb-hcd-uhci-test.c

@@ -79,7 +79,7 @@
 {
     const char *arch = qtest_get_arch();
     const char *cmd = "-device piix3-usb-uhci,id=uhci,addr=1d.0"
-                      " -drive id=drive0,if=none,file=/dev/null,format=raw"
+                      " -drive id=drive0,if=none,file=null-co://,format=raw"
                       " -device usb-tablet,bus=uhci.0,port=1";
     int ret;
 

diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c
index 22513e9..031764d 100644
--- a/tests/usb-hcd-xhci-test.c
+++ b/tests/usb-hcd-xhci-test.c

@@ -89,7 +89,7 @@
     qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug);
 
     qtest_start("-device nec-usb-xhci,id=xhci"
-                " -drive id=drive0,if=none,file=/dev/null,format=raw");
+                " -drive id=drive0,if=none,file=null-co://,format=raw");
     ret = g_test_run();
     qtest_end();
 

diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 1eee95d..fd2078c 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c

@@ -63,7 +63,7 @@
     const char *arch = qtest_get_arch();
     char *tmp_path;
     const char *cmd = "-drive if=none,id=drive0,file=%s,format=raw "
-                      "-drive if=none,id=drive1,file=/dev/null,format=raw "
+                      "-drive if=none,id=drive1,file=null-co://,format=raw "
                       "-device virtio-blk-pci,id=drv0,drive=drive0,"
                       "addr=%x.%x";
 

diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 0eabd56..8b0f77a 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c

@@ -35,7 +35,7 @@
 static QOSState *qvirtio_scsi_start(const char *extra_opts)
 {
     const char *arch = qtest_get_arch();
-    const char *cmd = "-drive id=drv0,if=none,file=/dev/null,format=raw "
+    const char *cmd = "-drive id=drv0,if=none,file=null-co://,format=raw "
                       "-device virtio-scsi-pci,id=vs0 "
                       "-device scsi-hd,bus=vs0.0,drive=drv0 %s";
 
@@ -195,7 +195,8 @@
     QDict *response;
     QOSState *qs;
 
-    qs = qvirtio_scsi_start("-drive id=drv1,if=none,file=/dev/null,format=raw");
+    qs = qvirtio_scsi_start(
+            "-drive id=drv1,if=none,file=null-co://,format=raw");
     response = qmp("{\"execute\": \"device_add\","
                    " \"arguments\": {"
                    "   \"driver\": \"scsi-hd\","

diff --git a/ui/Makefile.objs b/ui/Makefile.objs
index 27566b3..aac6ae8 100644
--- a/ui/Makefile.objs
+++ b/ui/Makefile.objs

@@ -33,6 +33,7 @@
 common-obj-y += console-gl.o
 common-obj-y += egl-helpers.o
 common-obj-y += egl-context.o
+common-obj-y += egl-headless.o
 ifeq ($(CONFIG_GTK_GL),y)
 common-obj-$(CONFIG_GTK) += gtk-gl-area.o
 else

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 207555e..3a9bc4d 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m

@@ -749,8 +749,8 @@
                  * clicks in the titlebar.
                  */
                 if ([self screenContainsPoint:p]) {
-                    qemu_input_queue_abs(dcl->con, INPUT_AXIS_X, p.x, screen.width);
-                    qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, screen.height - p.y, screen.height);
+                    qemu_input_queue_abs(dcl->con, INPUT_AXIS_X, p.x, 0, screen.width);
+                    qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, screen.height - p.y, 0, screen.height);
                 }
             } else {
                 qemu_input_queue_rel(dcl->con, INPUT_AXIS_X, (int)[event deltaX]);

diff --git a/ui/egl-context.c b/ui/egl-context.c
index 3a02b68..2161969 100644
--- a/ui/egl-context.c
+++ b/ui/egl-context.c

@@ -7,9 +7,10 @@
 {
    EGLContext ctx;
    EGLint ctx_att[] = {
-      EGL_CONTEXT_CLIENT_VERSION, params->major_ver,
-      EGL_CONTEXT_MINOR_VERSION_KHR, params->minor_ver,
-      EGL_NONE
+       EGL_CONTEXT_OPENGL_PROFILE_MASK, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
+       EGL_CONTEXT_CLIENT_VERSION, params->major_ver,
+       EGL_CONTEXT_MINOR_VERSION_KHR, params->minor_ver,
+       EGL_NONE
    };
 
    ctx = eglCreateContext(qemu_egl_display, qemu_egl_config,

diff --git a/ui/egl-headless.c b/ui/egl-headless.c
new file mode 100644
index 0000000..d8d800f
--- /dev/null
+++ b/ui/egl-headless.c

@@ -0,0 +1,158 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/sysemu.h"
+#include "ui/console.h"
+#include "ui/egl-helpers.h"
+#include "ui/egl-context.h"
+
+typedef struct egl_dpy {
+    DisplayChangeListener dcl;
+    DisplaySurface *ds;
+    int width, height;
+    GLuint texture;
+    GLuint framebuffer;
+    GLuint blit_texture;
+    GLuint blit_framebuffer;
+    bool y_0_top;
+} egl_dpy;
+
+static void egl_refresh(DisplayChangeListener *dcl)
+{
+    graphic_hw_update(dcl->con);
+}
+
+static void egl_gfx_update(DisplayChangeListener *dcl,
+                           int x, int y, int w, int h)
+{
+}
+
+static void egl_gfx_switch(DisplayChangeListener *dcl,
+                           struct DisplaySurface *new_surface)
+{
+    egl_dpy *edpy = container_of(dcl, egl_dpy, dcl);
+
+    edpy->ds = new_surface;
+}
+
+static void egl_scanout_disable(DisplayChangeListener *dcl)
+{
+    egl_dpy *edpy = container_of(dcl, egl_dpy, dcl);
+
+    edpy->texture = 0;
+    /* XXX: delete framebuffers here ??? */
+}
+
+static void egl_scanout_texture(DisplayChangeListener *dcl,
+                                uint32_t backing_id,
+                                bool backing_y_0_top,
+                                uint32_t backing_width,
+                                uint32_t backing_height,
+                                uint32_t x, uint32_t y,
+                                uint32_t w, uint32_t h)
+{
+    egl_dpy *edpy = container_of(dcl, egl_dpy, dcl);
+
+    edpy->texture = backing_id;
+    edpy->y_0_top = backing_y_0_top;
+
+    /* source framebuffer */
+    if (!edpy->framebuffer) {
+        glGenFramebuffers(1, &edpy->framebuffer);
+    }
+    glBindFramebuffer(GL_FRAMEBUFFER_EXT, edpy->framebuffer);
+    glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
+                              GL_TEXTURE_2D, edpy->texture, 0);
+
+    /* dest framebuffer */
+    if (!edpy->blit_framebuffer) {
+        glGenFramebuffers(1, &edpy->blit_framebuffer);
+        glGenTextures(1, &edpy->blit_texture);
+        edpy->width = 0;
+        edpy->height = 0;
+    }
+    if (edpy->width != backing_width || edpy->height != backing_height) {
+        edpy->width   = backing_width;
+        edpy->height  = backing_height;
+        glBindTexture(GL_TEXTURE_2D, edpy->blit_texture);
+        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB,
+                     edpy->width, edpy->height,
+                     0, GL_BGRA, GL_UNSIGNED_BYTE, 0);
+        glBindFramebuffer(GL_FRAMEBUFFER_EXT, edpy->blit_framebuffer);
+        glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT,
+                                  GL_TEXTURE_2D, edpy->blit_texture, 0);
+    }
+}
+
+static void egl_scanout_flush(DisplayChangeListener *dcl,
+                              uint32_t x, uint32_t y,
+                              uint32_t w, uint32_t h)
+{
+    egl_dpy *edpy = container_of(dcl, egl_dpy, dcl);
+    GLuint y1, y2;
+
+    if (!edpy->texture || !edpy->ds) {
+        return;
+    }
+    assert(surface_width(edpy->ds)  == edpy->width);
+    assert(surface_height(edpy->ds) == edpy->height);
+    assert(surface_format(edpy->ds) == PIXMAN_x8r8g8b8);
+
+    /* blit framebuffer, flip if needed */
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, edpy->framebuffer);
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, edpy->blit_framebuffer);
+    glViewport(0, 0, edpy->width, edpy->height);
+    y1 = edpy->y_0_top ? edpy->height : 0;
+    y2 = edpy->y_0_top ? 0 : edpy->height;
+    glBlitFramebuffer(0, y1, edpy->width, y2,
+                      0, 0, edpy->width, edpy->height,
+                      GL_COLOR_BUFFER_BIT, GL_NEAREST);
+
+    /* read pixels to surface */
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, edpy->blit_framebuffer);
+    glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
+    glReadPixels(0, 0, edpy->width, edpy->height,
+                 GL_BGRA, GL_UNSIGNED_BYTE, surface_data(edpy->ds));
+
+    /* notify about updates */
+    dpy_gfx_update(edpy->dcl.con, x, y, w, h);
+}
+
+static const DisplayChangeListenerOps egl_ops = {
+    .dpy_name                = "egl-headless",
+    .dpy_refresh             = egl_refresh,
+    .dpy_gfx_update          = egl_gfx_update,
+    .dpy_gfx_switch          = egl_gfx_switch,
+
+    .dpy_gl_ctx_create       = qemu_egl_create_context,
+    .dpy_gl_ctx_destroy      = qemu_egl_destroy_context,
+    .dpy_gl_ctx_make_current = qemu_egl_make_context_current,
+    .dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
+
+    .dpy_gl_scanout_disable  = egl_scanout_disable,
+    .dpy_gl_scanout_texture  = egl_scanout_texture,
+    .dpy_gl_update           = egl_scanout_flush,
+};
+
+void egl_headless_init(void)
+{
+    QemuConsole *con;
+    egl_dpy *edpy;
+    int idx;
+
+    if (egl_rendernode_init(NULL) < 0) {
+        error_report("egl: render node init failed");
+        exit(1);
+    }
+
+    for (idx = 0;; idx++) {
+        con = qemu_console_lookup_by_index(idx);
+        if (!con || !qemu_console_is_graphic(con)) {
+            break;
+        }
+
+        edpy = g_new0(egl_dpy, 1);
+        edpy->dcl.con = con;
+        edpy->dcl.ops = &egl_ops;
+        register_displaychangelistener(&edpy->dcl);
+    }
+}

diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index b7b6b2e..4a4d337 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c

@@ -26,18 +26,6 @@
 
 /* ---------------------------------------------------------------------- */
 
-static bool egl_gles;
-static int egl_debug;
-
-#define egl_dbg(_x ...)                          \
-    do {                                         \
-        if (egl_debug) {                         \
-            fprintf(stderr, "egl: " _x);         \
-        }                                        \
-    } while (0);
-
-/* ---------------------------------------------------------------------- */
-
 #ifdef CONFIG_OPENGL_DMABUF
 
 int qemu_egl_rn_fd;
@@ -92,6 +80,7 @@
 int egl_rendernode_init(const char *rendernode)
 {
     qemu_egl_rn_fd = -1;
+    int rc;
 
     qemu_egl_rn_fd = qemu_egl_rendernode_open(rendernode);
     if (qemu_egl_rn_fd == -1) {
@@ -105,7 +94,11 @@
         goto err;
     }
 
-    qemu_egl_init_dpy((EGLNativeDisplayType)qemu_egl_rn_gbm_dev, false, false);
+    rc = qemu_egl_init_dpy_mesa((EGLNativeDisplayType)qemu_egl_rn_gbm_dev);
+    if (rc != 0) {
+        /* qemu_egl_init_dpy_mesa reports error */
+        goto err;
+    }
 
     if (!epoxy_has_egl_extension(qemu_egl_display,
                                  "EGL_KHR_surfaceless_context")) {
@@ -171,8 +164,6 @@
     EGLSurface esurface;
     EGLBoolean b;
 
-    egl_dbg("eglCreateWindowSurface (x11 win id 0x%lx) ...\n",
-            (unsigned long) win);
     esurface = eglCreateWindowSurface(qemu_egl_display,
                                       qemu_egl_config,
                                       (EGLNativeWindowType)win, NULL);
@@ -220,20 +211,19 @@
  * platform extensions (EGL_KHR_platform_gbm and friends) yet it doesn't seem
  * like mesa will be able to advertise these (even though it can do EGL 1.5).
  */
-static EGLDisplay qemu_egl_get_display(void *native)
+static EGLDisplay qemu_egl_get_display(EGLNativeDisplayType native,
+                                       EGLenum platform)
 {
     EGLDisplay dpy = EGL_NO_DISPLAY;
 
-#ifdef EGL_MESA_platform_gbm
     /* In practise any EGL 1.5 implementation would support the EXT extension */
     if (epoxy_has_egl_extension(NULL, "EGL_EXT_platform_base")) {
         PFNEGLGETPLATFORMDISPLAYEXTPROC getPlatformDisplayEXT =
             (void *) eglGetProcAddress("eglGetPlatformDisplayEXT");
-        if (getPlatformDisplayEXT) {
-            dpy = getPlatformDisplayEXT(EGL_PLATFORM_GBM_MESA, native, NULL);
+        if (getPlatformDisplayEXT && platform != 0) {
+            dpy = getPlatformDisplayEXT(platform, native, NULL);
         }
     }
-#endif
 
     if (dpy == EGL_NO_DISPLAY) {
         /* fallback */
@@ -242,7 +232,8 @@
     return dpy;
 }
 
-int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug)
+static int qemu_egl_init_dpy(EGLNativeDisplayType dpy,
+                             EGLenum platform)
 {
     static const EGLint conf_att_gl[] = {
         EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
@@ -253,75 +244,66 @@
         EGL_ALPHA_SIZE, 0,
         EGL_NONE,
     };
-    static const EGLint conf_att_gles[] = {
-        EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
-        EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
-        EGL_RED_SIZE,   5,
-        EGL_GREEN_SIZE, 5,
-        EGL_BLUE_SIZE,  5,
-        EGL_ALPHA_SIZE, 0,
-        EGL_NONE,
-    };
     EGLint major, minor;
     EGLBoolean b;
     EGLint n;
 
-    if (debug) {
-        egl_debug = 1;
-        setenv("EGL_LOG_LEVEL", "debug", true);
-        setenv("LIBGL_DEBUG", "verbose", true);
-    }
-
-    egl_dbg("qemu_egl_get_display (dpy %p) ...\n", dpy);
-    qemu_egl_display = qemu_egl_get_display(dpy);
+    qemu_egl_display = qemu_egl_get_display(dpy, platform);
     if (qemu_egl_display == EGL_NO_DISPLAY) {
         error_report("egl: eglGetDisplay failed");
         return -1;
     }
 
-    egl_dbg("eglInitialize ...\n");
     b = eglInitialize(qemu_egl_display, &major, &minor);
     if (b == EGL_FALSE) {
         error_report("egl: eglInitialize failed");
         return -1;
     }
 
-    egl_dbg("eglBindAPI ...\n");
-    b = eglBindAPI(gles ? EGL_OPENGL_ES_API : EGL_OPENGL_API);
+    b = eglBindAPI(EGL_OPENGL_API);
     if (b == EGL_FALSE) {
         error_report("egl: eglBindAPI failed");
         return -1;
     }
 
-    egl_dbg("eglChooseConfig ...\n");
-    b = eglChooseConfig(qemu_egl_display,
-                        gles ? conf_att_gles : conf_att_gl,
+    b = eglChooseConfig(qemu_egl_display, conf_att_gl,
                         &qemu_egl_config, 1, &n);
     if (b == EGL_FALSE || n != 1) {
         error_report("egl: eglChooseConfig failed");
         return -1;
     }
-
-    egl_gles = gles;
     return 0;
 }
 
+int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy)
+{
+#ifdef EGL_KHR_platform_x11
+    return qemu_egl_init_dpy(dpy, EGL_PLATFORM_X11_KHR);
+#else
+    return qemu_egl_init_dpy(dpy, 0);
+#endif
+}
+
+int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy)
+{
+#ifdef EGL_MESA_platform_gbm
+    return qemu_egl_init_dpy(dpy, EGL_PLATFORM_GBM_MESA);
+#else
+    return qemu_egl_init_dpy(dpy, 0);
+#endif
+}
+
 EGLContext qemu_egl_init_ctx(void)
 {
     static const EGLint ctx_att_gl[] = {
+        EGL_CONTEXT_OPENGL_PROFILE_MASK, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
         EGL_NONE
     };
-    static const EGLint ctx_att_gles[] = {
-        EGL_CONTEXT_CLIENT_VERSION, 2,
-        EGL_NONE
-    };
-
     EGLContext ectx;
     EGLBoolean b;
 
-    egl_dbg("eglCreateContext ...\n");
     ectx = eglCreateContext(qemu_egl_display, qemu_egl_config, EGL_NO_CONTEXT,
-                            egl_gles ? ctx_att_gles : ctx_att_gl);
+                            ctx_att_gl);
     if (ectx == EGL_NO_CONTEXT) {
         error_report("egl: eglCreateContext failed");
         return NULL;

diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index d53288f..cf48cca 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c

@@ -246,7 +246,7 @@
     GdkDisplay *gdk_display = gdk_display_get_default();
     Display *x11_display = gdk_x11_display_get_xdisplay(gdk_display);
 
-    if (qemu_egl_init_dpy(x11_display, false, false) < 0) {
+    if (qemu_egl_init_dpy_x11(x11_display) < 0) {
         return;
     }
 

diff --git a/ui/gtk.c b/ui/gtk.c
index 7479cee..0213ad0 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c

@@ -912,9 +912,9 @@
             return TRUE;
         }
         qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_X, x,
-                             surface_width(vc->gfx.ds));
+                             0, surface_width(vc->gfx.ds));
         qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_Y, y,
-                             surface_height(vc->gfx.ds));
+                             0, surface_height(vc->gfx.ds));
         qemu_input_event_sync();
     } else if (s->last_set && s->ptr_owner == vc) {
         qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_X, x - s->last_x);

diff --git a/ui/input-linux.c b/ui/input-linux.c
index dc0613c..49d52a6 100644
--- a/ui/input-linux.c
+++ b/ui/input-linux.c

@@ -169,6 +169,10 @@
     bool        has_abs_x;
     int         num_keys;
     int         num_btns;
+    int         abs_x_min;
+    int         abs_x_max;
+    int         abs_y_min;
+    int         abs_y_max;
     struct input_event event;
     int         read_offset;
 
@@ -314,6 +318,18 @@
             break;
         }
         break;
+    case EV_ABS:
+        switch (event->code) {
+        case ABS_X:
+            qemu_input_queue_abs(NULL, INPUT_AXIS_X, event->value,
+                                 il->abs_x_min, il->abs_x_max);
+            break;
+        case ABS_Y:
+            qemu_input_queue_abs(NULL, INPUT_AXIS_Y, event->value,
+                                 il->abs_y_min, il->abs_y_max);
+            break;
+        }
+        break;
     case EV_SYN:
         qemu_input_event_sync();
         if (il->wheel != 0) {
@@ -351,7 +367,7 @@
         if (il->num_keys) {
             input_linux_handle_keyboard(il, &il->event);
         }
-        if (il->has_rel_x && il->num_btns) {
+        if ((il->has_rel_x || il->has_abs_x) && il->num_btns) {
             input_linux_handle_mouse(il, &il->event);
         }
     }
@@ -364,6 +380,7 @@
     uint8_t keymap[KEY_CNT / 8], keystate[KEY_CNT / 8];
     unsigned int i;
     int rc, ver;
+    struct input_absinfo absinfo;
 
     if (!il->evdev) {
         error_setg(errp, "no input device specified");
@@ -402,6 +419,12 @@
         rc = ioctl(il->fd, EVIOCGBIT(EV_ABS, sizeof(absmap)), &absmap);
         if (absmap & (1 << ABS_X)) {
             il->has_abs_x = true;
+            rc = ioctl(il->fd, EVIOCGABS(ABS_X), &absinfo);
+            il->abs_x_min = absinfo.minimum;
+            il->abs_x_max = absinfo.maximum;
+            rc = ioctl(il->fd, EVIOCGABS(ABS_Y), &absinfo);
+            il->abs_y_min = absinfo.minimum;
+            il->abs_y_max = absinfo.maximum;
         }
     }
 

diff --git a/ui/input.c b/ui/input.c
index 830f912..290ca9f 100644
--- a/ui/input.c
+++ b/ui/input.c

@@ -166,6 +166,11 @@
     qemu_input_event_sync();
 }
 
+static int qemu_input_transform_invert_abs_value(int value)
+{
+  return (int64_t)INPUT_EVENT_ABS_MAX - value + INPUT_EVENT_ABS_MIN;
+}
+
 static void qemu_input_transform_abs_rotate(InputEvent *evt)
 {
     InputMoveEvent *move = evt->u.abs.data;
@@ -175,16 +180,16 @@
             move->axis = INPUT_AXIS_Y;
         } else if (move->axis == INPUT_AXIS_Y) {
             move->axis = INPUT_AXIS_X;
-            move->value = INPUT_EVENT_ABS_SIZE - 1 - move->value;
+            move->value = qemu_input_transform_invert_abs_value(move->value);
         }
         break;
     case 180:
-        move->value = INPUT_EVENT_ABS_SIZE - 1 - move->value;
+        move->value = qemu_input_transform_invert_abs_value(move->value);
         break;
     case 270:
         if (move->axis == INPUT_AXIS_X) {
             move->axis = INPUT_AXIS_Y;
-            move->value = INPUT_EVENT_ABS_SIZE - 1 - move->value;
+            move->value = qemu_input_transform_invert_abs_value(move->value);
         } else if (move->axis == INPUT_AXIS_Y) {
             move->axis = INPUT_AXIS_X;
         }
@@ -467,12 +472,17 @@
     return (s != NULL) && (s->handler->mask & INPUT_EVENT_MASK_ABS);
 }
 
-int qemu_input_scale_axis(int value, int size_in, int size_out)
+int qemu_input_scale_axis(int value,
+                          int min_in, int max_in,
+                          int min_out, int max_out)
 {
-    if (size_in < 2) {
-        return size_out / 2;
+    int64_t range_in = (int64_t)max_in - min_in;
+    int64_t range_out = (int64_t)max_out - min_out;
+
+    if (range_in < 1) {
+        return min_out + range_out / 2;
     }
-    return (int64_t)value * (size_out - 1) / (size_in - 1);
+    return ((int64_t)value - min_in) * range_out / range_in + min_out;
 }
 
 InputEvent *qemu_input_event_new_move(InputEventKind kind,
@@ -496,10 +506,13 @@
     qapi_free_InputEvent(evt);
 }
 
-void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value, int size)
+void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value,
+                          int min_in, int max_in)
 {
     InputEvent *evt;
-    int scaled = qemu_input_scale_axis(value, size, INPUT_EVENT_ABS_SIZE);
+    int scaled = qemu_input_scale_axis(value, min_in, max_in,
+                                       INPUT_EVENT_ABS_MIN,
+                                       INPUT_EVENT_ABS_MAX);
     evt = qemu_input_event_new_move(INPUT_EVENT_KIND_ABS, axis, scaled);
     qemu_input_event_send(src, evt);
     qapi_free_InputEvent(evt);

diff --git a/ui/sdl.c b/ui/sdl.c
index 37c21a00..b35a678 100644
--- a/ui/sdl.c
+++ b/ui/sdl.c

@@ -490,9 +490,9 @@
 
     if (qemu_input_is_absolute()) {
         qemu_input_queue_abs(dcl->con, INPUT_AXIS_X, x,
-                             real_screen->w);
+                             0, real_screen->w);
         qemu_input_queue_abs(dcl->con, INPUT_AXIS_Y, y,
-                             real_screen->h);
+                             0, real_screen->h);
     } else {
         if (guest_cursor) {
             x -= guest_x;

diff --git a/ui/sdl2.c b/ui/sdl2.c
index faf9bdf..21de052 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c

@@ -298,8 +298,8 @@
                 }
             }
         }
-        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_X, off_x + x, max_w);
-        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_Y, off_y + y, max_h);
+        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_X, off_x + x, 0, max_w);
+        qemu_input_queue_abs(scon->dcl.con, INPUT_AXIS_Y, off_y + y, 0, max_h);
     } else {
         if (guest_cursor) {
             x -= guest_x;

diff --git a/ui/spice-input.c b/ui/spice-input.c
index 8eeebdb..86293dd 100644
--- a/ui/spice-input.c
+++ b/ui/spice-input.c

@@ -172,8 +172,8 @@
     QemuSpicePointer *pointer = container_of(sin, QemuSpicePointer, tablet);
 
     spice_update_buttons(pointer, 0, buttons_state);
-    qemu_input_queue_abs(NULL, INPUT_AXIS_X, x, pointer->width);
-    qemu_input_queue_abs(NULL, INPUT_AXIS_Y, y, pointer->height);
+    qemu_input_queue_abs(NULL, INPUT_AXIS_X, x, 0, pointer->width);
+    qemu_input_queue_abs(NULL, INPUT_AXIS_Y, y, 0, pointer->height);
     qemu_input_event_sync();
 }
 

diff --git a/ui/vnc-enc-zrle.c b/ui/vnc-enc-zrle.c
index 5489870..fd63d4f 100644
--- a/ui/vnc-enc-zrle.c
+++ b/ui/vnc-enc-zrle.c

@@ -163,7 +163,6 @@
             if (packed_bytes < estimated_bytes) {
                 *use_rle = false;
                 *use_palette = true;
-                estimated_bytes = packed_bytes;
             }
         }
     }

diff --git a/ui/vnc.c b/ui/vnc.c
index 9c4edcd..47b49c7 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c

@@ -1556,8 +1556,8 @@
     }
 
     if (vs->absolute) {
-        qemu_input_queue_abs(con, INPUT_AXIS_X, x, width);
-        qemu_input_queue_abs(con, INPUT_AXIS_Y, y, height);
+        qemu_input_queue_abs(con, INPUT_AXIS_X, x, 0, width);
+        qemu_input_queue_abs(con, INPUT_AXIS_Y, y, 0, height);
     } else if (vnc_has_feature(vs, VNC_FEATURE_POINTER_TYPE_CHANGE)) {
         qemu_input_queue_rel(con, INPUT_AXIS_X, x - 0x7FFF);
         qemu_input_queue_rel(con, INPUT_AXIS_Y, y - 0x7FFF);
@@ -2061,15 +2061,15 @@
     }
 
     vs->client_pf.rmax = red_max ? red_max : 0xFF;
-    vs->client_pf.rbits = hweight_long(red_max);
+    vs->client_pf.rbits = ctpopl(red_max);
     vs->client_pf.rshift = red_shift;
     vs->client_pf.rmask = red_max << red_shift;
     vs->client_pf.gmax = green_max ? green_max : 0xFF;
-    vs->client_pf.gbits = hweight_long(green_max);
+    vs->client_pf.gbits = ctpopl(green_max);
     vs->client_pf.gshift = green_shift;
     vs->client_pf.gmask = green_max << green_shift;
     vs->client_pf.bmax = blue_max ? blue_max : 0xFF;
-    vs->client_pf.bbits = hweight_long(blue_max);
+    vs->client_pf.bbits = ctpopl(blue_max);
     vs->client_pf.bshift = blue_shift;
     vs->client_pf.bmask = blue_max << blue_shift;
     vs->client_pf.bits_per_pixel = bits_per_pixel;

diff --git a/util/coroutine-gthread.c b/util/coroutine-gthread.c
deleted file mode 100644
index 62bfb40..0000000
--- a/util/coroutine-gthread.c
+++ /dev/null

@@ -1,198 +0,0 @@
-/*
- * GThread coroutine initialization code
- *
- * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
- * Copyright (C) 2011  Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.0 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/coroutine_int.h"
-
-typedef struct {
-    Coroutine base;
-    GThread *thread;
-    bool runnable;
-    bool free_on_thread_exit;
-    CoroutineAction action;
-} CoroutineGThread;
-
-static CompatGMutex coroutine_lock;
-static CompatGCond coroutine_cond;
-
-/* GLib 2.31 and beyond deprecated various parts of the thread API,
- * but the new interfaces are not available in older GLib versions
- * so we have to cope with both.
- */
-#if GLIB_CHECK_VERSION(2, 31, 0)
-/* Awkwardly, the GPrivate API doesn't provide a way to update the
- * GDestroyNotify handler for the coroutine key dynamically. So instead
- * we track whether or not the CoroutineGThread should be freed on
- * thread exit / coroutine key update using the free_on_thread_exit
- * field.
- */
-static void coroutine_destroy_notify(gpointer data)
-{
-    CoroutineGThread *co = data;
-    if (co && co->free_on_thread_exit) {
-        g_free(co);
-    }
-}
-
-static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify);
-
-static inline CoroutineGThread *get_coroutine_key(void)
-{
-    return g_private_get(&coroutine_key);
-}
-
-static inline void set_coroutine_key(CoroutineGThread *co,
-                                     bool free_on_thread_exit)
-{
-    /* Unlike g_static_private_set() this does not call the GDestroyNotify
-     * if the previous value of the key was NULL. Fortunately we only need
-     * the GDestroyNotify in the non-NULL key case.
-     */
-    co->free_on_thread_exit = free_on_thread_exit;
-    g_private_replace(&coroutine_key, co);
-}
-
-static inline GThread *create_thread(GThreadFunc func, gpointer data)
-{
-    return g_thread_new("coroutine", func, data);
-}
-
-#else
-
-/* Handle older GLib versions */
-
-static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
-
-static inline CoroutineGThread *get_coroutine_key(void)
-{
-    return g_static_private_get(&coroutine_key);
-}
-
-static inline void set_coroutine_key(CoroutineGThread *co,
-                                     bool free_on_thread_exit)
-{
-    g_static_private_set(&coroutine_key, co,
-                         free_on_thread_exit ? (GDestroyNotify)g_free : NULL);
-}
-
-static inline GThread *create_thread(GThreadFunc func, gpointer data)
-{
-    return g_thread_create_full(func, data, 0, TRUE, TRUE,
-                                G_THREAD_PRIORITY_NORMAL, NULL);
-}
-
-#endif
-
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
-#if !GLIB_CHECK_VERSION(2, 31, 0)
-    if (!g_thread_supported()) {
-        g_thread_init(NULL);
-    }
-#endif
-}
-
-static void coroutine_wait_runnable_locked(CoroutineGThread *co)
-{
-    while (!co->runnable) {
-        g_cond_wait(&coroutine_cond, &coroutine_lock);
-    }
-}
-
-static void coroutine_wait_runnable(CoroutineGThread *co)
-{
-    g_mutex_lock(&coroutine_lock);
-    coroutine_wait_runnable_locked(co);
-    g_mutex_unlock(&coroutine_lock);
-}
-
-static gpointer coroutine_thread(gpointer opaque)
-{
-    CoroutineGThread *co = opaque;
-
-    set_coroutine_key(co, false);
-    coroutine_wait_runnable(co);
-    co->base.entry(co->base.entry_arg);
-    qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
-    return NULL;
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
-    CoroutineGThread *co;
-
-    co = g_malloc0(sizeof(*co));
-    co->thread = create_thread(coroutine_thread, co);
-    if (!co->thread) {
-        g_free(co);
-        return NULL;
-    }
-    return &co->base;
-}
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
-    CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
-
-    g_thread_join(co->thread);
-    g_free(co);
-}
-
-CoroutineAction qemu_coroutine_switch(Coroutine *from_,
-                                      Coroutine *to_,
-                                      CoroutineAction action)
-{
-    CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
-    CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
-
-    g_mutex_lock(&coroutine_lock);
-    from->runnable = false;
-    from->action = action;
-    to->runnable = true;
-    to->action = action;
-    g_cond_broadcast(&coroutine_cond);
-
-    if (action != COROUTINE_TERMINATE) {
-        coroutine_wait_runnable_locked(from);
-    }
-    g_mutex_unlock(&coroutine_lock);
-    return from->action;
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
-    CoroutineGThread *co = get_coroutine_key();
-    if (!co) {
-        co = g_malloc0(sizeof(*co));
-        co->runnable = true;
-        set_coroutine_key(co, true);
-    }
-
-    return &co->base;
-}
-
-bool qemu_in_coroutine(void)
-{
-    CoroutineGThread *co = get_coroutine_key();
-
-    return co && co->base.caller;
-}

diff --git a/util/envlist.c b/util/envlist.c
index e86857e..1eeb7fc 100644
--- a/util/envlist.c
+++ b/util/envlist.c

@@ -17,16 +17,14 @@
     const char *env, int (*)(envlist_t *, const char *));
 
 /*
- * Allocates new envlist and returns pointer to that or
- * NULL in case of error.
+ * Allocates new envlist and returns pointer to it.
  */
 envlist_t *
 envlist_create(void)
 {
 	envlist_t *envlist;
 
-	if ((envlist = malloc(sizeof (*envlist))) == NULL)
-		return (NULL);
+	envlist = g_malloc(sizeof(*envlist));
 
 	QLIST_INIT(&envlist->el_entries);
 	envlist->el_count = 0;
@@ -48,10 +46,10 @@
 		entry = envlist->el_entries.lh_first;
 		QLIST_REMOVE(entry, ev_link);
 
-		free((char *)entry->ev_var);
-		free(entry);
+		g_free((char *)entry->ev_var);
+		g_free(entry);
 	}
-	free(envlist);
+	g_free(envlist);
 }
 
 /*
@@ -101,8 +99,7 @@
 	if ((envlist == NULL) || (env == NULL))
 		return (EINVAL);
 
-	if ((tmpenv = strdup(env)) == NULL)
-		return (errno);
+	tmpenv = g_strdup(env);
     envsave = tmpenv;
 
     do {
@@ -117,7 +114,7 @@
         tmpenv = envvar + 1;
     } while (envvar != NULL);
 
-    free(envsave);
+    g_free(envsave);
     return ret;
 }
 
@@ -155,18 +152,14 @@
 
 	if (entry != NULL) {
 		QLIST_REMOVE(entry, ev_link);
-		free((char *)entry->ev_var);
-		free(entry);
+		g_free((char *)entry->ev_var);
+		g_free(entry);
 	} else {
 		envlist->el_count++;
 	}
 
-	if ((entry = malloc(sizeof (*entry))) == NULL)
-		return (errno);
-	if ((entry->ev_var = strdup(env)) == NULL) {
-		free(entry);
-		return (errno);
-	}
+	entry = g_malloc(sizeof(*entry));
+	entry->ev_var = g_strdup(env);
 	QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
 
 	return (0);
@@ -201,8 +194,8 @@
 	}
 	if (entry != NULL) {
 		QLIST_REMOVE(entry, ev_link);
-		free((char *)entry->ev_var);
-		free(entry);
+		g_free((char *)entry->ev_var);
+		g_free(entry);
 
 		envlist->el_count--;
 	}
@@ -212,12 +205,12 @@
 /*
  * Returns given envlist as array of strings (in same form that
  * global variable environ is).  Caller must free returned memory
- * by calling free(3) for each element and for the array.  Returned
- * array and given envlist are not related (no common references).
+ * by calling g_free for each element and the array.
+ * Returned array and given envlist are not related (no common
+ * references).
  *
  * If caller provides count pointer, number of items in array is
- * stored there.  In case of error, NULL is returned and no memory
- * is allocated.
+ * stored there.
  */
 char **
 envlist_to_environ(const envlist_t *envlist, size_t *count)
@@ -225,13 +218,11 @@
 	struct envlist_entry *entry;
 	char **env, **penv;
 
-	penv = env = malloc((envlist->el_count + 1) * sizeof (char *));
-	if (env == NULL)
-		return (NULL);
+	penv = env = g_malloc((envlist->el_count + 1) * sizeof(char *));
 
 	for (entry = envlist->el_entries.lh_first; entry != NULL;
 	    entry = entry->ev_link.le_next) {
-		*(penv++) = strdup(entry->ev_var);
+		*(penv++) = g_strdup(entry->ev_var);
 	}
 	*penv = NULL; /* NULL terminate the list */
 

diff --git a/util/osdep.c b/util/osdep.c
index 06fb1cf..a2863c8 100644
--- a/util/osdep.c
+++ b/util/osdep.c

@@ -38,6 +38,14 @@
 #include "qemu/error-report.h"
 #include "monitor/monitor.h"
 
+#ifdef F_OFD_SETLK
+#define QEMU_SETLK F_OFD_SETLK
+#define QEMU_GETLK F_OFD_GETLK
+#else
+#define QEMU_SETLK F_SETLK
+#define QEMU_GETLK F_GETLK
+#endif
+
 static bool fips_enabled = false;
 
 static const char *hw_version = QEMU_HW_VERSION;
@@ -140,6 +148,46 @@
 {
     return qemu_parse_fd(param);
 }
+
+static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
+{
+    int ret;
+    struct flock fl = {
+        .l_whence = SEEK_SET,
+        .l_start  = start,
+        .l_len    = len,
+        .l_type   = fl_type,
+    };
+    ret = fcntl(fd, QEMU_SETLK, &fl);
+    return ret == -1 ? -errno : 0;
+}
+
+int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
+{
+    return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
+}
+
+int qemu_unlock_fd(int fd, int64_t start, int64_t len)
+{
+    return qemu_lock_fcntl(fd, start, len, F_UNLCK);
+}
+
+int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
+{
+    int ret;
+    struct flock fl = {
+        .l_whence = SEEK_SET,
+        .l_start  = start,
+        .l_len    = len,
+        .l_type   = exclusive ? F_WRLCK : F_RDLCK,
+    };
+    ret = fcntl(fd, QEMU_GETLK, &fl);
+    if (ret == -1) {
+        return -errno;
+    } else {
+        return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
+    }
+}
 #endif
 
 /*

diff --git a/vl.c b/vl.c
index 58023fc..5c9b40e 100644
--- a/vl.c
+++ b/vl.c

@@ -2050,6 +2050,7 @@
     DT_SDL,
     DT_COCOA,
     DT_GTK,
+    DT_EGL,
     DT_NONE,
 } DisplayType;
 
@@ -2127,6 +2128,15 @@
             error_report("VNC requires a display argument vnc=<display>");
             exit(1);
         }
+    } else if (strstart(p, "egl-headless", &opts)) {
+#ifdef CONFIG_OPENGL
+        request_opengl = 1;
+        display_opengl = 1;
+        display = DT_EGL;
+#else
+        fprintf(stderr, "egl support is disabled\n");
+        exit(1);
+#endif
     } else if (strstart(p, "curses", &opts)) {
 #ifdef CONFIG_CURSES
         display = DT_CURSES;
@@ -3524,10 +3534,11 @@
                     exit(1);
                 }
                 fsdev = qemu_opts_create(qemu_find_opts("fsdev"),
+                                         qemu_opts_id(opts) ?:
                                          qemu_opt_get(opts, "mount_tag"),
                                          1, NULL);
                 if (!fsdev) {
-                    error_report("duplicate fsdev id: %s",
+                    error_report("duplicate or invalid fsdev id: %s",
                                  qemu_opt_get(opts, "mount_tag"));
                     exit(1);
                 }
@@ -3565,7 +3576,7 @@
                                           &error_abort);
                 qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);
                 qemu_opt_set(device, "fsdev",
-                             qemu_opt_get(opts, "mount_tag"), &error_abort);
+                             qemu_opts_id(fsdev), &error_abort);
                 qemu_opt_set(device, "mount_tag",
                              qemu_opt_get(opts, "mount_tag"), &error_abort);
                 break;
@@ -4503,7 +4514,7 @@
     default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
     default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 
-    parse_numa_opts(machine_class);
+    parse_numa_opts(current_machine);
 
     if (qemu_opts_foreach(qemu_find_opts("mon"),
                           mon_init_func, NULL, NULL)) {
@@ -4559,7 +4570,7 @@
     current_machine->boot_order = boot_order;
     current_machine->cpu_model = cpu_model;
 
-    machine_class->init(current_machine);
+    machine_run_board_init(current_machine);
 
     realtime_init();
 
@@ -4592,8 +4603,6 @@
 
     cpu_synchronize_all_post_init();
 
-    numa_post_machine_init();
-
     rom_reset_order_override();
 
     /*
@@ -4659,6 +4668,12 @@
         qemu_spice_display_init();
     }
 
+#ifdef CONFIG_OPENGL
+    if (display_type == DT_EGL) {
+        egl_headless_init();
+    }
+#endif
+
     if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) {
         exit(1);
     }
commit	adb354dd1e00aa6b8bd674f0e1f70008badded0f	[log] [tgz]
author	Stefan Hajnoczi <stefanha@redhat.com>	Thu May 18 10:01:00 2017 +0100
committer	Stefan Hajnoczi <stefanha@redhat.com>	Thu May 18 10:01:08 2017 +0100
tree	51c2422717a6421bdc36a54c22bcb1b292aa44b0
parent	897eee242bc081d72ffbf84664c907dcba620ee3 [diff]
parent	a764040cc831cfe5b8bf1c80e8341b9bf2de3ce8 [diff]