Merge tag 'pull-riscv-to-apply-20230224' of github.com:palmer-dabbelt/qemu into staging Fourth RISC-V PR for QEMU 8.0, Attempt 2 * A triplet of cleanups to the kernel/initrd loader that avoids duplication between the various boards. * Weiwei Li, Daniel Henrique Barboza, and Liu Zhiwei have been added as reviewers. Thanks for the help! * A fix for PMP matching to avoid incorrectly appling the default permissions on PMP permission violations. * A cleanup to avoid an unnecessary avoid env_archcpu() in cpu_get_tb_cpu_state(). * Fixes for the vector slide instructions to avoid truncating 64-bit values (such as doubles) on 32-bit targets. # -----BEGIN PGP SIGNATURE----- # # iQJHBAABCAAxFiEEKzw3R0RoQ7JKlDp6LhMZ81+7GIkFAmP5Br8THHBhbG1lckBk # YWJiZWx0LmNvbQAKCRAuExnzX7sYiT4RD/9hdSlQlR1g/2h4fbCJ3U0GvyNH0T7N # mt3AX8hFvmfR1O63qqVVebJSHM1dTm6WsA19vKE5tdtbjV5V8UZuBTSqYeRBSrLd # LK9IHhwv3k9OQ/EG8CgRo7HEMxAurpC26zTf3chnfwa1Wyl5XxCXNx5hPbhu18G9 # oxw0sBi51T0Tb+N6lOVVSfmiEZWLXRq+lDCZdV0j864brsSjo4x8VEGrLaFTOJLf # X4MW6vBI4Pcb7EGnHjj5WvRKsf8gdahdx8bSTjORIm8oGri9Iyw6Vrg2khuhjnuH # 99sD1O06cvrylp+sCOVei8H3S6/xCepQXUXnCBCd1/cetgV+olo+ZR78Z8ZjXPED # jhZ23lsDcge+4W141lsCiwLgzI0YO3Ac+84zQLIvcx16c8zow3G9FO9sTlBSsgnW # 0XJrsUF7AZB6quUSMytG7WK+OBizzCRwj7ItC+Mty68wLrei5lDVj8b0t8hAQEdr # dOb7jku+Dz8OspGZx1aDKKifGDO+Ppv4PjAM2G44OmkM824SvvFg8+FEr9NgbKbp # VgTZDCeVC6IEpzthKsK8WeompLo7Sc33KITqwMbGiyGs+gsnmgKP2bcTLF8YTlFk # dqFBWjo3tjH5oukgTLCSYY4xPaHR9q418vGAfRox15GtUVliQ9iL5oH47PVXg4U7 # YsNZ74nD1pUueg== # =Umli # -----END PGP SIGNATURE----- # gpg: Signature made Fri 24 Feb 2023 18:49:35 GMT # gpg: using RSA key 2B3C3747446843B24A943A7A2E1319F35FBB1889 # gpg: issuer "palmer@dabbelt.com" # gpg: Good signature from "Palmer Dabbelt <palmer@dabbelt.com>" [unknown] # gpg: aka "Palmer Dabbelt <palmerdabbelt@google.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 00CE 76D1 8349 60DF CE88 6DF8 EF4C A150 2CCB AB41 # Subkey fingerprint: 2B3C 3747 4468 43B2 4A94 3A7A 2E13 19F3 5FBB 1889 * tag 'pull-riscv-to-apply-20230224' of github.com:palmer-dabbelt/qemu: target/riscv: Fix vslide1up.vf and vslide1down.vf target/riscv: avoid env_archcpu() in cpu_get_tb_cpu_state() target/riscv: Smepmp: Skip applying default rules when address matches MAINTAINERS: Add some RISC-V reviewers target/riscv: Remove privileged spec version restriction for RVV hw/riscv/boot.c: make riscv_load_initrd() static hw/riscv/boot.c: consolidate all kernel init in riscv_load_kernel() hw/riscv: handle 32 bit CPUs kernel_entry in riscv_load_kernel() Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

commit: b11728dc3ae67ddedf34b7a4f318170e7092803c [log] [tgz]
author: Peter Maydell <peter.maydell@linaro.org> Sun Feb 26 20:14:46 2023 +0000
committer: Peter Maydell <peter.maydell@linaro.org> Sun Feb 26 20:14:46 2023 +0000
tree: 1ec73af509292d112a3bf543d1b827643e288e03
parent: 1270a3f57c9221080f3205a15964814ff8359ca9 [diff]
parent: 8c89d50c10afdd98da82642ca5e9d7af4f1c18bd [diff]
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 0aa149a..8f332fc 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml

@@ -467,27 +467,16 @@
     TARGETS: x86_64-softmmu ppc64-softmmu riscv64-softmmu x86_64-linux-user
     MAKE_CHECK_ARGS: bench V=1
 
-# gprof/gcov are GCC features
-build-gprof-gcov:
+# gcov is a GCC features
+gcov:
   extends: .native_build_job_template
   needs:
     job: amd64-ubuntu2004-container
+  timeout: 80m
   variables:
     IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --enable-gprof --enable-gcov
+    CONFIGURE_ARGS: --enable-gcov
     TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu
-  artifacts:
-    expire_in: 1 days
-    paths:
-      - build
-
-check-gprof-gcov:
-  extends: .native_test_job_template
-  needs:
-    - job: build-gprof-gcov
-      artifacts: true
-  variables:
-    IMAGE: ubuntu2004
     MAKE_CHECK_ARGS: check
   after_script:
     - cd build

diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml
index 289ad13..b4cbdbc 100644
--- a/.gitlab-ci.d/static_checks.yml
+++ b/.gitlab-ci.d/static_checks.yml

@@ -23,12 +23,12 @@
   before_script:
     - apk -U add git
 
-check-python-pipenv:
+check-python-minreqs:
   extends: .base_job_template
   stage: test
   image: $CI_REGISTRY_IMAGE/qemu/python:latest
   script:
-    - make -C python check-pipenv
+    - make -C python check-minreqs
   variables:
     GIT_DEPTH: 1
   needs:

diff --git a/.gitmodules b/.gitmodules
index 24cffa8..6ce5bf4 100644
--- a/.gitmodules
+++ b/.gitmodules

@@ -13,9 +13,6 @@
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
 	url = https://gitlab.com/qemu-project/qemu-palcode.git
-[submodule "roms/sgabios"]
-	path = roms/sgabios
-	url = https://gitlab.com/qemu-project/sgabios.git
 [submodule "dtc"]
 	path = dtc
 	url = https://gitlab.com/qemu-project/dtc.git

diff --git a/MAINTAINERS b/MAINTAINERS
index 847bc7f..5c1ee41 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -810,13 +810,13 @@
 F: docs/system/arm/musicpal.rst
 
 Nuvoton NPCM7xx
-M: Havard Skinnemoen <hskinnemoen@google.com>
 M: Tyrone Ting <kfting@nuvoton.com>
+M: Hao Wu <wuhaotsh@google.com>
 L: qemu-arm@nongnu.org
 S: Supported
-F: hw/*/npcm7xx*
-F: include/hw/*/npcm7xx*
-F: tests/qtest/npcm7xx*
+F: hw/*/npcm*
+F: include/hw/*/npcm*
+F: tests/qtest/npcm*
 F: pc-bios/npcm7xx_bootrom.bin
 F: roms/vbootrom
 F: docs/system/arm/nuvoton.rst
@@ -1679,7 +1679,6 @@
 F: hw/acpi/ich9*.c
 F: include/hw/acpi/ich9*.h
 F: include/hw/southbridge/piix.h
-F: hw/misc/sga.c
 F: hw/isa/apm.c
 F: include/hw/isa/apm.h
 F: tests/unit/test-x86-cpuid.c
@@ -1999,6 +1998,7 @@
 
 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
+R: Cédric Le Goater <clg@redhat.com>
 S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/
@@ -2100,10 +2100,8 @@
 M: Dr. David Alan Gilbert <dgilbert@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
-F: tools/virtiofsd/*
 F: hw/virtio/vhost-user-fs*
 F: include/hw/virtio/vhost-user-fs.h
-F: docs/tools/virtiofsd.rst
 L: virtio-fs@redhat.com
 
 virtio-input
@@ -2278,7 +2276,6 @@
 F: include/hw/acpi/vmgenid.h
 F: docs/specs/vmgenid.txt
 F: tests/qtest/vmgenid-test.c
-F: stubs/vmgenid.c
 
 LED
 M: Philippe Mathieu-Daudé <philmd@linaro.org>
@@ -3583,13 +3580,11 @@
 parallels
 M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Denis V. Lunev <den@openvz.org>
-M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/parallels.c
 F: block/parallels-ext.c
 F: docs/interop/parallels.txt
-T: git https://gitlab.com/vsementsov/qemu.git block
 
 qed
 M: Stefan Hajnoczi <stefanha@redhat.com>

diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index ef5193c..1cf404c 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c

@@ -176,8 +176,16 @@
         if (host == NULL) {
             tb_page_addr_t phys_page =
                 get_page_addr_code_hostp(env, base, &db->host_addr[1]);
-            /* We cannot handle MMIO as second page. */
-            assert(phys_page != -1);
+
+            /*
+             * If the second page is MMIO, treat as if the first page
+             * was MMIO as well, so that we do not cache the TB.
+             */
+            if (unlikely(phys_page == -1)) {
+                tb_set_page_addr0(tb, -1);
+                return NULL;
+            }
+
             tb_set_page_addr1(tb, phys_page);
 #ifdef CONFIG_USER_ONLY
             page_protect(end);

diff --git a/authz/listfile.c b/authz/listfile.c
index da3a0e6..45a60e9 100644
--- a/authz/listfile.c
+++ b/authz/listfile.c

@@ -30,7 +30,6 @@
 #include "qapi/qapi-visit-authz.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qobject.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qobject-input-visitor.h"
 
 

diff --git a/backends/cryptodev-vhost.c b/backends/cryptodev-vhost.c
index 572f87b..74ea0ad 100644
--- a/backends/cryptodev-vhost.c
+++ b/backends/cryptodev-vhost.c

@@ -28,7 +28,6 @@
 
 #ifdef CONFIG_VHOST_CRYPTO
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "hw/virtio/virtio-crypto.h"
 #include "sysemu/cryptodev-vhost-user.h"

diff --git a/backends/rng.c b/backends/rng.c
index 6c7bf64..9bbd0c7 100644
--- a/backends/rng.c
+++ b/backends/rng.c

@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "sysemu/rng.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/module.h"
 #include "qom/object_interfaces.h"
 

diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 7bfcaef..0596223 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c

@@ -13,7 +13,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/vhost-user-backend.h"

diff --git a/block.c b/block.c
index aa9062f..0dd604d 100644
--- a/block.c
+++ b/block.c

@@ -277,8 +277,8 @@
     return !(bs->open_flags & BDRV_O_RDWR);
 }
 
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
-                           bool ignore_allow_rdw, Error **errp)
+static int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+                                  bool ignore_allow_rdw, Error **errp)
 {
     IO_CODE();
 
@@ -533,6 +533,7 @@
     int ret;
     GLOBAL_STATE_CODE();
     ERRP_GUARD();
+    assert_bdrv_graph_readable();
 
     if (!drv->bdrv_co_create_opts) {
         error_setg(errp, "Driver '%s' does not support image creation",
@@ -657,8 +658,8 @@
     options = qdict_new();
     qdict_put_str(options, "driver", drv->format_name);
 
-    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE, errp);
+    blk = blk_co_new_open(filename, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_RESIZE, errp);
     if (!blk) {
         error_prepend(errp, "Protocol driver '%s' does not support image "
                       "creation, and opening the image failed: ",
@@ -739,6 +740,7 @@
 
     IO_CODE();
     assert(bs != NULL);
+    assert_bdrv_graph_readable();
 
     if (!bs->drv) {
         error_setg(errp, "Block node '%s' is not opened", bs->filename);
@@ -1040,6 +1042,7 @@
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -3807,13 +3810,11 @@
  * function eventually calls bdrv_refresh_total_sectors() which polls
  * when called from non-coroutine context.
  */
-static BlockDriverState *bdrv_open_inherit(const char *filename,
-                                           const char *reference,
-                                           QDict *options, int flags,
-                                           BlockDriverState *parent,
-                                           const BdrvChildClass *child_class,
-                                           BdrvChildRole child_role,
-                                           Error **errp)
+static BlockDriverState * no_coroutine_fn
+bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
+                  int flags, BlockDriverState *parent,
+                  const BdrvChildClass *child_class, BdrvChildRole child_role,
+                  Error **errp)
 {
     int ret;
     BlockBackend *file = NULL;
@@ -3829,6 +3830,7 @@
     assert(!child_class || !flags);
     assert(!child_class == !parent);
     GLOBAL_STATE_CODE();
+    assert(!qemu_in_coroutine());
 
     if (reference) {
         bool options_non_empty = options ? qdict_size(options) : false;
@@ -5266,6 +5268,8 @@
  * child.
  *
  * This function does not create any image files.
+ *
+ * The caller must hold the AioContext lock for @bs_top.
  */
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                 Error **errp)
@@ -5273,11 +5277,14 @@
     int ret;
     BdrvChild *child;
     Transaction *tran = tran_new();
+    AioContext *old_context, *new_context = NULL;
 
     GLOBAL_STATE_CODE();
 
     assert(!bs_new->backing);
 
+    old_context = bdrv_get_aio_context(bs_top);
+
     child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
                                      &child_of_bds, bdrv_backing_role(bs_new),
                                      tran, errp);
@@ -5286,6 +5293,19 @@
         goto out;
     }
 
+    /*
+     * bdrv_attach_child_noperm could change the AioContext of bs_top.
+     * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
+     * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
+     * that assumes the new lock is taken.
+     */
+    new_context = bdrv_get_aio_context(bs_top);
+
+    if (old_context != new_context) {
+        aio_context_release(old_context);
+        aio_context_acquire(new_context);
+    }
+
     ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
     if (ret < 0) {
         goto out;
@@ -5297,6 +5317,11 @@
 
     bdrv_refresh_limits(bs_top, NULL, NULL);
 
+    if (new_context && old_context != new_context) {
+        aio_context_release(new_context);
+        aio_context_acquire(old_context);
+    }
+
     return ret;
 }
 
@@ -5819,6 +5844,7 @@
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv)
         return -ENOMEDIUM;
@@ -5840,6 +5866,7 @@
 {
     int64_t ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     ret = bdrv_co_nb_sectors(bs);
     if (ret < 0) {
@@ -6803,6 +6830,7 @@
     BlockDriver *drv = bs->drv;
     BdrvChild *child;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return false;
@@ -6825,6 +6853,7 @@
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (drv && drv->bdrv_co_eject) {
         drv->bdrv_co_eject(bs, eject_flag);
@@ -6839,6 +6868,7 @@
 {
     BlockDriver *drv = bs->drv;
     IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_lock_medium(bs, locked);
 
     if (drv && drv->bdrv_co_lock_medium) {

diff --git a/block/backup.c b/block/backup.c
index 824d39a..db3791f 100644
--- a/block/backup.c
+++ b/block/backup.c

@@ -22,7 +22,6 @@
 #include "block/block-copy.h"
 #include "block/dirty-bitmap.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitmap.h"
@@ -270,7 +269,10 @@
                 return -ECANCELED;
             }
 
+            /* rdlock protects the subsequent call to bdrv_is_allocated() */
+            bdrv_graph_co_rdlock();
             ret = block_copy_reset_unallocated(s->bcs, offset, &count);
+            bdrv_graph_co_rdunlock();
             if (ret < 0) {
                 return ret;
             }

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 28772be..978c8cf 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c

@@ -626,7 +626,7 @@
     return -error;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -647,7 +647,7 @@
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -668,7 +668,7 @@
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
+static int GRAPH_RDLOCK coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
 {
     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
 
@@ -679,9 +679,9 @@
     return bdrv_co_flush(bs->file->bs);
 }
 
-static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     uint32_t align = MAX(bs->bl.request_alignment,
                          bs->bl.pwrite_zeroes_alignment);
@@ -712,8 +712,8 @@
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     uint32_t align = bs->bl.pdiscard_alignment;
     int err;
@@ -967,7 +967,8 @@
     return false;
 }
 
-static int64_t coroutine_fn blkdebug_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkdebug_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index b00b8a6..3ea7141 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c

@@ -267,7 +267,8 @@
     s->log_file = NULL;
 }
 
-static int64_t coroutine_fn blk_log_writes_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
@@ -294,7 +295,7 @@
     bs->bl.request_alignment = s->sectorsize;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                          QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -307,7 +308,7 @@
     uint64_t bytes;
     int file_flags;
     QEMUIOVector *qiov;
-    int (*func)(struct BlkLogWritesFileReq *r);
+    int GRAPH_RDLOCK_PTR (*func)(struct BlkLogWritesFileReq *r);
     int file_ret;
 } BlkLogWritesFileReq;
 
@@ -319,7 +320,8 @@
     int log_ret;
 } BlkLogWritesLogReq;
 
-static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
+static void coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
 {
     BDRVBlkLogWritesState *s = lr->bs->opaque;
     uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
@@ -368,15 +370,16 @@
     }
 }
 
-static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
+static void coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
 {
     fr->file_ret = fr->func(fr);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                       QEMUIOVector *qiov, int flags,
-                      int (*file_func)(BlkLogWritesFileReq *r),
+                      int /*GRAPH_RDLOCK*/ (*file_func)(BlkLogWritesFileReq *r),
                       uint64_t entry_flags, bool is_zero_write)
 {
     QEMUIOVector log_qiov;
@@ -428,32 +431,33 @@
     return fr.file_ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes,
                            fr->qiov, fr->file_flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes,
                                  fr->file_flags);
 }
 
-static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_flush(fr->bs->file->bs);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pdiscard(fr->bs->file, fr->offset, fr->bytes);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                           QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -461,7 +465,7 @@
                                  blk_log_writes_co_do_file_pwritev, 0, false);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                 int64_t bytes, BdrvRequestFlags flags)
 {
@@ -470,14 +474,15 @@
                                  true);
 }
 
-static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
 {
     return blk_log_writes_co_log(bs, 0, 0, NULL, 0,
                                  blk_log_writes_co_do_file_flush,
                                  LOG_FLUSH_FLAG, false);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,

diff --git a/block/blkreplay.c b/block/blkreplay.c
index 16543f5..04f53ee 100644
--- a/block/blkreplay.c
+++ b/block/blkreplay.c

@@ -40,7 +40,8 @@
     return ret;
 }
 
-static int64_t coroutine_fn blkreplay_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkreplay_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
@@ -69,8 +70,9 @@
     replay_block_event(req->bh, reqid);
 }
 
-static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -80,8 +82,9 @@
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -91,8 +94,9 @@
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@@ -102,8 +106,8 @@
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pdiscard(bs->file, offset, bytes);
@@ -113,7 +117,7 @@
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK blkreplay_co_flush(BlockDriverState *bs)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_flush(bs->file->bs);

diff --git a/block/blkverify.c b/block/blkverify.c
index edf1a55..1c16f86 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c

@@ -155,7 +155,8 @@
     s->test_file = NULL;
 }
 
-static int64_t coroutine_fn blkverify_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkverify_co_getlength(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
@@ -256,7 +257,7 @@
     return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
 }
 
-static int coroutine_fn blkverify_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK blkverify_co_flush(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 

diff --git a/block/block-backend.c b/block/block-backend.c
index ef512f7..278b04c 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c

@@ -1235,8 +1235,8 @@
     blk->disable_request_queuing = disable;
 }
 
-static coroutine_fn int blk_check_byte_request(BlockBackend *blk,
-                                               int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
 {
     int64_t len;
 
@@ -1244,7 +1244,7 @@
         return -EIO;
     }
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1289,6 +1289,7 @@
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     /* Call blk_bs() only after waiting, the graph may have changed */
     bs = blk_bs(blk);
@@ -1363,6 +1364,7 @@
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     /* Call blk_bs() only after waiting, the graph may have changed */
     bs = blk_bs(blk);
@@ -1431,6 +1433,7 @@
                                            BlockDriverState **file)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
     return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
                                       map, file);
 }
@@ -1441,6 +1444,7 @@
                                            int64_t bytes, int64_t *pnum)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
     return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
                                       bytes, pnum);
 }
@@ -1602,8 +1606,9 @@
 int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1623,8 +1628,9 @@
 int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
 {
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1670,8 +1676,9 @@
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1716,6 +1723,7 @@
     IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
@@ -1759,10 +1767,11 @@
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
 {
-    blk_wait_while_drained(blk);
     IO_CODE();
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1989,20 +1998,22 @@
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     return bs && bdrv_co_is_inserted(bs);
 }
 
-bool blk_is_available(BlockBackend *blk)
+bool coroutine_fn blk_co_is_available(BlockBackend *blk)
 {
     IO_CODE();
-    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
+    return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
 void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked)
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_lock_medium(bs, locked);
@@ -2014,6 +2025,7 @@
     BlockDriverState *bs = blk_bs(blk);
     char *id;
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_eject(bs, eject_flag);
@@ -2321,6 +2333,7 @@
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_io_plug(bs);
@@ -2331,6 +2344,7 @@
 {
     BlockDriverState *bs = blk_bs(blk);
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
         bdrv_co_io_unplug(bs);
@@ -2372,7 +2386,8 @@
                                  Error **errp)
 {
     IO_OR_GS_CODE();
-    if (!blk_is_available(blk)) {
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_co_is_available(blk)) {
         error_setg(errp, "No medium inserted");
         return -ENOMEDIUM;
     }
@@ -2627,6 +2642,7 @@
 {
     int r;
     IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     r = blk_check_byte_request(blk_in, off_in, bytes);
     if (r) {
@@ -2636,6 +2652,7 @@
     if (r) {
         return r;
     }
+
     return bdrv_co_copy_range(blk_in->root, off_in,
                               blk_out->root, off_out,
                               bytes, read_flags, write_flags);

diff --git a/block/block-copy.c b/block/block-copy.c
index 30a4da0..e13d7bc 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c

@@ -469,10 +469,9 @@
  * value of @method should be used for subsequent tasks.
  * Returns 0 on success.
  */
-static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
-                                           int64_t offset, int64_t bytes,
-                                           BlockCopyMethod *method,
-                                           bool *error_is_read)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
+                   BlockCopyMethod *method, bool *error_is_read)
 {
     int ret;
     int64_t nbytes = MIN(offset + bytes, s->len) - offset;
@@ -558,8 +557,10 @@
     BlockCopyMethod method = t->method;
     int ret;
 
-    ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
-                             &error_is_read);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
+                                 &error_is_read);
+    }
 
     WITH_QEMU_LOCK_GUARD(&s->lock) {
         if (s->method == t->method) {
@@ -581,9 +582,9 @@
     return ret;
 }
 
-static coroutine_fn int block_copy_block_status(BlockCopyState *s,
-                                                int64_t offset,
-                                                int64_t bytes, int64_t *pnum)
+static coroutine_fn GRAPH_RDLOCK
+int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
+                            int64_t *pnum)
 {
     int64_t num;
     BlockDriverState *base;
@@ -618,9 +619,9 @@
  * Check if the cluster starting at offset is allocated or not.
  * return via pnum the number of contiguous clusters sharing this allocation.
  */
-static int coroutine_fn block_copy_is_cluster_allocated(BlockCopyState *s,
-                                                        int64_t offset,
-                                                        int64_t *pnum)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
+                                int64_t *pnum)
 {
     BlockDriverState *bs = s->source->bs;
     int64_t count, total_count = 0;
@@ -630,6 +631,7 @@
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 
     while (true) {
+        /* protected in backup_run() */
         ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
         if (ret < 0) {
             return ret;
@@ -704,7 +706,7 @@
  * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
  * clusters found and -errno on failure.
  */
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 block_copy_dirty_clusters(BlockCopyCallState *call_state)
 {
     BlockCopyState *s = call_state->s;
@@ -827,7 +829,8 @@
  * it means that some I/O operation failed in context of _this_ block_copy call,
  * not some parallel operation.
  */
-static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_common(BlockCopyCallState *call_state)
 {
     int ret;
     BlockCopyState *s = call_state->s;
@@ -892,6 +895,7 @@
 
 static void coroutine_fn block_copy_async_co_entry(void *opaque)
 {
+    GRAPH_RDLOCK_GUARD();
     block_copy_common(opaque);
 }
 

diff --git a/block/bochs.c b/block/bochs.c
index 46e7958..2f5ae52 100644
--- a/block/bochs.c
+++ b/block/bochs.c

@@ -237,7 +237,7 @@
     return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 bochs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {

diff --git a/block/commit.c b/block/commit.c
index 41e3599..2b20fd0 100644
--- a/block/commit.c
+++ b/block/commit.c

@@ -18,7 +18,6 @@
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/memalign.h"
 #include "sysemu/block-backend.h"
@@ -207,8 +206,9 @@
     },
 };
 
-static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_commit_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index c9fb809..646d822 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c

@@ -78,9 +78,9 @@
     int snapshot_error;
 } BDRVCopyBeforeWriteState;
 
-static coroutine_fn int cbw_co_preadv(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -149,8 +149,8 @@
     return 0;
 }
 
-static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
     if (ret < 0) {
@@ -160,8 +160,9 @@
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
     if (ret < 0) {
@@ -171,11 +172,9 @@
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
-                                       int64_t offset,
-                                       int64_t bytes,
-                                       QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static coroutine_fn GRAPH_RDLOCK
+int cbw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
     if (ret < 0) {
@@ -185,7 +184,7 @@
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK cbw_co_flush(BlockDriverState *bs)
 {
     if (!bs->file) {
         return 0;
@@ -257,7 +256,7 @@
     g_free(req);
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
                        QEMUIOVector *qiov, size_t qiov_offset)
 {
@@ -289,7 +288,7 @@
     return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 cbw_co_snapshot_block_status(BlockDriverState *bs,
                              bool want_zero, int64_t offset, int64_t bytes,
                              int64_t *pnum, int64_t *map,
@@ -322,8 +321,8 @@
     return ret;
 }
 
-static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs,
-                                                 int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BDRVCopyBeforeWriteState *s = bs->opaque;
 

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3280eb2..cc0f848 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c

@@ -121,17 +121,16 @@
 }
 
 
-static int64_t coroutine_fn cor_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK cor_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
 
 
-static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           size_t qiov_offset,
-                                           BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, size_t qiov_offset,
+                   BdrvRequestFlags flags)
 {
     int64_t n;
     int local_flags;
@@ -180,50 +179,49 @@
 }
 
 
-static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
-                                            int64_t offset,
-                                            int64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset,
-                                            BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags);
 }
 
 
-static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 
-static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
 
-static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
-                                                  int64_t offset,
-                                                  int64_t bytes,
-                                                  QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          QEMUIOVector *qiov)
 {
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
                            BDRV_REQ_WRITE_COMPRESSED);
 }
 
 
-static void coroutine_fn cor_co_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+cor_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
 
-static void coroutine_fn cor_co_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+cor_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     bdrv_co_lock_medium(bs->file->bs, locked);
 }

diff --git a/block/coroutines.h b/block/coroutines.h
index 2a1e0b3..dd9f3d4 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h

@@ -43,7 +43,7 @@
 int coroutine_fn GRAPH_RDLOCK
 bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
 
-int coroutine_fn
+int coroutine_fn GRAPH_RDLOCK
 bdrv_co_common_block_status_above(BlockDriverState *bs,
                                   BlockDriverState *base,
                                   bool include_base,

diff --git a/block/create.c b/block/create.c
index 4df43f1..bf67b99 100644
--- a/block/create.c
+++ b/block/create.c

@@ -43,6 +43,7 @@
     int ret;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     job_progress_set_remaining(&s->common, 1);
     ret = s->drv->bdrv_co_create(s->opts, errp);
@@ -59,6 +60,12 @@
     .run           = blockdev_create_run,
 };
 
+/* Checking whether the function is present doesn't require the graph lock */
+static inline bool TSA_NO_TSA has_bdrv_co_create(BlockDriver *drv)
+{
+    return drv->bdrv_co_create;
+}
+
 void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
                          Error **errp)
 {
@@ -79,7 +86,7 @@
     }
 
     /* Error out if the driver doesn't support .bdrv_co_create */
-    if (!drv->bdrv_co_create) {
+    if (!has_bdrv_co_create(drv)) {
         error_setg(errp, "Driver does not support blockdev-create");
         return;
     }

diff --git a/block/crypto.c b/block/crypto.c
index b70cec9..ca67289 100644
--- a/block/crypto.c
+++ b/block/crypto.c

@@ -314,19 +314,18 @@
 }
 
 
-static int block_crypto_co_create_generic(BlockDriverState *bs,
-                                          int64_t size,
-                                          QCryptoBlockCreateOptions *opts,
-                                          PreallocMode prealloc,
-                                          Error **errp)
+static int coroutine_fn
+block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
+                               QCryptoBlockCreateOptions *opts,
+                               PreallocMode prealloc, Error **errp)
 {
     int ret;
     BlockBackend *blk;
     QCryptoBlock *crypto = NULL;
     struct BlockCryptoCreateData data;
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto cleanup;
@@ -360,7 +359,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
                          PreallocMode prealloc, BdrvRequestFlags flags,
                          Error **errp)
@@ -398,7 +397,7 @@
  */
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -460,7 +459,7 @@
 }
 
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                         QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -531,7 +530,8 @@
 }
 
 
-static int64_t coroutine_fn block_crypto_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+block_crypto_co_getlength(BlockDriverState *bs)
 {
     BlockCrypto *crypto = bs->opaque;
     int64_t len = bdrv_co_getlength(bs->file->bs);
@@ -639,7 +639,7 @@
     assert(create_options->driver == BLOCKDEV_DRIVER_LUKS);
     luks_opts = &create_options->u.luks;
 
-    bs = bdrv_open_blockdev_ref(luks_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(luks_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
@@ -665,10 +665,9 @@
     return ret;
 }
 
-static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv,
-                                                         const char *filename,
-                                                         QemuOpts *opts,
-                                                         Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
+                                 QemuOpts *opts, Error **errp)
 {
     QCryptoBlockCreateOptions *create_opts = NULL;
     BlockDriverState *bs = NULL;
@@ -708,8 +707,8 @@
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (!bs) {
         ret = -EINVAL;
         goto fail;

diff --git a/block/curl.c b/block/curl.c
index cbada22..8bb39a1 100644
--- a/block/curl.c
+++ b/block/curl.c

@@ -38,8 +38,15 @@
 
 // #define DEBUG_VERBOSE
 
+/* CURL 7.85.0 switches to a string based API for specifying
+ * the desired protocols.
+ */
+#if LIBCURL_VERSION_NUM >= 0x075500
+#define PROTOCOLS "HTTP,HTTPS,FTP,FTPS"
+#else
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                    CURLPROTO_FTP | CURLPROTO_FTPS)
+#endif
 
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
@@ -510,9 +517,18 @@
          * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
          * CVE-2013-0249.
          *
-         * Restricting protocols is only supported from 7.19.4 upwards.
+         * Restricting protocols is only supported from 7.19.4 upwards. Note:
+         * version 7.85.0 deprecates CURLOPT_*PROTOCOLS in favour of a string
+         * based CURLOPT_*PROTOCOLS_STR API.
          */
-#if LIBCURL_VERSION_NUM >= 0x071304
+#if LIBCURL_VERSION_NUM >= 0x075500
+        if (curl_easy_setopt(state->curl,
+                             CURLOPT_PROTOCOLS_STR, PROTOCOLS) ||
+            curl_easy_setopt(state->curl,
+                             CURLOPT_REDIR_PROTOCOLS_STR, PROTOCOLS)) {
+            goto err;
+        }
+#elif LIBCURL_VERSION_NUM >= 0x071304
         if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) ||
             curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) {
             goto err;
@@ -670,7 +686,12 @@
     const char *file;
     const char *cookie;
     const char *cookie_secret;
-    double d;
+    /* CURL >= 7.55.0 uses curl_off_t for content length instead of a double */
+#if LIBCURL_VERSION_NUM >= 0x073700
+    curl_off_t cl;
+#else
+    double cl;
+#endif
     const char *secretid;
     const char *protocol_delimiter;
     int ret;
@@ -797,27 +818,36 @@
     }
     if (curl_easy_perform(state->curl))
         goto out;
-    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
+    /* CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
+     * the *_T version which returns a more sensible type for content length.
+     */
+#if LIBCURL_VERSION_NUM >= 0x073700
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl)) {
         goto out;
     }
+#else
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl)) {
+        goto out;
+    }
+#endif
     /* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
      * know or the size is zero. From 7.19.4 CURL returns -1 if size is not
      * known and zero if it is really zero-length file. */
 #if LIBCURL_VERSION_NUM >= 0x071304
-    if (d < 0) {
+    if (cl < 0) {
         pstrcpy(state->errmsg, CURL_ERROR_SIZE,
                 "Server didn't report file size.");
         goto out;
     }
 #else
-    if (d <= 0) {
+    if (cl <= 0) {
         pstrcpy(state->errmsg, CURL_ERROR_SIZE,
                 "Unknown file size or zero-length file.");
         goto out;
     }
 #endif
 
-    s->len = d;
+    s->len = cl;
 
     if ((!strncasecmp(s->url, "http://", strlen("http://"))
         || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -850,8 +880,10 @@
     g_free(s->username);
     g_free(s->proxyusername);
     g_free(s->proxypassword);
-    curl_drop_all_sockets(s->sockets);
-    g_hash_table_destroy(s->sockets);
+    if (s->sockets) {
+        curl_drop_all_sockets(s->sockets);
+        g_hash_table_destroy(s->sockets);
+    }
     qemu_opts_del(opts);
     return -EINVAL;
 }

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 1e7aee4..13a1979 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c

@@ -394,6 +394,7 @@
 bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
                                        Error **errp)
 {
+    assert_bdrv_graph_readable();
     if (bs->drv && bs->drv->bdrv_co_remove_persistent_dirty_bitmap) {
         return bs->drv->bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
     }
@@ -415,6 +416,7 @@
                                    uint32_t granularity, Error **errp)
 {
     BlockDriver *drv = bs->drv;
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         error_setg_errno(errp, ENOMEDIUM,

diff --git a/block/file-posix.c b/block/file-posix.c
index d3073a7..5760cf2 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c

@@ -1738,7 +1738,7 @@
 #ifdef CONFIG_FALLOCATE
     /* Last resort: we are trying to extend the file with zeroed data. This
      * can be done via fallocate(fd, 0) */
-    len = bdrv_getlength(aiocb->bs);
+    len = raw_co_getlength(aiocb->bs);
     if (s->has_fallocate && len >= 0 && aiocb->aio_offset >= len) {
         int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
         if (ret == 0 || ret != -ENOTSUP) {
@@ -2607,10 +2607,9 @@
     return result;
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions options;
     int64_t total_size = 0;
@@ -2920,8 +2919,8 @@
 }
 #endif /* __linux__ */
 
-static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
-                                                 Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -3272,7 +3271,7 @@
     raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
 }
 
-static int coroutine_fn raw_co_copy_range_from(
+static int coroutine_fn GRAPH_RDLOCK raw_co_copy_range_from(
         BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
         BdrvChild *dst, int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags)
@@ -3281,14 +3280,12 @@
                                  read_flags, write_flags);
 }
 
-static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
-                                             BdrvChild *src,
-                                             int64_t src_offset,
-                                             BdrvChild *dst,
-                                             int64_t dst_offset,
-                                             int64_t bytes,
-                                             BdrvRequestFlags read_flags,
-                                             BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_to(BlockDriverState *bs,
+                     BdrvChild *src, int64_t src_offset,
+                     BdrvChild *dst, int64_t dst_offset,
+                     int64_t bytes, BdrvRequestFlags read_flags,
+                     BdrvRequestFlags write_flags)
 {
     RawPosixAIOData acb;
     BDRVRawState *s = bs->opaque;

diff --git a/block/file-win32.c b/block/file-win32.c
index 200d244..c7d0b85 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c

@@ -613,10 +613,9 @@
     return 0;
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions options;
     int64_t total_size = 0;

diff --git a/block/filter-compress.c b/block/filter-compress.c
index 2e2a659..ac285f4 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c

@@ -55,45 +55,43 @@
 }
 
 
-static int64_t coroutine_fn compress_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+compress_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
 
 
-static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
-                                                QEMUIOVector *qiov,
-                                                size_t qiov_offset,
-                                                BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, size_t qiov_offset,
+                        BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
 }
 
 
-static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,
-                                                 int64_t offset,
-                                                 int64_t bytes,
-                                                 QEMUIOVector *qiov,
-                                                 size_t qiov_offset,
-                                                 BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, size_t qiov_offset,
+                         BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags | BDRV_REQ_WRITE_COMPRESSED);
 }
 
 
-static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 
-static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
@@ -117,14 +115,14 @@
 }
 
 
-static void coroutine_fn
+static void coroutine_fn GRAPH_RDLOCK
 compress_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
 
-static void coroutine_fn
+static void coroutine_fn GRAPH_RDLOCK
 compress_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     bdrv_co_lock_medium(bs->file->bs, locked);

diff --git a/block/io.c b/block/io.c
index 2dc0c13..8974d46 100644
--- a/block/io.c
+++ b/block/io.c

@@ -160,6 +160,7 @@
     bool have_limits;
 
     GLOBAL_STATE_CODE();
+    assume_graph_lock(); /* FIXME */
 
     if (tran) {
         BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
@@ -932,6 +933,7 @@
 {
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
     if (ret < 0) {
@@ -959,16 +961,16 @@
     aio_co_wake(co->coroutine);
 }
 
-static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriver *drv = bs->drv;
     int64_t sector_num;
     unsigned int nb_sectors;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
     bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
     assert(!(flags & ~bs->supported_read_flags));
@@ -1028,11 +1030,10 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
-                                            int64_t offset, int64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset,
-                                            BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     bool emulate_fua = false;
@@ -1040,6 +1041,7 @@
     unsigned int nb_sectors;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
     bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 
@@ -1110,7 +1112,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
                                int64_t bytes, QEMUIOVector *qiov,
                                size_t qiov_offset)
@@ -1118,6 +1120,7 @@
     BlockDriver *drv = bs->drv;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
     bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 
@@ -1145,9 +1148,9 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
-        size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
 
@@ -1309,9 +1312,10 @@
  * handles copy on read, zeroing after EOF, and fragmentation of large
  * reads; any other features must be implemented by the caller.
  */
-static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
+                    int64_t offset, int64_t bytes, int64_t align,
+                    QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
     int64_t total_bytes, max_bytes;
@@ -1478,10 +1482,9 @@
     return true;
 }
 
-static coroutine_fn int bdrv_padding_rmw_read(BdrvChild *child,
-                                              BdrvTrackedRequest *req,
-                                              BdrvRequestPadding *pad,
-                                              bool zero_middle)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_padding_rmw_read(BdrvChild *child, BdrvTrackedRequest *req,
+                      BdrvRequestPadding *pad, bool zero_middle)
 {
     QEMUIOVector local_qiov;
     BlockDriverState *bs = child->bs;
@@ -1669,8 +1672,9 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     QEMUIOVector qiov;
@@ -1686,6 +1690,7 @@
                         bs->bl.request_alignment);
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
 
+    assert_bdrv_graph_readable();
     bdrv_check_request(offset, bytes, &error_abort);
 
     if (!drv) {
@@ -1889,10 +1894,11 @@
  * Forwards an already correctly aligned write request to the BlockDriver,
  * after possibly fragmenting it.
  */
-static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
-    BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req,
+                     int64_t offset, int64_t bytes, int64_t align,
+                     QEMUIOVector *qiov, size_t qiov_offset,
+                     BdrvRequestFlags flags)
 {
     BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
@@ -1926,6 +1932,9 @@
         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
             flags |= BDRV_REQ_MAY_UNMAP;
         }
+
+        /* Can't use optimization hint with bufferless zero write */
+        flags &= ~BDRV_REQ_REGISTERED_BUF;
     }
 
     if (ret < 0) {
@@ -1973,11 +1982,9 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
-                                                int64_t offset,
-                                                int64_t bytes,
-                                                BdrvRequestFlags flags,
-                                                BdrvTrackedRequest *req)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
+                        BdrvRequestFlags flags, BdrvTrackedRequest *req)
 {
     BlockDriverState *bs = child->bs;
     QEMUIOVector local_qiov;
@@ -2150,6 +2157,7 @@
 {
     IO_CODE();
     trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
+    assert_bdrv_graph_readable();
 
     if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
         flags &= ~BDRV_REQ_MAY_UNMAP;
@@ -2221,11 +2229,10 @@
  * BDRV_BLOCK_OFFSET_VALID bit is set, 'map' and 'file' (if non-NULL) are
  * set to the host mapping and BDS corresponding to the guest offset.
  */
-static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes,
+                     int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
     int64_t total_size;
     int64_t n; /* bytes */
@@ -2237,6 +2244,7 @@
     bool has_filtered_child;
 
     assert(pnum);
+    assert_bdrv_graph_readable();
     *pnum = 0;
     total_size = bdrv_getlength(bs);
     if (total_size < 0) {
@@ -2467,6 +2475,7 @@
     IO_CODE();
 
     assert(!include_base || base); /* Can't include NULL base */
+    assert_bdrv_graph_readable();
 
     if (!depth) {
         depth = &dummy;
@@ -2833,6 +2842,7 @@
     int ret = 0;
     IO_CODE();
 
+    assert_bdrv_graph_readable();
     bdrv_inc_in_flight(bs);
 
     if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs) ||
@@ -2958,6 +2968,7 @@
     int head, tail, align;
     BlockDriverState *bs = child->bs;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!bs || !bs->drv || !bdrv_co_is_inserted(bs)) {
         return -ENOMEDIUM;
@@ -3077,6 +3088,7 @@
     };
     BlockAIOCB *acb;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     bdrv_inc_in_flight(bs);
     if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
@@ -3141,6 +3153,7 @@
 {
     BdrvChild *child;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     QLIST_FOREACH(child, &bs->children, next) {
         bdrv_co_io_plug(child->bs);
@@ -3158,6 +3171,7 @@
 {
     BdrvChild *child;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     assert(bs->io_plugged);
     if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
@@ -3173,13 +3187,15 @@
 }
 
 /* Helper that undoes bdrv_register_buf() when it fails partway through */
-static void bdrv_register_buf_rollback(BlockDriverState *bs,
-                                       void *host,
-                                       size_t size,
-                                       BdrvChild *final_child)
+static void GRAPH_RDLOCK
+bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
+                           BdrvChild *final_child)
 {
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
+
     QLIST_FOREACH(child, &bs->children, next) {
         if (child == final_child) {
             break;
@@ -3199,6 +3215,8 @@
     BdrvChild *child;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->bdrv_register_buf) {
         if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
             return false;
@@ -3218,6 +3236,8 @@
     BdrvChild *child;
 
     GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->bdrv_unregister_buf) {
         bs->drv->bdrv_unregister_buf(bs, host, size);
     }
@@ -3226,7 +3246,7 @@
     }
 }
 
-static int coroutine_fn bdrv_co_copy_range_internal(
+static int coroutine_fn GRAPH_RDLOCK bdrv_co_copy_range_internal(
         BdrvChild *src, int64_t src_offset, BdrvChild *dst,
         int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
@@ -3234,6 +3254,7 @@
 {
     BdrvTrackedRequest req;
     int ret;
+    assert_bdrv_graph_readable();
 
     /* TODO We can support BDRV_REQ_NO_FALLBACK here */
     assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
@@ -3315,6 +3336,7 @@
                                          BdrvRequestFlags write_flags)
 {
     IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
                                   read_flags, write_flags);
     return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3332,6 +3354,7 @@
                                        BdrvRequestFlags write_flags)
 {
     IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                 read_flags, write_flags);
     return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3344,6 +3367,8 @@
                                     BdrvRequestFlags write_flags)
 {
     IO_CODE();
+    assert_bdrv_graph_readable();
+
     return bdrv_co_copy_range_from(src, src_offset,
                                    dst, dst_offset,
                                    bytes, read_flags, write_flags);
@@ -3377,6 +3402,7 @@
     int64_t old_size, new_bytes;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
     if (!drv) {
@@ -3514,6 +3540,7 @@
     BlockDriver *drv = bs->drv;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -3539,6 +3566,7 @@
     BlockDriver *drv = bs->drv;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -3562,6 +3590,7 @@
     BlockDriver *drv = bs->drv;
     int ret;
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;

diff --git a/block/iscsi.c b/block/iscsi.c
index b3e10f4..9fc0bed 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c

@@ -269,6 +269,7 @@
                 timer_mod(&iTask->retry_timer,
                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
                 iTask->do_retry = 1;
+                return;
             } else if (status == SCSI_STATUS_CHECK_CONDITION) {
                 int error = iscsi_translate_sense(&task->sense);
                 if (error == EAGAIN) {
@@ -1353,6 +1354,9 @@
     } else if (!password) {
         error_setg(errp, "CHAP username specified but no password was given");
         return;
+    } else {
+        warn_report("iSCSI block driver 'password' option is deprecated, "
+                    "use 'password-secret' instead");
     }
 
     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
@@ -2186,14 +2190,12 @@
     iscsi_allocmap_invalidate(iscsilun);
 }
 
-static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
-                                                 BdrvChild *src,
-                                                 int64_t src_offset,
-                                                 BdrvChild *dst,
-                                                 int64_t dst_offset,
-                                                 int64_t bytes,
-                                                 BdrvRequestFlags read_flags,
-                                                 BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+iscsi_co_copy_range_from(BlockDriverState *bs,
+                         BdrvChild *src, int64_t src_offset,
+                         BdrvChild *dst, int64_t dst_offset,
+                         int64_t bytes, BdrvRequestFlags read_flags,
+                         BdrvRequestFlags write_flags)
 {
     return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                  read_flags, write_flags);
@@ -2327,14 +2329,12 @@
                               src_lba, dst_lba);
 }
 
-static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
-                                               BdrvChild *src,
-                                               int64_t src_offset,
-                                               BdrvChild *dst,
-                                               int64_t dst_offset,
-                                               int64_t bytes,
-                                               BdrvRequestFlags read_flags,
-                                               BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+iscsi_co_copy_range_to(BlockDriverState *bs,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvRequestFlags write_flags)
 {
     IscsiLun *dst_lun = dst->bs->opaque;
     IscsiLun *src_lun;

diff --git a/block/meson.build b/block/meson.build
index 3662852..382bec0 100644
--- a/block/meson.build
+++ b/block/meson.build

@@ -141,6 +141,7 @@
                                       '../include/block/dirty-bitmap.h',
                                       '../include/block/block_int-io.h',
                                       '../include/block/block-global-state.h',
+                                      '../include/sysemu/block-backend-global-state.h',
                                       '../include/sysemu/block-backend-io.h',
                                       'coroutines.h'
                                       ),

diff --git a/block/mirror.c b/block/mirror.c
index ab326b6..663e2b7 100644
--- a/block/mirror.c
+++ b/block/mirror.c

@@ -21,7 +21,6 @@
 #include "block/dirty-bitmap.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
 #include "qemu/memalign.h"
@@ -390,8 +389,10 @@
     op->is_in_flight = true;
     trace_mirror_one_iteration(s, op->offset, op->bytes);
 
-    ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
-                         &op->qiov, 0);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
+                             &op->qiov, 0);
+    }
     mirror_read_complete(op, ret);
 }
 
@@ -558,9 +559,11 @@
         MirrorMethod mirror_method = MIRROR_METHOD_COPY;
 
         assert(!(offset % s->granularity));
-        ret = bdrv_block_status_above(source, NULL, offset,
-                                      nb_chunks * s->granularity,
-                                      &io_bytes, NULL, NULL);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_block_status_above(source, NULL, offset,
+                                        nb_chunks * s->granularity,
+                                        &io_bytes, NULL, NULL);
+        }
         if (ret < 0) {
             io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
         } else if (ret & BDRV_BLOCK_DATA) {
@@ -863,8 +866,10 @@
             return 0;
         }
 
-        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
-                                      &count);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset,
+                                          bytes, &count);
+        }
         if (ret < 0) {
             return ret;
         }
@@ -896,6 +901,7 @@
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
     BlockDriverState *bs = s->mirror_top_bs->backing->bs;
+    MirrorBDSOpaque *mirror_top_opaque = s->mirror_top_bs->opaque;
     BlockDriverState *target_bs = blk_bs(s->target);
     bool need_drain = true;
     BlockDeviceIoStatus iostatus;
@@ -910,7 +916,10 @@
         goto immediate_exit;
     }
 
+    bdrv_graph_co_rdlock();
     s->bdev_length = bdrv_co_getlength(bs);
+    bdrv_graph_co_rdunlock();
+
     if (s->bdev_length < 0) {
         ret = s->bdev_length;
         goto immediate_exit;
@@ -985,6 +994,12 @@
         }
     }
 
+    /*
+     * Only now the job is fully initialised and mirror_top_bs should start
+     * accessing it.
+     */
+    mirror_top_opaque->job = s;
+
     assert(!s->dbi);
     s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
     for (;;) {
@@ -1426,15 +1441,17 @@
     g_free(op);
 }
 
-static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
-    MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
-    int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method,
+                         uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
+                         int flags)
 {
     MirrorOp *op = NULL;
     MirrorBDSOpaque *s = bs->opaque;
@@ -1483,8 +1500,9 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     MirrorBDSOpaque *s = bs->opaque;
     QEMUIOVector bounce_qiov;
@@ -1524,7 +1542,7 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK bdrv_mirror_top_flush(BlockDriverState *bs)
 {
     if (bs->backing == NULL) {
         /* we can be here after failed bdrv_append in mirror_start_job */
@@ -1533,15 +1551,16 @@
     return bdrv_co_flush(bs->backing->bs);
 }
 
-static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                              int64_t bytes, BdrvRequestFlags flags)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
                                     flags);
 }
 
-static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
                                     NULL, 0);
@@ -1704,7 +1723,6 @@
     if (!s) {
         goto fail;
     }
-    bs_opaque->job = s;
 
     /* The block job now has a reference to this node */
     bdrv_unref(mirror_top_bs);

diff --git a/block/parallels.c b/block/parallels.c
index bbea2f2..0136848 100644
--- a/block/parallels.c
+++ b/block/parallels.c

@@ -165,9 +165,9 @@
     return start_off;
 }
 
-static coroutine_fn int64_t allocate_clusters(BlockDriverState *bs,
-                                              int64_t sector_num,
-                                              int nb_sectors, int *pnum)
+static int64_t coroutine_fn GRAPH_RDLOCK
+allocate_clusters(BlockDriverState *bs, int64_t sector_num,
+                  int nb_sectors, int *pnum)
 {
     int ret = 0;
     BDRVParallelsState *s = bs->opaque;
@@ -261,7 +261,8 @@
 }
 
 
-static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_flush_to_os(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
     unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
@@ -320,9 +321,9 @@
     return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 }
 
-static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                    QEMUIOVector *qiov, int flags)
 {
     BDRVParallelsState *s = bs->opaque;
     uint64_t bytes_done = 0;
@@ -363,8 +364,9 @@
     return ret;
 }
 
-static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                   QEMUIOVector *qiov)
 {
     BDRVParallelsState *s = bs->opaque;
     uint64_t bytes_done = 0;
@@ -414,9 +416,9 @@
 }
 
 
-static int coroutine_fn parallels_co_check(BlockDriverState *bs,
-                                           BdrvCheckResult *res,
-                                           BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
+                   BdrvCheckMode fix)
 {
     BDRVParallelsState *s = bs->opaque;
     int64_t size, prev_off, high_off;
@@ -565,13 +567,13 @@
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(parallels_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(parallels_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -620,10 +622,9 @@
     goto out;
 }
 
-static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
-                                                 const char *filename,
-                                                 QemuOpts *opts,
-                                                 Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_create_opts(BlockDriver *drv, const char *filename,
+                         QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
@@ -651,8 +652,8 @@
         goto done;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto done;

diff --git a/block/preallocate.c b/block/preallocate.c
index c0dcf8c..71c3601 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c

@@ -226,16 +226,17 @@
     state->opaque = NULL;
 }
 
-static coroutine_fn int preallocate_co_preadv_part(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov, size_t qiov_offset,
+                           BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
 }
 
-static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
@@ -269,8 +270,9 @@
  * want_merge_zero is used to merge write-zero request with preallocation in
  * one bdrv_co_pwrite_zeroes() call.
  */
-static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, bool want_merge_zero)
+static bool coroutine_fn GRAPH_RDLOCK
+handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,
+             bool want_merge_zero)
 {
     BDRVPreallocateState *s = bs->opaque;
     int64_t end = offset + bytes;
@@ -345,8 +347,9 @@
     return want_merge_zero;
 }
 
-static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                             int64_t bytes, BdrvRequestFlags flags)
 {
     bool want_merge_zero =
         !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
@@ -357,12 +360,10 @@
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
-                                                    int64_t offset,
-                                                    int64_t bytes,
-                                                    QEMUIOVector *qiov,
-                                                    size_t qiov_offset,
-                                                    BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            BdrvRequestFlags flags)
 {
     handle_write(bs, offset, bytes, false);
 
@@ -370,7 +371,7 @@
                                 flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
                         bool exact, PreallocMode prealloc,
                         BdrvRequestFlags flags, Error **errp)
@@ -437,12 +438,13 @@
     return 0;
 }
 
-static int coroutine_fn preallocate_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK preallocate_co_flush(BlockDriverState *bs)
 {
     return bdrv_co_flush(bs->file->bs);
 }
 
-static int64_t coroutine_fn preallocate_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+preallocate_co_getlength(BlockDriverState *bs)
 {
     int64_t ret;
     BDRVPreallocateState *s = bs->opaque;

diff --git a/block/qcow.c b/block/qcow.c
index 5f0801f..490e4f8 100644
--- a/block/qcow.c
+++ b/block/qcow.c

@@ -92,8 +92,8 @@
 
 static QemuOptsList qcow_create_opts;
 
-static int coroutine_fn decompress_cluster(BlockDriverState *bs,
-                                           uint64_t cluster_offset);
+static int coroutine_fn GRAPH_RDLOCK
+decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 
 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -350,11 +350,10 @@
  * return 0 if not allocated, 1 if *result is assigned, and negative
  * errno on failure.
  */
-static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
-                                           uint64_t offset, int allocate,
-                                           int compressed_size,
-                                           int n_start, int n_end,
-                                           uint64_t *result)
+static int coroutine_fn GRAPH_RDLOCK
+get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
+                   int compressed_size, int n_start, int n_end,
+                   uint64_t *result)
 {
     BDRVQcowState *s = bs->opaque;
     int min_index, i, j, l1_index, l2_index, ret;
@@ -525,11 +524,10 @@
     return 1;
 }
 
-static int coroutine_fn qcow_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes, int64_t *pnum,
+                     int64_t *map, BlockDriverState **file)
 {
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster, ret;
@@ -586,8 +584,8 @@
     return 0;
 }
 
-static int coroutine_fn decompress_cluster(BlockDriverState *bs,
-                                           uint64_t cluster_offset)
+static int coroutine_fn GRAPH_RDLOCK
+decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 {
     BDRVQcowState *s = bs->opaque;
     int ret, csize;
@@ -619,9 +617,9 @@
     bs->bl.request_alignment = BDRV_SECTOR_SIZE;
 }
 
-static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -715,9 +713,9 @@
     return ret;
 }
 
-static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                        int64_t bytes, QEMUIOVector *qiov,
-                                        BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -833,13 +831,13 @@
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(qcow_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qcow_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    qcow_blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                               BLK_PERM_ALL, errp);
+    qcow_blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                                  BLK_PERM_ALL, errp);
     if (!qcow_blk) {
         ret = -EPERM;
         goto exit;
@@ -923,9 +921,9 @@
     return ret;
 }
 
-static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
@@ -978,8 +976,8 @@
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -1046,7 +1044,7 @@
 
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
                            QEMUIOVector *qiov)
 {

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 870be10..a9e6622 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c

@@ -491,10 +491,9 @@
     return count;
 }
 
-static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
-                                            uint64_t src_cluster_offset,
-                                            unsigned offset_in_cluster,
-                                            QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+do_perform_cow_read(BlockDriverState *bs, uint64_t src_cluster_offset,
+                    unsigned offset_in_cluster, QEMUIOVector *qiov)
 {
     int ret;
 
@@ -535,10 +534,9 @@
     return 0;
 }
 
-static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
-                                             uint64_t cluster_offset,
-                                             unsigned offset_in_cluster,
-                                             QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+do_perform_cow_write(BlockDriverState *bs, uint64_t cluster_offset,
+                     unsigned offset_in_cluster, QEMUIOVector *qiov)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -886,7 +884,8 @@
     return 0;
 }
 
-static int coroutine_fn perform_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int coroutine_fn GRAPH_RDLOCK
+perform_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2COWRegion *start = &m->cow_start;

diff --git a/block/qcow2.c b/block/qcow2.c
index 21aa4c6..30fd53f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c

@@ -601,9 +601,9 @@
     }
 }
 
-static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
-                                              BdrvCheckResult *result,
-                                              BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_check_locked(BlockDriverState *bs, BdrvCheckResult *result,
+                      BdrvCheckMode fix)
 {
     BdrvCheckResult snapshot_res = {};
     BdrvCheckResult refcount_res = {};
@@ -640,9 +640,9 @@
     return ret;
 }
 
-static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
-                                       BdrvCheckResult *result,
-                                       BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+               BdrvCheckMode fix)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -1294,9 +1294,9 @@
 }
 
 /* Called with s->lock held.  */
-static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
-                                      int flags, bool open_data_file,
-                                      Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
+              bool open_data_file, Error **errp)
 {
     ERRP_GUARD();
     BDRVQcow2State *s = bs->opaque;
@@ -1617,9 +1617,9 @@
 
     if (open_data_file) {
         /* Open external data file */
-        s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
-                                       &child_of_bds, BDRV_CHILD_DATA,
-                                       true, errp);
+        s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
+                                          &child_of_bds, BDRV_CHILD_DATA,
+                                          true, errp);
         if (*errp) {
             ret = -EINVAL;
             goto fail;
@@ -1627,9 +1627,10 @@
 
         if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
             if (!s->data_file && s->image_data_file) {
-                s->data_file = bdrv_open_child(s->image_data_file, options,
-                                               "data-file", bs, &child_of_bds,
-                                               BDRV_CHILD_DATA, false, errp);
+                s->data_file = bdrv_co_open_child(s->image_data_file, options,
+                                                  "data-file", bs,
+                                                  &child_of_bds,
+                                                  BDRV_CHILD_DATA, false, errp);
                 if (!s->data_file) {
                     ret = -EINVAL;
                     goto fail;
@@ -1889,6 +1890,8 @@
     QCow2OpenCo *qoc = opaque;
     BDRVQcow2State *s = qoc->bs->opaque;
 
+    assume_graph_lock(); /* FIXME */
+
     qemu_co_mutex_lock(&s->lock);
     qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
                              qoc->errp);
@@ -2136,9 +2139,8 @@
     return status;
 }
 
-static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
-                                            QCowL2Meta **pl2meta,
-                                            bool link_l2)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_handle_l2meta(BlockDriverState *bs, QCowL2Meta **pl2meta, bool link_l2)
 {
     int ret = 0;
     QCowL2Meta *l2meta = *pl2meta;
@@ -2169,7 +2171,7 @@
     return ret;
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_preadv_encrypted(BlockDriverState *bs,
                            uint64_t host_offset,
                            uint64_t offset,
@@ -2270,12 +2272,10 @@
     return 0;
 }
 
-static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
-                                             QCow2SubclusterType subc_type,
-                                             uint64_t host_offset,
-                                             uint64_t offset, uint64_t bytes,
-                                             QEMUIOVector *qiov,
-                                             size_t qiov_offset)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
+                     uint64_t host_offset, uint64_t offset, uint64_t bytes,
+                     QEMUIOVector *qiov, size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -2314,7 +2314,11 @@
     g_assert_not_reached();
 }
 
-static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because qcow2_co_preadv_part() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static int coroutine_fn GRAPH_RDLOCK qcow2_co_preadv_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -2325,11 +2329,10 @@
                                 t->qiov, t->qiov_offset);
 }
 
-static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
-                                             QEMUIOVector *qiov,
-                                             size_t qiov_offset,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, size_t qiov_offset,
+                     BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret = 0;
@@ -2449,7 +2452,8 @@
  * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error.
  * Note that returning 0 does not guarantee non-zero data.
  */
-static int coroutine_fn is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int coroutine_fn GRAPH_RDLOCK
+is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
     /*
      * This check is designed for optimization shortcut so it must be
@@ -2467,8 +2471,8 @@
                                 m->cow_end.nb_bytes);
 }
 
-static int coroutine_fn handle_alloc_space(BlockDriverState *bs,
-                                           QCowL2Meta *l2meta)
+static int coroutine_fn GRAPH_RDLOCK
+handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
 {
     BDRVQcow2State *s = bs->opaque;
     QCowL2Meta *m;
@@ -2531,12 +2535,10 @@
  * l2meta  - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
  *           not use it somehow after qcow2_co_pwritev_task() call
  */
-static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
-                                              uint64_t host_offset,
-                                              uint64_t offset, uint64_t bytes,
-                                              QEMUIOVector *qiov,
-                                              uint64_t qiov_offset,
-                                              QCowL2Meta *l2meta)
+static coroutine_fn GRAPH_RDLOCK
+int qcow2_co_pwritev_task(BlockDriverState *bs, uint64_t host_offset,
+                          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
+                          uint64_t qiov_offset, QCowL2Meta *l2meta)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -2602,7 +2604,11 @@
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because qcow2_co_pwritev_part() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static coroutine_fn GRAPH_RDLOCK int qcow2_co_pwritev_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -2613,9 +2619,10 @@
                                  t->l2meta);
 }
 
-static coroutine_fn int qcow2_co_pwritev_part(
-        BlockDriverState *bs, int64_t offset, int64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      QEMUIOVector *qiov, size_t qiov_offset,
+                      BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int offset_in_cluster;
@@ -2770,8 +2777,8 @@
     qcow2_do_close(bs, true);
 }
 
-static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
-                                                   Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     ERRP_GUARD();
     BDRVQcow2State *s = bs->opaque;
@@ -3182,9 +3189,9 @@
  *
  * Returns: 0 on success, -errno on failure.
  */
-static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t new_length, PreallocMode mode,
-                                       Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co(BlockDriverState *bs, uint64_t offset, uint64_t new_length,
+               PreallocMode mode, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t bytes;
@@ -3454,7 +3461,7 @@
     assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
     qcow2_opts = &create_options->u.qcow2;
 
-    bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qcow2_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
@@ -3596,7 +3603,7 @@
             ret = -EINVAL;
             goto out;
         }
-        data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
+        data_bs = bdrv_co_open_blockdev_ref(qcow2_opts->data_file, errp);
         if (data_bs == NULL) {
             ret = -EIO;
             goto out;
@@ -3629,8 +3636,8 @@
     }
 
     /* Create BlockBackend to write to the image */
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -3712,9 +3719,9 @@
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
     }
-    blk = blk_new_open(NULL, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
-                       errp);
+    blk = blk_co_new_open(NULL, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
@@ -3793,9 +3800,9 @@
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
     }
-    blk = blk_new_open(NULL, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
-                       errp);
+    blk = blk_co_new_open(NULL, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
@@ -3809,10 +3816,9 @@
     return ret;
 }
 
-static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
-                                             const char *filename,
-                                             QemuOpts *opts,
-                                             Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
+                     Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -3877,8 +3883,8 @@
         goto finish;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto finish;
@@ -3892,9 +3898,9 @@
             goto finish;
         }
 
-        data_bs = bdrv_open(val, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                            errp);
+        data_bs = bdrv_co_open(val, NULL, NULL,
+                               BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                               errp);
         if (data_bs == NULL) {
             ret = -EIO;
             goto finish;
@@ -3973,8 +3979,9 @@
     return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
 }
 
-static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       BdrvRequestFlags flags)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -4057,7 +4064,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_copy_range_from(BlockDriverState *bs,
                          BdrvChild *src, int64_t src_offset,
                          BdrvChild *dst, int64_t dst_offset,
@@ -4140,7 +4147,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_copy_range_to(BlockDriverState *bs,
                        BdrvChild *src, int64_t src_offset,
                        BdrvChild *dst, int64_t dst_offset,
@@ -4208,9 +4215,9 @@
     return ret;
 }
 
-static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
-                                          bool exact, PreallocMode prealloc,
-                                          BdrvRequestFlags flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t old_length;
@@ -4584,7 +4591,7 @@
     return ret;
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
                                  uint64_t offset, uint64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
@@ -4648,7 +4655,13 @@
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because
+ * qcow2_co_pwritev_compressed_part() holds the graph lock and keeps it until
+ * this coroutine has terminated.
+ */
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwritev_compressed_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -4662,7 +4675,7 @@
  * XXX: put compressed sectors first, then all the cluster aligned
  * tables to avoid losing bytes in alignment
  */
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
                                  int64_t offset, int64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
@@ -4725,7 +4738,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_preadv_compressed(BlockDriverState *bs,
                            uint64_t l2_entry,
                            uint64_t offset,
@@ -5287,8 +5300,8 @@
     return pos;
 }
 
-static coroutine_fn int qcow2_co_save_vmstate(BlockDriverState *bs,
-                                              QEMUIOVector *qiov, int64_t pos)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
     if (offset < 0) {
@@ -5299,8 +5312,8 @@
     return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
 }
 
-static coroutine_fn int qcow2_co_load_vmstate(BlockDriverState *bs,
-                                              QEMUIOVector *qiov, int64_t pos)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
     if (offset < 0) {

diff --git a/block/qcow2.h b/block/qcow2.h
index 2285f18..c59e33c 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h

@@ -846,7 +846,7 @@
                          Error **errp);
 
 /* qcow2-refcount.c functions */
-int coroutine_fn qcow2_refcount_init(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
 int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
@@ -893,14 +893,17 @@
 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                 BlockDriverAmendStatusCB *status_cb,
                                 void *cb_opaque, Error **errp);
-int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs);
 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
 int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
-int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
+
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           uint8_t *buf, int nb_sectors, bool enc, Error **errp);
@@ -918,14 +921,17 @@
 void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
                                      uint64_t *coffset, int *csize);
 
-int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
-                                             QCowL2Meta *m);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+
 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
                           uint64_t bytes, enum qcow2_discard_type type,
                           bool full_discard);
-int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                                          uint64_t bytes, int flags);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                         int flags);
 
 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                                BlockDriverAmendStatusCB *status_cb,
@@ -948,9 +954,10 @@
 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
 int qcow2_write_snapshots(BlockDriverState *bs);
 
-int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
-                                                 BdrvCheckResult *result,
-                                                 BdrvCheckMode fix);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_read_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result,
+                                BdrvCheckMode fix);
+
 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
                                                 BdrvCheckResult *result,
                                                 BdrvCheckMode fix);

diff --git a/block/qed-check.c b/block/qed-check.c
index a6612be..8fd94f4 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c

@@ -107,7 +107,8 @@
 /**
  * Descend tables and check each cluster is referenced once only
  */
-static int coroutine_fn qed_check_l1_table(QEDCheck *check, QEDTable *table)
+static int coroutine_fn GRAPH_RDLOCK
+qed_check_l1_table(QEDCheck *check, QEDTable *table)
 {
     BDRVQEDState *s = check->s;
     unsigned int i, num_invalid_l1 = 0;

diff --git a/block/qed-table.c b/block/qed-table.c
index e41c87a..3b331ce 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c

@@ -21,8 +21,8 @@
 #include "qemu/memalign.h"
 
 /* Called with table_lock held.  */
-static int coroutine_fn qed_read_table(BDRVQEDState *s, uint64_t offset,
-                                       QEDTable *table)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
     unsigned int bytes = s->header.cluster_size * s->header.table_size;
 
@@ -63,9 +63,9 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_write_table(BDRVQEDState *s, uint64_t offset,
-                                        QEDTable *table, unsigned int index,
-                                        unsigned int n, bool flush)
+static int coroutine_fn GRAPH_RDLOCK
+qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+                unsigned int index, unsigned int n, bool flush)
 {
     unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
     unsigned int start, end, i;

diff --git a/block/qed.c b/block/qed.c
index 4473465..ed94bb6 100644
--- a/block/qed.c
+++ b/block/qed.c

@@ -100,7 +100,7 @@
  *
  * No new allocating reqs can start while this function runs.
  */
-static int coroutine_fn qed_write_header(BDRVQEDState *s)
+static int coroutine_fn GRAPH_RDLOCK qed_write_header(BDRVQEDState *s)
 {
     /* We must write full sectors for O_DIRECT but cannot necessarily generate
      * the data following the header if an unrecognized compat feature is
@@ -282,11 +282,12 @@
     qemu_co_mutex_unlock(&s->table_lock);
 }
 
-static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
+static void coroutine_fn GRAPH_RDLOCK qed_need_check_timer(BDRVQEDState *s)
 {
     int ret;
 
     trace_qed_need_check_timer_cb(s);
+    assert_bdrv_graph_readable();
 
     if (!qed_plug_allocating_write_reqs(s)) {
         return;
@@ -312,6 +313,7 @@
 static void coroutine_fn qed_need_check_timer_entry(void *opaque)
 {
     BDRVQEDState *s = opaque;
+    GRAPH_RDLOCK_GUARD();
 
     qed_need_check_timer(opaque);
     bdrv_dec_in_flight(s->bs);
@@ -393,8 +395,8 @@
 }
 
 /* Called with table_lock held.  */
-static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
-                                         int flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     QEDHeader le_header;
@@ -555,7 +557,7 @@
     int ret;
 } QEDOpenCo;
 
-static void coroutine_fn bdrv_qed_open_entry(void *opaque)
+static void coroutine_fn GRAPH_RDLOCK bdrv_qed_open_entry(void *opaque)
 {
     QEDOpenCo *qoc = opaque;
     BDRVQEDState *s = qoc->bs->opaque;
@@ -577,6 +579,8 @@
     };
     int ret;
 
+    assume_graph_lock(); /* FIXME */
+
     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
     if (ret < 0) {
         return ret;
@@ -676,13 +680,13 @@
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(qed_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qed_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -750,10 +754,9 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
-                                                const char *filename,
-                                                QemuOpts *opts,
-                                                Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
+                        QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -783,8 +786,8 @@
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -822,11 +825,10 @@
     return ret;
 }
 
-static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs,
-                                                 bool want_zero,
-                                                 int64_t pos, int64_t bytes,
-                                                 int64_t *pnum, int64_t *map,
-                                                 BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos,
+                         int64_t bytes, int64_t *pnum, int64_t *map,
+                         BlockDriverState **file)
 {
     BDRVQEDState *s = bs->opaque;
     size_t len = MIN(bytes, SIZE_MAX);
@@ -879,8 +881,8 @@
  * This function reads qiov->size bytes starting at pos from the backing file.
  * If there is no backing file then zeroes are read.
  */
-static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
-                                              QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_backing_file(BDRVQEDState *s, uint64_t pos, QEMUIOVector *qiov)
 {
     if (s->bs->backing) {
         BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
@@ -898,9 +900,9 @@
  * @len:        Number of bytes
  * @offset:     Byte offset in image file
  */
-static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
-                                                   uint64_t pos, uint64_t len,
-                                                   uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, uint64_t len,
+                           uint64_t offset)
 {
     QEMUIOVector qiov;
     int ret;
@@ -993,7 +995,7 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_l1_update(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     CachedL2Table *l2_table = acb->request.l2_table;
@@ -1023,7 +1025,8 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 {
     BDRVQEDState *s = acb_to_s(acb);
     bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
@@ -1061,7 +1064,7 @@
  *
  * Called with table_lock *not* held.
  */
-static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_main(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset = acb->cur_cluster +
@@ -1079,7 +1082,7 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_cow(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t start, len, offset;
@@ -1157,7 +1160,8 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
     int ret;
@@ -1220,8 +1224,8 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
-                                              size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
     int r;
@@ -1263,8 +1267,8 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
-                                           uint64_t offset, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_data(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDAIOCB *acb = opaque;
 
@@ -1296,8 +1300,8 @@
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
-                                          uint64_t offset, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
@@ -1334,7 +1338,7 @@
 /**
  * Begin next I/O or complete the request
  */
-static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_next_io(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset;
@@ -1379,9 +1383,9 @@
     return ret;
 }
 
-static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qed_co_request(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov,
+               int nb_sectors, int flags)
 {
     QEDAIOCB acb = {
         .bs         = bs,
@@ -1398,24 +1402,23 @@
     return qed_aio_next_io(&acb);
 }
 
-static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
-                                          int64_t sector_num, int nb_sectors,
-                                          QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                  QEMUIOVector *qiov)
 {
     return qed_co_request(bs, sector_num, qiov, nb_sectors, 0);
 }
 
-static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
-                                           int64_t sector_num, int nb_sectors,
-                                           QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                   QEMUIOVector *qiov, int flags)
 {
     return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
 }
 
-static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset,
-                                                  int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -1569,8 +1572,8 @@
     return ret;
 }
 
-static void coroutine_fn bdrv_qed_co_invalidate_cache(BlockDriverState *bs,
-                                                      Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     int ret;
@@ -1586,9 +1589,9 @@
     }
 }
 
-static int coroutine_fn bdrv_qed_co_check(BlockDriverState *bs,
-                                          BdrvCheckResult *result,
-                                          BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+                  BdrvCheckMode fix)
 {
     BDRVQEDState *s = bs->opaque;
     int ret;

diff --git a/block/qed.h b/block/qed.h
index 3d12bf7..988654c 100644
--- a/block/qed.h
+++ b/block/qed.h

@@ -200,33 +200,40 @@
 /**
  * Table I/O functions
  */
-int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s);
-int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index,
-                                    unsigned int n);
-int coroutine_fn qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
-                                         unsigned int n);
-int coroutine_fn qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                                        uint64_t offset);
-int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request,
-                                   uint64_t offset);
-int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                                    unsigned int index, unsigned int n,
-                                    bool flush);
-int coroutine_fn qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                                         unsigned int index, unsigned int n,
-                                         bool flush);
+int coroutine_fn GRAPH_RDLOCK qed_read_l1_table_sync(BDRVQEDState *s);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, unsigned int n);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, unsigned int index,
+                   unsigned int n, bool flush);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+                        unsigned int index, unsigned int n, bool flush);
 
 /**
  * Cluster functions
  */
-int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
-                                  uint64_t pos, size_t *len,
-                                  uint64_t *img_offset);
+int coroutine_fn GRAPH_RDLOCK
+qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+                 size_t *len, uint64_t *img_offset);
 
 /**
  * Consistency check
  */
-int coroutine_fn qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
+int coroutine_fn GRAPH_RDLOCK
+qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
 
 QEDTable *qed_alloc_table(BDRVQEDState *s);
 

diff --git a/block/quorum.c b/block/quorum.c
index d1dcf2e..ff5a0a2 100644
--- a/block/quorum.c
+++ b/block/quorum.c

@@ -270,7 +270,11 @@
     }
 }
 
-static void coroutine_fn quorum_rewrite_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK quorum_rewrite_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -290,8 +294,8 @@
     }
 }
 
-static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
-                                        QuorumVoteValue *value)
+static bool coroutine_fn GRAPH_RDLOCK
+quorum_rewrite_bad_versions(QuorumAIOCB *acb, QuorumVoteValue *value)
 {
     QuorumVoteVersion *version;
     QuorumVoteItem *item;
@@ -491,7 +495,7 @@
     return ret;
 }
 
-static void quorum_vote(QuorumAIOCB *acb)
+static void coroutine_fn GRAPH_RDLOCK quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
     int i, j, ret;
@@ -571,7 +575,11 @@
     quorum_free_vote_list(&acb->votes);
 }
 
-static void coroutine_fn read_quorum_children_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK read_quorum_children_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -599,7 +607,7 @@
     }
 }
 
-static int coroutine_fn read_quorum_children(QuorumAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK read_quorum_children(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
     int i;
@@ -640,7 +648,7 @@
     return acb->vote_ret;
 }
 
-static int coroutine_fn read_fifo_child(QuorumAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK read_fifo_child(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
     int n, ret;
@@ -661,10 +669,9 @@
     return ret;
 }
 
-static int coroutine_fn quorum_co_preadv(BlockDriverState *bs,
-                                         int64_t offset, int64_t bytes,
-                                         QEMUIOVector *qiov,
-                                         BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -683,7 +690,11 @@
     return ret;
 }
 
-static void coroutine_fn write_quorum_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because quorum_co_pwritev() holds the
+ * graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK write_quorum_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -714,9 +725,9 @@
     }
 }
 
-static int coroutine_fn quorum_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                          int64_t bytes, QEMUIOVector *qiov,
-                                          BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                  QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -745,16 +756,16 @@
     return ret;
 }
 
-static int coroutine_fn quorum_co_pwrite_zeroes(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
-                                                BdrvRequestFlags flags)
-
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        BdrvRequestFlags flags)
 {
     return quorum_co_pwritev(bs, offset, bytes, NULL,
                              flags | BDRV_REQ_ZERO_WRITE);
 }
 
-static int64_t coroutine_fn quorum_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+quorum_co_getlength(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
     int64_t result;
@@ -778,7 +789,7 @@
     return result;
 }
 
-static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+static coroutine_fn GRAPH_RDLOCK int quorum_co_flush(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumVoteVersion *winner = NULL;
@@ -1217,11 +1228,10 @@
  * return BDRV_BLOCK_ZERO if *all* children agree that a certain
  * region contains zeroes, and BDRV_BLOCK_DATA otherwise.
  */
-static int coroutine_fn quorum_co_block_status(BlockDriverState *bs,
-                                               bool want_zero,
-                                               int64_t offset, int64_t count,
-                                               int64_t *pnum, int64_t *map,
-                                               BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_block_status(BlockDriverState *bs, bool want_zero,
+                       int64_t offset, int64_t count,
+                       int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
     BDRVQuorumState *s = bs->opaque;
     int i, ret;

diff --git a/block/raw-format.c b/block/raw-format.c
index 0dc469b..66783ed 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c

@@ -203,9 +203,9 @@
     return 0;
 }
 
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
 
@@ -218,9 +218,9 @@
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
-                                       int64_t bytes, QEMUIOVector *qiov,
-                                       BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     void *buf = NULL;
     BlockDriver *drv;
@@ -292,9 +292,9 @@
     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 }
 
-static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     int ret;
 
@@ -305,8 +305,8 @@
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int ret;
 
@@ -317,7 +317,8 @@
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+raw_co_getlength(BlockDriverState *bs)
 {
     int64_t len;
     BDRVRawState *s = bs->opaque;
@@ -384,9 +385,9 @@
     }
 }
 
-static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
-                                        bool exact, PreallocMode prealloc,
-                                        BdrvRequestFlags flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -405,18 +406,20 @@
     return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
 }
 
-static void coroutine_fn raw_co_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
-static void coroutine_fn raw_co_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     bdrv_co_lock_medium(bs->file->bs, locked);
 }
 
-static int coroutine_fn raw_co_ioctl(BlockDriverState *bs,
-                                     unsigned long int req, void *buf)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
     if (s->offset || s->has_size) {
@@ -430,10 +433,9 @@
     return bdrv_has_zero_init(bs->file->bs);
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     return bdrv_co_create_file(filename, opts, errp);
 }
@@ -536,14 +538,12 @@
     return bdrv_probe_geometry(bs->file->bs, geo);
 }
 
-static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
-                                               BdrvChild *src,
-                                               int64_t src_offset,
-                                               BdrvChild *dst,
-                                               int64_t dst_offset,
-                                               int64_t bytes,
-                                               BdrvRequestFlags read_flags,
-                                               BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_from(BlockDriverState *bs,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvRequestFlags write_flags)
 {
     int ret;
 
@@ -555,14 +555,12 @@
                                    bytes, read_flags, write_flags);
 }
 
-static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
-                                             BdrvChild *src,
-                                             int64_t src_offset,
-                                             BdrvChild *dst,
-                                             int64_t dst_offset,
-                                             int64_t bytes,
-                                             BdrvRequestFlags read_flags,
-                                             BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_to(BlockDriverState *bs,
+                     BdrvChild *src, int64_t src_offset,
+                     BdrvChild *dst, int64_t dst_offset,
+                     int64_t bytes, BdrvRequestFlags read_flags,
+                     BdrvRequestFlags write_flags)
 {
     int ret;
 

diff --git a/block/rbd.c b/block/rbd.c
index 5e102fe..9786714 100644
--- a/block/rbd.c
+++ b/block/rbd.c

@@ -72,6 +72,16 @@
     'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 2
 };
 
+static const char rbd_layered_luks_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'R', 'B', 'D', 'L', 0xBA, 0xBE, 0, 1
+};
+
+static const char rbd_layered_luks2_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'R', 'B', 'D', 'L', 0xBA, 0xBE, 0, 2
+};
+
 typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
@@ -386,7 +396,6 @@
 {
     int r = 0;
     g_autofree char *passphrase = NULL;
-    size_t passphrase_len;
     rbd_encryption_format_t format;
     rbd_encryption_options_t opts;
     rbd_encryption_luks1_format_options_t luks_opts;
@@ -408,12 +417,12 @@
             opts_size = sizeof(luks_opts);
             r = qemu_rbd_convert_luks_create_options(
                     qapi_RbdEncryptionCreateOptionsLUKS_base(&encrypt->u.luks),
-                    &luks_opts.alg, &passphrase, &passphrase_len, errp);
+                    &luks_opts.alg, &passphrase, &luks_opts.passphrase_size,
+                    errp);
             if (r < 0) {
                 return r;
             }
             luks_opts.passphrase = passphrase;
-            luks_opts.passphrase_size = passphrase_len;
             break;
         }
         case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
@@ -424,12 +433,12 @@
             r = qemu_rbd_convert_luks_create_options(
                     qapi_RbdEncryptionCreateOptionsLUKS2_base(
                             &encrypt->u.luks2),
-                    &luks2_opts.alg, &passphrase, &passphrase_len, errp);
+                    &luks2_opts.alg, &passphrase, &luks2_opts.passphrase_size,
+                    errp);
             if (r < 0) {
                 return r;
             }
             luks2_opts.passphrase = passphrase;
-            luks2_opts.passphrase_size = passphrase_len;
             break;
         }
         default: {
@@ -468,9 +477,11 @@
 {
     int r = 0;
     g_autofree char *passphrase = NULL;
-    size_t passphrase_len;
     rbd_encryption_luks1_format_options_t luks_opts;
     rbd_encryption_luks2_format_options_t luks2_opts;
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+    rbd_encryption_luks_format_options_t luks_any_opts;
+#endif
     rbd_encryption_format_t format;
     rbd_encryption_options_t opts;
     size_t opts_size;
@@ -483,12 +494,11 @@
             opts_size = sizeof(luks_opts);
             r = qemu_rbd_convert_luks_options(
                     qapi_RbdEncryptionOptionsLUKS_base(&encrypt->u.luks),
-                    &passphrase, &passphrase_len, errp);
+                    &passphrase, &luks_opts.passphrase_size, errp);
             if (r < 0) {
                 return r;
             }
             luks_opts.passphrase = passphrase;
-            luks_opts.passphrase_size = passphrase_len;
             break;
         }
         case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
@@ -498,14 +508,29 @@
             opts_size = sizeof(luks2_opts);
             r = qemu_rbd_convert_luks_options(
                     qapi_RbdEncryptionOptionsLUKS2_base(&encrypt->u.luks2),
-                    &passphrase, &passphrase_len, errp);
+                    &passphrase, &luks2_opts.passphrase_size, errp);
             if (r < 0) {
                 return r;
             }
             luks2_opts.passphrase = passphrase;
-            luks2_opts.passphrase_size = passphrase_len;
             break;
         }
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS_ANY: {
+            memset(&luks_any_opts, 0, sizeof(luks_any_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS;
+            opts = &luks_any_opts;
+            opts_size = sizeof(luks_any_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKSAny_base(&encrypt->u.luks_any),
+                    &passphrase, &luks_any_opts.passphrase_size, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_any_opts.passphrase = passphrase;
+            break;
+        }
+#endif
         default: {
             r = -ENOTSUP;
             error_setg_errno(
@@ -523,6 +548,128 @@
 
     return 0;
 }
+
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+static int qemu_rbd_encryption_load2(rbd_image_t image,
+                                     RbdEncryptionOptions *encrypt,
+                                     Error **errp)
+{
+    int r = 0;
+    int encrypt_count = 1;
+    int i;
+    RbdEncryptionOptions *curr_encrypt;
+    rbd_encryption_spec_t *specs;
+    rbd_encryption_luks1_format_options_t *luks_opts;
+    rbd_encryption_luks2_format_options_t *luks2_opts;
+    rbd_encryption_luks_format_options_t *luks_any_opts;
+
+    /* count encryption options */
+    for (curr_encrypt = encrypt->parent; curr_encrypt;
+         curr_encrypt = curr_encrypt->parent) {
+        ++encrypt_count;
+    }
+
+    specs = g_new0(rbd_encryption_spec_t, encrypt_count);
+
+    curr_encrypt = encrypt;
+    for (i = 0; i < encrypt_count; ++i) {
+        switch (curr_encrypt->format) {
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS1;
+
+                luks_opts = g_new0(rbd_encryption_luks1_format_options_t, 1);
+                specs[i].opts = luks_opts;
+                specs[i].opts_size = sizeof(*luks_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKS_base(
+                                &curr_encrypt->u.luks),
+                        (char **)&luks_opts->passphrase,
+                        &luks_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS2;
+
+                luks2_opts = g_new0(rbd_encryption_luks2_format_options_t, 1);
+                specs[i].opts = luks2_opts;
+                specs[i].opts_size = sizeof(*luks2_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKS2_base(
+                                &curr_encrypt->u.luks2),
+                        (char **)&luks2_opts->passphrase,
+                        &luks2_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS_ANY: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS;
+
+                luks_any_opts = g_new0(rbd_encryption_luks_format_options_t, 1);
+                specs[i].opts = luks_any_opts;
+                specs[i].opts_size = sizeof(*luks_any_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKSAny_base(
+                                &curr_encrypt->u.luks_any),
+                        (char **)&luks_any_opts->passphrase,
+                        &luks_any_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            default: {
+                r = -ENOTSUP;
+                error_setg_errno(
+                        errp, -r, "unknown image encryption format: %u",
+                        curr_encrypt->format);
+            }
+        }
+
+        if (r < 0) {
+            goto exit;
+        }
+
+        curr_encrypt = curr_encrypt->parent;
+    }
+
+    r = rbd_encryption_load2(image, specs, encrypt_count);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "layered encryption load fail");
+        goto exit;
+    }
+
+exit:
+    for (i = 0; i < encrypt_count; ++i) {
+        if (!specs[i].opts) {
+            break;
+        }
+
+        switch (specs[i].format) {
+            case RBD_ENCRYPTION_FORMAT_LUKS1: {
+                luks_opts = specs[i].opts;
+                g_free((void *)luks_opts->passphrase);
+                break;
+            }
+            case RBD_ENCRYPTION_FORMAT_LUKS2: {
+                luks2_opts = specs[i].opts;
+                g_free((void *)luks2_opts->passphrase);
+                break;
+            }
+            case RBD_ENCRYPTION_FORMAT_LUKS: {
+                luks_any_opts = specs[i].opts;
+                g_free((void *)luks_any_opts->passphrase);
+                break;
+            }
+        }
+
+        g_free(specs[i].opts);
+    }
+    g_free(specs);
+    return r;
+}
+#endif
 #endif
 
 /* FIXME Deprecate and remove keypairs or make it available in QMP. */
@@ -989,7 +1136,16 @@
 
     if (opts->encrypt) {
 #ifdef LIBRBD_SUPPORTS_ENCRYPTION
-        r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+        if (opts->encrypt->parent) {
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+            r = qemu_rbd_encryption_load2(s->image, opts->encrypt, errp);
+#else
+            r = -ENOTSUP;
+            error_setg(errp, "RBD library does not support layered encryption");
+#endif
+        } else {
+            r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+        }
         if (r < 0) {
             goto failed_post_open;
         }
@@ -1281,6 +1437,16 @@
         spec_info->u.rbd.data->encryption_format =
                 RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
         spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_layered_luks_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_layered_luks2_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+        spec_info->u.rbd.data->has_encryption_format = true;
     } else {
         spec_info->u.rbd.data->has_encryption_format = false;
     }

diff --git a/block/replication.c b/block/replication.c
index a27417d..de01f96 100644
--- a/block/replication.c
+++ b/block/replication.c

@@ -179,7 +179,8 @@
     return;
 }
 
-static int64_t coroutine_fn replication_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+replication_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
@@ -220,10 +221,9 @@
     return ret;
 }
 
-static coroutine_fn int replication_co_readv(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             int remaining_sectors,
-                                             QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+replication_co_readv(BlockDriverState *bs, int64_t sector_num,
+                     int remaining_sectors, QEMUIOVector *qiov)
 {
     BDRVReplicationState *s = bs->opaque;
     int ret;
@@ -244,11 +244,9 @@
     return replication_return_value(s, ret);
 }
 
-static coroutine_fn int replication_co_writev(BlockDriverState *bs,
-                                              int64_t sector_num,
-                                              int remaining_sectors,
-                                              QEMUIOVector *qiov,
-                                              int flags)
+static int coroutine_fn GRAPH_RDLOCK
+replication_co_writev(BlockDriverState *bs, int64_t sector_num,
+                      int remaining_sectors, QEMUIOVector *qiov, int flags)
 {
     BDRVReplicationState *s = bs->opaque;
     QEMUIOVector hd_qiov;

diff --git a/block/snapshot-access.c b/block/snapshot-access.c
index 0a30ec6..67ea339 100644
--- a/block/snapshot-access.c
+++ b/block/snapshot-access.c

@@ -26,7 +26,7 @@
 #include "qemu/cutils.h"
 #include "block/block_int.h"
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 snapshot_access_co_preadv_part(BlockDriverState *bs,
                                int64_t offset, int64_t bytes,
                                QEMUIOVector *qiov, size_t qiov_offset,
@@ -39,7 +39,7 @@
     return bdrv_co_preadv_snapshot(bs->file, offset, bytes, qiov, qiov_offset);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 snapshot_access_co_block_status(BlockDriverState *bs,
                                 bool want_zero, int64_t offset,
                                 int64_t bytes, int64_t *pnum,
@@ -49,8 +49,8 @@
                                          bytes, pnum, map, file);
 }
 
-static int coroutine_fn snapshot_access_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+snapshot_access_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard_snapshot(bs->file->bs, offset, bytes);
 }

diff --git a/block/stream.c b/block/stream.c
index 8744ad1..7f9e1ec 100644
--- a/block/stream.c
+++ b/block/stream.c

@@ -16,7 +16,6 @@
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
@@ -141,9 +140,11 @@
         return 0;
     }
 
-    len = bdrv_getlength(s->target_bs);
-    if (len < 0) {
-        return len;
+    WITH_GRAPH_RDLOCK_GUARD() {
+        len = bdrv_co_getlength(s->target_bs);
+        if (len < 0) {
+            return len;
+        }
     }
     job_progress_set_remaining(&s->common.job, len);
 
@@ -161,21 +162,25 @@
 
         copy = false;
 
-        ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
-        if (ret == 1) {
-            /* Allocated in the top, no need to copy.  */
-        } else if (ret >= 0) {
-            /* Copy if allocated in the intermediate images.  Limit to the
-             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
-            ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
-                                          s->base_overlay, true,
-                                          offset, n, &n);
-            /* Finish early if end of backing file has been reached */
-            if (ret == 0 && n == 0) {
-                n = len - offset;
-            }
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
+            if (ret == 1) {
+                /* Allocated in the top, no need to copy.  */
+            } else if (ret >= 0) {
+                /*
+                 * Copy if allocated in the intermediate images.  Limit to the
+                 * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).
+                 */
+                ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+                                            s->base_overlay, true,
+                                            offset, n, &n);
+                /* Finish early if end of backing file has been reached */
+                if (ret == 0 && n == 0) {
+                    n = len - offset;
+                }
 
-            copy = (ret > 0);
+                copy = (ret > 0);
+            }
         }
         trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {

diff --git a/block/throttle.c b/block/throttle.c
index 64fa0f5..3aaef18 100644
--- a/block/throttle.c
+++ b/block/throttle.c

@@ -106,15 +106,15 @@
 }
 
 
-static int64_t coroutine_fn throttle_co_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+throttle_co_getlength(BlockDriverState *bs)
 {
     return bdrv_co_getlength(bs->file->bs);
 }
 
-static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
-                                           int64_t offset, int64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
 
     ThrottleGroupMember *tgm = bs->opaque;
@@ -123,10 +123,9 @@
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
-                                            int64_t offset, int64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -134,9 +133,9 @@
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int64_t bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -144,8 +143,8 @@
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int64_t bytes)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -153,16 +152,15 @@
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs,
-                                                       int64_t offset,
-                                                       int64_t bytes,
-                                                       QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwritev_compressed(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes, QEMUIOVector *qiov)
 {
     return throttle_co_pwritev(bs, offset, bytes, qiov,
                                BDRV_REQ_WRITE_COMPRESSED);
 }
 
-static int coroutine_fn throttle_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK throttle_co_flush(BlockDriverState *bs)
 {
     return bdrv_co_flush(bs->file->bs);
 }

diff --git a/block/vdi.c b/block/vdi.c
index 9c8736b..f2434d6 100644
--- a/block/vdi.c
+++ b/block/vdi.c

@@ -544,7 +544,7 @@
         (s->header.image_type == VDI_TYPE_STATIC ? BDRV_BLOCK_RECURSE : 0);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vdi_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -600,7 +600,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vdi_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -800,14 +800,14 @@
     }
 
     /* Create BlockBackend to write to the image */
-    bs_file = bdrv_open_blockdev_ref(vdi_opts->file, errp);
+    bs_file = bdrv_co_open_blockdev_ref(vdi_opts->file, errp);
     if (!bs_file) {
         ret = -EIO;
         goto exit;
     }
 
-    blk = blk_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                             BLK_PERM_ALL, errp);
     if (!blk) {
         ret = -EPERM;
         goto exit;
@@ -898,10 +898,9 @@
     return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp);
 }
 
-static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vdi_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     QDict *qdict = NULL;
     BlockdevCreateOptions *create_options = NULL;
@@ -940,8 +939,8 @@
         goto done;
     }
 
-    bs_file = bdrv_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs_file = bdrv_co_open(filename, NULL, NULL,
+                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (!bs_file) {
         ret = -EIO;
         goto done;

diff --git a/block/vhdx.c b/block/vhdx.c
index ef1f65d..8142072 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c

@@ -1172,8 +1172,9 @@
 }
 
 
-static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+              QEMUIOVector *qiov)
 {
     BDRVVHDXState *s = bs->opaque;
     int ret = 0;
@@ -1324,9 +1325,9 @@
     return ret;
 }
 
-static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                       int nb_sectors, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+               QEMUIOVector *qiov, int flags)
 {
     int ret = -ENOTSUP;
     BDRVVHDXState *s = bs->opaque;
@@ -1991,13 +1992,13 @@
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(vhdx_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(vhdx_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto delete_and_exit;
@@ -2058,10 +2059,9 @@
     return ret;
 }
 
-static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -2090,8 +2090,8 @@
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;

diff --git a/block/vmdk.c b/block/vmdk.c
index 5b0eae8..f5f4901 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c

@@ -1403,13 +1403,11 @@
  * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
  * it for call to write user data in the request.
  */
-static int coroutine_fn get_whole_cluster(BlockDriverState *bs,
-                                          VmdkExtent *extent,
-                                          uint64_t cluster_offset,
-                                          uint64_t offset,
-                                          uint64_t skip_start_bytes,
-                                          uint64_t skip_end_bytes,
-                                          bool zeroed)
+static int coroutine_fn GRAPH_RDLOCK
+get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
+                  uint64_t cluster_offset, uint64_t offset,
+                  uint64_t skip_start_bytes, uint64_t skip_end_bytes,
+                  bool zeroed)
 {
     int ret = VMDK_OK;
     int64_t cluster_bytes;
@@ -1484,8 +1482,8 @@
     return ret;
 }
 
-static int coroutine_fn vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
-                                      uint32_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, uint32_t offset)
 {
     offset = cpu_to_le32(offset);
     /* update L2 table */
@@ -1536,14 +1534,11 @@
  *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
  *          VMDK_ERROR if failed.
  */
-static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
-                                           VmdkExtent *extent,
-                                           VmdkMetaData *m_data,
-                                           uint64_t offset,
-                                           bool allocate,
-                                           uint64_t *cluster_offset,
-                                           uint64_t skip_start_bytes,
-                                           uint64_t skip_end_bytes)
+static int coroutine_fn GRAPH_RDLOCK
+get_cluster_offset(BlockDriverState *bs, VmdkExtent *extent,
+                   VmdkMetaData *m_data, uint64_t offset, bool allocate,
+                   uint64_t *cluster_offset, uint64_t skip_start_bytes,
+                   uint64_t skip_end_bytes)
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
@@ -1736,11 +1731,10 @@
     return extent_relative_offset % cluster_size;
 }
 
-static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes, int64_t *pnum,
+                     int64_t *map, BlockDriverState **file)
 {
     BDRVVmdkState *s = bs->opaque;
     int64_t index_in_cluster, n, ret;
@@ -1785,7 +1779,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
                   int64_t offset_in_cluster, QEMUIOVector *qiov,
                   uint64_t qiov_offset, uint64_t n_bytes,
@@ -1867,10 +1861,9 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
-                 int64_t offset_in_cluster, QEMUIOVector *qiov,
-                 int bytes)
+                 int64_t offset_in_cluster, QEMUIOVector *qiov, int bytes)
 {
     int ret;
     int cluster_bytes, buf_bytes;
@@ -1934,7 +1927,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -2016,9 +2009,9 @@
  *
  * Returns: error code with 0 for success.
  */
-static int coroutine_fn vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
-                                     uint64_t bytes, QEMUIOVector *qiov,
-                                     bool zeroed, bool zero_dry_run)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+             QEMUIOVector *qiov, bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
@@ -2114,7 +2107,7 @@
     return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -2126,7 +2119,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
                            QEMUIOVector *qiov)
 {
@@ -2154,10 +2147,9 @@
     return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
 }
 
-static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
-                                              int64_t offset,
-                                              int64_t bytes,
-                                              BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      BdrvRequestFlags flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
@@ -2285,11 +2277,10 @@
     return ret;
 }
 
-static int coroutine_fn vmdk_create_extent(const char *filename,
-                                           int64_t filesize, bool flat,
-                                           bool compress, bool zeroed_grain,
-                                           BlockBackend **pbb,
-                                           QemuOpts *opts, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
+                   bool compress, bool zeroed_grain, BlockBackend **pbb,
+                   QemuOpts *opts, Error **errp)
 {
     int ret;
     BlockBackend *blk = NULL;
@@ -2299,9 +2290,9 @@
         goto exit;
     }
 
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       errp);
+    blk = blk_co_new_open(filename, NULL, NULL,
+                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto exit;
@@ -2367,14 +2358,10 @@
  *           non-split format.
  * idx >= 1: get the n-th extent if in a split subformat
  */
-typedef BlockBackend * coroutine_fn (*vmdk_create_extent_fn)(int64_t size,
-                                                             int idx,
-                                                             bool flat,
-                                                             bool split,
-                                                             bool compress,
-                                                             bool zeroed_grain,
-                                                             void *opaque,
-                                                             Error **errp);
+typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */
+    (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
+                             bool compress, bool zeroed_grain, void *opaque,
+                             Error **errp);
 
 static void vmdk_desc_add_extent(GString *desc,
                                  const char *extent_line_fmt,
@@ -2387,17 +2374,18 @@
     g_free(basename);
 }
 
-static int coroutine_fn vmdk_co_do_create(int64_t size,
-                                          BlockdevVmdkSubformat subformat,
-                                          BlockdevVmdkAdapterType adapter_type,
-                                          const char *backing_file,
-                                          const char *hw_version,
-                                          const char *toolsversion,
-                                          bool compat6,
-                                          bool zeroed_grain,
-                                          vmdk_create_extent_fn extent_fn,
-                                          void *opaque,
-                                          Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_do_create(int64_t size,
+                  BlockdevVmdkSubformat subformat,
+                  BlockdevVmdkAdapterType adapter_type,
+                  const char *backing_file,
+                  const char *hw_version,
+                  const char *toolsversion,
+                  bool compat6,
+                  bool zeroed_grain,
+                  vmdk_create_extent_fn extent_fn,
+                  void *opaque,
+                  Error **errp)
 {
     int extent_idx;
     BlockBackend *blk = NULL;
@@ -2518,8 +2506,8 @@
         }
         assert(full_backing);
 
-        backing = blk_new_open(full_backing, NULL, NULL,
-                               BDRV_O_NO_BACKING, errp);
+        backing = blk_co_new_open(full_backing, NULL, NULL,
+                                  BDRV_O_NO_BACKING, errp);
         g_free(full_backing);
         if (backing == NULL) {
             ret = -EIO;
@@ -2617,10 +2605,10 @@
     QemuOpts *opts;
 } VMDKCreateOptsData;
 
-static BlockBackend * coroutine_fn vmdk_co_create_opts_cb(int64_t size, int idx,
-                                            bool flat, bool split, bool compress,
-                                            bool zeroed_grain, void *opaque,
-                                            Error **errp)
+static BlockBackend * coroutine_fn GRAPH_RDLOCK
+vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
+                       bool compress, bool zeroed_grain, void *opaque,
+                       Error **errp)
 {
     BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
@@ -2659,10 +2647,9 @@
     return blk;
 }
 
-static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     Error *local_err = NULL;
     char *desc = NULL;
@@ -2781,7 +2768,7 @@
     BlockdevCreateOptionsVmdk *opts = opaque;
 
     if (idx == 0) {
-        bs = bdrv_open_blockdev_ref(opts->file, errp);
+        bs = bdrv_co_open_blockdev_ref(opts->file, errp);
     } else {
         int i;
         BlockdevRefList *list = opts->extents;
@@ -2796,14 +2783,16 @@
             error_setg(errp, "Extent [%d] not specified", idx - 1);
             return NULL;
         }
-        bs = bdrv_open_blockdev_ref(list->value, errp);
+        bs = bdrv_co_open_blockdev_ref(list->value, errp);
     }
     if (!bs) {
         return NULL;
     }
-    blk = blk_new_with_bs(bs,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs,
+                             BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                                BLK_PERM_RESIZE,
+                             BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         return NULL;
     }
@@ -2820,8 +2809,8 @@
     return blk;
 }
 
-static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
-                                       Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsVmdk *opts;
 
@@ -2916,9 +2905,8 @@
     return info;
 }
 
-static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
-                                      BdrvCheckResult *result,
-                                      BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;

diff --git a/block/vpc.c b/block/vpc.c
index cfdea7d..b89b0ff 100644
--- a/block/vpc.c
+++ b/block/vpc.c

@@ -610,7 +610,7 @@
     return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -660,7 +660,7 @@
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -1005,13 +1005,13 @@
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(vpc_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(vpc_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -1087,10 +1087,9 @@
     return ret;
 }
 
-static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+vpc_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -1117,8 +1116,8 @@
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;

diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 0ceecfb..4e7b8b1 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h

@@ -36,6 +36,7 @@
 #include "target_os_signal.h"
 #include "target.h"
 #include "exec/gdbstub.h"
+#include "qemu/clang-tsa.h"
 
 /*
  * This struct is used to hold certain information about the image.  Basically,
@@ -234,8 +235,8 @@
 extern unsigned long last_brk;
 extern abi_ulong mmap_next_start;
 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size);
-void mmap_fork_start(void);
-void mmap_fork_end(int child);
+void TSA_NO_TSA mmap_fork_start(void);
+void TSA_NO_TSA mmap_fork_end(int child);
 
 /* main.c */
 extern char qemu_proc_pathname[];

diff --git a/configs/devices/x86_64-softmmu/x86_64-quintela-devices.mak b/configs/devices/x86_64-softmmu/x86_64-quintela-devices.mak
deleted file mode 100644
index ee2bb8c..0000000
--- a/configs/devices/x86_64-softmmu/x86_64-quintela-devices.mak
+++ /dev/null

@@ -1,7 +0,0 @@
-# Boards:
-#
-CONFIG_ISAPC=n
-CONFIG_I440FX=n
-CONFIG_Q35=n
-CONFIG_MICROVM=y
-

diff --git a/configs/devices/x86_64-softmmu/x86_64-quintela2-devices.mak b/configs/devices/x86_64-softmmu/x86_64-quintela2-devices.mak
deleted file mode 100644
index f7e4dae..0000000
--- a/configs/devices/x86_64-softmmu/x86_64-quintela2-devices.mak
+++ /dev/null

@@ -1,6 +0,0 @@
-# Boards:
-#
-CONFIG_ISAPC=y
-CONFIG_I440FX=y
-CONFIG_Q35=y
-CONFIG_MICROVM=y

diff --git a/configs/targets/microblaze-linux-user.mak b/configs/targets/microblaze-linux-user.mak
index 4249a37..0a2322c 100644
--- a/configs/targets/microblaze-linux-user.mak
+++ b/configs/targets/microblaze-linux-user.mak

@@ -3,3 +3,4 @@
 TARGET_SYSTBL=syscall.tbl
 TARGET_BIG_ENDIAN=y
 TARGET_HAS_BFLT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml

diff --git a/configs/targets/microblaze-softmmu.mak b/configs/targets/microblaze-softmmu.mak
index 8385e2d..e84c0cc 100644
--- a/configs/targets/microblaze-softmmu.mak
+++ b/configs/targets/microblaze-softmmu.mak

@@ -2,3 +2,4 @@
 TARGET_BIG_ENDIAN=y
 TARGET_SUPPORTS_MTTCG=y
 TARGET_NEED_FDT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml

diff --git a/configs/targets/microblazeel-linux-user.mak b/configs/targets/microblazeel-linux-user.mak
index d0e775d..2707431 100644
--- a/configs/targets/microblazeel-linux-user.mak
+++ b/configs/targets/microblazeel-linux-user.mak

@@ -2,3 +2,4 @@
 TARGET_SYSTBL_ABI=common
 TARGET_SYSTBL=syscall.tbl
 TARGET_HAS_BFLT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml

diff --git a/configs/targets/microblazeel-softmmu.mak b/configs/targets/microblazeel-softmmu.mak
index af40391..9b68803 100644
--- a/configs/targets/microblazeel-softmmu.mak
+++ b/configs/targets/microblazeel-softmmu.mak

@@ -1,3 +1,4 @@
 TARGET_ARCH=microblaze
 TARGET_SUPPORTS_MTTCG=y
 TARGET_NEED_FDT=y
+TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml

diff --git a/configure b/configure
index 64960c6..cf6db3d 100755
--- a/configure
+++ b/configure

@@ -1018,7 +1018,7 @@
   debug-tcg       TCG debugging (default is disabled)
   debug-info      debugging information
   safe-stack      SafeStack Stack Smash Protection. Depends on
-                  clang/llvm >= 3.7 and requires coroutine backend ucontext.
+                  clang/llvm and requires coroutine backend ucontext.
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1138,12 +1138,12 @@
 cat > $TMPC << EOF
 #if defined(__clang_major__) && defined(__clang_minor__)
 # ifdef __apple_build_version__
-#  if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0)
-#   error You need at least XCode Clang v10.0 to compile QEMU
+#  if __clang_major__ < 12 || (__clang_major__ == 12 && __clang_minor__ < 0)
+#   error You need at least XCode Clang v12.0 to compile QEMU
 #  endif
 # else
-#  if __clang_major__ < 6 || (__clang_major__ == 6 && __clang_minor__ < 0)
-#   error You need at least Clang v6.0 to compile QEMU
+#  if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0)
+#   error You need at least Clang v10.0 to compile QEMU
 #  endif
 # endif
 #elif defined(__GNUC__) && defined(__GNUC_MINOR__)
@@ -1156,7 +1156,7 @@
 int main (void) { return 0; }
 EOF
 if ! compile_prog "" "" ; then
-    error_exit "You need at least GCC v7.4 or Clang v6.0 (or XCode Clang v10.0)"
+    error_exit "You need at least GCC v7.4 or Clang v10.0 (or XCode Clang v12.0)"
 fi
 
 # Accumulate -Wfoo and -Wno-bar separately.
@@ -1184,6 +1184,7 @@
 add_to warn_flags -Wexpansion-to-defined
 add_to warn_flags -Wimplicit-fallthrough=2
 add_to warn_flags -Wmissing-format-attribute
+add_to warn_flags -Wthread-safety
 
 nowarn_flags=
 add_to nowarn_flags -Wno-initializer-overrides
@@ -1261,19 +1262,6 @@
   fi
 fi
 
-# Disable -Wmissing-braces on older compilers that warn even for
-# the "universal" C zero initializer {0}.
-cat > $TMPC << EOF
-struct {
-  int a[2];
-} x = {0};
-EOF
-if compile_object "-Werror" "" ; then
-  :
-else
-  QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces"
-fi
-
 # Our module code doesn't support Windows
 if test "$modules" = "yes" && test "$mingw32" = "yes" ; then
   error_exit "Modules are not available for Windows"

diff --git a/cpus-common.c b/cpus-common.c
index 793364d..39f355d 100644
--- a/cpus-common.c
+++ b/cpus-common.c

@@ -192,6 +192,11 @@
     CPUState *other_cpu;
     int running_cpus;
 
+    if (current_cpu->exclusive_context_count) {
+        current_cpu->exclusive_context_count++;
+        return;
+    }
+
     qemu_mutex_lock(&qemu_cpu_list_lock);
     exclusive_idle();
 
@@ -219,13 +224,16 @@
      */
     qemu_mutex_unlock(&qemu_cpu_list_lock);
 
-    current_cpu->in_exclusive_context = true;
+    current_cpu->exclusive_context_count = 1;
 }
 
 /* Finish an exclusive operation.  */
 void end_exclusive(void)
 {
-    current_cpu->in_exclusive_context = false;
+    current_cpu->exclusive_context_count--;
+    if (current_cpu->exclusive_context_count) {
+        return;
+    }
 
     qemu_mutex_lock(&qemu_cpu_list_lock);
     qatomic_set(&pending_cpus, 0);

diff --git a/crypto/tlssession.c b/crypto/tlssession.c
index b302d83..1e98f44 100644
--- a/crypto/tlssession.c
+++ b/crypto/tlssession.c

@@ -493,6 +493,13 @@
 }
 
 
+size_t
+qcrypto_tls_session_check_pending(QCryptoTLSSession *session)
+{
+    return gnutls_record_check_pending(session->handle);
+}
+
+
 int
 qcrypto_tls_session_handshake(QCryptoTLSSession *session,
                               Error **errp)
@@ -615,6 +622,13 @@
 }
 
 
+size_t
+qcrypto_tls_session_check_pending(QCryptoTLSSession *session)
+{
+    return 0;
+}
+
+
 int
 qcrypto_tls_session_handshake(QCryptoTLSSession *sess,
                               Error **errp)

diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst
index 1c1e7b9..20b97c3 100644
--- a/docs/about/build-platforms.rst
+++ b/docs/about/build-platforms.rst

@@ -86,6 +86,38 @@
 For macOS, `Homebrew`_ will be used, although `MacPorts`_ is expected to carry
 similar versions.
 
+Some build dependencies may follow less conservative rules:
+
+Python runtime
+  Distributions with long-term support often provide multiple versions
+  of the Python runtime.  While QEMU will initially aim to support the
+  distribution's default runtime, it may later increase its minimum version
+  to any newer python that is available as an option from the vendor.
+  In this case, it will be necessary to use the ``--python`` command line
+  option of the ``configure`` script to point QEMU to a supported
+  version of the Python runtime.
+
+  As of QEMU |version|, the minimum supported version of Python is 3.6.
+
+Python build dependencies
+  Some of QEMU's build dependencies are written in Python.  Usually these
+  are only packaged by distributions for the default Python runtime.
+  If QEMU bumps its minimum Python version and a non-default runtime is
+  required, it may be necessary to fetch python modules from the Python
+  Package Index (PyPI) via ``pip``, in order to build QEMU.
+
+Optional build dependencies
+  Build components whose absence does not affect the ability to build
+  QEMU may not be available in distros, or may be too old for QEMU's
+  requirements.  Many of these, such as the Avocado testing framework
+  or various linters, are written in Python and therefore can also
+  be installed using ``pip``.  Cross compilers are another example
+  of optional build-time dependency; in this case it is possible to
+  download them from repositories such as EPEL, to use container-based
+  cross compilation using ``docker`` or ``podman``, or to use pre-built
+  binaries distributed with QEMU.
+
+
 Windows
 -------
 

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index da2e6fe..ee95bcb 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst

@@ -20,6 +20,20 @@
 What follows is a list of all features currently marked as
 deprecated.
 
+Build options
+-------------
+
+``gprof`` builds (since 8.0)
+''''''''''''''''''''''''''''
+
+The ``--enable-gprof`` configure setting relies on compiler
+instrumentation to gather its data which can distort the generated
+profile. As other non-instrumenting tools are available that give a
+more holistic view of the system with non-instrumented binaries we are
+deprecating the build option and no longer defend it in CI. The
+``--enable-gcov`` build option remains for analysis test case
+coverage.
+
 System emulator command line arguments
 --------------------------------------
 
@@ -52,14 +66,6 @@
 The replacement for the ``nodelay`` short-form boolean option is ``nodelay=on``
 rather than ``delay=off``.
 
-``-spice password=string`` (since 6.0)
-''''''''''''''''''''''''''''''''''''''
-
-This option is insecure because the SPICE password remains visible in
-the process listing. This is replaced by the new ``password-secret``
-option which lets the password be securely provided on the command
-line using a ``secret`` object instance.
-
 ``-smp`` ("parameter=0" SMP configurations) (since 6.2)
 '''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
@@ -246,15 +252,6 @@
 Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an
 alias.
 
-``-device sga`` (since 6.2)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The ``sga`` device loads an option ROM for x86 targets which enables
-SeaBIOS to send messages to the serial console. SeaBIOS 1.11.0 onwards
-contains native support for this feature and thus use of the option
-ROM approach is obsolete. The native SeaBIOS support can be activated
-by using ``-machine graphics=off``.
-
 ``-device nvme-ns,eui64-default=on|off`` (since 7.1)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -296,6 +293,14 @@
 
   json:{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"}
 
+``iscsi,password=xxx`` (since 8.0)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Specifying the iSCSI password in plain text on the command line using the
+``password`` option is insecure. The ``password-secret`` option should be
+used instead, to refer to a ``--object secret...`` instance that provides
+a password via a file, or encrypted.
+
 Backwards compatibility
 -----------------------
 
@@ -325,24 +330,6 @@
 depending on the machine type, so management software must
 resolve CPU model aliases before starting a virtual machine.
 
-Tools
------
-
-virtiofsd
-'''''''''
-
-There is a new Rust implementation of ``virtiofsd`` at
-``https://gitlab.com/virtio-fs/virtiofsd``;
-since this is now marked stable, new development should be done on that
-rather than the existing C version in the QEMU tree.
-The C version will still accept fixes and patches that
-are already in development for the moment, but will eventually
-be deleted from this tree.
-New deployments should use the Rust version, and existing systems
-should consider moving to it.  The command line and feature set
-is very close and moving should be simple.
-
-
 QEMU guest agent
 ----------------
 

diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index a17d055..5b258b4 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst

@@ -428,6 +428,13 @@
 Use ``-drive if=pflash`` to configure the OTP device of the sifive_u
 RISC-V machine instead.
 
+``-spice password=string`` (removed in 8.0)
+'''''''''''''''''''''''''''''''''''''''''''
+
+This option was insecure because the SPICE password remained visible in
+the process listing. This was replaced by the new ``password-secret``
+option which lets the password be securely provided on the command
+line using a ``secret`` object instance.
 
 QEMU Machine Protocol (QMP) commands
 ------------------------------------
@@ -789,6 +796,16 @@
 The 'scsi-disk' device has been removed. Users should use 'scsi-hd' or
 'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed.
 
+``sga`` (removed in 8.0)
+''''''''''''''''''''''''
+
+The ``sga`` device loaded an option ROM for x86 targets which enabled
+SeaBIOS to send messages to the serial console. SeaBIOS 1.11.0 onwards
+contains native support for this feature and thus use of the option
+ROM approach was obsolete. The native SeaBIOS support can be activated
+by using ``-machine graphics=off``.
+
+
 Related binaries
 ----------------
 
@@ -872,3 +889,16 @@
 The corresponding upstream server project is no longer maintained.
 Users are recommended to switch to an alternative distributed block
 device driver such as RBD.
+
+Tools
+-----
+
+virtiofsd (removed in 8.0)
+''''''''''''''''''''''''''
+
+There is a newer Rust implementation of ``virtiofsd`` at
+``https://gitlab.com/virtio-fs/virtiofsd``; this has been
+stable for some time and is now widely used.
+The command line and feature set is very close to the removed
+C implementation.
+

diff --git a/docs/conf.py b/docs/conf.py
index 73a287a..00767b0 100644
--- a/docs/conf.py
+++ b/docs/conf.py

@@ -290,10 +290,6 @@
     ('tools/virtfs-proxy-helper', 'virtfs-proxy-helper',
      'QEMU 9p virtfs proxy filesystem helper',
      ['M. Mohan Kumar'], 1),
-    ('tools/virtiofsd', 'virtiofsd',
-     'QEMU virtio-fs shared file system daemon',
-     ['Stefan Hajnoczi <stefanha@redhat.com>',
-      'Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>'], 1),
 ]
 man_make_section_directory = False
 

diff --git a/docs/devel/fuzzing.rst b/docs/devel/fuzzing.rst
index 715330c..3bfcb33 100644
--- a/docs/devel/fuzzing.rst
+++ b/docs/devel/fuzzing.rst

@@ -19,11 +19,6 @@
 Building the fuzzers
 --------------------
 
-*NOTE*: If possible, build a 32-bit binary. When forking, the 32-bit fuzzer is
-much faster, since the page-map has a smaller size. This is due to the fact that
-AddressSanitizer maps ~20TB of memory, as part of its detection. This results
-in a large page-map, and a much slower ``fork()``.
-
 To build the fuzzers, install a recent version of clang:
 Configure with (substitute the clang binaries with the version you installed).
 Here, enable-sanitizers, is optional but it allows us to reliably detect bugs
@@ -296,10 +291,9 @@
 that bottom halves are executed and any cleanup required before the next input.
 
 Since the same process is reused for many fuzzing runs, QEMU state needs to
-be reset at the end of each run. There are currently two implemented
-options for resetting state:
+be reset at the end of each run. For example, this can be done by rebooting the
+VM, after each run.
 
-- Reboot the guest between runs.
   - *Pros*: Straightforward and fast for simple fuzz targets.
 
   - *Cons*: Depending on the device, does not reset all device state. If the
@@ -308,15 +302,3 @@
     reboot.
 
   - *Example target*: ``i440fx-qtest-reboot-fuzz``
-
-- Run each test case in a separate forked process and copy the coverage
-   information back to the parent. This is fairly similar to AFL's "deferred"
-   fork-server mode [3]
-
-  - *Pros*: Relatively fast. Devices only need to be initialized once. No need to
-    do slow reboots or vmloads.
-
-  - *Cons*: Not officially supported by libfuzzer. Does not work well for
-     devices that rely on dedicated threads.
-
-  - *Example target*: ``virtio-net-fork-fuzz``

diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
index 5edc49a..23e7f2f 100644
--- a/docs/devel/qapi-code-gen.rst
+++ b/docs/devel/qapi-code-gen.rst

@@ -685,9 +685,10 @@
 that previously resulted in an error).  QMP clients may still need to
 know whether the extension is available.
 
-For this purpose, a list of features can be specified for a command or
-struct type.  Each list member can either be ``{ 'name': STRING, '*if':
-COND }``, or STRING, which is shorthand for ``{ 'name': STRING }``.
+For this purpose, a list of features can be specified for definitions,
+enumeration values, and struct members.  Each feature list member can
+either be ``{ 'name': STRING, '*if': COND }``, or STRING, which is
+shorthand for ``{ 'name': STRING }``.
 
 The optional 'if' member specifies a conditional.  See `Configuring
 the schema`_ below for more on this.
@@ -817,8 +818,8 @@
 
 A union's discriminator may not be conditional.
 
-Likewise, individual enumeration values be conditional.  This requires
-the longhand form of ENUM-VALUE_.
+Likewise, individual enumeration values may be conditional.  This
+requires the longhand form of ENUM-VALUE_.
 
 Example: an enum type with unconditional value 'foo' and conditional
 value 'bar' ::
@@ -1157,9 +1158,8 @@
     Type "q_obj-EVENT_C-arg" is an implicitly defined object type with
     the two members from the event's definition.
 
-The SchemaInfo for struct and union types has meta-type "object".
-
-The SchemaInfo for a struct type has variant member "members".
+The SchemaInfo for struct and union types has meta-type "object" and
+variant member "members".
 
 The SchemaInfo for a union type additionally has variant members "tag"
 and "variants".

diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
index 673057c..c214c73 100644
--- a/docs/devel/vfio-migration.rst
+++ b/docs/devel/vfio-migration.rst

@@ -7,46 +7,43 @@
 destination host. This document details how saving and restoring of VFIO
 devices is done in QEMU.
 
-Migration of VFIO devices consists of two phases: the optional pre-copy phase,
-and the stop-and-copy phase. The pre-copy phase is iterative and allows to
-accommodate VFIO devices that have a large amount of data that needs to be
-transferred. The iterative pre-copy phase of migration allows for the guest to
-continue whilst the VFIO device state is transferred to the destination, this
-helps to reduce the total downtime of the VM. VFIO devices can choose to skip
-the pre-copy phase of migration by returning pending_bytes as zero during the
-pre-copy phase.
+Migration of VFIO devices currently consists of a single stop-and-copy phase.
+During the stop-and-copy phase the guest is stopped and the entire VFIO device
+data is transferred to the destination.
+
+The pre-copy phase of migration is currently not supported for VFIO devices.
+Support for VFIO pre-copy will be added later on.
+
+Note that currently VFIO migration is supported only for a single device. This
+is due to VFIO migration's lack of P2P support. However, P2P support is planned
+to be added later on.
 
 A detailed description of the UAPI for VFIO device migration can be found in
-the comment for the ``vfio_device_migration_info`` structure in the header
-file linux-headers/linux/vfio.h.
+the comment for the ``vfio_device_mig_state`` structure in the header file
+linux-headers/linux/vfio.h.
 
 VFIO implements the device hooks for the iterative approach as follows:
 
-* A ``save_setup`` function that sets up the migration region and sets _SAVING
-  flag in the VFIO device state.
+* A ``save_setup`` function that sets up migration on the source.
 
-* A ``load_setup`` function that sets up the migration region on the
-  destination and sets _RESUMING flag in the VFIO device state.
+* A ``load_setup`` function that sets the VFIO device on the destination in
+  _RESUMING state.
 
 * A ``state_pending_exact`` function that reads pending_bytes from the vendor
   driver, which indicates the amount of data that the vendor driver has yet to
   save for the VFIO device.
 
-* A ``save_live_iterate`` function that reads the VFIO device's data from the
-  vendor driver through the migration region during iterative phase.
-
 * A ``save_state`` function to save the device config space if it is present.
 
-* A ``save_live_complete_precopy`` function that resets _RUNNING flag from the
-  VFIO device state and iteratively copies the remaining data for the VFIO
-  device until the vendor driver indicates that no data remains (pending bytes
-  is zero).
+* A ``save_live_complete_precopy`` function that sets the VFIO device in
+  _STOP_COPY state and iteratively copies the data for the VFIO device until
+  the vendor driver indicates that no data remains.
 
 * A ``load_state`` function that loads the config section and the data
-  sections that are generated by the save functions above
+  sections that are generated by the save functions above.
 
 * ``cleanup`` functions for both save and load that perform any migration
-  related cleanup, including unmapping the migration region
+  related cleanup.
 
 
 The VFIO migration code uses a VM state change handler to change the VFIO
@@ -71,13 +68,13 @@
 can also be written by the device. There is currently no device or IOMMU
 support for dirty page tracking in hardware.
 
-By default, dirty pages are tracked when the device is in pre-copy as well as
-stop-and-copy phase. So, a page pinned by the vendor driver will be copied to
-the destination in both phases. Copying dirty pages in pre-copy phase helps
-QEMU to predict if it can achieve its downtime tolerances. If QEMU during
-pre-copy phase keeps finding dirty pages continuously, then it understands
-that even in stop-and-copy phase, it is likely to find dirty pages and can
-predict the downtime accordingly.
+By default, dirty pages are tracked during pre-copy as well as stop-and-copy
+phase. So, a page pinned by the vendor driver will be copied to the destination
+in both phases. Copying dirty pages in pre-copy phase helps QEMU to predict if
+it can achieve its downtime tolerances. If QEMU during pre-copy phase keeps
+finding dirty pages continuously, then it understands that even in stop-and-copy
+phase, it is likely to find dirty pages and can predict the downtime
+accordingly.
 
 QEMU also provides a per device opt-out option ``pre-copy-dirty-page-tracking``
 which disables querying the dirty bitmap during pre-copy phase. If it is set to
@@ -111,23 +108,22 @@
                                   |
                      migrate_init spawns migration_thread
                 Migration thread then calls each device's .save_setup()
-                    (RUNNING, _SETUP, _RUNNING|_SAVING)
+                       (RUNNING, _SETUP, _RUNNING)
                                   |
-                    (RUNNING, _ACTIVE, _RUNNING|_SAVING)
+                      (RUNNING, _ACTIVE, _RUNNING)
              If device is active, get pending_bytes by .state_pending_exact()
           If total pending_bytes >= threshold_size, call .save_live_iterate()
-                  Data of VFIO device for pre-copy phase is copied
         Iterate till total pending bytes converge and are less than threshold
                                   |
   On migration completion, vCPU stops and calls .save_live_complete_precopy for
-   each active device. The VFIO device is then transitioned into _SAVING state
-                   (FINISH_MIGRATE, _DEVICE, _SAVING)
+  each active device. The VFIO device is then transitioned into _STOP_COPY state
+                  (FINISH_MIGRATE, _DEVICE, _STOP_COPY)
                                   |
      For the VFIO device, iterate in .save_live_complete_precopy until
                          pending data is 0
-                   (FINISH_MIGRATE, _DEVICE, _STOPPED)
+                   (FINISH_MIGRATE, _DEVICE, _STOP)
                                   |
-                 (FINISH_MIGRATE, _COMPLETED, _STOPPED)
+                 (FINISH_MIGRATE, _COMPLETED, _STOP)
              Migraton thread schedules cleanup bottom half and exits
 
 Live migration resume path
@@ -136,7 +132,7 @@
 ::
 
               Incoming migration calls .load_setup for each device
-                       (RESTORE_VM, _ACTIVE, _STOPPED)
+                       (RESTORE_VM, _ACTIVE, _STOP)
                                  |
        For each device, .load_state is called for that device section data
                        (RESTORE_VM, _ACTIVE, _RESUMING)

diff --git a/docs/meson.build b/docs/meson.build
index 9136fed..bbcdccc 100644
--- a/docs/meson.build
+++ b/docs/meson.build

@@ -48,7 +48,6 @@
         'qemu-storage-daemon.1': (have_tools ? 'man1' : ''),
         'qemu-trace-stap.1': (stap.found() ? 'man1' : ''),
         'virtfs-proxy-helper.1': (have_virtfs_proxy_helper ? 'man1' : ''),
-        'virtiofsd.1': (have_virtiofsd ? 'man1' : ''),
         'qemu.1': 'man1',
         'qemu-block-drivers.7': 'man7',
         'qemu-cpu-models.7': 'man7'

diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
index c38df32..0424cae 100644
--- a/docs/system/arm/nuvoton.rst
+++ b/docs/system/arm/nuvoton.rst

@@ -49,6 +49,7 @@
  * SMBus controller (SMBF)
  * Ethernet controller (EMC)
  * Tachometer
+ * Peripheral SPI controller (PSPI)
 
 Missing devices
 ---------------
@@ -64,7 +65,6 @@
 
  * Ethernet controller (GMAC)
  * USB device (USBD)
- * Peripheral SPI controller (PSPI)
  * SD/MMC host
  * PECI interface
  * PCI and PCIe root complex and bridges

diff --git a/docs/tools/index.rst b/docs/tools/index.rst
index 2151adc..8e65ce0 100644
--- a/docs/tools/index.rst
+++ b/docs/tools/index.rst

@@ -16,4 +16,3 @@
    qemu-pr-helper
    qemu-trace-stap
    virtfs-proxy-helper
-   virtiofsd

diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst
deleted file mode 100644
index 995a754..0000000
--- a/docs/tools/virtiofsd.rst
+++ /dev/null

@@ -1,403 +0,0 @@
-QEMU virtio-fs shared file system daemon
-========================================
-
-Synopsis
---------
-
-**virtiofsd** [*OPTIONS*]
-
-Description
------------
-
-Share a host directory tree with a guest through a virtio-fs device.  This
-program is a vhost-user backend that implements the virtio-fs device.  Each
-virtio-fs device instance requires its own virtiofsd process.
-
-This program is designed to work with QEMU's ``--device vhost-user-fs-pci``
-but should work with any virtual machine monitor (VMM) that supports
-vhost-user.  See the Examples section below.
-
-This program must be run as the root user.  The program drops privileges where
-possible during startup although it must be able to create and access files
-with any uid/gid:
-
-* The ability to invoke syscalls is limited using seccomp(2).
-* Linux capabilities(7) are dropped.
-
-In "namespace" sandbox mode the program switches into a new file system
-namespace and invokes pivot_root(2) to make the shared directory tree its root.
-A new pid and net namespace is also created to isolate the process.
-
-In "chroot" sandbox mode the program invokes chroot(2) to make the shared
-directory tree its root. This mode is intended for container environments where
-the container runtime has already set up the namespaces and the program does
-not have permission to create namespaces itself.
-
-Both sandbox modes prevent "file system escapes" due to symlinks and other file
-system objects that might lead to files outside the shared directory.
-
-Options
--------
-
-.. program:: virtiofsd
-
-.. option:: -h, --help
-
-  Print help.
-
-.. option:: -V, --version
-
-  Print version.
-
-.. option:: -d
-
-  Enable debug output.
-
-.. option:: --syslog
-
-  Print log messages to syslog instead of stderr.
-
-.. option:: -o OPTION
-
-  * debug -
-    Enable debug output.
-
-  * flock|no_flock -
-    Enable/disable flock.  The default is ``no_flock``.
-
-  * modcaps=CAPLIST
-    Modify the list of capabilities allowed; CAPLIST is a colon separated
-    list of capabilities, each preceded by either + or -, e.g.
-    ''+sys_admin:-chown''.
-
-  * log_level=LEVEL -
-    Print only log messages matching LEVEL or more severe.  LEVEL is one of
-    ``err``, ``warn``, ``info``, or ``debug``.  The default is ``info``.
-
-  * posix_lock|no_posix_lock -
-    Enable/disable remote POSIX locks.  The default is ``no_posix_lock``.
-
-  * readdirplus|no_readdirplus -
-    Enable/disable readdirplus.  The default is ``readdirplus``.
-
-  * sandbox=namespace|chroot -
-    Sandbox mode:
-    - namespace: Create mount, pid, and net namespaces and pivot_root(2) into
-    the shared directory.
-    - chroot: chroot(2) into shared directory (use in containers).
-    The default is "namespace".
-
-  * source=PATH -
-    Share host directory tree located at PATH.  This option is required.
-
-  * timeout=TIMEOUT -
-    I/O timeout in seconds.  The default depends on cache= option.
-
-  * writeback|no_writeback -
-    Enable/disable writeback cache. The cache allows the FUSE client to buffer
-    and merge write requests.  The default is ``no_writeback``.
-
-  * xattr|no_xattr -
-    Enable/disable extended attributes (xattr) on files and directories.  The
-    default is ``no_xattr``.
-
-  * posix_acl|no_posix_acl -
-    Enable/disable posix acl support.  Posix ACLs are disabled by default.
-
-  * security_label|no_security_label -
-    Enable/disable security label support. Security labels are disabled by
-    default. This will allow client to send a MAC label of file during
-    file creation. Typically this is expected to be SELinux security
-    label. Server will try to set that label on newly created file
-    atomically wherever possible.
-
-  * killpriv_v2|no_killpriv_v2 -
-    Enable/disable ``FUSE_HANDLE_KILLPRIV_V2`` support. KILLPRIV_V2 is enabled
-    by default as long as the client supports it. Enabling this option helps
-    with performance in write path.
-
-.. option:: --socket-path=PATH
-
-  Listen on vhost-user UNIX domain socket at PATH.
-
-.. option:: --socket-group=GROUP
-
-  Set the vhost-user UNIX domain socket gid to GROUP.
-
-.. option:: --fd=FDNUM
-
-  Accept connections from vhost-user UNIX domain socket file descriptor FDNUM.
-  The file descriptor must already be listening for connections.
-
-.. option:: --thread-pool-size=NUM
-
-  Restrict the number of worker threads per request queue to NUM.  The default
-  is 0.
-
-.. option:: --cache=none|auto|always
-
-  Select the desired trade-off between coherency and performance.  ``none``
-  forbids the FUSE client from caching to achieve best coherency at the cost of
-  performance.  ``auto`` acts similar to NFS with a 1 second metadata cache
-  timeout.  ``always`` sets a long cache lifetime at the expense of coherency.
-  The default is ``auto``.
-
-Extended attribute (xattr) mapping
-----------------------------------
-
-By default the name of xattr's used by the client are passed through to the server
-file system.  This can be a problem where either those xattr names are used
-by something on the server (e.g. selinux client/server confusion) or if the
-``virtiofsd`` is running in a container with restricted privileges where it
-cannot access some attributes.
-
-Mapping syntax
-~~~~~~~~~~~~~~
-
-A mapping of xattr names can be made using -o xattrmap=mapping where the ``mapping``
-string consists of a series of rules.
-
-The first matching rule terminates the mapping.
-The set of rules must include a terminating rule to match any remaining attributes
-at the end.
-
-Each rule consists of a number of fields separated with a separator that is the
-first non-white space character in the rule.  This separator must then be used
-for the whole rule.
-White space may be added before and after each rule.
-
-Using ':' as the separator a rule is of the form:
-
-``:type:scope:key:prepend:``
-
-**scope** is:
-
-- 'client' - match 'key' against a xattr name from the client for
-             setxattr/getxattr/removexattr
-- 'server' - match 'prepend' against a xattr name from the server
-             for listxattr
-- 'all' - can be used to make a single rule where both the server
-          and client matches are triggered.
-
-**type** is one of:
-
-- 'prefix' - is designed to prepend and strip a prefix;  the modified
-  attributes then being passed on to the client/server.
-
-- 'ok' - Causes the rule set to be terminated when a match is found
-  while allowing matching xattr's through unchanged.
-  It is intended both as a way of explicitly terminating
-  the list of rules, and to allow some xattr's to skip following rules.
-
-- 'bad' - If a client tries to use a name matching 'key' it's
-  denied using EPERM; when the server passes an attribute
-  name matching 'prepend' it's hidden.  In many ways it's use is very like
-  'ok' as either an explicit terminator or for special handling of certain
-  patterns.
-
-- 'unsupported' - If a client tries to use a name matching 'key' it's
-  denied using ENOTSUP; when the server passes an attribute
-  name matching 'prepend' it's hidden.  In many ways it's use is very like
-  'ok' as either an explicit terminator or for special handling of certain
-  patterns.
-
-**key** is a string tested as a prefix on an attribute name originating
-on the client.  It maybe empty in which case a 'client' rule
-will always match on client names.
-
-**prepend** is a string tested as a prefix on an attribute name originating
-on the server, and used as a new prefix.  It may be empty
-in which case a 'server' rule will always match on all names from
-the server.
-
-e.g.:
-
-  ``:prefix:client:trusted.:user.virtiofs.:``
-
-  will match 'trusted.' attributes in client calls and prefix them before
-  passing them to the server.
-
-  ``:prefix:server::user.virtiofs.:``
-
-  will strip 'user.virtiofs.' from all server replies.
-
-  ``:prefix:all:trusted.:user.virtiofs.:``
-
-  combines the previous two cases into a single rule.
-
-  ``:ok:client:user.::``
-
-  will allow get/set xattr for 'user.' xattr's and ignore
-  following rules.
-
-  ``:ok:server::security.:``
-
-  will pass 'security.' xattr's in listxattr from the server
-  and ignore following rules.
-
-  ``:ok:all:::``
-
-  will terminate the rule search passing any remaining attributes
-  in both directions.
-
-  ``:bad:server::security.:``
-
-  would hide 'security.' xattr's in listxattr from the server.
-
-A simpler 'map' type provides a shorter syntax for the common case:
-
-``:map:key:prepend:``
-
-The 'map' type adds a number of separate rules to add **prepend** as a prefix
-to the matched **key** (or all attributes if **key** is empty).
-There may be at most one 'map' rule and it must be the last rule in the set.
-
-Note: When the 'security.capability' xattr is remapped, the daemon has to do
-extra work to remove it during many operations, which the host kernel normally
-does itself.
-
-Security considerations
-~~~~~~~~~~~~~~~~~~~~~~~
-
-Operating systems typically partition the xattr namespace using
-well defined name prefixes. Each partition may have different
-access controls applied. For example, on Linux there are multiple
-partitions
-
- * ``system.*`` - access varies depending on attribute & filesystem
- * ``security.*`` - only processes with CAP_SYS_ADMIN
- * ``trusted.*`` - only processes with CAP_SYS_ADMIN
- * ``user.*`` - any process granted by file permissions / ownership
-
-While other OS such as FreeBSD have different name prefixes
-and access control rules.
-
-When remapping attributes on the host, it is important to
-ensure that the remapping does not allow a guest user to
-evade the guest access control rules.
-
-Consider if ``trusted.*`` from the guest was remapped to
-``user.virtiofs.trusted*`` in the host. An unprivileged
-user in a Linux guest has the ability to write to xattrs
-under ``user.*``. Thus the user can evade the access
-control restriction on ``trusted.*`` by instead writing
-to ``user.virtiofs.trusted.*``.
-
-As noted above, the partitions used and access controls
-applied, will vary across guest OS, so it is not wise to
-try to predict what the guest OS will use.
-
-The simplest way to avoid an insecure configuration is
-to remap all xattrs at once, to a given fixed prefix.
-This is shown in example (1) below.
-
-If selectively mapping only a subset of xattr prefixes,
-then rules must be added to explicitly block direct
-access to the target of the remapping. This is shown
-in example (2) below.
-
-Mapping examples
-~~~~~~~~~~~~~~~~
-
-1) Prefix all attributes with 'user.virtiofs.'
-
-::
-
- -o xattrmap=":prefix:all::user.virtiofs.::bad:all:::"
-
-
-This uses two rules, using : as the field separator;
-the first rule prefixes and strips 'user.virtiofs.',
-the second rule hides any non-prefixed attributes that
-the host set.
-
-This is equivalent to the 'map' rule:
-
-::
-
- -o xattrmap=":map::user.virtiofs.:"
-
-2) Prefix 'trusted.' attributes, allow others through
-
-::
-
-   "/prefix/all/trusted./user.virtiofs./
-    /bad/server//trusted./
-    /bad/client/user.virtiofs.//
-    /ok/all///"
-
-
-Here there are four rules, using / as the field
-separator, and also demonstrating that new lines can
-be included between rules.
-The first rule is the prefixing of 'trusted.' and
-stripping of 'user.virtiofs.'.
-The second rule hides unprefixed 'trusted.' attributes
-on the host.
-The third rule stops a guest from explicitly setting
-the 'user.virtiofs.' path directly to prevent access
-control bypass on the target of the earlier prefix
-remapping.
-Finally, the fourth rule lets all remaining attributes
-through.
-
-This is equivalent to the 'map' rule:
-
-::
-
- -o xattrmap="/map/trusted./user.virtiofs./"
-
-3) Hide 'security.' attributes, and allow everything else
-
-::
-
-    "/bad/all/security./security./
-     /ok/all///'
-
-The first rule combines what could be separate client and server
-rules into a single 'all' rule, matching 'security.' in either
-client arguments or lists returned from the host.  This stops
-the client seeing any 'security.' attributes on the server and
-stops it setting any.
-
-SELinux support
----------------
-One can enable support for SELinux by running virtiofsd with option
-"-o security_label". But this will try to save guest's security context
-in xattr security.selinux on host and it might fail if host's SELinux
-policy does not permit virtiofsd to do this operation.
-
-Hence, it is preferred to remap guest's "security.selinux" xattr to say
-"trusted.virtiofs.security.selinux" on host.
-
-"-o xattrmap=:map:security.selinux:trusted.virtiofs.:"
-
-This will make sure that guest and host's SELinux xattrs on same file
-remain separate and not interfere with each other. And will allow both
-host and guest to implement their own separate SELinux policies.
-
-Setting trusted xattr on host requires CAP_SYS_ADMIN. So one will need
-add this capability to daemon.
-
-"-o modcaps=+sys_admin"
-
-Giving CAP_SYS_ADMIN increases the risk on system. Now virtiofsd is more
-powerful and if gets compromised, it can do lot of damage to host system.
-So keep this trade-off in my mind while making a decision.
-
-Examples
---------
-
-Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket
-``/var/run/vm001-vhost-fs.sock``:
-
-.. parsed-literal::
-
-  host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001
-  host# |qemu_system| \\
-        -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \\
-        -device vhost-user-fs-pci,chardev=char0,tag=myfs \\
-        -object memory-backend-memfd,id=mem,size=4G,share=on \\
-        -numa node,memdev=mem \\
-        ...
-  guest# mount -t virtiofs myfs /mnt

diff --git a/dump/dump.c b/dump/dump.c
index 279b07f..1362810 100644
--- a/dump/dump.c
+++ b/dump/dump.c

@@ -1854,7 +1854,8 @@
      */
     ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
     if (ret < 0) {
-        error_setg(errp, QERR_UNSUPPORTED);
+        error_setg(errp,
+                   "dumping guest memory is not supported on this target");
         goto cleanup;
     }
 
@@ -1864,10 +1865,7 @@
 
     s->note_size = cpu_get_note_size(s->dump_info.d_class,
                                      s->dump_info.d_machine, nr_cpus);
-    if (s->note_size < 0) {
-        error_setg(errp, QERR_UNSUPPORTED);
-        goto cleanup;
-    }
+    assert(s->note_size >= 0);
 
     /*
      * The goal of this block is to (a) update the previously guessed

diff --git a/gdb-xml/microblaze-core.xml b/gdb-xml/microblaze-core.xml
new file mode 100644
index 0000000..becf77c
--- /dev/null
+++ b/gdb-xml/microblaze-core.xml

@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2008 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.microblaze.core">
+  <reg name="r0" bitsize="32" regnum="0"/>
+  <reg name="r1" bitsize="32" type="data_ptr"/>
+  <reg name="r2" bitsize="32"/>
+  <reg name="r3" bitsize="32"/>
+  <reg name="r4" bitsize="32"/>
+  <reg name="r5" bitsize="32"/>
+  <reg name="r6" bitsize="32"/>
+  <reg name="r7" bitsize="32"/>
+  <reg name="r8" bitsize="32"/>
+  <reg name="r9" bitsize="32"/>
+  <reg name="r10" bitsize="32"/>
+  <reg name="r11" bitsize="32"/>
+  <reg name="r12" bitsize="32"/>
+  <reg name="r13" bitsize="32"/>
+  <reg name="r14" bitsize="32"/>
+  <reg name="r15" bitsize="32"/>
+  <reg name="r16" bitsize="32"/>
+  <reg name="r17" bitsize="32"/>
+  <reg name="r18" bitsize="32"/>
+  <reg name="r19" bitsize="32"/>
+  <reg name="r20" bitsize="32"/>
+  <reg name="r21" bitsize="32"/>
+  <reg name="r22" bitsize="32"/>
+  <reg name="r23" bitsize="32"/>
+  <reg name="r24" bitsize="32"/>
+  <reg name="r25" bitsize="32"/>
+  <reg name="r26" bitsize="32"/>
+  <reg name="r27" bitsize="32"/>
+  <reg name="r28" bitsize="32"/>
+  <reg name="r29" bitsize="32"/>
+  <reg name="r30" bitsize="32"/>
+  <reg name="r31" bitsize="32"/>
+  <reg name="rpc" bitsize="32" type="code_ptr"/>
+  <reg name="rmsr" bitsize="32"/>
+  <reg name="rear" bitsize="32"/>
+  <reg name="resr" bitsize="32"/>
+  <reg name="rfsr" bitsize="32"/>
+  <reg name="rbtr" bitsize="32"/>
+  <reg name="rpvr0" bitsize="32"/>
+  <reg name="rpvr1" bitsize="32"/>
+  <reg name="rpvr2" bitsize="32"/>
+  <reg name="rpvr3" bitsize="32"/>
+  <reg name="rpvr4" bitsize="32"/>
+  <reg name="rpvr5" bitsize="32"/>
+  <reg name="rpvr6" bitsize="32"/>
+  <reg name="rpvr7" bitsize="32"/>
+  <reg name="rpvr8" bitsize="32"/>
+  <reg name="rpvr9" bitsize="32"/>
+  <reg name="rpvr10" bitsize="32"/>
+  <reg name="rpvr11" bitsize="32"/>
+  <reg name="redr" bitsize="32"/>
+  <reg name="rpid" bitsize="32"/>
+  <reg name="rzpr" bitsize="32"/>
+  <reg name="rtlbx" bitsize="32"/>
+  <reg name="rtlbsx" bitsize="32"/>
+  <reg name="rtlblo" bitsize="32"/>
+  <reg name="rtlbhi" bitsize="32"/>
+</feature>

diff --git a/gdb-xml/microblaze-stack-protect.xml b/gdb-xml/microblaze-stack-protect.xml
new file mode 100644
index 0000000..997301e
--- /dev/null
+++ b/gdb-xml/microblaze-stack-protect.xml

@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2008 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.microblaze.stack-protect">
+  <reg name="rslr" bitsize="32"/>
+  <reg name="rshr" bitsize="32"/>
+</feature>

diff --git a/hw/acpi/acpi-stub.c b/hw/acpi/acpi-stub.c
index 4c9d081..e268ce9 100644
--- a/hw/acpi/acpi-stub.c
+++ b/hw/acpi/acpi-stub.c

@@ -19,11 +19,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "hw/acpi/acpi.h"
 
 void acpi_table_add(const QemuOpts *opts, Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
+    g_assert_not_reached();
 }

diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
index 0c9f158..a39315c 100644
--- a/hw/acpi/vmgenid.c
+++ b/hw/acpi/vmgenid.c

@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
 #include "qemu/module.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/aml-build.h"
@@ -244,20 +243,3 @@
 }
 
 type_init(vmgenid_register_types)
-
-GuidInfo *qmp_query_vm_generation_id(Error **errp)
-{
-    GuidInfo *info;
-    VmGenIdState *vms;
-    Object *obj = find_vmgenid_dev();
-
-    if (!obj) {
-        error_setg(errp, "VM Generation ID device not found");
-        return NULL;
-    }
-    vms = VMGENID(obj);
-
-    info = g_malloc0(sizeof(*info));
-    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
-    return info;
-}

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 2d157de..b5aed4a 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig

@@ -389,6 +389,7 @@
     select XLNX_CSU_DMA
     select XLNX_ZYNQMP
     select XLNX_ZDMA
+    select USB_DWC3
 
 config XLNX_VERSAL
     bool

diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index d85cc02..15ff21d 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c

@@ -86,6 +86,8 @@
     NPCM7XX_EMC1RX_IRQ          = 15,
     NPCM7XX_EMC1TX_IRQ,
     NPCM7XX_MMC_IRQ             = 26,
+    NPCM7XX_PSPI2_IRQ           = 28,
+    NPCM7XX_PSPI1_IRQ           = 31,
     NPCM7XX_TIMER0_IRQ          = 32,   /* Timer Module 0 */
     NPCM7XX_TIMER1_IRQ,
     NPCM7XX_TIMER2_IRQ,
@@ -220,6 +222,12 @@
     0xf0826000,
 };
 
+/* Register base address for each PSPI Module */
+static const hwaddr npcm7xx_pspi_addr[] = {
+    0xf0200000,
+    0xf0201000,
+};
+
 static const struct {
     hwaddr regs_addr;
     uint32_t unconnected_pins;
@@ -444,6 +452,10 @@
         object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC);
     }
 
+    for (i = 0; i < ARRAY_SIZE(s->pspi); i++) {
+        object_initialize_child(obj, "pspi[*]", &s->pspi[i], TYPE_NPCM_PSPI);
+    }
+
     object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
 }
 
@@ -715,6 +727,17 @@
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc), 0,
             npcm7xx_irq(s, NPCM7XX_MMC_IRQ));
 
+    /* PSPI */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm7xx_pspi_addr) != ARRAY_SIZE(s->pspi));
+    for (i = 0; i < ARRAY_SIZE(s->pspi); i++) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->pspi[i]);
+        int irq = (i == 0) ? NPCM7XX_PSPI1_IRQ : NPCM7XX_PSPI2_IRQ;
+
+        sysbus_realize(sbd, &error_abort);
+        sysbus_mmio_map(sbd, 0, npcm7xx_pspi_addr[i]);
+        sysbus_connect_irq(sbd, 0, npcm7xx_irq(s, irq));
+    }
+
     create_unimplemented_device("npcm7xx.shm",          0xc0001000,   4 * KiB);
     create_unimplemented_device("npcm7xx.vdmx",         0xe0800000,   4 * KiB);
     create_unimplemented_device("npcm7xx.pcierc",       0xe1000000,  64 * KiB);
@@ -724,8 +747,6 @@
     create_unimplemented_device("npcm7xx.peci",         0xf0100000,   4 * KiB);
     create_unimplemented_device("npcm7xx.siox[1]",      0xf0101000,   4 * KiB);
     create_unimplemented_device("npcm7xx.siox[2]",      0xf0102000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.pspi1",        0xf0200000,   4 * KiB);
-    create_unimplemented_device("npcm7xx.pspi2",        0xf0201000,   4 * KiB);
     create_unimplemented_device("npcm7xx.ahbpci",       0xf0400000,   1 * MiB);
     create_unimplemented_device("npcm7xx.mcphy",        0xf05f0000,  64 * KiB);
     create_unimplemented_device("npcm7xx.gmac1",        0xf0802000,   8 * KiB);

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 733c964..0a5a60c 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c

@@ -249,7 +249,7 @@
         /* there is a ttbr0 region and we are in it (high bits all zero) */
         return &cfg->tt[0];
     } else if (cfg->tt[1].tsz &&
-           !extract64(iova, 64 - cfg->tt[1].tsz, cfg->tt[1].tsz - tbi_byte)) {
+        sextract64(iova, 64 - cfg->tt[1].tsz, cfg->tt[1].tsz - tbi_byte) == -1) {
         /* there is a ttbr1 region and we are in it (high bits all one) */
         return &cfg->tt[1];
     } else if (!cfg->tt[0].tsz) {
@@ -439,7 +439,7 @@
 
         memory_region_init_iommu(&sdev->iommu, sizeof(sdev->iommu),
                                  s->mrtypename,
-                                 OBJECT(s), name, 1ULL << SMMU_MAX_VA_BITS);
+                                 OBJECT(s), name, UINT64_MAX);
         address_space_init(&sdev->as,
                            MEMORY_REGION(&sdev->iommu), name);
         trace_smmu_add_mr(name);

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index bce1618..e8f0ebf 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h

@@ -79,6 +79,13 @@
 REG32(CR1,                 0x28)
 REG32(CR2,                 0x2c)
 REG32(STATUSR,             0x40)
+REG32(GBPA,                0x44)
+    FIELD(GBPA, ABORT,        20, 1)
+    FIELD(GBPA, UPDATE,       31, 1)
+
+/* Use incoming. */
+#define SMMU_GBPA_RESET_VAL 0x1000
+
 REG32(IRQ_CTRL,            0x50)
     FIELD(IRQ_CTRL, GERROR_IRQEN,        0, 1)
     FIELD(IRQ_CTRL, PRI_IRQEN,           1, 1)

diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 955b89c..270c80b 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c

@@ -285,6 +285,7 @@
     s->gerror = 0;
     s->gerrorn = 0;
     s->statusr = 0;
+    s->gbpa = SMMU_GBPA_RESET_VAL;
 }
 
 static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
@@ -659,7 +660,11 @@
     qemu_mutex_lock(&s->mutex);
 
     if (!smmu_enabled(s)) {
-        status = SMMU_TRANS_DISABLE;
+        if (FIELD_EX32(s->gbpa, GBPA, ABORT)) {
+            status = SMMU_TRANS_ABORT;
+        } else {
+            status = SMMU_TRANS_DISABLE;
+        }
         goto epilogue;
     }
 
@@ -1170,6 +1175,16 @@
     case A_GERROR_IRQ_CFG2:
         s->gerror_irq_cfg2 = data;
         return MEMTX_OK;
+    case A_GBPA:
+        /*
+         * If UPDATE is not set, the write is ignored. This is the only
+         * permitted behavior in SMMUv3.2 and later.
+         */
+        if (data & R_GBPA_UPDATE_MASK) {
+            /* Ignore update bit as write is synchronous. */
+            s->gbpa = data & ~R_GBPA_UPDATE_MASK;
+        }
+        return MEMTX_OK;
     case A_STRTAB_BASE: /* 64b */
         s->strtab_base = deposit64(s->strtab_base, 0, 32, data);
         return MEMTX_OK;
@@ -1318,6 +1333,9 @@
     case A_STATUSR:
         *data = s->statusr;
         return MEMTX_OK;
+    case A_GBPA:
+        *data = s->gbpa;
+        return MEMTX_OK;
     case A_IRQ_CTRL:
     case A_IRQ_CTRL_ACK:
         *data = s->irq_ctrl;
@@ -1482,6 +1500,25 @@
     },
 };
 
+static bool smmuv3_gbpa_needed(void *opaque)
+{
+    SMMUv3State *s = opaque;
+
+    /* Only migrate GBPA if it has different reset value. */
+    return s->gbpa != SMMU_GBPA_RESET_VAL;
+}
+
+static const VMStateDescription vmstate_gbpa = {
+    .name = "smmuv3/gbpa",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = smmuv3_gbpa_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(gbpa, SMMUv3State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_smmuv3 = {
     .name = "smmuv3",
     .version_id = 1,
@@ -1512,6 +1549,10 @@
 
         VMSTATE_END_OF_LIST(),
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_gbpa,
+        NULL
+    }
 };
 
 static void smmuv3_instance_init(Object *obj)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 75f2894..ac626b3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c

@@ -2133,21 +2133,21 @@
     if (vms->secure && (kvm_enabled() || hvf_enabled())) {
         error_report("mach-virt: %s does not support providing "
                      "Security extensions (TrustZone) to the guest CPU",
-                     kvm_enabled() ? "KVM" : "HVF");
+                     current_accel_name());
         exit(1);
     }
 
     if (vms->virt && (kvm_enabled() || hvf_enabled())) {
         error_report("mach-virt: %s does not support providing "
                      "Virtualization extensions to the guest CPU",
-                     kvm_enabled() ? "KVM" : "HVF");
+                     current_accel_name());
         exit(1);
     }
 
     if (vms->mte && (kvm_enabled() || hvf_enabled())) {
         error_report("mach-virt: %s does not support providing "
                      "MTE to the guest CPU",
-                     kvm_enabled() ? "KVM" : "HVF");
+                     current_accel_name());
         exit(1);
     }
 
@@ -3013,7 +3013,11 @@
     mc->minimum_page_bits = 12;
     mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
     mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
+#ifdef CONFIG_TCG
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
+#else
+    mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
+#endif
     mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
     mc->kvm_type = virt_kvm_type;
     assert(!mc->get_hotplug_handler);

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 1762517..cefca93 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c

@@ -894,6 +894,10 @@
     uint64_t capacity;
     int64_t length;
     int blk_size = conf->logical_block_size;
+    AioContext *ctx;
+
+    ctx = blk_get_aio_context(s->blk);
+    aio_context_acquire(ctx);
 
     blk_get_geometry(s->blk, &capacity);
     memset(&blkcfg, 0, sizeof(blkcfg));
@@ -917,6 +921,7 @@
      * per track (cylinder).
      */
     length = blk_getlength(s->blk);
+    aio_context_release(ctx);
     if (length > 0 && length / conf->heads / conf->secs % blk_size) {
         blkcfg.geometry.sectors = conf->secs & ~s->sector_mask;
     } else {

diff --git a/hw/char/ibex_uart.c b/hw/char/ibex_uart.c
index e58181f..f70adb5 100644
--- a/hw/char/ibex_uart.c
+++ b/hw/char/ibex_uart.c

@@ -31,6 +31,7 @@
 #include "hw/qdev-clock.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
+#include "hw/registerfields.h"
 #include "migration/vmstate.h"
 #include "qemu/log.h"
 #include "qemu/module.h"

diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 44b5da8..2d90474 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c

@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/acpi/vmgenid.h"
 #include "hw/boards.h"
 #include "hw/intc/intc.h"
 #include "hw/mem/memory-device.h"
@@ -15,7 +16,6 @@
 #include "qapi/error.h"
 #include "qapi/qapi-builtin-visit.h"
 #include "qapi/qapi-commands-machine.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/type-helpers.h"
@@ -140,7 +140,7 @@
     MachineClass *mc = MACHINE_GET_CLASS(ms);
 
     if (!mc->has_hotpluggable_cpus) {
-        error_setg(errp, QERR_FEATURE_DISABLED, "query-hotpluggable-cpus");
+        error_setg(errp, "machine does not support hot-plugging CPUs");
         return NULL;
     }
 
@@ -383,3 +383,20 @@
 
     return human_readable_text_from_str(buf);
 }
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    GuidInfo *info;
+    VmGenIdState *vms;
+    Object *obj = find_vmgenid_dev();
+
+    if (!obj) {
+        error_setg(errp, "VM Generation ID device not found");
+        return NULL;
+    }
+    vms = VMGENID(obj);
+
+    info = g_malloc0(sizeof(*info));
+    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
+    return info;
+}

diff --git a/hw/core/machine.c b/hw/core/machine.c
index f73fc4c..f29e700 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c

@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "qemu/option.h"
 #include "qemu/accel.h"
-#include "qapi/qmp/qerror.h"
 #include "sysemu/replay.h"
 #include "qemu/units.h"
 #include "hw/boards.h"

diff --git a/hw/core/nmi.c b/hw/core/nmi.c
index 481c4b3..a7bce8a 100644
--- a/hw/core/nmi.c
+++ b/hw/core/nmi.c

@@ -22,7 +22,6 @@
 #include "qemu/osdep.h"
 #include "hw/nmi.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/module.h"
 #include "monitor/monitor.h"
 
@@ -70,7 +69,7 @@
     if (ns.handled) {
         error_propagate(errp, ns.err);
     } else {
-        error_setg(errp, QERR_UNSUPPORTED);
+        error_setg(errp, "machine does not provide NMIs");
     }
 }
 

diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 4380a5e..71dfd95 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c

@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "hw/qdev-properties.h"
 #include "hw/virtio/virtio-gpu.h"

diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c
index 847fa4c..69e2cf0 100644
--- a/hw/display/virtio-gpu-udmabuf.c
+++ b/hw/display/virtio-gpu-udmabuf.c

@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/units.h"
 #include "qemu/iov.h"
 #include "ui/console.h"

diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 73cb92c..1c47603 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c

@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/iov.h"
 #include "trace.h"
 #include "hw/virtio/virtio.h"

diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 1bf47b0..9fbfe74 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig

@@ -26,7 +26,6 @@
     imply QXL
     imply SEV
     imply SGX
-    imply SGA
     imply TEST_DEVICES
     imply TPM_CRB
     imply TPM_TIS_ISA

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6e592bd..a7a2ede 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c

@@ -92,7 +92,6 @@
 #include "hw/mem/memory-device.h"
 #include "sysemu/replay.h"
 #include "target/i386/cpu.h"
-#include "qapi/qmp/qerror.h"
 #include "e820_memory_layout.h"
 #include "fw_cfg.h"
 #include "trace.h"

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index eaff422..48be7a1 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c

@@ -28,7 +28,6 @@
 #include "qemu/datadir.h"
 #include "qemu/guest-random.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qapi-visit-common.h"
 #include "qapi/clone-visitor.h"
 #include "qapi/qapi-visit-machine.h"

diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 1f77639..e545532 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c

@@ -389,7 +389,7 @@
     return MIN(running, s->exception_prio);
 }
 
-bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
+bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure)
 {
     /* Return true if the requested execution priority is negative
      * for the specified security state, ie that security state
@@ -399,8 +399,6 @@
      * mean we don't allow FAULTMASK_NS to actually make the execution
      * priority negative). Compare pseudocode IsReqExcPriNeg().
      */
-    NVICState *s = opaque;
-
     if (s->cpu->env.v7m.faultmask[secure]) {
         return true;
     }
@@ -418,17 +416,13 @@
     return false;
 }
 
-bool armv7m_nvic_can_take_pending_exception(void *opaque)
+bool armv7m_nvic_can_take_pending_exception(NVICState *s)
 {
-    NVICState *s = opaque;
-
     return nvic_exec_prio(s) > nvic_pending_prio(s);
 }
 
-int armv7m_nvic_raw_execution_priority(void *opaque)
+int armv7m_nvic_raw_execution_priority(NVICState *s)
 {
-    NVICState *s = opaque;
-
     return s->exception_prio;
 }
 
@@ -506,9 +500,8 @@
  * if @secure is true and @irq does not specify one of the fixed set
  * of architecturally banked exceptions.
  */
-static void armv7m_nvic_clear_pending(void *opaque, int irq, bool secure)
+static void armv7m_nvic_clear_pending(NVICState *s, int irq, bool secure)
 {
-    NVICState *s = (NVICState *)opaque;
     VecInfo *vec;
 
     assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
@@ -666,17 +659,17 @@
     }
 }
 
-void armv7m_nvic_set_pending(void *opaque, int irq, bool secure)
+void armv7m_nvic_set_pending(NVICState *s, int irq, bool secure)
 {
-    do_armv7m_nvic_set_pending(opaque, irq, secure, false);
+    do_armv7m_nvic_set_pending(s, irq, secure, false);
 }
 
-void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure)
+void armv7m_nvic_set_pending_derived(NVICState *s, int irq, bool secure)
 {
-    do_armv7m_nvic_set_pending(opaque, irq, secure, true);
+    do_armv7m_nvic_set_pending(s, irq, secure, true);
 }
 
-void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure)
+void armv7m_nvic_set_pending_lazyfp(NVICState *s, int irq, bool secure)
 {
     /*
      * Pend an exception during lazy FP stacking. This differs
@@ -684,7 +677,6 @@
      * whether we should escalate depends on the saved context
      * in the FPCCR register, not on the current state of the CPU/NVIC.
      */
-    NVICState *s = (NVICState *)opaque;
     bool banked = exc_is_banked(irq);
     VecInfo *vec;
     bool targets_secure;
@@ -773,9 +765,8 @@
 }
 
 /* Make pending IRQ active.  */
-void armv7m_nvic_acknowledge_irq(void *opaque)
+void armv7m_nvic_acknowledge_irq(NVICState *s)
 {
-    NVICState *s = (NVICState *)opaque;
     CPUARMState *env = &s->cpu->env;
     const int pending = s->vectpending;
     const int running = nvic_exec_prio(s);
@@ -814,10 +805,9 @@
         exc_targets_secure(s, s->vectpending);
 }
 
-void armv7m_nvic_get_pending_irq_info(void *opaque,
+void armv7m_nvic_get_pending_irq_info(NVICState *s,
                                       int *pirq, bool *ptargets_secure)
 {
-    NVICState *s = (NVICState *)opaque;
     const int pending = s->vectpending;
     bool targets_secure;
 
@@ -831,9 +821,8 @@
     *pirq = pending;
 }
 
-int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure)
+int armv7m_nvic_complete_irq(NVICState *s, int irq, bool secure)
 {
-    NVICState *s = (NVICState *)opaque;
     VecInfo *vec = NULL;
     int ret = 0;
 
@@ -915,7 +904,7 @@
     return ret;
 }
 
-bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
+bool armv7m_nvic_get_ready_status(NVICState *s, int irq, bool secure)
 {
     /*
      * Return whether an exception is "ready", i.e. it is enabled and is
@@ -926,7 +915,6 @@
      * for non-banked exceptions secure is always false; for banked exceptions
      * it indicates which of the exceptions is required.
      */
-    NVICState *s = (NVICState *)opaque;
     bool banked = exc_is_banked(irq);
     VecInfo *vec;
     int running = nvic_exec_prio(s);

diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c
index e6640eb..72f038d 100644
--- a/hw/mem/sparse-mem.c
+++ b/hw/mem/sparse-mem.c

@@ -77,6 +77,13 @@
 
 }
 
+static void sparse_mem_enter_reset(Object *obj, ResetType type)
+{
+    SparseMemState *s = SPARSE_MEM(obj);
+    g_hash_table_remove_all(s->mapped);
+    return;
+}
+
 static const MemoryRegionOps sparse_mem_ops = {
     .read = sparse_mem_read,
     .write = sparse_mem_write,
@@ -123,7 +130,8 @@
 
     assert(s->baseaddr + s->length > s->baseaddr);
 
-    s->mapped = g_hash_table_new(NULL, NULL);
+    s->mapped = g_hash_table_new_full(NULL, NULL, NULL,
+                                      (GDestroyNotify)g_free);
     memory_region_init_io(&s->mmio, OBJECT(s), &sparse_mem_ops, s,
                           "sparse-mem", s->length);
     sysbus_init_mmio(sbd, &s->mmio);
@@ -131,12 +139,15 @@
 
 static void sparse_mem_class_init(ObjectClass *klass, void *data)
 {
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     device_class_set_props(dc, sparse_mem_properties);
 
     dc->desc = "Sparse Memory Device";
     dc->realize = sparse_mem_realize;
+
+    rc->phases.enter = sparse_mem_enter_reset;
 }
 
 static const TypeInfo sparse_mem_types[] = {

diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index eaeddca..2ef5781 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig

@@ -15,10 +15,6 @@
     bool
     depends on ISA_BUS
 
-config SGA
-    bool
-    depends on ISA_BUS
-
 config ISA_TESTDEV
     bool
     default y if TEST_DEVICES

diff --git a/hw/misc/applesmc.c b/hw/misc/applesmc.c
index 5f9c742..72300d0 100644
--- a/hw/misc/applesmc.c
+++ b/hw/misc/applesmc.c

@@ -34,6 +34,7 @@
 #include "hw/isa/isa.h"
 #include "hw/qdev-properties.h"
 #include "ui/console.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/timer.h"
 #include "qom/object.h"

diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 448e14b..fe869b9 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build

@@ -5,7 +5,6 @@
 softmmu_ss.add(when: 'CONFIG_ISA_TESTDEV', if_true: files('pc-testdev.c'))
 softmmu_ss.add(when: 'CONFIG_PCA9552', if_true: files('pca9552.c'))
 softmmu_ss.add(when: 'CONFIG_PCI_TESTDEV', if_true: files('pci-testdev.c'))
-softmmu_ss.add(when: 'CONFIG_SGA', if_true: files('sga.c'))
 softmmu_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c'))
 softmmu_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c'))
 softmmu_ss.add(when: 'CONFIG_LED', if_true: files('led.c'))

diff --git a/hw/misc/sga.c b/hw/misc/sga.c
deleted file mode 100644
index 1d04672..0000000
--- a/hw/misc/sga.c
+++ /dev/null

@@ -1,71 +0,0 @@
-/*
- * QEMU dummy ISA device for loading sgabios option rom.
- *
- * Copyright (c) 2011 Glauber Costa, Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * sgabios code originally available at code.google.com/p/sgabios
- *
- */
-
-#include "qemu/osdep.h"
-#include "hw/isa/isa.h"
-#include "hw/loader.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-#include "qemu/error-report.h"
-
-#define SGABIOS_FILENAME "sgabios.bin"
-
-#define TYPE_SGA "sga"
-OBJECT_DECLARE_SIMPLE_TYPE(ISASGAState, SGA)
-
-struct ISASGAState {
-    ISADevice parent_obj;
-};
-
-static void sga_realizefn(DeviceState *dev, Error **errp)
-{
-    warn_report("-device sga is deprecated, use -machine graphics=off");
-    rom_add_vga(SGABIOS_FILENAME);
-}
-
-static void sga_class_initfn(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-    dc->realize = sga_realizefn;
-    dc->desc = "Serial Graphics Adapter";
-}
-
-static const TypeInfo sga_info = {
-    .name          = TYPE_SGA,
-    .parent        = TYPE_ISA_DEVICE,
-    .instance_size = sizeof(ISASGAState),
-    .class_init    = sga_class_initfn,
-};
-
-static void sga_register_types(void)
-{
-    type_register_static(&sga_info);
-}
-
-type_init(sga_register_types)

diff --git a/hw/misc/xlnx-zynqmp-apu-ctrl.c b/hw/misc/xlnx-zynqmp-apu-ctrl.c
index 20de23c..3d2be95 100644
--- a/hw/misc/xlnx-zynqmp-apu-ctrl.c
+++ b/hw/misc/xlnx-zynqmp-apu-ctrl.c

@@ -18,7 +18,6 @@
 #include "hw/register.h"
 
 #include "qemu/bitops.h"
-#include "qapi/qmp/qerror.h"
 
 #include "hw/misc/xlnx-zynqmp-apu-ctrl.h"
 

diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index f1cba55..e5c4af1 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c

@@ -15,7 +15,6 @@
 #include "migration/vmstate.h"
 #include "net/net.h"
 #include "net/eth.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/net/lan9118.h"
 #include "hw/ptimer.h"
@@ -32,12 +31,8 @@
 #ifdef DEBUG_LAN9118
 #define DPRINTF(fmt, ...) \
 do { printf("lan9118: " fmt , ## __VA_ARGS__); } while (0)
-#define BADF(fmt, ...) \
-do { hw_error("lan9118: error: " fmt , ## __VA_ARGS__);} while (0)
 #else
 #define DPRINTF(fmt, ...) do {} while(0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "lan9118: error: " fmt , ## __VA_ARGS__);} while (0)
 #endif
 
 /* The tx and rx fifo ports are a range of aliased 32-bit registers */
@@ -848,7 +843,8 @@
     case 30: /* Interrupt mask */
         return s->phy_int_mask;
     default:
-        BADF("PHY read reg %d\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "do_phy_read: PHY read reg %d\n", reg);
         return 0;
     }
 }
@@ -876,7 +872,8 @@
         phy_update_irq(s);
         break;
     default:
-        BADF("PHY write reg %d = 0x%04x\n", reg, val);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "do_phy_write: PHY write reg %d = 0x%04x\n", reg, val);
     }
 }
 
@@ -1209,7 +1206,8 @@
         return;
     }
 
-    hw_error("lan9118_write: Bad size 0x%x\n", size);
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "lan9118_16bit_mode_write: Bad size 0x%x\n", size);
 }
 
 static uint64_t lan9118_readl(void *opaque, hwaddr offset,
@@ -1324,7 +1322,8 @@
         return lan9118_readl(opaque, offset, size);
     }
 
-    hw_error("lan9118_read: Bad size 0x%x\n", size);
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "lan9118_16bit_mode_read: Bad size 0x%x\n", size);
     return 0;
 }
 

diff --git a/hw/net/rocker/qmp-norocker.c b/hw/net/rocker/qmp-norocker.c
index 5ef4f93..f6c1196 100644
--- a/hw/net/rocker/qmp-norocker.c
+++ b/hw/net/rocker/qmp-norocker.c

@@ -1,6 +1,5 @@
 /*
- * QMP Target options - Commands handled based on a target config
- *                      versus a host config
+ * QMP command stubs
  *
  * Copyright (c) 2015 David Ahern <dsahern@gmail.com>
  *
@@ -18,17 +17,16 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-rocker.h"
-#include "qapi/qmp/qerror.h"
 
 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
 
 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
 
@@ -37,7 +35,7 @@
                                                    uint32_t tbl_id,
                                                    Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };
 
@@ -46,6 +44,6 @@
                                                      uint8_t type,
                                                      Error **errp)
 {
-    error_setg(errp, QERR_FEATURE_DISABLED, "rocker");
+    error_setg(errp, "rocker %s not found", name);
     return NULL;
 };

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index d2ab527..56559cd 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c

@@ -1441,7 +1441,7 @@
     vmxnet3_setup_rx_filtering(s);
     /* Cache fields from shared memory */
     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
-    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU);
+    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu <= VMXNET3_MAX_MTU);
     VMW_CFPRN("MTU is %u", s->mtu);
 
     s->max_rx_frags =

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 208c16f..cc51f98 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c

@@ -1789,7 +1789,6 @@
                                const char *default_devaddr)
 {
     const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr;
-    GSList *list;
     GPtrArray *pci_nic_models;
     PCIBus *bus;
     PCIDevice *pci_dev;
@@ -1804,33 +1803,7 @@
         nd->model = g_strdup("virtio-net-pci");
     }
 
-    list = object_class_get_list_sorted(TYPE_PCI_DEVICE, false);
-    pci_nic_models = g_ptr_array_new();
-    while (list) {
-        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data,
-                                             TYPE_DEVICE);
-        GSList *next;
-        if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) &&
-            dc->user_creatable) {
-            const char *name = object_class_get_name(list->data);
-            /*
-             * A network device might also be something else than a NIC, see
-             * e.g. the "rocker" device. Thus we have to look for the "netdev"
-             * property, too. Unfortunately, some devices like virtio-net only
-             * create this property during instance_init, so we have to create
-             * a temporary instance here to be able to check it.
-             */
-            Object *obj = object_new_with_class(OBJECT_CLASS(dc));
-            if (object_property_find(obj, "netdev")) {
-                g_ptr_array_add(pci_nic_models, (gpointer)name);
-            }
-            object_unref(obj);
-        }
-        next = list->next;
-        g_slist_free_1(list);
-        list = next;
-    }
-    g_ptr_array_add(pci_nic_models, NULL);
+    pci_nic_models = qemu_get_nic_models(TYPE_PCI_DEVICE);
 
     if (qemu_show_nic_models(nd->model, (const char **)pci_nic_models->pdata)) {
         exit(0);

diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index faa51aa..6891e3c 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c

@@ -64,8 +64,7 @@
     SCLPEventClass *event_class;
 
     QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
-        DeviceState *qdev = kid->child;
-        event = DO_UPCAST(SCLPEvent, qdev, qdev);
+        event = SCLP_EVENT(kid->child);
         event_class = SCLP_EVENT_GET_CLASS(event);
         if (event->event_pending &&
             event_class->get_send_mask() & ef->receive_mask) {

diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 3e32002..aed919a 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c

@@ -182,17 +182,15 @@
     return 0;
 }
 
-static void cmma_state_pending(void *opaque,
-                               uint64_t *res_precopy_only,
-                               uint64_t *res_compatible,
-                               uint64_t *res_postcopy_only)
+static void cmma_state_pending(void *opaque, uint64_t *must_precopy,
+                               uint64_t *can_postcopy)
 {
     S390StAttribState *sas = S390_STATTRIB(opaque);
     S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
     long long res = sac->get_dirtycount(sas);
 
     if (res >= 0) {
-        *res_precopy_only += res;
+        *must_precopy += res;
     }
 }
 

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index d4e3608..97c9b1c 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c

@@ -273,9 +273,11 @@
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     if (scsi_disk_req_check_error(r, ret, true)) {
         goto done;
     }
@@ -352,6 +354,7 @@
     scsi_req_unref(&r->req);
 }
 
+/* Called with AioContext lock held */
 static void scsi_dma_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -360,14 +363,12 @@
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
     }
     scsi_dma_complete_noio(r, ret);
-    aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
 }
 
 static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
@@ -393,10 +394,11 @@
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
@@ -446,10 +448,11 @@
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert (r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
@@ -530,10 +533,11 @@
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert (r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
     } else {
@@ -1737,10 +1741,11 @@
     SCSIDiskReq *r = data->r;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
     if (scsi_disk_req_check_error(r, ret, true)) {
         scsi_req_unref(&r->req);
         g_free(data);
@@ -1816,9 +1821,11 @@
     SCSIDiskReq *r = data->r;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 
+    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
-    aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
     if (scsi_disk_req_check_error(r, ret, true)) {
         goto done;
     }

diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 92cce20..ac9fa66 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c

@@ -111,10 +111,11 @@
     SCSIGenericReq *r = (SCSIGenericReq *)opaque;
     SCSIDevice *s = r->req.dev;
 
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
     scsi_command_complete_noio(r, ret);
     aio_context_release(blk_get_aio_context(s->conf.blk));
 }
@@ -269,11 +270,11 @@
     SCSIDevice *s = r->req.dev;
     int len;
 
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
     if (ret || r->req.io_canceled) {
         scsi_command_complete_noio(r, ret);
         goto done;
@@ -386,11 +387,11 @@
 
     trace_scsi_generic_write_complete(ret);
 
+    aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
     if (ret || r->req.io_canceled) {
         scsi_command_complete_noio(r, ret);
         goto done;

diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 2b649ca..612c525 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c

@@ -43,13 +43,11 @@
     QEMUSGList qsgl;
     QEMUIOVector resp_iov;
 
-    union {
-        /* Used for two-stage request submission */
-        QTAILQ_ENTRY(VirtIOSCSIReq) next;
+    /* Used for two-stage request submission and TMFs deferred to BH */
+    QTAILQ_ENTRY(VirtIOSCSIReq) next;
 
-        /* Used for cancellation of request during TMFs */
-        int remaining;
-    };
+    /* Used for cancellation of request during TMFs */
+    int remaining;
 
     SCSIRequest *sreq;
     size_t resp_size;
@@ -294,6 +292,122 @@
     }
 }
 
+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
+{
+    VirtIOSCSI *s = req->dev;
+    SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
+    BusChild *kid;
+    int target;
+
+    switch (req->req.tmf.subtype) {
+    case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+        if (!d) {
+            req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
+            goto out;
+        }
+        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
+            req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
+            goto out;
+        }
+        qatomic_inc(&s->resetting);
+        device_cold_reset(&d->qdev);
+        qatomic_dec(&s->resetting);
+        break;
+
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+        target = req->req.tmf.lun[1];
+        qatomic_inc(&s->resetting);
+
+        rcu_read_lock();
+        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
+            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
+            if (d1->channel == 0 && d1->id == target) {
+                device_cold_reset(&d1->qdev);
+            }
+        }
+        rcu_read_unlock();
+
+        qatomic_dec(&s->resetting);
+        break;
+
+    default:
+        g_assert_not_reached();
+        break;
+    }
+
+out:
+    object_unref(OBJECT(d));
+
+    virtio_scsi_acquire(s);
+    virtio_scsi_complete_req(req);
+    virtio_scsi_release(s);
+}
+
+/* Some TMFs must be processed from the main loop thread */
+static void virtio_scsi_do_tmf_bh(void *opaque)
+{
+    VirtIOSCSI *s = opaque;
+    QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
+    VirtIOSCSIReq *req;
+    VirtIOSCSIReq *tmp;
+
+    GLOBAL_STATE_CODE();
+
+    virtio_scsi_acquire(s);
+
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
+        QTAILQ_INSERT_TAIL(&reqs, req, next);
+    }
+
+    qemu_bh_delete(s->tmf_bh);
+    s->tmf_bh = NULL;
+
+    virtio_scsi_release(s);
+
+    QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
+        QTAILQ_REMOVE(&reqs, req, next);
+        virtio_scsi_do_one_tmf_bh(req);
+    }
+}
+
+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
+{
+    VirtIOSCSIReq *req;
+    VirtIOSCSIReq *tmp;
+
+    GLOBAL_STATE_CODE();
+
+    virtio_scsi_acquire(s);
+
+    if (s->tmf_bh) {
+        qemu_bh_delete(s->tmf_bh);
+        s->tmf_bh = NULL;
+    }
+
+    QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
+        QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
+
+        /* SAM-6 6.3.2 Hard reset */
+        req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
+        virtio_scsi_complete_req(req);
+    }
+
+    virtio_scsi_release(s);
+}
+
+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
+{
+    VirtIOSCSI *s = req->dev;
+
+    QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
+
+    if (!s->tmf_bh) {
+        s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
+        qemu_bh_schedule(s->tmf_bh);
+    }
+}
+
 /* Return 0 if the request is ready to be completed and return to guest;
  * -EINPROGRESS if the request is submitted and will be completed later, in the
  *  case of async cancellation. */
@@ -301,8 +415,6 @@
 {
     SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
     SCSIRequest *r, *next;
-    BusChild *kid;
-    int target;
     int ret = 0;
 
     virtio_scsi_ctx_check(s, d);
@@ -359,15 +471,9 @@
         break;
 
     case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
-        if (!d) {
-            goto fail;
-        }
-        if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
-            goto incorrect_lun;
-        }
-        s->resetting++;
-        device_cold_reset(&d->qdev);
-        s->resetting--;
+    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+        virtio_scsi_defer_tmf_to_bh(req);
+        ret = -EINPROGRESS;
         break;
 
     case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
@@ -410,22 +516,6 @@
         }
         break;
 
-    case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
-        target = req->req.tmf.lun[1];
-        s->resetting++;
-
-        rcu_read_lock();
-        QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
-            SCSIDevice *d1 = SCSI_DEVICE(kid->child);
-            if (d1->channel == 0 && d1->id == target) {
-                device_cold_reset(&d1->qdev);
-            }
-        }
-        rcu_read_unlock();
-
-        s->resetting--;
-        break;
-
     case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
     default:
         req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
@@ -655,7 +745,7 @@
     if (!req) {
         return;
     }
-    if (req->dev->resetting) {
+    if (qatomic_read(&req->dev->resetting)) {
         req->resp.cmd.response = VIRTIO_SCSI_S_RESET;
     } else {
         req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED;
@@ -831,9 +921,12 @@
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
 
     assert(!s->dataplane_started);
-    s->resetting++;
+
+    virtio_scsi_reset_tmf_bh(s);
+
+    qatomic_inc(&s->resetting);
     bus_cold_reset(BUS(&s->bus));
-    s->resetting--;
+    qatomic_dec(&s->resetting);
 
     vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
     vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
@@ -1053,6 +1146,8 @@
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
     Error *err = NULL;
 
+    QTAILQ_INIT(&s->tmf_bh_list);
+
     virtio_scsi_common_realize(dev,
                                virtio_scsi_handle_ctrl,
                                virtio_scsi_handle_event,
@@ -1090,6 +1185,8 @@
 {
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
 
+    virtio_scsi_reset_tmf_bh(s);
+
     qbus_set_hotplug_handler(BUS(&s->bus), NULL);
     virtio_scsi_common_unrealize(dev);
 }

diff --git a/hw/smbios/smbios-stub.c b/hw/smbios/smbios-stub.c
index 64e5ba9..e8808ad 100644
--- a/hw/smbios/smbios-stub.c
+++ b/hw/smbios/smbios-stub.c

@@ -21,11 +21,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "hw/firmware/smbios.h"
 
 void smbios_entry_add(QemuOpts *opts, Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
+    g_assert_not_reached();
 }

diff --git a/hw/ssi/ibex_spi_host.c b/hw/ssi/ibex_spi_host.c
index 57df462..1ee7d88 100644
--- a/hw/ssi/ibex_spi_host.c
+++ b/hw/ssi/ibex_spi_host.c

@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
+#include "hw/registerfields.h"
 #include "hw/ssi/ibex_spi_host.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"

diff --git a/hw/ssi/meson.build b/hw/ssi/meson.build
index 702aa5e..904a471 100644
--- a/hw/ssi/meson.build
+++ b/hw/ssi/meson.build

@@ -1,6 +1,6 @@
 softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_smc.c'))
 softmmu_ss.add(when: 'CONFIG_MSF2', if_true: files('mss-spi.c'))
-softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_fiu.c'))
+softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_fiu.c', 'npcm_pspi.c'))
 softmmu_ss.add(when: 'CONFIG_PL022', if_true: files('pl022.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_SPI', if_true: files('sifive_spi.c'))
 softmmu_ss.add(when: 'CONFIG_SSI', if_true: files('ssi.c'))

diff --git a/hw/ssi/npcm_pspi.c b/hw/ssi/npcm_pspi.c
new file mode 100644
index 0000000..3fb9350
--- /dev/null
+++ b/hw/ssi/npcm_pspi.c

@@ -0,0 +1,221 @@
+/*
+ * Nuvoton NPCM Peripheral SPI Module (PSPI)
+ *
+ * Copyright 2023 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/irq.h"
+#include "hw/registerfields.h"
+#include "hw/ssi/npcm_pspi.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+
+#include "trace.h"
+
+REG16(PSPI_DATA, 0x0)
+REG16(PSPI_CTL1, 0x2)
+    FIELD(PSPI_CTL1, SPIEN, 0,  1)
+    FIELD(PSPI_CTL1, MOD,   2,  1)
+    FIELD(PSPI_CTL1, EIR,   5,  1)
+    FIELD(PSPI_CTL1, EIW,   6,  1)
+    FIELD(PSPI_CTL1, SCM,   7,  1)
+    FIELD(PSPI_CTL1, SCIDL, 8,  1)
+    FIELD(PSPI_CTL1, SCDV,  9,  7)
+REG16(PSPI_STAT, 0x4)
+    FIELD(PSPI_STAT, BSY,  0,  1)
+    FIELD(PSPI_STAT, RBF,  1,  1)
+
+static void npcm_pspi_update_irq(NPCMPSPIState *s)
+{
+    int level = 0;
+
+    /* Only fire IRQ when the module is enabled. */
+    if (FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, SPIEN)) {
+        /* Update interrupt as BSY is cleared. */
+        if ((!FIELD_EX16(s->regs[R_PSPI_STAT], PSPI_STAT, BSY)) &&
+            FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, EIW)) {
+            level = 1;
+        }
+
+        /* Update interrupt as RBF is set. */
+        if (FIELD_EX16(s->regs[R_PSPI_STAT], PSPI_STAT, RBF) &&
+            FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, EIR)) {
+            level = 1;
+        }
+    }
+    qemu_set_irq(s->irq, level);
+}
+
+static uint16_t npcm_pspi_read_data(NPCMPSPIState *s)
+{
+    uint16_t value = s->regs[R_PSPI_DATA];
+
+    /* Clear stat bits as the value are read out. */
+    s->regs[R_PSPI_STAT] = 0;
+
+    return value;
+}
+
+static void npcm_pspi_write_data(NPCMPSPIState *s, uint16_t data)
+{
+    uint16_t value = 0;
+
+    if (FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, MOD)) {
+        value = ssi_transfer(s->spi, extract16(data, 8, 8)) << 8;
+    }
+    value |= ssi_transfer(s->spi, extract16(data, 0, 8));
+    s->regs[R_PSPI_DATA] = value;
+
+    /* Mark data as available */
+    s->regs[R_PSPI_STAT] = R_PSPI_STAT_BSY_MASK | R_PSPI_STAT_RBF_MASK;
+}
+
+/* Control register read handler. */
+static uint64_t npcm_pspi_ctrl_read(void *opaque, hwaddr addr,
+                                    unsigned int size)
+{
+    NPCMPSPIState *s = opaque;
+    uint16_t value;
+
+    switch (addr) {
+    case A_PSPI_DATA:
+        value = npcm_pspi_read_data(s);
+        break;
+
+    case A_PSPI_CTL1:
+        value = s->regs[R_PSPI_CTL1];
+        break;
+
+    case A_PSPI_STAT:
+        value = s->regs[R_PSPI_STAT];
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to invalid offset 0x%" PRIx64 "\n",
+                      DEVICE(s)->canonical_path, addr);
+        return 0;
+    }
+    trace_npcm_pspi_ctrl_read(DEVICE(s)->canonical_path, addr, value);
+    npcm_pspi_update_irq(s);
+
+    return value;
+}
+
+/* Control register write handler. */
+static void npcm_pspi_ctrl_write(void *opaque, hwaddr addr, uint64_t v,
+                                 unsigned int size)
+{
+    NPCMPSPIState *s = opaque;
+    uint16_t value = v;
+
+    trace_npcm_pspi_ctrl_write(DEVICE(s)->canonical_path, addr, value);
+
+    switch (addr) {
+    case A_PSPI_DATA:
+        npcm_pspi_write_data(s, value);
+        break;
+
+    case A_PSPI_CTL1:
+        s->regs[R_PSPI_CTL1] = value;
+        break;
+
+    case A_PSPI_STAT:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to read-only register PSPI_STAT: 0x%08"
+                      PRIx64 "\n", DEVICE(s)->canonical_path, v);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to invalid offset 0x%" PRIx64 "\n",
+                      DEVICE(s)->canonical_path, addr);
+        return;
+    }
+    npcm_pspi_update_irq(s);
+}
+
+static const MemoryRegionOps npcm_pspi_ctrl_ops = {
+    .read = npcm_pspi_ctrl_read,
+    .write = npcm_pspi_ctrl_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 2,
+        .unaligned = false,
+    },
+    .impl = {
+        .min_access_size = 2,
+        .max_access_size = 2,
+        .unaligned = false,
+    },
+};
+
+static void npcm_pspi_enter_reset(Object *obj, ResetType type)
+{
+    NPCMPSPIState *s = NPCM_PSPI(obj);
+
+    trace_npcm_pspi_enter_reset(DEVICE(obj)->canonical_path, type);
+    memset(s->regs, 0, sizeof(s->regs));
+}
+
+static void npcm_pspi_realize(DeviceState *dev, Error **errp)
+{
+    NPCMPSPIState *s = NPCM_PSPI(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    Object *obj = OBJECT(dev);
+
+    s->spi = ssi_create_bus(dev, "pspi");
+    memory_region_init_io(&s->mmio, obj, &npcm_pspi_ctrl_ops, s,
+                          "mmio", 4 * KiB);
+    sysbus_init_mmio(sbd, &s->mmio);
+    sysbus_init_irq(sbd, &s->irq);
+}
+
+static const VMStateDescription vmstate_npcm_pspi = {
+    .name = "npcm-pspi",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16_ARRAY(regs, NPCMPSPIState, NPCM_PSPI_NR_REGS),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+
+static void npcm_pspi_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "NPCM Peripheral SPI Module";
+    dc->realize = npcm_pspi_realize;
+    dc->vmsd = &vmstate_npcm_pspi;
+    rc->phases.enter = npcm_pspi_enter_reset;
+}
+
+static const TypeInfo npcm_pspi_types[] = {
+    {
+        .name = TYPE_NPCM_PSPI,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .instance_size = sizeof(NPCMPSPIState),
+        .class_init = npcm_pspi_class_init,
+    },
+};
+DEFINE_TYPES(npcm_pspi_types);

diff --git a/hw/ssi/trace-events b/hw/ssi/trace-events
index c707d4a..2d5bd2b 100644
--- a/hw/ssi/trace-events
+++ b/hw/ssi/trace-events

@@ -21,6 +21,11 @@
 npcm7xx_fiu_flash_read(const char *id, int cs, uint64_t addr, unsigned int size, uint64_t value) "%s[%d] offset: 0x%08" PRIx64 " size: %u value: 0x%" PRIx64
 npcm7xx_fiu_flash_write(const char *id, unsigned cs, uint64_t addr, unsigned int size, uint64_t value) "%s[%d] offset: 0x%08" PRIx64 " size: %u value: 0x%" PRIx64
 
+# npcm_pspi.c
+npcm_pspi_enter_reset(const char *id, int reset_type) "%s reset type: %d"
+npcm_pspi_ctrl_read(const char *id, uint64_t addr, uint16_t data) "%s offset: 0x%03" PRIx64 " value: 0x%04" PRIx16
+npcm_pspi_ctrl_write(const char *id, uint64_t addr, uint16_t data) "%s offset: 0x%03" PRIx64 " value: 0x%04" PRIx16
+
 # ibex_spi_host.c
 
 ibex_spi_host_reset(const char *msg) "%s"

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 130e5d1..bab83c0 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c

@@ -40,6 +40,8 @@
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/migration.h"
+#include "migration/misc.h"
+#include "migration/blocker.h"
 #include "sysemu/tpm.h"
 
 VFIOGroupList vfio_group_list =
@@ -336,6 +338,58 @@
     return true;
 }
 
+static Error *multiple_devices_migration_blocker;
+
+static unsigned int vfio_migratable_device_num(void)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    unsigned int device_num = 0;
+
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->migration) {
+                device_num++;
+            }
+        }
+    }
+
+    return device_num;
+}
+
+int vfio_block_multiple_devices_migration(Error **errp)
+{
+    int ret;
+
+    if (multiple_devices_migration_blocker ||
+        vfio_migratable_device_num() <= 1) {
+        return 0;
+    }
+
+    error_setg(&multiple_devices_migration_blocker,
+               "Migration is currently not supported with multiple "
+               "VFIO devices");
+    ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
+    if (ret < 0) {
+        error_free(multiple_devices_migration_blocker);
+        multiple_devices_migration_blocker = NULL;
+    }
+
+    return ret;
+}
+
+void vfio_unblock_multiple_devices_migration(void)
+{
+    if (!multiple_devices_migration_blocker ||
+        vfio_migratable_device_num() > 1) {
+        return;
+    }
+
+    migrate_del_blocker(multiple_devices_migration_blocker);
+    error_free(multiple_devices_migration_blocker);
+    multiple_devices_migration_blocker = NULL;
+}
+
 static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
 {
     VFIOGroup *group;
@@ -354,8 +408,8 @@
                 return false;
             }
 
-            if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF)
-                && (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) {
+            if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
+                migration->device_state == VFIO_DEVICE_STATE_RUNNING) {
                 return false;
             }
         }
@@ -363,13 +417,16 @@
     return true;
 }
 
-static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
+/*
+ * Check if all VFIO devices are running and migration is active, which is
+ * essentially equivalent to the migration being in pre-copy phase.
+ */
+static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
 {
     VFIOGroup *group;
     VFIODevice *vbasedev;
-    MigrationState *ms = migrate_get_current();
 
-    if (!migration_is_setup_or_active(ms->state)) {
+    if (!migration_is_active(migrate_get_current())) {
         return false;
     }
 
@@ -381,8 +438,7 @@
                 return false;
             }
 
-            if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) &&
-                (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) {
+            if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) {
                 continue;
             } else {
                 return false;
@@ -461,7 +517,7 @@
     };
 
     if (iotlb && container->dirty_pages_supported &&
-        vfio_devices_all_running_and_saving(container)) {
+        vfio_devices_all_running_and_mig_active(container)) {
         return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
     }
 
@@ -488,6 +544,12 @@
         return -errno;
     }
 
+    if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
+        cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
+                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
+                                            DIRTY_CLIENTS_NOCODE);
+    }
+
     return 0;
 }
 
@@ -1201,6 +1263,10 @@
         .argsz = sizeof(dirty),
     };
 
+    if (!container->dirty_pages_supported) {
+        return;
+    }
+
     if (start) {
         dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
     } else {
@@ -1236,6 +1302,13 @@
     uint64_t pages;
     int ret;
 
+    if (!container->dirty_pages_supported) {
+        cpu_physical_memory_set_dirty_range(ram_addr, size,
+                                            tcg_enabled() ? DIRTY_CLIENTS_ALL :
+                                            DIRTY_CLIENTS_NOCODE);
+        return 0;
+    }
+
     dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
 
     dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1409,8 +1482,7 @@
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
 
-    if (vfio_listener_skipped_section(section) ||
-        !container->dirty_pages_supported) {
+    if (vfio_listener_skipped_section(section)) {
         return;
     }
 

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b3318f0..a2c3d9b 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c

@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/cutils.h"
+#include "qemu/units.h"
 #include <linux/vfio.h>
 #include <sys/ioctl.h>
 
@@ -44,310 +45,124 @@
 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
 
+/*
+ * This is an arbitrary size based on migration of mlx5 devices, where typically
+ * total device migration size is on the order of 100s of MB. Testing with
+ * larger values, e.g. 128MB and 1GB, did not show a performance improvement.
+ */
+#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
+
 static int64_t bytes_transferred;
 
-static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
-                                  off_t off, bool iswrite)
+static const char *mig_state_to_str(enum vfio_device_mig_state state)
 {
-    int ret;
-
-    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
-                    pread(vbasedev->fd, val, count, off);
-    if (ret < count) {
-        error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
-                     HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
-                     vbasedev->name, off, strerror(errno));
-        return (ret < 0) ? ret : -EINVAL;
+    switch (state) {
+    case VFIO_DEVICE_STATE_ERROR:
+        return "ERROR";
+    case VFIO_DEVICE_STATE_STOP:
+        return "STOP";
+    case VFIO_DEVICE_STATE_RUNNING:
+        return "RUNNING";
+    case VFIO_DEVICE_STATE_STOP_COPY:
+        return "STOP_COPY";
+    case VFIO_DEVICE_STATE_RESUMING:
+        return "RESUMING";
+    default:
+        return "UNKNOWN STATE";
     }
-    return 0;
 }
 
-static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
-                       off_t off, bool iswrite)
-{
-    int ret, done = 0;
-    __u8 *tbuf = buf;
-
-    while (count) {
-        int bytes = 0;
-
-        if (count >= 8 && !(off % 8)) {
-            bytes = 8;
-        } else if (count >= 4 && !(off % 4)) {
-            bytes = 4;
-        } else if (count >= 2 && !(off % 2)) {
-            bytes = 2;
-        } else {
-            bytes = 1;
-        }
-
-        ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
-        if (ret) {
-            return ret;
-        }
-
-        count -= bytes;
-        done += bytes;
-        off += bytes;
-        tbuf += bytes;
-    }
-    return done;
-}
-
-#define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
-#define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
-
-#define VFIO_MIG_STRUCT_OFFSET(f)       \
-                                 offsetof(struct vfio_device_migration_info, f)
-/*
- * Change the device_state register for device @vbasedev. Bits set in @mask
- * are preserved, bits set in @value are set, and bits not set in either @mask
- * or @value are cleared in device_state. If the register cannot be accessed,
- * the resulting state would be invalid, or the device enters an error state,
- * an error is returned.
- */
-
-static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
-                                    uint32_t value)
+static int vfio_migration_set_state(VFIODevice *vbasedev,
+                                    enum vfio_device_mig_state new_state,
+                                    enum vfio_device_mig_state recover_state)
 {
     VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    off_t dev_state_off = region->fd_offset +
-                          VFIO_MIG_STRUCT_OFFSET(device_state);
-    uint32_t device_state;
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                              sizeof(struct vfio_device_feature_mig_state),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_mig_state *mig_state =
+        (struct vfio_device_feature_mig_state *)feature->data;
     int ret;
 
-    ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
-                        dev_state_off);
-    if (ret < 0) {
+    feature->argsz = sizeof(buf);
+    feature->flags =
+        VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
+    mig_state->device_state = new_state;
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        /* Try to set the device in some good state */
+        ret = -errno;
+
+        if (recover_state == VFIO_DEVICE_STATE_ERROR) {
+            error_report("%s: Failed setting device state to %s, err: %s. "
+                         "Recover state is ERROR. Resetting device",
+                         vbasedev->name, mig_state_to_str(new_state),
+                         strerror(errno));
+
+            goto reset_device;
+        }
+
+        error_report(
+            "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s",
+                     vbasedev->name, mig_state_to_str(new_state),
+                     strerror(errno), mig_state_to_str(recover_state));
+
+        mig_state->device_state = recover_state;
+        if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+            ret = -errno;
+            error_report(
+                "%s: Failed setting device in recover state, err: %s. Resetting device",
+                         vbasedev->name, strerror(errno));
+
+            goto reset_device;
+        }
+
+        migration->device_state = recover_state;
+
         return ret;
     }
 
-    device_state = (device_state & mask) | value;
-
-    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
-        return -EINVAL;
-    }
-
-    ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
-                         dev_state_off);
-    if (ret < 0) {
-        int rret;
-
-        rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
-                             dev_state_off);
-
-        if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
-            hw_error("%s: Device in error state 0x%x", vbasedev->name,
-                     device_state);
-            return rret ? rret : -EIO;
-        }
-        return ret;
-    }
-
-    migration->device_state = device_state;
-    trace_vfio_migration_set_state(vbasedev->name, device_state);
-    return 0;
-}
-
-static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
-                                   uint64_t data_size, uint64_t *size)
-{
-    void *ptr = NULL;
-    uint64_t limit = 0;
-    int i;
-
-    if (!region->mmaps) {
-        if (size) {
-            *size = MIN(data_size, region->size - data_offset);
-        }
-        return ptr;
-    }
-
-    for (i = 0; i < region->nr_mmaps; i++) {
-        VFIOMmap *map = region->mmaps + i;
-
-        if ((data_offset >= map->offset) &&
-            (data_offset < map->offset + map->size)) {
-
-            /* check if data_offset is within sparse mmap areas */
-            ptr = map->mmap + data_offset - map->offset;
-            if (size) {
-                *size = MIN(data_size, map->offset + map->size - data_offset);
-            }
-            break;
-        } else if ((data_offset < map->offset) &&
-                   (!limit || limit > map->offset)) {
+    migration->device_state = new_state;
+    if (mig_state->data_fd != -1) {
+        if (migration->data_fd != -1) {
             /*
-             * data_offset is not within sparse mmap areas, find size of
-             * non-mapped area. Check through all list since region->mmaps list
-             * is not sorted.
+             * This can happen if the device is asynchronously reset and
+             * terminates a data transfer.
              */
-            limit = map->offset;
-        }
-    }
+            error_report("%s: data_fd out of sync", vbasedev->name);
+            close(mig_state->data_fd);
 
-    if (!ptr && size) {
-        *size = limit ? MIN(data_size, limit - data_offset) : data_size;
-    }
-    return ptr;
-}
-
-static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
-{
-    VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    uint64_t data_offset = 0, data_size = 0, sz;
-    int ret;
-
-    ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
-                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
-                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
-    if (ret < 0) {
-        return ret;
-    }
-
-    trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
-                           migration->pending_bytes);
-
-    qemu_put_be64(f, data_size);
-    sz = data_size;
-
-    while (sz) {
-        void *buf;
-        uint64_t sec_size;
-        bool buf_allocated = false;
-
-        buf = get_data_section_size(region, data_offset, sz, &sec_size);
-
-        if (!buf) {
-            buf = g_try_malloc(sec_size);
-            if (!buf) {
-                error_report("%s: Error allocating buffer ", __func__);
-                return -ENOMEM;
-            }
-            buf_allocated = true;
-
-            ret = vfio_mig_read(vbasedev, buf, sec_size,
-                                region->fd_offset + data_offset);
-            if (ret < 0) {
-                g_free(buf);
-                return ret;
-            }
+            return -EBADF;
         }
 
-        qemu_put_buffer(f, buf, sec_size);
-
-        if (buf_allocated) {
-            g_free(buf);
-        }
-        sz -= sec_size;
-        data_offset += sec_size;
+        migration->data_fd = mig_state->data_fd;
     }
 
-    ret = qemu_file_get_error(f);
+    trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state));
 
-    if (!ret && size) {
-        *size = data_size;
+    return 0;
+
+reset_device:
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {
+        hw_error("%s: Failed resetting device, err: %s", vbasedev->name,
+                 strerror(errno));
     }
 
-    bytes_transferred += data_size;
+    migration->device_state = VFIO_DEVICE_STATE_RUNNING;
+
     return ret;
 }
 
 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
                             uint64_t data_size)
 {
-    VFIORegion *region = &vbasedev->migration->region;
-    uint64_t data_offset = 0, size, report_size;
-    int ret;
-
-    do {
-        ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
-                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (data_offset + data_size > region->size) {
-            /*
-             * If data_size is greater than the data section of migration region
-             * then iterate the write buffer operation. This case can occur if
-             * size of migration region at destination is smaller than size of
-             * migration region at source.
-             */
-            report_size = size = region->size - data_offset;
-            data_size -= size;
-        } else {
-            report_size = size = data_size;
-            data_size = 0;
-        }
-
-        trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
-
-        while (size) {
-            void *buf;
-            uint64_t sec_size;
-            bool buf_alloc = false;
-
-            buf = get_data_section_size(region, data_offset, size, &sec_size);
-
-            if (!buf) {
-                buf = g_try_malloc(sec_size);
-                if (!buf) {
-                    error_report("%s: Error allocating buffer ", __func__);
-                    return -ENOMEM;
-                }
-                buf_alloc = true;
-            }
-
-            qemu_get_buffer(f, buf, sec_size);
-
-            if (buf_alloc) {
-                ret = vfio_mig_write(vbasedev, buf, sec_size,
-                        region->fd_offset + data_offset);
-                g_free(buf);
-
-                if (ret < 0) {
-                    return ret;
-                }
-            }
-            size -= sec_size;
-            data_offset += sec_size;
-        }
-
-        ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
-                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
-        if (ret < 0) {
-            return ret;
-        }
-    } while (data_size);
-
-    return 0;
-}
-
-static int vfio_update_pending(VFIODevice *vbasedev)
-{
     VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    uint64_t pending_bytes = 0;
     int ret;
 
-    ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
-                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
-    if (ret < 0) {
-        migration->pending_bytes = 0;
-        return ret;
-    }
+    ret = qemu_file_get_to_fd(f, migration->data_fd, data_size);
+    trace_vfio_load_state_device_data(vbasedev->name, data_size, ret);
 
-    migration->pending_bytes = pending_bytes;
-    trace_vfio_update_pending(vbasedev->name, pending_bytes);
-    return 0;
+    return ret;
 }
 
 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
@@ -398,9 +213,55 @@
 {
     VFIOMigration *migration = vbasedev->migration;
 
-    if (migration->region.mmaps) {
-        vfio_region_unmap(&migration->region);
+    close(migration->data_fd);
+    migration->data_fd = -1;
+}
+
+static int vfio_query_stop_copy_size(VFIODevice *vbasedev,
+                                     uint64_t *stop_copy_size)
+{
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                              sizeof(struct vfio_device_feature_mig_data_size),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_mig_data_size *mig_data_size =
+        (struct vfio_device_feature_mig_data_size *)feature->data;
+
+    feature->argsz = sizeof(buf);
+    feature->flags =
+        VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE;
+
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        return -errno;
     }
+
+    *stop_copy_size = mig_data_size->stop_copy_length;
+
+    return 0;
+}
+
+/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */
+static int vfio_save_block(QEMUFile *f, VFIOMigration *migration)
+{
+    ssize_t data_size;
+
+    data_size = read(migration->data_fd, migration->data_buffer,
+                     migration->data_buffer_size);
+    if (data_size < 0) {
+        return -errno;
+    }
+    if (data_size == 0) {
+        return 1;
+    }
+
+    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
+    qemu_put_be64(f, data_size);
+    qemu_put_buffer(f, migration->data_buffer, data_size);
+    bytes_transferred += data_size;
+
+    trace_vfio_save_block(migration->vbasedev->name, data_size);
+
+    return qemu_file_get_error(f);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -409,172 +270,100 @@
 {
     VFIODevice *vbasedev = opaque;
     VFIOMigration *migration = vbasedev->migration;
-    int ret;
-
-    trace_vfio_save_setup(vbasedev->name);
+    uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
 
     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 
-    if (migration->region.mmaps) {
-        /*
-         * Calling vfio_region_mmap() from migration thread. Memory API called
-         * from this function require locking the iothread when called from
-         * outside the main loop thread.
-         */
-        qemu_mutex_lock_iothread();
-        ret = vfio_region_mmap(&migration->region);
-        qemu_mutex_unlock_iothread();
-        if (ret) {
-            error_report("%s: Failed to mmap VFIO migration region: %s",
-                         vbasedev->name, strerror(-ret));
-            error_report("%s: Falling back to slow path", vbasedev->name);
-        }
+    vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
+    migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE,
+                                      stop_copy_size);
+    migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
+    if (!migration->data_buffer) {
+        error_report("%s: Failed to allocate migration data buffer",
+                     vbasedev->name);
+        return -ENOMEM;
     }
 
-    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
-                                   VFIO_DEVICE_STATE_V1_SAVING);
-    if (ret) {
-        error_report("%s: Failed to set state SAVING", vbasedev->name);
-        return ret;
-    }
+    trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size);
 
     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 
-    ret = qemu_file_get_error(f);
-    if (ret) {
-        return ret;
-    }
-
-    return 0;
+    return qemu_file_get_error(f);
 }
 
 static void vfio_save_cleanup(void *opaque)
 {
     VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
 
+    g_free(migration->data_buffer);
+    migration->data_buffer = NULL;
     vfio_migration_cleanup(vbasedev);
     trace_vfio_save_cleanup(vbasedev->name);
 }
 
-static void vfio_state_pending(void *opaque,
-                               uint64_t *res_precopy_only,
-                               uint64_t *res_compatible,
-                               uint64_t *res_postcopy_only)
+/*
+ * Migration size of VFIO devices can be as little as a few KBs or as big as
+ * many GBs. This value should be big enough to cover the worst case.
+ */
+#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
+
+/*
+ * Only exact function is implemented and not estimate function. The reason is
+ * that during pre-copy phase of migration the estimate function is called
+ * repeatedly while pending RAM size is over the threshold, thus migration
+ * can't converge and querying the VFIO device pending data size is useless.
+ */
+static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
+                                     uint64_t *can_postcopy)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    int ret;
+    uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE;
 
-    ret = vfio_update_pending(vbasedev);
-    if (ret) {
-        return;
-    }
+    /*
+     * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is
+     * reported so downtime limit won't be violated.
+     */
+    vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
+    *must_precopy += stop_copy_size;
 
-    *res_precopy_only += migration->pending_bytes;
-
-    trace_vfio_state_pending(vbasedev->name, *res_precopy_only,
-                            *res_postcopy_only, *res_compatible);
+    trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,
+                                   stop_copy_size);
 }
 
-static int vfio_save_iterate(QEMUFile *f, void *opaque)
+static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    uint64_t data_size;
     int ret;
 
-    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
-
-    if (migration->pending_bytes == 0) {
-        ret = vfio_update_pending(vbasedev);
-        if (ret) {
-            return ret;
-        }
-
-        if (migration->pending_bytes == 0) {
-            qemu_put_be64(f, 0);
-            qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
-            /* indicates data finished, goto complete phase */
-            return 1;
-        }
-    }
-
-    ret = vfio_save_buffer(f, vbasedev, &data_size);
+    /* We reach here with device state STOP only */
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+                                   VFIO_DEVICE_STATE_STOP);
     if (ret) {
-        error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
-                     strerror(errno));
         return ret;
     }
 
-    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+    do {
+        ret = vfio_save_block(f, vbasedev->migration);
+        if (ret < 0) {
+            return ret;
+        }
+    } while (!ret);
 
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
     ret = qemu_file_get_error(f);
     if (ret) {
         return ret;
     }
 
     /*
-     * Reset pending_bytes as state_pending* are not called during
-     * savevm or snapshot case, in such case vfio_update_pending() at
-     * the start of this function updates pending_bytes.
+     * If setting the device in STOP state fails, the device should be reset.
+     * To do so, use ERROR state as a recover state.
      */
-    migration->pending_bytes = 0;
-    trace_vfio_save_iterate(vbasedev->name, data_size);
-    return 0;
-}
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
+                                   VFIO_DEVICE_STATE_ERROR);
+    trace_vfio_save_complete_precopy(vbasedev->name, ret);
 
-static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
-{
-    VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    uint64_t data_size;
-    int ret;
-
-    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING,
-                                   VFIO_DEVICE_STATE_V1_SAVING);
-    if (ret) {
-        error_report("%s: Failed to set state STOP and SAVING",
-                     vbasedev->name);
-        return ret;
-    }
-
-    ret = vfio_update_pending(vbasedev);
-    if (ret) {
-        return ret;
-    }
-
-    while (migration->pending_bytes > 0) {
-        qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
-        ret = vfio_save_buffer(f, vbasedev, &data_size);
-        if (ret < 0) {
-            error_report("%s: Failed to save buffer", vbasedev->name);
-            return ret;
-        }
-
-        if (data_size == 0) {
-            break;
-        }
-
-        ret = vfio_update_pending(vbasedev);
-        if (ret) {
-            return ret;
-        }
-    }
-
-    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
-
-    ret = qemu_file_get_error(f);
-    if (ret) {
-        return ret;
-    }
-
-    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0);
-    if (ret) {
-        error_report("%s: Failed to set state STOPPED", vbasedev->name);
-        return ret;
-    }
-
-    trace_vfio_save_complete_precopy(vbasedev->name);
     return ret;
 }
 
@@ -594,28 +383,9 @@
 static int vfio_load_setup(QEMUFile *f, void *opaque)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    int ret = 0;
 
-    if (migration->region.mmaps) {
-        ret = vfio_region_mmap(&migration->region);
-        if (ret) {
-            error_report("%s: Failed to mmap VFIO migration region %d: %s",
-                         vbasedev->name, migration->region.nr,
-                         strerror(-ret));
-            error_report("%s: Falling back to slow path", vbasedev->name);
-        }
-    }
-
-    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
-                                   VFIO_DEVICE_STATE_V1_RESUMING);
-    if (ret) {
-        error_report("%s: Failed to set state RESUMING", vbasedev->name);
-        if (migration->region.mmaps) {
-            vfio_region_unmap(&migration->region);
-        }
-    }
-    return ret;
+    return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+                                   vbasedev->migration->device_state);
 }
 
 static int vfio_load_cleanup(void *opaque)
@@ -624,6 +394,7 @@
 
     vfio_migration_cleanup(vbasedev);
     trace_vfio_load_cleanup(vbasedev->name);
+
     return 0;
 }
 
@@ -681,12 +452,10 @@
     return ret;
 }
 
-static SaveVMHandlers savevm_vfio_handlers = {
+static const SaveVMHandlers savevm_vfio_handlers = {
     .save_setup = vfio_save_setup,
     .save_cleanup = vfio_save_cleanup,
-    .state_pending_exact = vfio_state_pending,
-    .state_pending_estimate = vfio_state_pending,
-    .save_live_iterate = vfio_save_iterate,
+    .state_pending_exact = vfio_state_pending_exact,
     .save_live_complete_precopy = vfio_save_complete_precopy,
     .save_state = vfio_save_state,
     .load_setup = vfio_load_setup,
@@ -699,56 +468,33 @@
 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
 {
     VFIODevice *vbasedev = opaque;
-    VFIOMigration *migration = vbasedev->migration;
-    uint32_t value, mask;
+    enum vfio_device_mig_state new_state;
     int ret;
 
-    if (vbasedev->migration->vm_running == running) {
-        return;
-    }
-
     if (running) {
-        /*
-         * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
-         * Transition from _SAVING to _RUNNING can happen if there is migration
-         * failure, in that case clear _SAVING bit.
-         * Transition from _RESUMING to _RUNNING occurs during resuming
-         * phase, in that case clear _RESUMING bit.
-         * In both the above cases, set _RUNNING bit.
-         */
-        mask = ~VFIO_DEVICE_STATE_MASK;
-        value = VFIO_DEVICE_STATE_V1_RUNNING;
+        new_state = VFIO_DEVICE_STATE_RUNNING;
     } else {
-        /*
-         * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
-         * _RUNNING bit
-         */
-        mask = ~VFIO_DEVICE_STATE_V1_RUNNING;
-
-        /*
-         * When VM state transition to stop for savevm command, device should
-         * start saving data.
-         */
-        if (state == RUN_STATE_SAVE_VM) {
-            value = VFIO_DEVICE_STATE_V1_SAVING;
-        } else {
-            value = 0;
-        }
+        new_state = VFIO_DEVICE_STATE_STOP;
     }
 
-    ret = vfio_migration_set_state(vbasedev, mask, value);
+    /*
+     * If setting the device in new_state fails, the device should be reset.
+     * To do so, use ERROR state as a recover state.
+     */
+    ret = vfio_migration_set_state(vbasedev, new_state,
+                                   VFIO_DEVICE_STATE_ERROR);
     if (ret) {
         /*
          * Migration should be aborted in this case, but vm_state_notify()
          * currently does not support reporting failures.
          */
-        error_report("%s: Failed to set device state 0x%x", vbasedev->name,
-                     (migration->device_state & mask) | value);
-        qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+        if (migrate_get_current()->to_dst_file) {
+            qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+        }
     }
-    vbasedev->migration->vm_running = running;
+
     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
-            (migration->device_state & mask) | value);
+                              mig_state_to_str(new_state));
 }
 
 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
@@ -757,7 +503,6 @@
     VFIOMigration *migration = container_of(notifier, VFIOMigration,
                                             migration_state);
     VFIODevice *vbasedev = migration->vbasedev;
-    int ret;
 
     trace_vfio_migration_state_notifier(vbasedev->name,
                                         MigrationStatus_str(s->state));
@@ -767,34 +512,57 @@
     case MIGRATION_STATUS_CANCELLED:
     case MIGRATION_STATUS_FAILED:
         bytes_transferred = 0;
-        ret = vfio_migration_set_state(vbasedev,
-                                       ~(VFIO_DEVICE_STATE_V1_SAVING |
-                                         VFIO_DEVICE_STATE_V1_RESUMING),
-                                       VFIO_DEVICE_STATE_V1_RUNNING);
-        if (ret) {
-            error_report("%s: Failed to set state RUNNING", vbasedev->name);
-        }
+        /*
+         * If setting the device in RUNNING state fails, the device should
+         * be reset. To do so, use ERROR state as a recover state.
+         */
+        vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING,
+                                 VFIO_DEVICE_STATE_ERROR);
     }
 }
 
 static void vfio_migration_exit(VFIODevice *vbasedev)
 {
-    VFIOMigration *migration = vbasedev->migration;
-
-    vfio_region_exit(&migration->region);
-    vfio_region_finalize(&migration->region);
     g_free(vbasedev->migration);
     vbasedev->migration = NULL;
 }
 
-static int vfio_migration_init(VFIODevice *vbasedev,
-                               struct vfio_region_info *info)
+static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags)
+{
+    uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+                                  sizeof(struct vfio_device_feature_migration),
+                              sizeof(uint64_t))] = {};
+    struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+    struct vfio_device_feature_migration *mig =
+        (struct vfio_device_feature_migration *)feature->data;
+
+    feature->argsz = sizeof(buf);
+    feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION;
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+        if (errno == ENOTTY) {
+            error_report("%s: VFIO migration is not supported in kernel",
+                         vbasedev->name);
+        } else {
+            error_report("%s: Failed to query VFIO migration support, err: %s",
+                         vbasedev->name, strerror(errno));
+        }
+
+        return -errno;
+    }
+
+    *mig_flags = mig->flags;
+
+    return 0;
+}
+
+static int vfio_migration_init(VFIODevice *vbasedev)
 {
     int ret;
     Object *obj;
     VFIOMigration *migration;
     char id[256] = "";
     g_autofree char *path = NULL, *oid = NULL;
+    uint64_t mig_flags = 0;
 
     if (!vbasedev->ops->vfio_get_object) {
         return -EINVAL;
@@ -805,27 +573,21 @@
         return -EINVAL;
     }
 
-    vbasedev->migration = g_new0(VFIOMigration, 1);
-    vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING;
-    vbasedev->migration->vm_running = runstate_is_running();
-
-    ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
-                            info->index, "migration");
+    ret = vfio_migration_query_flags(vbasedev, &mig_flags);
     if (ret) {
-        error_report("%s: Failed to setup VFIO migration region %d: %s",
-                     vbasedev->name, info->index, strerror(-ret));
-        goto err;
+        return ret;
     }
 
-    if (!vbasedev->migration->region.size) {
-        error_report("%s: Invalid zero-sized VFIO migration region %d",
-                     vbasedev->name, info->index);
-        ret = -EINVAL;
-        goto err;
+    /* Basic migration functionality must be supported */
+    if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) {
+        return -EOPNOTSUPP;
     }
 
+    vbasedev->migration = g_new0(VFIOMigration, 1);
     migration = vbasedev->migration;
     migration->vbasedev = vbasedev;
+    migration->device_state = VFIO_DEVICE_STATE_RUNNING;
+    migration->data_fd = -1;
 
     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
     if (oid) {
@@ -843,11 +605,8 @@
                                                            vbasedev);
     migration->migration_state.notify = vfio_migration_state_notifier;
     add_migration_state_change_notifier(&migration->migration_state);
-    return 0;
 
-err:
-    vfio_migration_exit(vbasedev);
-    return ret;
+    return 0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -859,35 +618,28 @@
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-    VFIOContainer *container = vbasedev->group->container;
-    struct vfio_region_info *info = NULL;
     int ret = -ENOTSUP;
 
-    if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+    if (!vbasedev->enable_migration) {
         goto add_blocker;
     }
 
-    ret = vfio_get_dev_region_info(vbasedev,
-                                   VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
-                                   VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
-                                   &info);
+    ret = vfio_migration_init(vbasedev);
     if (ret) {
         goto add_blocker;
     }
 
-    ret = vfio_migration_init(vbasedev, info);
+    ret = vfio_block_multiple_devices_migration(errp);
     if (ret) {
-        goto add_blocker;
+        return ret;
     }
 
-    trace_vfio_migration_probe(vbasedev->name, info->index);
-    g_free(info);
+    trace_vfio_migration_probe(vbasedev->name);
     return 0;
 
 add_blocker:
     error_setg(&vbasedev->migration_blocker,
                "VFIO device doesn't support migration");
-    g_free(info);
 
     ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
     if (ret < 0) {
@@ -906,6 +658,7 @@
         qemu_del_vm_change_state_handler(migration->vm_state);
         unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
         vfio_migration_exit(vbasedev);
+        vfio_unblock_multiple_devices_migration();
     }
 
     if (vbasedev->migration_blocker) {

diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 52de1c8..669d9fe 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events

@@ -119,6 +119,8 @@
 vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
 vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
 vfio_dma_unmap_overflow_workaround(void) ""
+vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
+vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
 
 # platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
@@ -148,21 +150,17 @@
 vfio_display_edid_write_error(void) ""
 
 # migration.c
-vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
-vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
-vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
-vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
-vfio_save_setup(const char *name) " (%s)"
-vfio_save_cleanup(const char *name) " (%s)"
-vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
-vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
-vfio_save_device_config_state(const char *name) " (%s)"
-vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
-vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
-vfio_save_complete_precopy(const char *name) " (%s)"
+vfio_load_cleanup(const char *name) " (%s)"
 vfio_load_device_config_state(const char *name) " (%s)"
 vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
-vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
-vfio_load_cleanup(const char *name) " (%s)"
-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
-vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
+vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d"
+vfio_migration_probe(const char *name) " (%s)"
+vfio_migration_set_state(const char *name, const char *state) " (%s) state %s"
+vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
+vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
+vfio_save_cleanup(const char *name) " (%s)"
+vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
+vfio_save_device_config_state(const char *name) " (%s)"
+vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64
+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64
+vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"

diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index f504973..83fc20e 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c

@@ -273,6 +273,7 @@
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserFS *fs = VHOST_USER_FS(dev);
+    struct vhost_virtqueue *vhost_vqs = fs->vhost_dev.vqs;
     int i;
 
     /* This will stop vhost backend if appropriate. */
@@ -288,8 +289,7 @@
     }
     g_free(fs->req_vqs);
     virtio_cleanup(vdev);
-    g_free(fs->vhost_dev.vqs);
-    fs->vhost_dev.vqs = NULL;
+    g_free(vhost_vqs);
 }
 
 static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)

diff --git a/include/block/block-common.h b/include/block/block-common.h
index 469300f..b5122ef 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h

@@ -54,6 +54,20 @@
 #define co_wrapper_bdrv_rdlock         no_coroutine_fn
 #define co_wrapper_mixed_bdrv_rdlock   no_coroutine_fn coroutine_mixed_fn
 
+/*
+ * no_co_wrapper: Function specifier used by block-coroutine-wrapper.py
+ *
+ * Function specifier which does nothing but mark functions to be generated by
+ * scripts/block-coroutine-wrapper.py.
+ *
+ * A no_co_wrapper function declaration creates a coroutine_fn wrapper around
+ * functions that must not be called in coroutine context. It achieves this by
+ * scheduling a BH in the bottom half that runs the respective non-coroutine
+ * function. The coroutine yields after scheduling the BH and is reentered when
+ * the wrapped function returns.
+ */
+#define no_co_wrapper
+
 #include "block/blockjob.h"
 
 /* block.c */

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index d0f8386..0700953 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h

@@ -36,9 +36,9 @@
 void block_copy_state_free(BlockCopyState *s);
 
 void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
-int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
-                                                  int64_t offset,
-                                                  int64_t *count);
+
+int64_t coroutine_fn GRAPH_RDLOCK
+block_copy_reset_unallocated(BlockCopyState *s, int64_t offset, int64_t *count);
 
 int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
                             bool ignore_ratelimit, uint64_t timeout_ns,

diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index a38f86d..399200a 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h

@@ -58,13 +58,15 @@
                                 Error **errp);
 BlockDriver *bdrv_find_format(const char *format_name);
 
-int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
-                                QemuOpts *opts, Error **errp);
-int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
-                           QemuOpts *opts, Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts,
+               Error **errp);
 
-int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
-                                     Error **errp);
+int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename,
+                                       QemuOpts *opts, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
 
 BlockDriverState *bdrv_new(void);
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
@@ -77,16 +79,26 @@
                                    int flags, Error **errp);
 int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
 
-BdrvChild *bdrv_open_child(const char *filename,
-                           QDict *options, const char *bdref_key,
-                           BlockDriverState *parent,
-                           const BdrvChildClass *child_class,
-                           BdrvChildRole child_role,
-                           bool allow_none, Error **errp);
+BdrvChild * no_coroutine_fn
+bdrv_open_child(const char *filename, QDict *options, const char *bdref_key,
+                BlockDriverState *parent, const BdrvChildClass *child_class,
+                BdrvChildRole child_role, bool allow_none, Error **errp);
+
+BdrvChild * coroutine_fn no_co_wrapper
+bdrv_co_open_child(const char *filename, QDict *options, const char *bdref_key,
+                BlockDriverState *parent, const BdrvChildClass *child_class,
+                BdrvChildRole child_role, bool allow_none, Error **errp);
+
 int bdrv_open_file_child(const char *filename,
                          QDict *options, const char *bdref_key,
                          BlockDriverState *parent, Error **errp);
-BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
+BlockDriverState * no_coroutine_fn
+bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
+BlockDriverState * coroutine_fn no_co_wrapper
+bdrv_co_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
                         Error **errp);
 int bdrv_set_backing_hd_drained(BlockDriverState *bs,
@@ -94,8 +106,15 @@
                                 Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
-BlockDriverState *bdrv_open(const char *filename, const char *reference,
-                            QDict *options, int flags, Error **errp);
+
+BlockDriverState * no_coroutine_fn
+bdrv_open(const char *filename, const char *reference, QDict *options,
+          int flags, Error **errp);
+
+BlockDriverState * coroutine_fn no_co_wrapper
+bdrv_co_open(const char *filename, const char *reference,
+             QDict *options, int flags, Error **errp);
+
 BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
                                             const char *node_name,
                                             QDict *options, int flags,

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 614cbd7..5da99d4 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h

@@ -60,27 +60,29 @@
 bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
                  const void *buf, BdrvRequestFlags flags);
 
-int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
-                                     int64_t bytes, const void *buf,
-                                     BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
+                    const void *buf, BdrvRequestFlags flags);
+
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
  * function is not suitable for zeroing the entire image in a single request
  * because it may allocate memory for the entire region.
  */
-int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                                       int64_t bytes, BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes,
+                      BdrvRequestFlags flags);
 
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
-                                  PreallocMode prealloc, BdrvRequestFlags flags,
-                                  Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
+                 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
 
-int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs);
-int64_t co_wrapper_mixed bdrv_nb_sectors(BlockDriverState *bs);
+int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_nb_sectors(BlockDriverState *bs);
+int64_t co_wrapper_mixed_bdrv_rdlock bdrv_nb_sectors(BlockDriverState *bs);
 
-int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs);
-int64_t co_wrapper_mixed bdrv_getlength(BlockDriverState *bs);
+int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs);
+int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs);
 
 int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs);
 int64_t co_wrapper bdrv_get_allocated_file_size(BlockDriverState *bs);
@@ -88,8 +90,12 @@
 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
                                BlockDriverState *in_bs, Error **errp);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
-void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_delete_file_noerr(BlockDriverState *bs);
 
 
 /* async block I/O */
@@ -97,45 +103,45 @@
 void bdrv_aio_cancel_async(BlockAIOCB *acb);
 
 /* sg packet commands */
-int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
 
 /* Ensure contents are flushed to disk.  */
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_flush(BlockDriverState *bs);
 
-int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
-                                  int64_t bytes);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
+                                               int64_t bytes);
+
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int bdrv_block_status(BlockDriverState *bs, int64_t offset,
                       int64_t bytes, int64_t *pnum, int64_t *map,
                       BlockDriverState **file);
 
-int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
-                                            BlockDriverState *base,
-                                            int64_t offset, int64_t bytes,
-                                            int64_t *pnum, int64_t *map,
-                                            BlockDriverState **file);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+                           int64_t offset, int64_t bytes, int64_t *pnum,
+                           int64_t *map, BlockDriverState **file);
 int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
                             int64_t offset, int64_t bytes, int64_t *pnum,
                             int64_t *map, BlockDriverState **file);
 
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes, int64_t *pnum);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     int64_t *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
                       int64_t *pnum);
 
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
-                                            BlockDriverState *base,
-                                            bool include_base, int64_t offset,
-                                            int64_t bytes, int64_t *pnum);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+                           bool include_base, int64_t offset, int64_t bytes,
+                           int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             bool include_base, int64_t offset, int64_t bytes,
                             int64_t *pnum);
 
-int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
-                           bool ignore_allow_rdw, Error **errp);
 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
                               Error **errp);
 bool bdrv_is_read_only(BlockDriverState *bs);
@@ -143,11 +149,14 @@
 bool bdrv_is_sg(BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
 
-bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs);
-bool co_wrapper bdrv_is_inserted(BlockDriverState *bs);
+bool coroutine_fn GRAPH_RDLOCK bdrv_co_is_inserted(BlockDriverState *bs);
+bool co_wrapper_bdrv_rdlock bdrv_is_inserted(BlockDriverState *bs);
 
-void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked);
-void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag);
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_lock_medium(BlockDriverState *bs, bool locked);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_eject(BlockDriverState *bs, bool eject_flag);
 
 const char *bdrv_get_format_name(BlockDriverState *bs);
 
@@ -231,17 +240,15 @@
 
 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
 
-void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs);
-void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs);
+void coroutine_fn GRAPH_RDLOCK bdrv_co_io_plug(BlockDriverState *bs);
+void coroutine_fn GRAPH_RDLOCK bdrv_co_io_unplug(BlockDriverState *bs);
 
-bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
-                                                     const char *name,
-                                                     uint32_t granularity,
-                                                     Error **errp);
-bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs,
-                                                const char *name,
-                                                uint32_t granularity,
-                                                Error **errp);
+bool coroutine_fn GRAPH_RDLOCK
+bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                   uint32_t granularity, Error **errp);
+bool co_wrapper_bdrv_rdlock
+bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                uint32_t granularity, Error **errp);
 
 /**
  *
@@ -272,10 +279,11 @@
  *
  * Returns: 0 if succeeded; negative error code if failed.
  **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
-                                    BdrvChild *dst, int64_t dst_offset,
-                                    int64_t bytes, BdrvRequestFlags read_flags,
-                                    BdrvRequestFlags write_flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+                   BdrvChild *dst, int64_t dst_offset,
+                   int64_t bytes, BdrvRequestFlags read_flags,
+                   BdrvRequestFlags write_flags);
 
 /*
  * "I/O or GS" API functions. These functions can run without

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index ba2e0fc..d419017 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h

@@ -246,12 +246,11 @@
                           Error **errp);
     void (*bdrv_close)(BlockDriverState *bs);
 
-    int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
-                                       Error **errp);
-    int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)(
+        BlockdevCreateOptions *opts, Error **errp);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)(
+        BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
 
     int (*bdrv_amend_options)(BlockDriverState *bs,
                               QemuOpts *opts,
@@ -446,9 +445,10 @@
      *
      * Returns: true on success, false on failure
      */
-    bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size,
-                              Error **errp);
-    void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
+    bool GRAPH_RDLOCK_PTR (*bdrv_register_buf)(
+        BlockDriverState *bs, void *host, size_t size, Error **errp);
+    void GRAPH_RDLOCK_PTR (*bdrv_unregister_buf)(
+        BlockDriverState *bs, void *host, size_t size);
 
     /*
      * This field is modified only under the BQL, and is part of
@@ -471,19 +471,22 @@
                                       Error **errp);
 
     /* aio */
-    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_preadv)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pwritev)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int bytes,
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)(
+        BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque);
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)(
+        BlockDriverState *bs, int64_t offset, int bytes,
         BlockCompletionFunc *cb, void *opaque);
 
-    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 
     /**
@@ -501,16 +504,16 @@
      *
      * The buffer in @qiov may point directly to guest memory.
      */
-    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv)(BlockDriverState *bs,
         int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags);
 
-    int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
         QEMUIOVector *qiov, size_t qiov_offset,
         BdrvRequestFlags flags);
 
-    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_writev)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
         int flags);
     /**
@@ -528,12 +531,12 @@
      *
      * The buffer in @qiov may point directly to guest memory.
      */
-    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov,
         BdrvRequestFlags flags);
-    int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
-        BdrvRequestFlags flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        size_t qiov_offset, BdrvRequestFlags flags);
 
     /*
      * Efficiently zero a region of the disk image.  Typically an image format
@@ -541,10 +544,12 @@
      * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
      * will be called instead.
      */
-    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, BdrvRequestFlags flags);
-    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwrite_zeroes)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        BdrvRequestFlags flags);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes);
 
     /*
      * Map [offset, offset + nbytes) range onto a child of @bs to copy from,
@@ -554,14 +559,10 @@
      * See the comment of bdrv_co_copy_range for the parameter and return value
      * semantics.
      */
-    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
-                                                BdrvChild *src,
-                                                int64_t offset,
-                                                BdrvChild *dst,
-                                                int64_t dst_offset,
-                                                int64_t bytes,
-                                                BdrvRequestFlags read_flags,
-                                                BdrvRequestFlags write_flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_from)(
+        BlockDriverState *bs, BdrvChild *src, int64_t offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags);
 
     /*
      * Map [offset, offset + nbytes) range onto a child of bs to copy data to,
@@ -572,14 +573,10 @@
      * See the comment of bdrv_co_copy_range for the parameter and return value
      * semantics.
      */
-    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
-                                              BdrvChild *src,
-                                              int64_t src_offset,
-                                              BdrvChild *dst,
-                                              int64_t dst_offset,
-                                              int64_t bytes,
-                                              BdrvRequestFlags read_flags,
-                                              BdrvRequestFlags write_flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_to)(
+        BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags);
 
     /*
      * Building block for bdrv_block_status[_above] and
@@ -606,7 +603,8 @@
      * *pnum value for the block-status cache on protocol nodes, prior
      * to clamping *pnum for return to its caller.
      */
-    int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)(
+        BlockDriverState *bs,
         bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
         int64_t *map, BlockDriverState **file);
 
@@ -626,13 +624,16 @@
      * - receive the snapshot's actual length (which may differ from bs's
      *   length)
      */
-    int coroutine_fn (*bdrv_co_preadv_snapshot)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-    int coroutine_fn (*bdrv_co_snapshot_block_status)(BlockDriverState *bs,
-        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
-        int64_t *map, BlockDriverState **file);
-    int coroutine_fn (*bdrv_co_pdiscard_snapshot)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_snapshot)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)(
+        BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+        int64_t *pnum, int64_t *map, BlockDriverState **file);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes);
 
     /*
      * Invalidate any cached meta-data.
@@ -645,24 +646,26 @@
      * layers, if needed. This function is needed for deterministic
      * synchronization of the flush finishing callback.
      */
-    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush)(BlockDriverState *bs);
 
     /* Delete a created file. */
-    int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
-                                            Error **errp);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_delete_file)(
+        BlockDriverState *bs, Error **errp);
 
     /*
      * Flushes all data that was already written to the OS all the way down to
      * the disk (for example file-posix.c calls fsync()).
      */
-    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_disk)(
+        BlockDriverState *bs);
 
     /*
      * Flushes all internal caches to the OS. The data may still sit in a
      * writeback cache of the host OS, but it will survive a crash of the qemu
      * process.
      */
-    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_os)(
+        BlockDriverState *bs);
 
     /*
      * Truncate @bs to @offset bytes using the given @prealloc mode
@@ -677,21 +680,26 @@
      * If @exact is true and this function fails but would succeed
      * with @exact = false, it should return -ENOTSUP.
      */
-    int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
-                                         bool exact, PreallocMode prealloc,
-                                         BdrvRequestFlags flags, Error **errp);
-    int64_t coroutine_fn (*bdrv_co_getlength)(BlockDriverState *bs);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_truncate)(
+        BlockDriverState *bs, int64_t offset, bool exact,
+        PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+    int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_getlength)(
+        BlockDriverState *bs);
+
     int64_t coroutine_fn (*bdrv_co_get_allocated_file_size)(
         BlockDriverState *bs);
 
     BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
                                       Error **errp);
 
-    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov);
-    int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
-        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
-        size_t qiov_offset);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset);
 
     int coroutine_fn (*bdrv_co_get_info)(BlockDriverState *bs,
                                          BlockDriverInfo *bdi);
@@ -707,16 +715,20 @@
         BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
     /* removable device specific */
-    bool coroutine_fn (*bdrv_co_is_inserted)(BlockDriverState *bs);
-    void coroutine_fn (*bdrv_co_eject)(BlockDriverState *bs, bool eject_flag);
-    void coroutine_fn (*bdrv_co_lock_medium)(BlockDriverState *bs, bool locked);
+    bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_is_inserted)(
+        BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_eject)(
+        BlockDriverState *bs, bool eject_flag);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_lock_medium)(
+        BlockDriverState *bs, bool locked);
 
     /* to control generic scsi devices */
-    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
-        unsigned long int req, void *buf,
+    BlockAIOCB *coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_aio_ioctl)(
+        BlockDriverState *bs, unsigned long int req, void *buf,
         BlockCompletionFunc *cb, void *opaque);
-    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
-                                      unsigned long int req, void *buf);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_ioctl)(
+        BlockDriverState *bs, unsigned long int req, void *buf);
 
     /*
      * Returns 0 for completed check, -errno for internal errors.
@@ -729,8 +741,9 @@
                                              BlkdebugEvent event);
 
     /* io queue for linux-aio */
-    void coroutine_fn (*bdrv_co_io_plug)(BlockDriverState *bs);
-    void coroutine_fn (*bdrv_co_io_unplug)(BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_plug)(BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)(
+        BlockDriverState *bs);
 
     /**
      * bdrv_drain_begin is called if implemented in the beginning of a
@@ -748,14 +761,16 @@
     void (*bdrv_drain_end)(BlockDriverState *bs);
 
     bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
-    bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
+
+    bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)(
         BlockDriverState *bs, const char *name, uint32_t granularity,
         Error **errp);
-    int coroutine_fn (*bdrv_co_remove_persistent_dirty_bitmap)(
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_remove_persistent_dirty_bitmap)(
         BlockDriverState *bs, const char *name, Error **errp);
 };
 
-static inline bool block_driver_can_compress(BlockDriver *drv)
+static inline bool TSA_NO_TSA block_driver_can_compress(BlockDriver *drv)
 {
     return drv->bdrv_co_pwritev_compressed ||
            drv->bdrv_co_pwritev_compressed_part;

diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 4430bf4..eb0da72 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h

@@ -35,42 +35,44 @@
  * the I/O API.
  */
 
-int coroutine_fn bdrv_co_preadv_snapshot(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child,
     int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-int coroutine_fn bdrv_co_snapshot_block_status(BlockDriverState *bs,
-    bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
-    int64_t *map, BlockDriverState **file);
-int coroutine_fn bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status(
+    BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+    int64_t *pnum, int64_t *map, BlockDriverState **file);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
     int64_t offset, int64_t bytes);
 
 
-int coroutine_fn bdrv_co_preadv(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv(BdrvChild *child,
     int64_t offset, int64_t bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_part(BdrvChild *child,
     int64_t offset, int64_t bytes,
     QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pwritev(BdrvChild *child,
     int64_t offset, int64_t bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pwritev_part(BdrvChild *child,
     int64_t offset, int64_t bytes,
     QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 
-static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
+static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_pread(BdrvChild *child,
     int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags)
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
 }
 
-static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
+static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_pwrite(BdrvChild *child,
     int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags)
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
     IO_CODE();
+    assert_bdrv_graph_readable();
 
     return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
 }
@@ -111,20 +113,21 @@
 void bdrv_inc_in_flight(BlockDriverState *bs);
 void bdrv_dec_in_flight(BlockDriverState *bs);
 
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
-                                         BdrvChild *dst, int64_t dst_offset,
-                                         int64_t bytes,
-                                         BdrvRequestFlags read_flags,
-                                         BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
-                                       BdrvChild *dst, int64_t dst_offset,
-                                       int64_t bytes,
-                                       BdrvRequestFlags read_flags,
-                                       BdrvRequestFlags write_flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+                        BdrvChild *dst, int64_t dst_offset,
+                        int64_t bytes, BdrvRequestFlags read_flags,
+                        BdrvRequestFlags write_flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+                      BdrvChild *dst, int64_t dst_offset,
+                      int64_t bytes, BdrvRequestFlags read_flags,
+                      BdrvRequestFlags write_flags);
 
-int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
-                                               int64_t hint);
-int co_wrapper_mixed
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
 
 BdrvChild *bdrv_cow_child(BlockDriverState *bs);

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 233535e..fa956de 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h

@@ -36,12 +36,12 @@
 void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
 
-int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                                        const char *name,
-                                                        Error **errp);
-int co_wrapper bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                                   const char *name,
-                                                   Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                       Error **errp);
+int co_wrapper_bdrv_rdlock
+bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                    Error **errp);
 
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);

diff --git a/include/crypto/tlssession.h b/include/crypto/tlssession.h
index 15b9cef..571049b 100644
--- a/include/crypto/tlssession.h
+++ b/include/crypto/tlssession.h

@@ -249,6 +249,17 @@
                                  size_t len);
 
 /**
+ * qcrypto_tls_session_check_pending:
+ * @sess: the TLS session object
+ *
+ * Check if there are unread data in the TLS buffers that have
+ * already been read from the underlying data source.
+ *
+ * Returns: the number of bytes available or zero
+ */
+size_t qcrypto_tls_session_check_pending(QCryptoTLSSession *sess);
+
+/**
  * qcrypto_tls_session_handshake:
  * @sess: the TLS session object
  * @errp: pointer to a NULL-initialized error object

diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
index 64247ec..32cda9e 100644
--- a/include/disas/dis-asm.h
+++ b/include/disas/dis-asm.h

@@ -11,10 +11,6 @@
 
 #include "qemu/bswap.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 typedef void *PTR;
 typedef uint64_t bfd_vma;
 typedef int64_t bfd_signed_vma;
@@ -506,8 +502,4 @@
 
 typedef bool bfd_boolean;
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* DISAS_DIS_ASM_H */

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 54585a9..0e36f4d 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h

@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #endif
 #include "qemu/interval-tree.h"
+#include "qemu/clang-tsa.h"
 
 /* allow to see translation results - the slowdown should be negligible, so we leave it */
 #define DEBUG_DISAS
@@ -759,8 +760,8 @@
 }
 
 #if defined(CONFIG_USER_ONLY)
-void mmap_lock(void);
-void mmap_unlock(void);
+void TSA_NO_TSA mmap_lock(void);
+void TSA_NO_TSA mmap_unlock(void);
 bool have_mmap_lock(void);
 
 /**

diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h
index e0f2f7a..79e0c80 100644
--- a/include/hw/arm/allwinner-a10.h
+++ b/include/hw/arm/allwinner-a10.h

@@ -1,7 +1,6 @@
 #ifndef HW_ARM_ALLWINNER_A10_H
 #define HW_ARM_ALLWINNER_A10_H
 
-#include "qemu/error-report.h"
 #include "hw/char/serial.h"
 #include "hw/arm/boot.h"
 #include "hw/pci/pci_device.h"

diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h
index f1b7e4a..72c7722 100644
--- a/include/hw/arm/npcm7xx.h
+++ b/include/hw/arm/npcm7xx.h

@@ -32,6 +32,7 @@
 #include "hw/nvram/npcm7xx_otp.h"
 #include "hw/timer/npcm7xx_timer.h"
 #include "hw/ssi/npcm7xx_fiu.h"
+#include "hw/ssi/npcm_pspi.h"
 #include "hw/usb/hcd-ehci.h"
 #include "hw/usb/hcd-ohci.h"
 #include "target/arm/cpu.h"
@@ -104,6 +105,7 @@
     NPCM7xxFIUState     fiu[2];
     NPCM7xxEMCState     emc[2];
     NPCM7xxSDHCIState   mmc;
+    NPCMPSPIState       pspi[2];
 };
 
 #define TYPE_NPCM7XX    "npcm7xx"

diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
index c5683af..9fcff26 100644
--- a/include/hw/arm/smmu-common.h
+++ b/include/hw/arm/smmu-common.h

@@ -27,8 +27,6 @@
 #define SMMU_PCI_DEVFN_MAX    256
 #define SMMU_PCI_DEVFN(sid)   (sid & 0xFF)
 
-#define SMMU_MAX_VA_BITS      48
-
 /*
  * Page table walk error types
  */

diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index f1921fd..a0c0264 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h

@@ -20,7 +20,6 @@
 #define HW_ARM_SMMUV3_H
 
 #include "hw/arm/smmu-common.h"
-#include "hw/registerfields.h"
 #include "qom/object.h"
 
 #define TYPE_SMMUV3_IOMMU_MEMORY_REGION "smmuv3-iommu-memory-region"
@@ -46,6 +45,7 @@
     uint32_t cr[3];
     uint32_t cr0ack;
     uint32_t statusr;
+    uint32_t gbpa;
     uint32_t irq_ctrl;
     uint32_t gerror;
     uint32_t gerrorn;

diff --git a/include/hw/char/ibex_uart.h b/include/hw/char/ibex_uart.h
index a399855..9deadf2 100644
--- a/include/hw/char/ibex_uart.h
+++ b/include/hw/char/ibex_uart.h

@@ -26,7 +26,6 @@
 #define HW_IBEX_UART_H
 
 #include "hw/sysbus.h"
-#include "hw/registerfields.h"
 #include "chardev/char-fe.h"
 #include "qemu/timer.h"
 #include "qom/object.h"

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 2417597..671f041 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h

@@ -349,7 +349,7 @@
     bool unplug;
     bool crash_occurred;
     bool exit_request;
-    bool in_exclusive_context;
+    int exclusive_context_count;
     uint32_t cflags_next_tb;
     /* updates protected by BQL */
     uint32_t interrupt_request;
@@ -758,7 +758,7 @@
  */
 static inline bool cpu_in_exclusive_context(const CPUState *cpu)
 {
-    return cpu->in_exclusive_context;
+    return cpu->exclusive_context_count;
 }
 
 /**

diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h
index 0180c7b..1ca262f 100644
--- a/include/hw/intc/armv7m_nvic.h
+++ b/include/hw/intc/armv7m_nvic.h

@@ -16,10 +16,7 @@
 #include "qom/object.h"
 
 #define TYPE_NVIC "armv7m_nvic"
-
-typedef struct NVICState NVICState;
-DECLARE_INSTANCE_CHECKER(NVICState, NVIC,
-                         TYPE_NVIC)
+OBJECT_DECLARE_SIMPLE_TYPE(NVICState, NVIC)
 
 /* Highest permitted number of exceptions (architectural limit) */
 #define NVIC_MAX_VECTORS 512
@@ -86,4 +83,127 @@
     qemu_irq sysresetreq;
 };
 
+/* Interface between CPU and Interrupt controller.  */
+/**
+ * armv7m_nvic_set_pending: mark the specified exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Marks the specified exception as pending. Note that we will assert()
+ * if @secure is true and @irq does not specify one of the fixed set
+ * of architecturally banked exceptions.
+ */
+void armv7m_nvic_set_pending(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_set_pending_derived: mark this derived exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for derived
+ * exceptions (exceptions generated in the course of trying to take
+ * a different exception).
+ */
+void armv7m_nvic_set_pending_derived(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_set_pending_lazyfp: mark this lazy FP exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for exceptions
+ * generated in the course of lazy stacking of FP registers.
+ */
+void armv7m_nvic_set_pending_lazyfp(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_get_pending_irq_info: return highest priority pending
+ *    exception, and whether it targets Secure state
+ * @s: the NVIC
+ * @pirq: set to pending exception number
+ * @ptargets_secure: set to whether pending exception targets Secure
+ *
+ * This function writes the number of the highest priority pending
+ * exception (the one which would be made active by
+ * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
+ * to true if the current highest priority pending exception should
+ * be taken to Secure state, false for NS.
+ */
+void armv7m_nvic_get_pending_irq_info(NVICState *s, int *pirq,
+                                      bool *ptargets_secure);
+/**
+ * armv7m_nvic_acknowledge_irq: make highest priority pending exception active
+ * @s: the NVIC
+ *
+ * Move the current highest priority pending exception from the pending
+ * state to the active state, and update v7m.exception to indicate that
+ * it is the exception currently being handled.
+ */
+void armv7m_nvic_acknowledge_irq(NVICState *s);
+/**
+ * armv7m_nvic_complete_irq: complete specified interrupt or exception
+ * @s: the NVIC
+ * @irq: the exception number to complete
+ * @secure: true if this exception was secure
+ *
+ * Returns: -1 if the irq was not active
+ *           1 if completing this irq brought us back to base (no active irqs)
+ *           0 if there is still an irq active after this one was completed
+ * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
+ */
+int armv7m_nvic_complete_irq(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Return whether an exception is "ready", i.e. whether the exception is
+ * enabled and is configured at a priority which would allow it to
+ * interrupt the current execution priority. This controls whether the
+ * RDY bit for it in the FPCCR is set.
+ */
+bool armv7m_nvic_get_ready_status(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_raw_execution_priority: return the raw execution priority
+ * @s: the NVIC
+ *
+ * Returns: the raw execution priority as defined by the v8M architecture.
+ * This is the execution priority minus the effects of AIRCR.PRIS,
+ * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting.
+ * (v8M ARM ARM I_PKLD.)
+ */
+int armv7m_nvic_raw_execution_priority(NVICState *s);
+/**
+ * armv7m_nvic_neg_prio_requested: return true if the requested execution
+ * priority is negative for the specified security state.
+ * @s: the NVIC
+ * @secure: the security state to test
+ * This corresponds to the pseudocode IsReqExecPriNeg().
+ */
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure);
+#else
+static inline bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure)
+{
+    return false;
+}
+#endif
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_can_take_pending_exception(NVICState *s);
+#else
+static inline bool armv7m_nvic_can_take_pending_exception(NVICState *s)
+{
+    return true;
+}
+#endif
+
 #endif

diff --git a/include/hw/ssi/ibex_spi_host.h b/include/hw/ssi/ibex_spi_host.h
index 1f6d077..8089cc1 100644
--- a/include/hw/ssi/ibex_spi_host.h
+++ b/include/hw/ssi/ibex_spi_host.h

@@ -32,7 +32,6 @@
 #include "hw/ssi/ssi.h"
 #include "qemu/fifo8.h"
 #include "qom/object.h"
-#include "hw/registerfields.h"
 #include "qemu/timer.h"
 
 #define TYPE_IBEX_SPI_HOST "ibex-spi"

diff --git a/include/hw/ssi/npcm_pspi.h b/include/hw/ssi/npcm_pspi.h
new file mode 100644
index 0000000..37cc784
--- /dev/null
+++ b/include/hw/ssi/npcm_pspi.h

@@ -0,0 +1,53 @@
+/*
+ * Nuvoton Peripheral SPI Module
+ *
+ * Copyright 2023 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM_PSPI_H
+#define NPCM_PSPI_H
+
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM_PSPI_NR_REGS 3
+
+/**
+ * NPCMPSPIState - Device state for one Flash Interface Unit.
+ * @parent: System bus device.
+ * @mmio: Memory region for register access.
+ * @spi: The SPI bus mastered by this controller.
+ * @regs: Register contents.
+ * @irq: The interrupt request queue for this module.
+ *
+ * Each PSPI has a shared bank of registers, and controls up to four chip
+ * selects. Each chip select has a dedicated memory region which may be used to
+ * read and write the flash connected to that chip select as if it were memory.
+ */
+typedef struct NPCMPSPIState {
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+
+    SSIBus *spi;
+    uint16_t regs[NPCM_PSPI_NR_REGS];
+    qemu_irq irq;
+} NPCMPSPIState;
+
+#define TYPE_NPCM_PSPI "npcm-pspi"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCMPSPIState, NPCM_PSPI)
+
+#endif /* NPCM_PSPI_H */

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e573f5a..87524c6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h

@@ -61,11 +61,11 @@
 typedef struct VFIOMigration {
     struct VFIODevice *vbasedev;
     VMChangeStateEntry *vm_state;
-    VFIORegion region;
-    uint32_t device_state;
-    int vm_running;
     Notifier migration_state;
-    uint64_t pending_bytes;
+    uint32_t device_state;
+    int data_fd;
+    void *data_buffer;
+    size_t data_buffer_size;
 } VFIOMigration;
 
 typedef struct VFIOAddressSpace {
@@ -218,6 +218,8 @@
 extern VFIOGroupList vfio_group_list;
 
 bool vfio_mig_active(void);
+int vfio_block_multiple_devices_migration(Error **errp);
+void vfio_unblock_multiple_devices_migration(void);
 int64_t vfio_mig_bytes_transferred(void);
 
 #ifdef CONFIG_LINUX

diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index 37b75e1..779568a 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h

@@ -74,13 +74,22 @@
     VirtQueue **cmd_vqs;
 };
 
+struct VirtIOSCSIReq;
+
 struct VirtIOSCSI {
     VirtIOSCSICommon parent_obj;
 
     SCSIBus bus;
-    int resetting;
+    int resetting; /* written from main loop thread, read from any thread */
     bool events_dropped;
 
+    /*
+     * TMFs deferred to main loop BH. These fields are protected by
+     * virtio_scsi_acquire().
+     */
+    QEMUBH *tmf_bh;
+    QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
+
     /* Fields for dataplane below */
     AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
 

diff --git a/include/migration/register.h b/include/migration/register.h
index b91a0cd..a8dfd8f 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h

@@ -47,25 +47,25 @@
     /* This runs outside the iothread lock!  */
     int (*save_setup)(QEMUFile *f, void *opaque);
     /* Note for save_live_pending:
-     * - res_precopy_only is for data which must be migrated in precopy phase
-     *     or in stopped state, in other words - before target vm start
-     * - res_compatible is for data which may be migrated in any phase
-     * - res_postcopy_only is for data which must be migrated in postcopy phase
-     *     or in stopped state, in other words - after source vm stop
+     * must_precopy:
+     * - must be migrated in precopy or in stopped state
+     * - i.e. must be migrated before target start
      *
-     * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
-     * whole amount of pending data.
+     * can_postcopy:
+     * - can migrate in postcopy or in stopped state
+     * - i.e. can migrate after target start
+     * - some can also be migrated during precopy (RAM)
+     * - some must be migrated after source stops (block-dirty-bitmap)
+     *
+     * Sum of can_postcopy and must_postcopy is the whole amount of
+     * pending data.
      */
     /* This estimates the remaining data to transfer */
-    void (*state_pending_estimate)(void *opaque,
-                                   uint64_t *res_precopy_only,
-                                   uint64_t *res_compatible,
-                                   uint64_t *res_postcopy_only);
+    void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy,
+                                   uint64_t *can_postcopy);
     /* This calculate the exact remaining data to transfer */
-    void (*state_pending_exact)(void *opaque,
-                                uint64_t *res_precopy_only,
-                                uint64_t *res_compatible,
-                                uint64_t *res_postcopy_only);
+    void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
+                                uint64_t *can_postcopy);
     LoadStateHandler *load_state;
     int (*load_setup)(QEMUFile *f, void *opaque);
     int (*load_cleanup)(void *opaque);

diff --git a/include/net/net.h b/include/net/net.h
index fad589c..1d88621 100644
--- a/include/net/net.h
+++ b/include/net/net.h

@@ -203,6 +203,20 @@
                         bool vnet_hdr);
 NetClientState *qemu_get_peer(NetClientState *nc, int queue_index);
 
+/**
+ * qemu_get_nic_models:
+ * @device_type: Defines which devices should be taken into consideration
+ *               (e.g. TYPE_DEVICE for all devices, or TYPE_PCI_DEVICE for PCI)
+ *
+ * Get an array of pointers to names of NIC devices that are available in
+ * the QEMU binary. The array is terminated with a NULL pointer entry.
+ * The caller is responsible for freeing the memory when it is not required
+ * anymore, e.g. with g_ptr_array_free(..., true).
+ *
+ * Returns: Pointer to the array that contains the pointers to the names.
+ */
+GPtrArray *qemu_get_nic_models(const char *device_type);
+
 /* NIC info */
 
 #define MAX_NICS 8

diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h
index 87ca83b..8dd9fcb 100644
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h

@@ -29,9 +29,6 @@
 #define QERR_DEVICE_NO_HOTPLUG \
     "Device '%s' does not support hotplugging"
 
-#define QERR_FEATURE_DISABLED \
-    "The feature '%s' is not enabled"
-
 #define QERR_INVALID_PARAMETER \
     "Invalid parameter '%s'"
 
@@ -59,9 +56,6 @@
 #define QERR_QGA_COMMAND_FAILED \
     "Guest agent command failed, error was '%s'"
 
-#define QERR_REPLAY_NOT_SUPPORTED \
-    "Record/replay feature is not supported for '%s'"
-
 #define QERR_UNSUPPORTED \
     "this feature or command is not currently supported"
 

diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 3cbe522..b1650da 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h

@@ -1,10 +1,6 @@
 #ifndef BSWAP_H
 #define BSWAP_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #undef  bswap16
 #define bswap16(_x) __builtin_bswap16(_x)
 #undef  bswap32
@@ -395,8 +391,4 @@
 #undef le_bswaps
 #undef be_bswaps
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* BSWAP_H */

diff --git a/include/qemu/envlist.h b/include/qemu/envlist.h
index b9addcc..6006dfa 100644
--- a/include/qemu/envlist.h
+++ b/include/qemu/envlist.h

@@ -1,10 +1,6 @@
 #ifndef ENVLIST_H
 #define ENVLIST_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 typedef struct envlist envlist_t;
 
 envlist_t *envlist_create(void);
@@ -15,8 +11,4 @@
 int envlist_parse_unset(envlist_t *, const char *);
 char **envlist_to_environ(const envlist_t *, size_t *);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* ENVLIST_H */

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index af4e4ab..8136e33 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h

@@ -330,7 +330,7 @@
                              int64_t *dirty_start, int64_t *dirty_count);
 
 /*
- * bdrv_dirty_bitmap_status:
+ * hbitmap_status:
  * @hb: The HBitmap to operate on
  * @start: The bit to start from
  * @count: Number of bits to proceed

diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index b063c6f..313fc41 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h

@@ -31,10 +31,6 @@
 #include "qemu/sys_membarrier.h"
 #include "qemu/coroutine-tls.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*
  * Important !
  *
@@ -196,8 +192,4 @@
 void rcu_add_force_rcu_notifier(Notifier *n);
 void rcu_remove_force_rcu_notifier(Notifier *n);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* QEMU_RCU_H */

diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
index 0e53ddd..4e6298d 100644
--- a/include/qemu/rcu_queue.h
+++ b/include/qemu/rcu_queue.h

@@ -28,11 +28,6 @@
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
 /*
  * List access methods.
  */
@@ -311,7 +306,4 @@
          (var) && ((next) = qatomic_rcu_read(&(var)->field.sle_next), 1); \
          (var) = (next))
 
-#ifdef __cplusplus
-}
-#endif
 #endif /* QEMU_RCU_QUEUE_H */

diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 7841084..dd3822d 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h

@@ -3,6 +3,7 @@
 
 #include "qemu/processor.h"
 #include "qemu/atomic.h"
+#include "qemu/clang-tsa.h"
 
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
@@ -24,9 +25,12 @@
 
 void qemu_mutex_init(QemuMutex *mutex);
 void qemu_mutex_destroy(QemuMutex *mutex);
-int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
-void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
-void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
+int TSA_NO_TSA qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file,
+                                       const int line);
+void TSA_NO_TSA qemu_mutex_lock_impl(QemuMutex *mutex, const char *file,
+                                     const int line);
+void TSA_NO_TSA qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file,
+                                       const int line);
 
 void qemu_rec_mutex_init(QemuRecMutex *mutex);
 void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
@@ -153,8 +157,8 @@
  */
 void qemu_cond_signal(QemuCond *cond);
 void qemu_cond_broadcast(QemuCond *cond);
-void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
-                         const char *file, const int line);
+void TSA_NO_TSA qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
+                                    const char *file, const int line);
 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
                               const char *file, const int line);
 

diff --git a/include/qemu/uri.h b/include/qemu/uri.h
index d201c61..db5218c 100644
--- a/include/qemu/uri.h
+++ b/include/qemu/uri.h

@@ -53,10 +53,6 @@
 #ifndef QEMU_URI_H
 #define QEMU_URI_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * URI:
  *
@@ -105,7 +101,4 @@
 extern QueryParams *query_params_parse (const char *query);
 extern void query_params_free (QueryParams *ps);
 
-#ifdef __cplusplus
-}
-#endif
 #endif /* QEMU_URI_H */

diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
index cd43193..25c7243 100644
--- a/include/qemu/vhost-user-server.h
+++ b/include/qemu/vhost-user-server.h

@@ -15,7 +15,6 @@
 #include "io/channel-socket.h"
 #include "io/channel-file.h"
 #include "io/net-listener.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "standard-headers/linux/virtio_blk.h"
 

diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index 48b620c..69cab17 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h

@@ -98,18 +98,42 @@
 #define DRM_FORMAT_INVALID	0
 
 /* color index */
+#define DRM_FORMAT_C1		fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
+#define DRM_FORMAT_C2		fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */
+#define DRM_FORMAT_C4		fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */
 #define DRM_FORMAT_C8		fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
 
-/* 8 bpp Red */
+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D1		fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D2		fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D4		fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */
+
+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D8		fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
+
+/* 1 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R1		fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R2		fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R4		fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */
+
+/* 8 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R8		fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
 
-/* 10 bpp Red */
+/* 10 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R10		fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
 
-/* 12 bpp Red */
+/* 12 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R12		fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
 
-/* 16 bpp Red */
+/* 16 bpp Red (direct relationship between channel value and brightness) */
 #define DRM_FORMAT_R16		fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
 
 /* 16 bpp RG */
@@ -204,7 +228,9 @@
 #define DRM_FORMAT_VYUY		fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUV		fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_AVUY8888	fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */
 #define DRM_FORMAT_XYUV8888	fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XVUY8888	fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */
 #define DRM_FORMAT_VUY888	fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */
 #define DRM_FORMAT_VUY101010	fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */
 
@@ -717,6 +743,35 @@
  */
 #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
 
+/*
+ * Vivante TS (tile-status) buffer modifiers. They can be combined with all of
+ * the color buffer tiling modifiers defined above. When TS is present it's a
+ * separate buffer containing the clear/compression status of each tile. The
+ * modifiers are defined as VIVANTE_MOD_TS_c_s, where c is the color buffer
+ * tile size in bytes covered by one entry in the status buffer and s is the
+ * number of status bits per entry.
+ * We reserve the top 8 bits of the Vivante modifier space for tile status
+ * clear/compression modifiers, as future cores might add some more TS layout
+ * variations.
+ */
+#define VIVANTE_MOD_TS_64_4               (1ULL << 48)
+#define VIVANTE_MOD_TS_64_2               (2ULL << 48)
+#define VIVANTE_MOD_TS_128_4              (3ULL << 48)
+#define VIVANTE_MOD_TS_256_4              (4ULL << 48)
+#define VIVANTE_MOD_TS_MASK               (0xfULL << 48)
+
+/*
+ * Vivante compression modifiers. Those depend on a TS modifier being present
+ * as the TS bits get reinterpreted as compression tags instead of simple
+ * clear markers when compression is enabled.
+ */
+#define VIVANTE_MOD_COMP_DEC400           (1ULL << 52)
+#define VIVANTE_MOD_COMP_MASK             (0xfULL << 52)
+
+/* Masking out the extension bits will yield the base modifier. */
+#define VIVANTE_MOD_EXT_MASK              (VIVANTE_MOD_TS_MASK | \
+                                           VIVANTE_MOD_COMP_MASK)
+
 /* NVIDIA frame buffer modifiers */
 
 /*

diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 4537da2..87176ab 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h

@@ -159,8 +159,10 @@
  *	in its bus driver structure (e.g. pci_driver::name).  Must
  *	not be an empty string.
  * @version: Driver version string; may be an empty string
- * @fw_version: Firmware version string; may be an empty string
- * @erom_version: Expansion ROM version string; may be an empty string
+ * @fw_version: Firmware version string; driver defined; may be an
+ *	empty string
+ * @erom_version: Expansion ROM version string; driver defined; may be
+ *	an empty string
  * @bus_info: Device bus address.  This should match the dev_name()
  *	string for the underlying bus device, if there is one.  May be
  *	an empty string.
@@ -179,10 +181,6 @@
  *
  * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
  * strings in any string set (from Linux 2.6.34).
- *
- * Drivers should set at most @driver, @version, @fw_version and
- * @bus_info in their get_drvinfo() implementation.  The ethtool
- * core fills in the other fields using other driver operations.
  */
 struct ethtool_drvinfo {
 	uint32_t	cmd;
@@ -737,6 +735,51 @@
 };
 
 /**
+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
+ *	functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
+ * 	unknown
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
+ */
+enum ethtool_podl_pse_admin_state {
+	ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
+	ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+	ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
+ *	IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
+ *	asserted true when the PoDL PSE state diagram variable mr_pse_enable is
+ *	false"
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
+ *	asserted true when either of the PSE state diagram variables
+ *	pi_detecting or pi_classifying is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
+ *	is asserted true when the PoDL PSE state diagram variable pi_powered is
+ *	true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
+ *	true when the PoDL PSE state diagram variable pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
+ *	when the logical combination of the PoDL PSE state diagram variables
+ *	pi_prebiased*!pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
+ *	true when the PoDL PSE state diagram variable overload_held is true."
+ */
+enum ethtool_podl_pse_pw_d_status {
+	ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
+	ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
+};
+
+/**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
  * @string_set: String set ID; one of &enum ethtool_stringset
@@ -1692,6 +1735,13 @@
 	ETHTOOL_LINK_MODE_100baseFX_Half_BIT		 = 90,
 	ETHTOOL_LINK_MODE_100baseFX_Full_BIT		 = 91,
 	ETHTOOL_LINK_MODE_10baseT1L_Full_BIT		 = 92,
+	ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT	 = 93,
+	ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT	 = 94,
+	ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT	 = 95,
+	ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT	 = 96,
+	ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT	 = 97,
+	ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT	 = 98,
+
 	/* must be last entry */
 	__ETHTOOL_LINK_MODE_MASK_NBITS
 };
@@ -1803,6 +1853,7 @@
 #define SPEED_100000		100000
 #define SPEED_200000		200000
 #define SPEED_400000		400000
+#define SPEED_800000		800000
 
 #define SPEED_UNKNOWN		-1
 
@@ -1840,6 +1891,20 @@
 #define MASTER_SLAVE_STATE_SLAVE		3
 #define MASTER_SLAVE_STATE_ERR			4
 
+/* These are used to throttle the rate of data on the phy interface when the
+ * native speed of the interface is higher than the link speed. These should
+ * not be used for phy interfaces which natively support multiple speeds (e.g.
+ * MII or SGMII).
+ */
+/* No rate matching performed. */
+#define RATE_MATCH_NONE		0
+/* The phy sends pause frames to throttle the MAC. */
+#define RATE_MATCH_PAUSE	1
+/* The phy asserts CRS to prevent the MAC from transmitting. */
+#define RATE_MATCH_CRS		2
+/* The MAC is programmed with a sufficiently-large IPG. */
+#define RATE_MATCH_OPEN_LOOP	3
+
 /* Which connector port. */
 #define PORT_TP			0x00
 #define PORT_AUI		0x01
@@ -2033,8 +2098,8 @@
  *	reported consistently by PHYLIB.  Read-only.
  * @master_slave_cfg: Master/slave port mode.
  * @master_slave_state: Master/slave port state.
+ * @rate_matching: Rate adaptation performed by the PHY
  * @reserved: Reserved for future use; see the note on reserved space.
- * @reserved1: Reserved for future use; see the note on reserved space.
  * @link_mode_masks: Variable length bitmaps.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
@@ -2085,7 +2150,7 @@
 	uint8_t	transceiver;
 	uint8_t	master_slave_cfg;
 	uint8_t	master_slave_state;
-	uint8_t	reserved1[1];
+	uint8_t	rate_matching;
 	uint32_t	reserved[7];
 	uint32_t	link_mode_masks[];
 	/* layout of link_mode_masks fields:

diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index bda0625..a1af78d 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h

@@ -194,6 +194,13 @@
  *  - add FUSE_SECURITY_CTX init flag
  *  - add security context to create, mkdir, symlink, and mknod requests
  *  - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
+ *
+ *  7.37
+ *  - add FUSE_TMPFILE
+ *
+ *  7.38
+ *  - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry
+ *  - add FOPEN_PARALLEL_DIRECT_WRITES
  */
 
 #ifndef _LINUX_FUSE_H
@@ -225,7 +232,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 36
+#define FUSE_KERNEL_MINOR_VERSION 38
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -297,6 +304,7 @@
  * FOPEN_CACHE_DIR: allow caching this directory
  * FOPEN_STREAM: the file is stream-like (no file position at all)
  * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
+ * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
@@ -304,6 +312,7 @@
 #define FOPEN_CACHE_DIR		(1 << 3)
 #define FOPEN_STREAM		(1 << 4)
 #define FOPEN_NOFLUSH		(1 << 5)
+#define FOPEN_PARALLEL_DIRECT_WRITES	(1 << 6)
 
 /**
  * INIT request/reply flags
@@ -484,6 +493,12 @@
  */
 #define FUSE_SETXATTR_ACL_KILL_SGID	(1 << 0)
 
+/**
+ * notify_inval_entry flags
+ * FUSE_EXPIRE_ONLY
+ */
+#define FUSE_EXPIRE_ONLY		(1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
@@ -533,6 +548,7 @@
 	FUSE_SETUPMAPPING	= 48,
 	FUSE_REMOVEMAPPING	= 49,
 	FUSE_SYNCFS		= 50,
+	FUSE_TMPFILE		= 51,
 
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
@@ -911,7 +927,7 @@
 struct fuse_notify_inval_entry_out {
 	uint64_t	parent;
 	uint32_t	namelen;
-	uint32_t	padding;
+	uint32_t	flags;
 };
 
 struct fuse_notify_delete_out {

diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 50790ae..f6bab08 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h

@@ -614,6 +614,9 @@
 #define KEY_KBD_LAYOUT_NEXT	0x248	/* AC Next Keyboard Layout Select */
 #define KEY_EMOJI_PICKER	0x249	/* Show/hide emoji picker (HUTRR101) */
 #define KEY_DICTATE		0x24a	/* Start or Stop Voice Dictation Session (HUTRR99) */
+#define KEY_CAMERA_ACCESS_ENABLE	0x24b	/* Enables programmatic access to camera devices. (HUTRR72) */
+#define KEY_CAMERA_ACCESS_DISABLE	0x24c	/* Disables programmatic access to camera devices. (HUTRR72) */
+#define KEY_CAMERA_ACCESS_TOGGLE	0x24d	/* Toggles the current state of the camera access control. (HUTRR72) */
 
 #define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
@@ -862,6 +865,7 @@
 #define ABS_TOOL_WIDTH		0x1c
 
 #define ABS_VOLUME		0x20
+#define ABS_PROFILE		0x21
 
 #define ABS_MISC		0x28
 

diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 57b8e2f..85ab127 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h

@@ -1058,6 +1058,7 @@
 /* Precision Time Measurement */
 #define PCI_PTM_CAP			0x04	    /* PTM Capability */
 #define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_RES		0x00000002  /* Responder capable */
 #define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
 #define  PCI_PTM_GRANULARITY_MASK	0x0000FF00  /* Clock granularity */
 #define PCI_PTM_CTRL			0x08	    /* PTM Control */
@@ -1119,6 +1120,7 @@
 #define  PCI_DOE_STATUS_DATA_OBJECT_READY	0x80000000  /* Data Object Ready */
 #define PCI_DOE_WRITE		0x10    /* DOE Write Data Mailbox Register */
 #define PCI_DOE_READ		0x14    /* DOE Read Data Mailbox Register */
+#define PCI_DOE_CAP_SIZEOF	0x18	/* Size of DOE register block */
 
 /* DOE Data Object - note not actually registers */
 #define PCI_DOE_DATA_OBJECT_HEADER_1_VID		0x0000ffff

diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h
index 2dcc908..e81715c 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h

@@ -40,6 +40,7 @@
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
 #define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
 #define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
+#define VIRTIO_BLK_F_SECURE_ERASE	16 /* Secure Erase is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -119,6 +120,21 @@
 	uint8_t write_zeroes_may_unmap;
 
 	uint8_t unused1[3];
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */
+	/*
+	 * The maximum secure erase sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	__virtio32 max_secure_erase_sectors;
+	/*
+	 * The maximum number of secure erase segments in a
+	 * secure erase command.
+	 */
+	__virtio32 max_secure_erase_seg;
+	/* Secure erase commands must be aligned to this number of sectors. */
+	__virtio32 secure_erase_sector_alignment;
+
 } QEMU_PACKED;
 
 /*
@@ -153,6 +169,9 @@
 /* Write zeroes command */
 #define VIRTIO_BLK_T_WRITE_ZEROES	13
 
+/* Secure erase command */
+#define VIRTIO_BLK_T_SECURE_ERASE	14
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000

diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h
index 245e1ef..a11ecc3 100644
--- a/include/standard-headers/linux/virtio_bt.h
+++ b/include/standard-headers/linux/virtio_bt.h

@@ -9,6 +9,7 @@
 #define VIRTIO_BT_F_VND_HCI	0	/* Indicates vendor command support */
 #define VIRTIO_BT_F_MSFT_EXT	1	/* Indicates MSFT vendor support */
 #define VIRTIO_BT_F_AOSP_EXT	2	/* Indicates AOSP vendor support */
+#define VIRTIO_BT_F_CONFIG_V2	3	/* Use second version configuration */
 
 enum virtio_bt_config_type {
 	VIRTIO_BT_CONFIG_TYPE_PRIMARY	= 0,
@@ -28,4 +29,11 @@
 	uint16_t msft_opcode;
 } QEMU_PACKED;
 
+struct virtio_bt_config_v2 {
+	uint8_t  type;
+	uint8_t  alignment;
+	uint16_t vendor;
+	uint16_t msft_opcode;
+};
+
 #endif /* _LINUX_VIRTIO_BT_H */

diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
index 42c68ca..c0e7970 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h

@@ -57,6 +57,9 @@
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
 #define VIRTIO_NET_F_NOTF_COAL	53	/* Device supports notifications coalescing */
+#define VIRTIO_NET_F_GUEST_USO4	54	/* Guest can handle USOv4 in. */
+#define VIRTIO_NET_F_GUEST_USO6	55	/* Guest can handle USOv6 in. */
+#define VIRTIO_NET_F_HOST_USO	56	/* Host can handle USO in. */
 #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
 #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
 #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
@@ -130,6 +133,7 @@
 #define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */
 #define VIRTIO_NET_HDR_GSO_UDP		3	/* GSO frame, IPv4 UDP (UFO) */
 #define VIRTIO_NET_HDR_GSO_TCPV6	4	/* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_UDP_L4	5	/* GSO frame, IPv4& IPv6 UDP (USO) */
 #define VIRTIO_NET_HDR_GSO_ECN		0x80	/* TCP has ECN set */
 	uint8_t gso_type;
 	__virtio16 hdr_len;	/* Ethernet + IP + tcp/udp hdrs */

diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
index 6858e39..2b6d27d 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h

@@ -23,10 +23,23 @@
  */
 
 BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
-BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
-                              uint64_t shared_perm, Error **errp);
-BlockBackend *blk_new_open(const char *filename, const char *reference,
-                           QDict *options, int flags, Error **errp);
+
+BlockBackend * no_coroutine_fn
+blk_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                   Error **errp);
+
+BlockBackend * no_coroutine_fn
+blk_new_open(const char *filename, const char *reference, QDict *options,
+             int flags, Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_open(const char *filename, const char *reference, QDict *options,
+                int flags, Error **errp);
+
 int blk_get_refcnt(BlockBackend *blk);
 void blk_ref(BlockBackend *blk);
 void blk_unref(BlockBackend *blk);

diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
index b1196ab..40ab178 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h

@@ -55,10 +55,11 @@
 void blk_inc_in_flight(BlockBackend *blk);
 void blk_dec_in_flight(BlockBackend *blk);
 
-bool coroutine_fn blk_co_is_inserted(BlockBackend *blk);
-bool co_wrapper_mixed blk_is_inserted(BlockBackend *blk);
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk);
 
-bool blk_is_available(BlockBackend *blk);
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_available(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_available(BlockBackend *blk);
 
 void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked);
 void co_wrapper blk_lock_medium(BlockBackend *blk, bool locked);

diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 5b38c7b..97d0243 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h

@@ -51,14 +51,34 @@
 extern "C" {
 #endif
 
-#if defined(_WIN64)
-/* On w64, setjmp is implemented by _setjmp which needs a second parameter.
+#if defined(__aarch64__)
+/*
+ * On windows-arm64, setjmp is available in only one variant, and longjmp always
+ * does stack unwinding. This crash with generated code.
+ * Thus, we use another implementation of setjmp (not windows one), coming from
+ * mingw, which never performs stack unwinding.
+ */
+#undef setjmp
+#undef longjmp
+/*
+ * These functions are not declared in setjmp.h because __aarch64__ defines
+ * setjmp to _setjmpex instead. However, they are still defined in libmingwex.a,
+ * which gets linked automatically.
+ */
+extern int __mingw_setjmp(jmp_buf);
+extern void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+#define setjmp(env) __mingw_setjmp(env)
+#define longjmp(env, val) __mingw_longjmp(env, val)
+#elif defined(_WIN64)
+/*
+ * On windows-x64, setjmp is implemented by _setjmp which needs a second parameter.
  * If this parameter is NULL, longjump does no stack unwinding.
  * That is what we need for QEMU. Passing the value of register rsp (default)
- * lets longjmp try a stack unwinding which will crash with generated code. */
+ * lets longjmp try a stack unwinding which will crash with generated code.
+ */
 # undef setjmp
 # define setjmp(env) _setjmp(env, NULL)
-#endif
+#endif /* __aarch64__ */
 /* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify
  * "longjmp and don't touch the signal masks". Since we know that the
  * savemask parameter will always be zero we can safely define these

diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 7ec0882..6e5ab09 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h

@@ -72,7 +72,7 @@
 /*! Closes replay log file and frees other resources. */
 void replay_finish(void);
 /*! Adds replay blocker with the specified error description */
-void replay_add_blocker(Error *reason);
+void replay_add_blocker(const char *feature);
 /* Returns name of the replay log file */
 const char *replay_get_filename(void);
 /*

diff --git a/include/ui/console.h b/include/ui/console.h
index 8e6cf78..1cb53ac 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h

@@ -4,7 +4,6 @@
 #include "ui/qemu-pixman.h"
 #include "qom/object.h"
 #include "qemu/notify.h"
-#include "qemu/error-report.h"
 #include "qapi/qapi-types-ui.h"
 
 #ifdef CONFIG_OPENGL

diff --git a/io/channel-tls.c b/io/channel-tls.c
index c730cb8..8052945 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c

@@ -389,12 +389,76 @@
     qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
 }
 
+typedef struct QIOChannelTLSSource QIOChannelTLSSource;
+struct QIOChannelTLSSource {
+    GSource parent;
+    QIOChannelTLS *tioc;
+};
+
+static gboolean
+qio_channel_tls_source_check(GSource *source)
+{
+    QIOChannelTLSSource *tsource = (QIOChannelTLSSource *)source;
+
+    return qcrypto_tls_session_check_pending(tsource->tioc->session) > 0;
+}
+
+static gboolean
+qio_channel_tls_source_prepare(GSource *source, gint *timeout)
+{
+    *timeout = -1;
+    return qio_channel_tls_source_check(source);
+}
+
+static gboolean
+qio_channel_tls_source_dispatch(GSource *source, GSourceFunc callback,
+                                gpointer user_data)
+{
+    return G_SOURCE_CONTINUE;
+}
+
+static void
+qio_channel_tls_source_finalize(GSource *source)
+{
+    QIOChannelTLSSource *tsource = (QIOChannelTLSSource *)source;
+
+    object_unref(OBJECT(tsource->tioc));
+}
+
+static GSourceFuncs qio_channel_tls_source_funcs = {
+    qio_channel_tls_source_prepare,
+    qio_channel_tls_source_check,
+    qio_channel_tls_source_dispatch,
+    qio_channel_tls_source_finalize
+};
+
+static void
+qio_channel_tls_read_watch(QIOChannelTLS *tioc, GSource *source)
+{
+    GSource *child;
+    QIOChannelTLSSource *tlssource;
+
+    child = g_source_new(&qio_channel_tls_source_funcs,
+                          sizeof(QIOChannelTLSSource));
+    tlssource = (QIOChannelTLSSource *)child;
+
+    tlssource->tioc = tioc;
+    object_ref(OBJECT(tioc));
+
+    g_source_add_child_source(source, child);
+}
+
 static GSource *qio_channel_tls_create_watch(QIOChannel *ioc,
                                              GIOCondition condition)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
+    GSource *source = qio_channel_create_watch(tioc->master, condition);
 
-    return qio_channel_create_watch(tioc->master, condition);
+    if (condition & G_IO_IN) {
+        qio_channel_tls_read_watch(tioc, source);
+    }
+
+    return source;
 }
 
 QCryptoTLSSession *

diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 4bf2d72..a7cfefb 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h

@@ -43,6 +43,7 @@
 #define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))

diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h
index 4f3d5aa..de68700 100644
--- a/linux-headers/asm-generic/hugetlb_encode.h
+++ b/linux-headers/asm-generic/hugetlb_encode.h

@@ -20,18 +20,18 @@
 #define HUGETLB_FLAG_ENCODE_SHIFT	26
 #define HUGETLB_FLAG_ENCODE_MASK	0x3f
 
-#define HUGETLB_FLAG_ENCODE_16KB	(14 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_32MB	(25 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512MB	(29 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16KB	(14U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_64KB	(16U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB	(19U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB		(20U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB		(21U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB		(23U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB	(24U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB	(25U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB	(28U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB	(29U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB		(30U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB		(31U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB	(34U << HUGETLB_FLAG_ENCODE_SHIFT)
 
 #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */

diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index 6c1aa92..6ce1f1c 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h

@@ -77,6 +77,8 @@
 
 #define MADV_DONTNEED_LOCKED	24	/* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE	0
 

diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h
index 1be4286..c6e1fc7 100644
--- a/linux-headers/asm-mips/mman.h
+++ b/linux-headers/asm-mips/mman.h

@@ -103,6 +103,8 @@
 
 #define MADV_DONTNEED_LOCKED	24	/* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE	0
 

diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index 7351417..92af6f3 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h

@@ -48,6 +48,10 @@
 /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
 struct kvm_riscv_config {
 	unsigned long isa;
+	unsigned long zicbom_block_size;
+	unsigned long mvendorid;
+	unsigned long marchid;
+	unsigned long mimpid;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -98,6 +102,9 @@
 	KVM_RISCV_ISA_EXT_M,
 	KVM_RISCV_ISA_EXT_SVPBMT,
 	KVM_RISCV_ISA_EXT_SSTC,
+	KVM_RISCV_ISA_EXT_SVINVAL,
+	KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+	KVM_RISCV_ISA_EXT_ZICBOM,
 	KVM_RISCV_ISA_EXT_MAX,
 };
 

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 46de10a..2747d2c 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h

@@ -53,14 +53,6 @@
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
-struct kvm_memory_alias {
-	__u32 slot;  /* this has a different namespace than memory slots */
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size;
-	__u64 target_phys_addr;
-};
-
 /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
 struct kvm_pic_state {
 	__u8 last_irr;	/* edge detection */
@@ -214,6 +206,8 @@
 struct kvm_msr_filter_range {
 #define KVM_MSR_FILTER_READ  (1 << 0)
 #define KVM_MSR_FILTER_WRITE (1 << 1)
+#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \
+					 KVM_MSR_FILTER_WRITE)
 	__u32 flags;
 	__u32 nmsrs; /* number of msrs in bitmap */
 	__u32 base;  /* MSR index the bitmap starts at */
@@ -224,6 +218,7 @@
 struct kvm_msr_filter {
 #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
 #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY)
 	__u32 flags;
 	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
 };

diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index ebdafa5..1e2c16c 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h

@@ -86,14 +86,6 @@
 /* *** End of deprecated interfaces *** */
 
 
-/* for KVM_CREATE_MEMORY_REGION */
-struct kvm_memory_region {
-	__u32 slot;
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size; /* bytes */
-};
-
 /* for KVM_SET_USER_MEMORY_REGION */
 struct kvm_userspace_memory_region {
 	__u32 slot;
@@ -104,9 +96,9 @@
 };
 
 /*
- * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
- * other bits are reserved for kvm internal use which are defined in
- * include/linux/kvm_host.h.
+ * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
+ * userspace, other bits are reserved for kvm internal use which are defined
+ * in include/linux/kvm_host.h.
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
@@ -483,6 +475,9 @@
 #define KVM_MSR_EXIT_REASON_INVAL	(1 << 0)
 #define KVM_MSR_EXIT_REASON_UNKNOWN	(1 << 1)
 #define KVM_MSR_EXIT_REASON_FILTER	(1 << 2)
+#define KVM_MSR_EXIT_REASON_VALID_MASK	(KVM_MSR_EXIT_REASON_INVAL   |	\
+					 KVM_MSR_EXIT_REASON_UNKNOWN |	\
+					 KVM_MSR_EXIT_REASON_FILTER)
 			__u32 reason; /* kernel -> user */
 			__u32 index; /* kernel -> user */
 			__u64 data; /* kernel <-> user */
@@ -1175,6 +1170,9 @@
 #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
 #define KVM_CAP_S390_ZPCI_OP 221
 #define KVM_CAP_S390_CPU_TOPOLOGY 222
+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
+#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
+#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1264,6 +1262,7 @@
 #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
 
 struct kvm_xen_hvm_config {
 	__u32 flags;
@@ -1435,17 +1434,11 @@
 };
 
 /*
- * ioctls for VM fds
- */
-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
-/*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
  */
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
-/* KVM_SET_MEMORY_ALIAS is obsolete: */
-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
 #define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
@@ -1737,6 +1730,8 @@
 	KVM_PV_UNSHARE_ALL,
 	KVM_PV_INFO,
 	KVM_PV_DUMP,
+	KVM_PV_ASYNC_CLEANUP_PREPARE,
+	KVM_PV_ASYNC_CLEANUP_PERFORM,
 };
 
 struct kvm_pv_cmd {
@@ -1767,8 +1762,10 @@
 	union {
 		__u8 long_mode;
 		__u8 vector;
+		__u8 runstate_update_flag;
 		struct {
 			__u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
 		} shared_info;
 		struct {
 			__u32 send_port;
@@ -1800,6 +1797,7 @@
 	} u;
 };
 
+
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
 #define KVM_XEN_ATTR_TYPE_LONG_MODE		0x0
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
@@ -1807,6 +1805,8 @@
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
 #define KVM_XEN_ATTR_TYPE_EVTCHN		0x3
 #define KVM_XEN_ATTR_TYPE_XEN_VERSION		0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG	0x5
 
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
@@ -1823,6 +1823,7 @@
 	__u16 pad[3];
 	union {
 		__u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
 		__u64 pad[8];
 		struct {
 			__u64 state;

diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h
index 213b2a0..74f3cb5 100644
--- a/linux-headers/linux/psci.h
+++ b/linux-headers/linux/psci.h

@@ -48,12 +48,26 @@
 #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU	PSCI_0_2_FN64(7)
 
 #define PSCI_1_0_FN_PSCI_FEATURES		PSCI_0_2_FN(10)
+#define PSCI_1_0_FN_CPU_FREEZE			PSCI_0_2_FN(11)
+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND		PSCI_0_2_FN(12)
+#define PSCI_1_0_FN_NODE_HW_STATE		PSCI_0_2_FN(13)
 #define PSCI_1_0_FN_SYSTEM_SUSPEND		PSCI_0_2_FN(14)
 #define PSCI_1_0_FN_SET_SUSPEND_MODE		PSCI_0_2_FN(15)
-#define PSCI_1_1_FN_SYSTEM_RESET2		PSCI_0_2_FN(18)
+#define PSCI_1_0_FN_STAT_RESIDENCY		PSCI_0_2_FN(16)
+#define PSCI_1_0_FN_STAT_COUNT			PSCI_0_2_FN(17)
 
+#define PSCI_1_1_FN_SYSTEM_RESET2		PSCI_0_2_FN(18)
+#define PSCI_1_1_FN_MEM_PROTECT			PSCI_0_2_FN(19)
+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN(20)
+
+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND	PSCI_0_2_FN64(12)
+#define PSCI_1_0_FN64_NODE_HW_STATE		PSCI_0_2_FN64(13)
 #define PSCI_1_0_FN64_SYSTEM_SUSPEND		PSCI_0_2_FN64(14)
+#define PSCI_1_0_FN64_STAT_RESIDENCY		PSCI_0_2_FN64(16)
+#define PSCI_1_0_FN64_STAT_COUNT		PSCI_0_2_FN64(17)
+
 #define PSCI_1_1_FN64_SYSTEM_RESET2		PSCI_0_2_FN64(18)
+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN64(20)
 
 /* PSCI v0.2 power state encoding for CPU_SUSPEND function */
 #define PSCI_0_2_POWER_STATE_ID_MASK		0xffff

diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index a3a377c..ba5d0df 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h

@@ -12,6 +12,10 @@
 
 #include <linux/types.h>
 
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
 /*
  * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
  * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR.  In

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index ede44b5..c59692c 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h

@@ -819,12 +819,20 @@
  * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P
  * is supported in addition to the STOP_COPY states.
  *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that
+ * PRE_COPY is supported in addition to the STOP_COPY states.
+ *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY
+ * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported
+ * in addition to the STOP_COPY states.
+ *
  * Other combinations of flags have behavior to be defined in the future.
  */
 struct vfio_device_feature_migration {
 	__aligned_u64 flags;
 #define VFIO_MIGRATION_STOP_COPY	(1 << 0)
 #define VFIO_MIGRATION_P2P		(1 << 1)
+#define VFIO_MIGRATION_PRE_COPY		(1 << 2)
 };
 #define VFIO_DEVICE_FEATURE_MIGRATION 1
 
@@ -875,8 +883,13 @@
  *  RESUMING - The device is stopped and is loading a new internal state
  *  ERROR - The device has failed and must be reset
  *
- * And 1 optional state to support VFIO_MIGRATION_P2P:
+ * And optional states to support VFIO_MIGRATION_P2P:
  *  RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA
+ * And VFIO_MIGRATION_PRE_COPY:
+ *  PRE_COPY - The device is running normally but tracking internal state
+ *             changes
+ * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY:
+ *  PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA
  *
  * The FSM takes actions on the arcs between FSM states. The driver implements
  * the following behavior for the FSM arcs:
@@ -908,20 +921,48 @@
  *
  *   To abort a RESUMING session the device must be reset.
  *
+ * PRE_COPY -> RUNNING
  * RUNNING_P2P -> RUNNING
  *   While in RUNNING the device is fully operational, the device may generate
  *   interrupts, DMA, respond to MMIO, all vfio device regions are functional,
  *   and the device may advance its internal state.
  *
+ *   The PRE_COPY arc will terminate a data transfer session.
+ *
+ * PRE_COPY_P2P -> RUNNING_P2P
  * RUNNING -> RUNNING_P2P
  * STOP -> RUNNING_P2P
  *   While in RUNNING_P2P the device is partially running in the P2P quiescent
  *   state defined below.
  *
- * STOP -> STOP_COPY
- *   This arc begin the process of saving the device state and will return a
- *   new data_fd.
+ *   The PRE_COPY_P2P arc will terminate a data transfer session.
  *
+ * RUNNING -> PRE_COPY
+ * RUNNING_P2P -> PRE_COPY_P2P
+ * STOP -> STOP_COPY
+ *   PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states
+ *   which share a data transfer session. Moving between these states alters
+ *   what is streamed in session, but does not terminate or otherwise affect
+ *   the associated fd.
+ *
+ *   These arcs begin the process of saving the device state and will return a
+ *   new data_fd. The migration driver may perform actions such as enabling
+ *   dirty logging of device state when entering PRE_COPY or PER_COPY_P2P.
+ *
+ *   Each arc does not change the device operation, the device remains
+ *   RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below
+ *   in PRE_COPY_P2P -> STOP_COPY.
+ *
+ * PRE_COPY -> PRE_COPY_P2P
+ *   Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above.
+ *   However, while in the PRE_COPY_P2P state, the device is partially running
+ *   in the P2P quiescent state defined below, like RUNNING_P2P.
+ *
+ * PRE_COPY_P2P -> PRE_COPY
+ *   This arc allows returning the device to a full RUNNING behavior while
+ *   continuing all the behaviors of PRE_COPY.
+ *
+ * PRE_COPY_P2P -> STOP_COPY
  *   While in the STOP_COPY state the device has the same behavior as STOP
  *   with the addition that the data transfers session continues to stream the
  *   migration state. End of stream on the FD indicates the entire device
@@ -939,6 +980,13 @@
  *   device state for this arc if required to prepare the device to receive the
  *   migration data.
  *
+ * STOP_COPY -> PRE_COPY
+ * STOP_COPY -> PRE_COPY_P2P
+ *   These arcs are not permitted and return error if requested. Future
+ *   revisions of this API may define behaviors for these arcs, in this case
+ *   support will be discoverable by a new flag in
+ *   VFIO_DEVICE_FEATURE_MIGRATION.
+ *
  * any -> ERROR
  *   ERROR cannot be specified as a device state, however any transition request
  *   can be failed with an errno return and may then move the device_state into
@@ -950,7 +998,7 @@
  * The optional peer to peer (P2P) quiescent state is intended to be a quiescent
  * state for the device for the purposes of managing multiple devices within a
  * user context where peer-to-peer DMA between devices may be active. The
- * RUNNING_P2P states must prevent the device from initiating
+ * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating
  * any new P2P DMA transactions. If the device can identify P2P transactions
  * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration
  * driver must complete any such outstanding operations prior to completing the
@@ -963,6 +1011,8 @@
  * above FSM arcs. As there are multiple paths through the FSM arcs the path
  * should be selected based on the following rules:
  *   - Select the shortest path.
+ *   - The path cannot have saving group states as interior arcs, only
+ *     starting/end states.
  * Refer to vfio_mig_get_next_state() for the result of the algorithm.
  *
  * The automatic transit through the FSM arcs that make up the combination
@@ -976,6 +1026,9 @@
  * support them. The user can discover if these states are supported by using
  * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can
  * avoid knowing about these optional states if the kernel driver supports them.
+ *
+ * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY
+ * is not present.
  */
 enum vfio_device_mig_state {
 	VFIO_DEVICE_STATE_ERROR = 0,
@@ -984,8 +1037,225 @@
 	VFIO_DEVICE_STATE_STOP_COPY = 3,
 	VFIO_DEVICE_STATE_RESUMING = 4,
 	VFIO_DEVICE_STATE_RUNNING_P2P = 5,
+	VFIO_DEVICE_STATE_PRE_COPY = 6,
+	VFIO_DEVICE_STATE_PRE_COPY_P2P = 7,
 };
 
+/**
+ * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21)
+ *
+ * This ioctl is used on the migration data FD in the precopy phase of the
+ * migration data transfer. It returns an estimate of the current data sizes
+ * remaining to be transferred. It allows the user to judge when it is
+ * appropriate to leave PRE_COPY for STOP_COPY.
+ *
+ * This ioctl is valid only in PRE_COPY states and kernel driver should
+ * return -EINVAL from any other migration state.
+ *
+ * The vfio_precopy_info data structure returned by this ioctl provides
+ * estimates of data available from the device during the PRE_COPY states.
+ * This estimate is split into two categories, initial_bytes and
+ * dirty_bytes.
+ *
+ * The initial_bytes field indicates the amount of initial precopy
+ * data available from the device. This field should have a non-zero initial
+ * value and decrease as migration data is read from the device.
+ * It is recommended to leave PRE_COPY for STOP_COPY only after this field
+ * reaches zero. Leaving PRE_COPY earlier might make things slower.
+ *
+ * The dirty_bytes field tracks device state changes relative to data
+ * previously retrieved.  This field starts at zero and may increase as
+ * the internal device state is modified or decrease as that modified
+ * state is read from the device.
+ *
+ * Userspace may use the combination of these fields to estimate the
+ * potential data size available during the PRE_COPY phases, as well as
+ * trends relative to the rate the device is dirtying its internal
+ * state, but these fields are not required to have any bearing relative
+ * to the data size available during the STOP_COPY phase.
+ *
+ * Drivers have a lot of flexibility in when and what they transfer during the
+ * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO.
+ *
+ * During pre-copy the migration data FD has a temporary "end of stream" that is
+ * reached when both initial_bytes and dirty_byte are zero. For instance, this
+ * may indicate that the device is idle and not currently dirtying any internal
+ * state. When read() is done on this temporary end of stream the kernel driver
+ * should return ENOMSG from read(). Userspace can wait for more data (which may
+ * never come) by using poll.
+ *
+ * Once in STOP_COPY the migration data FD has a permanent end of stream
+ * signaled in the usual way by read() always returning 0 and poll always
+ * returning readable. ENOMSG may not be returned in STOP_COPY.
+ * Support for this ioctl is mandatory if a driver claims to support
+ * VFIO_MIGRATION_PRE_COPY.
+ *
+ * Return: 0 on success, -1 and errno set on failure.
+ */
+struct vfio_precopy_info {
+	__u32 argsz;
+	__u32 flags;
+	__aligned_u64 initial_bytes;
+	__aligned_u64 dirty_bytes;
+};
+
+#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21)
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power
+ * state with the platform-based power management.  Device use of lower power
+ * states depends on factors managed by the runtime power management core,
+ * including system level support and coordinating support among dependent
+ * devices.  Enabling device low power entry does not guarantee lower power
+ * usage by the device, nor is a mechanism provided through this feature to
+ * know the current power state of the device.  If any device access happens
+ * (either from the host or through the vfio uAPI) when the device is in the
+ * low power state, then the host will move the device out of the low power
+ * state as necessary prior to the access.  Once the access is completed, the
+ * device may re-enter the low power state.  For single shot low power support
+ * with wake-up notification, see
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below.  Access to mmap'd
+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after
+ * calling LOW_POWER_EXIT.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3
+
+/*
+ * This device feature has the same behavior as
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user
+ * provides an eventfd for wake-up notification.  When the device moves out of
+ * the low power state for the wake-up, the host will not allow the device to
+ * re-enter a low power state without a subsequent user call to one of the low
+ * power entry device feature IOCTLs.  Access to mmap'd device regions is
+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the
+ * low power exit.  The low power exit can happen either through LOW_POWER_EXIT
+ * or through any other access (where the wake-up notification has been
+ * generated).  The access to mmap'd device regions will not trigger low power
+ * exit.
+ *
+ * The notification through the provided eventfd will be generated only when
+ * the device has entered and is resumed from a low power state after
+ * calling this device feature IOCTL.  A device that has not entered low power
+ * state, as managed through the runtime power management core, will not
+ * generate a notification through the provided eventfd on access.  Calling the
+ * LOW_POWER_EXIT feature is optional in the case where notification has been
+ * signaled on the provided eventfd that a resume from low power has occurred.
+ */
+struct vfio_device_low_power_entry_with_wakeup {
+	__s32 wakeup_eventfd;
+	__u32 reserved;
+};
+
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as
+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features.
+ * This device feature IOCTL may itself generate a wakeup eventfd notification
+ * in the latter case if the device had previously entered a low power state.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging.
+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports
+ * DMA logging.
+ *
+ * DMA logging allows a device to internally record what DMAs the device is
+ * initiating and report them back to userspace. It is part of the VFIO
+ * migration infrastructure that allows implementing dirty page tracking
+ * during the pre copy phase of live migration. Only DMA WRITEs are logged,
+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE.
+ *
+ * When DMA logging is started a range of IOVAs to monitor is provided and the
+ * device can optimize its logging to cover only the IOVA range given. Each
+ * DMA that the device initiates inside the range will be logged by the device
+ * for later retrieval.
+ *
+ * page_size is an input that hints what tracking granularity the device
+ * should try to achieve. If the device cannot do the hinted page size then
+ * it's the driver choice which page size to pick based on its support.
+ * On output the device will return the page size it selected.
+ *
+ * ranges is a pointer to an array of
+ * struct vfio_device_feature_dma_logging_range.
+ *
+ * The core kernel code guarantees to support by minimum num_ranges that fit
+ * into a single kernel page. User space can try higher values but should give
+ * up if the above can't be achieved as of some driver limitations.
+ *
+ * A single call to start device DMA logging can be issued and a matching stop
+ * should follow at the end. Another start is not allowed in the meantime.
+ */
+struct vfio_device_feature_dma_logging_control {
+	__aligned_u64 page_size;
+	__u32 num_ranges;
+	__u32 __reserved;
+	__aligned_u64 ranges;
+};
+
+struct vfio_device_feature_dma_logging_range {
+	__aligned_u64 iova;
+	__aligned_u64 length;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started
+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START
+ */
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log
+ *
+ * Query the device's DMA log for written pages within the given IOVA range.
+ * During querying the log is cleared for the IOVA range.
+ *
+ * bitmap is a pointer to an array of u64s that will hold the output bitmap
+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits
+ * is given by:
+ *  bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64))
+ *
+ * The input page_size can be any power of two value and does not have to
+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver
+ * will format its internal logging to match the reporting page size, possibly
+ * by replicating bits if the internal page size is lower than requested.
+ *
+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or
+ * perform any initialization of the user provided bitmap.
+ *
+ * If any error is returned userspace should assume that the dirty log is
+ * corrupted. Error recovery is to consider all memory dirty and try to
+ * restart the dirty tracking, or to abort/restart the whole migration.
+ *
+ * If DMA logging is not enabled, an error will be returned.
+ *
+ */
+struct vfio_device_feature_dma_logging_report {
+	__aligned_u64 iova;
+	__aligned_u64 length;
+	__aligned_u64 page_size;
+	__aligned_u64 bitmap;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will
+ * be required to complete stop copy.
+ *
+ * Note: Can be called on each device state.
+ */
+
+struct vfio_device_feature_mig_data_size {
+	__aligned_u64 stop_copy_length;
+};
+
+#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**

diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
index c0790f3..a992423 100644
--- a/linux-user/arm/cpu_loop.c
+++ b/linux-user/arm/cpu_loop.c

@@ -356,7 +356,7 @@
             break;
         case EXCP_SWI:
             {
-                env->eabi = 1;
+                env->eabi = true;
                 /* system call */
                 if (env->thumb) {
                     /* Thumb is always EABI style with syscall number in r7 */
@@ -382,7 +382,7 @@
                          * > 0xfffff and are handled below as out-of-range.
                          */
                         n ^= ARM_SYSCALL_BASE;
-                        env->eabi = 0;
+                        env->eabi = false;
                     }
                 }
 

diff --git a/linux-user/main.c b/linux-user/main.c
index 4290651..4ff30ff 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c

@@ -161,13 +161,15 @@
         }
         qemu_init_cpu_list();
         gdbserver_fork(thread_cpu);
-        /* qemu_init_cpu_list() takes care of reinitializing the
-         * exclusive state, so we don't need to end_exclusive() here.
-         */
     } else {
         cpu_list_unlock();
-        end_exclusive();
     }
+    /*
+     * qemu_init_cpu_list() reinitialized the child exclusive state, but we
+     * also need to keep current_cpu consistent, so call end_exclusive() for
+     * both child and parent.
+     */
+    end_exclusive();
 }
 
 __thread CPUState *thread_cpu;

diff --git a/linux-user/microblaze/cpu_loop.c b/linux-user/microblaze/cpu_loop.c
index 5ccf9e9..212e62d 100644
--- a/linux-user/microblaze/cpu_loop.c
+++ b/linux-user/microblaze/cpu_loop.c

@@ -25,8 +25,8 @@
 
 void cpu_loop(CPUMBState *env)
 {
+    int trapnr, ret, si_code, sig;
     CPUState *cs = env_cpu(env);
-    int trapnr, ret, si_code;
 
     while (1) {
         cpu_exec_start(cs);
@@ -76,6 +76,7 @@
             env->iflags &= ~(IMM_FLAG | D_FLAG);
             switch (env->esr & 31) {
             case ESR_EC_DIVZERO:
+                sig = TARGET_SIGFPE;
                 si_code = TARGET_FPE_INTDIV;
                 break;
             case ESR_EC_FPU:
@@ -84,6 +85,7 @@
                  * if there's no recognized bit set.  Possibly this
                  * implies that si_code is 0, but follow the structure.
                  */
+                sig = TARGET_SIGFPE;
                 si_code = env->fsr;
                 if (si_code & FSR_IO) {
                     si_code = TARGET_FPE_FLTINV;
@@ -97,13 +99,17 @@
                     si_code = TARGET_FPE_FLTRES;
                 }
                 break;
+            case ESR_EC_PRIVINSN:
+                sig = SIGILL;
+                si_code = ILL_PRVOPC;
+                break;
             default:
                 fprintf(stderr, "Unhandled hw-exception: 0x%x\n",
                         env->esr & ESR_EC_MASK);
                 cpu_dump_state(cs, stderr, 0);
                 exit(EXIT_FAILURE);
             }
-            force_sig_fault(TARGET_SIGFPE, si_code, env->pc);
+            force_sig_fault(sig, si_code, env->pc);
             break;
 
         case EXCP_DEBUG:

diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c
index 434c90a..c120c42 100644
--- a/linux-user/sparc/cpu_loop.c
+++ b/linux-user/sparc/cpu_loop.c

@@ -248,6 +248,14 @@
             cpu_exec_step_atomic(cs);
             break;
         default:
+            /*
+             * Most software trap numbers vector to BAD_TRAP.
+             * Handle anything not explicitly matched above.
+             */
+            if (trapnr >= TT_TRAP && trapnr <= TT_TRAP + 0x7f) {
+                force_sig_fault(TARGET_SIGILL, ILL_ILLTRP, env->pc);
+                break;
+            }
             fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, 0);
             exit(EXIT_FAILURE);

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 1e868e9..a6c426d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c

@@ -6752,6 +6752,7 @@
             cpu_clone_regs_parent(env, flags);
             fork_end(0);
         }
+        g_assert(!cpu_in_exclusive_context(cpu));
     }
     return ret;
 }

diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 0280e76..3576da4 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h

@@ -135,7 +135,7 @@
 #ifdef TARGET_ARM
 static inline int regpairs_aligned(CPUArchState *cpu_env, int num)
 {
-    return cpu_env->eabi == 1;
+    return cpu_env->eabi;
 }
 #elif defined(TARGET_MIPS) && defined(TARGET_ABI_MIPSO32)
 static inline int regpairs_aligned(CPUArchState *cpu_env, int num) { return 1; }

diff --git a/meson.build b/meson.build
index 4ba3bf3..6cb2b1a 100644
--- a/meson.build
+++ b/meson.build

@@ -215,10 +215,6 @@
 # Specify linker-script with add_project_link_arguments so that it is not placed
 # within a linker --start-group/--end-group pair
 if get_option('fuzzing')
-  add_project_link_arguments(['-Wl,-T,',
-                              (meson.current_source_dir() / 'tests/qtest/fuzz/fork_fuzz.ld')],
-                             native: false, language: all_languages)
-
   # Specify a filter to only instrument code that is directly related to
   # virtual-devices.
   configure_file(output: 'instrumentation-filter',
@@ -1649,10 +1645,14 @@
 endif
 
 # libdw
-libdw = dependency('libdw',
-                   method: 'pkg-config',
-                   kwargs: static_kwargs,
-                   required: false)
+libdw = not_found
+if not get_option('libdw').auto() or \
+        (not enable_static and (have_system or have_user))
+    libdw = dependency('libdw',
+                       method: 'pkg-config',
+                       kwargs: static_kwargs,
+                       required: get_option('libdw'))
+endif
 
 #################
 # config-host.h #
@@ -2129,6 +2129,18 @@
     pthread_create(&thread, 0, f, 0);
     return 0;
   }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_SET_NAME_NP', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+  #include <pthread_np.h>
+
+  static void *f(void *p) { return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    pthread_set_name_np(thread, "QEMU");
+    return 0;
+  }''', dependencies: threads))
 config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_prefix + '''
   #include <pthread.h>
   #include <time.h>
@@ -2351,6 +2363,22 @@
     int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
   '''), error_message: 'AVX512F not available').allowed())
 
+config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
+  .require(cc.links('''
+    #pragma GCC push_options
+    #pragma GCC target("avx512bw")
+    #include <cpuid.h>
+    #include <immintrin.h>
+    static int bar(void *a) {
+
+      __m512i *x = a;
+      __m512i res= _mm512_abs_epi8(*x);
+      return res[1];
+    }
+    int main(int argc, char *argv[]) { return bar(argv[0]); }
+  '''), error_message: 'AVX512BW not available').allowed())
+
 have_pvrdma = get_option('pvrdma') \
   .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
   .require(cc.compiles(gnu_source_prefix + '''
@@ -2450,6 +2478,27 @@
     }''', name: '_lock_file and _unlock_file'))
 endif
 
+if targetos == 'windows'
+  mingw_has_setjmp_longjmp = cc.links('''
+    #include <setjmp.h>
+    int main(void) {
+      /*
+       * These functions are not available in setjmp header, but may be
+       * available at link time, from libmingwex.a.
+       */
+      extern int __mingw_setjmp(jmp_buf);
+      extern void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+      jmp_buf env;
+      __mingw_setjmp(env);
+      __mingw_longjmp(env, 0);
+    }
+  ''', name: 'mingw setjmp and longjmp')
+
+  if cpu == 'aarch64' and not mingw_has_setjmp_longjmp
+    error('mingw must provide setjmp/longjmp for windows-arm64')
+  endif
+endif
+
 ########################
 # Target configuration #
 ########################
@@ -3783,8 +3832,14 @@
 summary_info += {'mutex debugging':   get_option('debug_mutex')}
 summary_info += {'memory allocator':  get_option('malloc')}
 summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
+summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')}
 summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
-summary_info += {'gprof enabled':     get_option('gprof')}
+if get_option('gprof')
+  gprof_info = 'YES (deprecated)'
+else
+  gprof_info = get_option('gprof')
+endif
+summary_info += {'gprof':             gprof_info}
 summary_info += {'gcov':              get_option('b_coverage')}
 summary_info += {'thread sanitizer':  config_host.has_key('CONFIG_TSAN')}
 summary_info += {'CFI support':       get_option('cfi')}
@@ -3853,7 +3908,6 @@
   summary_info += {'Block whitelist (ro)': get_option('block_drv_ro_whitelist')}
   summary_info += {'Use block whitelist in tools': get_option('block_drv_whitelist_in_tools')}
   summary_info += {'VirtFS support':    have_virtfs}
-  summary_info += {'build virtiofs daemon': have_virtiofsd}
   summary_info += {'Live block migration': config_host_data.get('CONFIG_LIVE_BLOCK_MIGRATION')}
   summary_info += {'replication support': config_host_data.get('CONFIG_REPLICATION')}
   summary_info += {'bochs support':     get_option('bochs').allowed()}

diff --git a/meson_options.txt b/meson_options.txt
index 559a571..6b09002 100644
--- a/meson_options.txt
+++ b/meson_options.txt

@@ -104,6 +104,8 @@
        description: 'AVX2 optimizations')
 option('avx512f', type: 'feature', value: 'disabled',
        description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+       description: 'AVX512BW optimizations')
 option('keyring', type: 'feature', value: 'auto',
        description: 'Linux keyring support')
 
@@ -129,6 +131,8 @@
        description: 'use libgio for D-Bus support')
 option('glusterfs', type : 'feature', value : 'auto',
        description: 'Glusterfs block device driver')
+option('libdw', type : 'feature', value : 'auto',
+       description: 'debuginfo support')
 option('libiscsi', type : 'feature', value : 'auto',
        description: 'libiscsi userspace initiator')
 option('libnfs', type : 'feature', value : 'auto',
@@ -268,8 +272,6 @@
        description: 'build vhost-user-blk server')
 option('virtfs', type: 'feature', value: 'auto',
        description: 'virtio-9p support')
-option('virtiofsd', type: 'feature', value: 'auto',
-       description: 'build virtiofs daemon (virtiofsd)')
 option('libvduse', type: 'feature', value: 'auto',
        description: 'build VDUSE Library')
 option('vduse_blk_export', type: 'feature', value: 'auto',
@@ -316,7 +318,8 @@
 option('qom_cast_debug', type: 'boolean', value: false,
        description: 'cast debugging support')
 option('gprof', type: 'boolean', value: false,
-       description: 'QEMU profiling with gprof')
+       description: 'QEMU profiling with gprof',
+       deprecated: true)
 option('profiler', type: 'boolean', value: false,
        description: 'profiler support')
 option('slirp_smbd', type : 'feature', value : 'auto',

diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 5a62141..fe73aa9 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c

@@ -763,9 +763,8 @@
 }
 
 static void dirty_bitmap_state_pending(void *opaque,
-                                       uint64_t *res_precopy_only,
-                                       uint64_t *res_compatible,
-                                       uint64_t *res_postcopy_only)
+                                       uint64_t *must_precopy,
+                                       uint64_t *can_postcopy)
 {
     DBMSaveState *s = &((DBMState *)opaque)->save;
     SaveBitmapState *dbms;
@@ -785,7 +784,7 @@
 
     trace_dirty_bitmap_state_pending(pending);
 
-    *res_postcopy_only += pending;
+    *can_postcopy += pending;
 }
 
 /* First occurrence of this bitmap. It should be created if doesn't exist */

diff --git a/migration/block.c b/migration/block.c
index 29f6902..426a25b 100644
--- a/migration/block.c
+++ b/migration/block.c

@@ -42,16 +42,6 @@
 #define MAX_IO_BUFFERS 512
 #define MAX_PARALLEL_IO 16
 
-/* #define DEBUG_BLK_MIGRATION */
-
-#ifdef DEBUG_BLK_MIGRATION
-#define DPRINTF(fmt, ...) \
-    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 typedef struct BlkMigDevState {
     /* Written during setup phase.  Can be read without a lock.  */
     BlockBackend *blk;
@@ -502,7 +492,7 @@
         block_mig_state.prev_progress = progress;
         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
                          | BLK_MIG_FLAG_PROGRESS);
-        DPRINTF("Completed %d %%\r", progress);
+        trace_migration_block_progression(progress);
     }
 
     return ret;
@@ -863,10 +853,8 @@
     return 0;
 }
 
-static void block_state_pending(void *opaque,
-                                uint64_t *res_precopy_only,
-                                uint64_t *res_compatible,
-                                uint64_t *res_postcopy_only)
+static void block_state_pending(void *opaque, uint64_t *must_precopy,
+                                uint64_t *can_postcopy)
 {
     /* Estimate pending number of bytes to send */
     uint64_t pending;
@@ -887,7 +875,7 @@
 
     trace_migration_block_state_pending(pending);
     /* We don't do postcopy */
-    *res_precopy_only += pending;
+    *must_precopy += pending;
 }
 
 static int block_load(QEMUFile *f, void *opaque, int version_id)

diff --git a/migration/colo-failover.c b/migration/colo-failover.c
index 4245348..6cb6f90 100644
--- a/migration/colo-failover.c
+++ b/migration/colo-failover.c

@@ -17,7 +17,6 @@
 #include "migration.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-migration.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "trace.h"
 
@@ -78,7 +77,7 @@
 void qmp_x_colo_lost_heartbeat(Error **errp)
 {
     if (get_colo_mode() == COLO_MODE_NONE) {
-        error_setg(errp, QERR_FEATURE_DISABLED, "colo");
+        error_setg(errp, "VM is not in COLO mode");
         return;
     }
 

diff --git a/migration/colo.c b/migration/colo.c
index 232c8d4..0716e64 100644
--- a/migration/colo.c
+++ b/migration/colo.c

@@ -33,7 +33,6 @@
 #include "net/colo.h"
 #include "block/block.h"
 #include "qapi/qapi-events-migration.h"
-#include "qapi/qmp/qerror.h"
 #include "sysemu/cpus.h"
 #include "sysemu/runstate.h"
 #include "net/filter.h"

diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index ef25bc8..72519ea 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c

@@ -23,7 +23,6 @@
 #include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-visit-migration.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/string-input-visitor.h"
 #include "qapi/string-output-visitor.h"
 #include "qemu/cutils.h"

diff --git a/migration/migration.c b/migration/migration.c
index 7a14aa9..ae2025d 100644
--- a/migration/migration.c
+++ b/migration/migration.c

@@ -184,16 +184,27 @@
                                  int new_state);
 static void migrate_fd_cancel(MigrationState *s);
 
-static bool migrate_allow_multi_channels = true;
-
-void migrate_protocol_allow_multi_channels(bool allow)
+static bool migration_needs_multiple_sockets(void)
 {
-    migrate_allow_multi_channels = allow;
+    return migrate_use_multifd() || migrate_postcopy_preempt();
 }
 
-bool migrate_multi_channels_is_allowed(void)
+static bool uri_supports_multi_channels(const char *uri)
 {
-    return migrate_allow_multi_channels;
+    return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
+           strstart(uri, "vsock:", NULL);
+}
+
+static bool
+migration_channels_and_uri_compatible(const char *uri, Error **errp)
+{
+    if (migration_needs_multiple_sockets() &&
+        !uri_supports_multi_channels(uri)) {
+        error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
+        return false;
+    }
+
+    return true;
 }
 
 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
@@ -224,6 +235,8 @@
     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
     qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
+    qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
+
     qemu_mutex_init(&current_incoming->page_request_mutex);
     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
 
@@ -302,6 +315,8 @@
 {
     struct MigrationIncomingState *mis = migration_incoming_get_current();
 
+    multifd_load_cleanup();
+
     if (mis->to_src_file) {
         /* Tell source that we are done */
         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
@@ -493,12 +508,15 @@
 {
     const char *p = NULL;
 
-    migrate_protocol_allow_multi_channels(false); /* reset it anyway */
+    /* URI is not suitable for migration? */
+    if (!migration_channels_and_uri_compatible(uri, errp)) {
+        return;
+    }
+
     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
     if (strstart(uri, "tcp:", &p) ||
         strstart(uri, "unix:", NULL) ||
         strstart(uri, "vsock:", NULL)) {
-        migrate_protocol_allow_multi_channels(true);
         socket_start_incoming_migration(p ? p : uri, errp);
 #ifdef CONFIG_RDMA
     } else if (strstart(uri, "rdma:", &p)) {
@@ -543,13 +561,7 @@
      */
     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
 
-    if (multifd_load_cleanup(&local_err) != 0) {
-        error_report_err(local_err);
-        autostart = false;
-    }
-    /* If global state section was not received or we are in running
-       state, we need to obey autostart. Any other state is set with
-       runstate_set. */
+    multifd_load_shutdown();
 
     dirty_bitmap_mig_before_vm_start();
 
@@ -649,9 +661,9 @@
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
     qemu_fclose(mis->from_src_file);
-    if (multifd_load_cleanup(&local_err) != 0) {
-        error_report_err(local_err);
-    }
+
+    multifd_load_cleanup();
+
     exit(EXIT_FAILURE);
 }
 
@@ -723,9 +735,29 @@
     migration_incoming_process();
 }
 
-static bool migration_needs_multiple_sockets(void)
+/*
+ * Returns true when we want to start a new incoming migration process,
+ * false otherwise.
+ */
+static bool migration_should_start_incoming(bool main_channel)
 {
-    return migrate_use_multifd() || migrate_postcopy_preempt();
+    /* Multifd doesn't start unless all channels are established */
+    if (migrate_use_multifd()) {
+        return migration_has_all_channels();
+    }
+
+    /* Preempt channel only starts when the main channel is created */
+    if (migrate_postcopy_preempt()) {
+        return main_channel;
+    }
+
+    /*
+     * For all the rest types of migration, we should only reach here when
+     * it's the main channel that's being created, and we should always
+     * proceed with this channel.
+     */
+    assert(main_channel);
+    return true;
 }
 
 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
@@ -789,7 +821,7 @@
         }
     }
 
-    if (migration_has_all_channels()) {
+    if (migration_should_start_incoming(default_channel)) {
         /* If it's a recovery, we're done */
         if (postcopy_try_recover()) {
             return;
@@ -1378,15 +1410,6 @@
     }
 #endif
 
-
-    /* incoming side only */
-    if (runstate_check(RUN_STATE_INMIGRATE) &&
-        !migrate_multi_channels_is_allowed() &&
-        cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return false;
-    }
-
     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
         if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
             error_setg(errp, "Postcopy preempt requires postcopy-ram");
@@ -2471,6 +2494,11 @@
     MigrationState *s = migrate_get_current();
     const char *p = NULL;
 
+    /* URI is not suitable for migration? */
+    if (!migration_channels_and_uri_compatible(uri, errp)) {
+        return;
+    }
+
     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
                          has_resume && resume, errp)) {
         /* Error detected, put into errp */
@@ -2483,11 +2511,9 @@
         }
     }
 
-    migrate_protocol_allow_multi_channels(false);
     if (strstart(uri, "tcp:", &p) ||
         strstart(uri, "unix:", NULL) ||
         strstart(uri, "vsock:", NULL)) {
-        migrate_protocol_allow_multi_channels(true);
         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
 #ifdef CONFIG_RDMA
     } else if (strstart(uri, "rdma:", &p)) {
@@ -3022,6 +3048,7 @@
         case MIG_RP_MSG_PONG:
             tmp32 = ldl_be_p(buf);
             trace_source_return_path_thread_pong(tmp32);
+            qemu_sem_post(&ms->rp_state.rp_pong_acks);
             break;
 
         case MIG_RP_MSG_REQ_PAGES:
@@ -3155,6 +3182,13 @@
     return ms->rp_state.error;
 }
 
+static inline void
+migration_wait_main_channel(MigrationState *ms)
+{
+    /* Wait until one PONG message received */
+    qemu_sem_wait(&ms->rp_state.rp_pong_acks);
+}
+
 /*
  * Switch from normal iteration to postcopy
  * Returns non-0 on error
@@ -3169,9 +3203,12 @@
     bool restart_block = false;
     int cur_state = MIGRATION_STATUS_ACTIVE;
 
-    if (postcopy_preempt_wait_channel(ms)) {
-        migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
-        return -1;
+    if (migrate_postcopy_preempt()) {
+        migration_wait_main_channel(ms);
+        if (postcopy_preempt_establish_channel(ms)) {
+            migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+            return -1;
+        }
     }
 
     if (!migrate_pause_before_switchover()) {
@@ -3583,6 +3620,20 @@
     }
 
     /*
+     * If preempt is enabled, re-establish the preempt channel.  Note that
+     * we do it after resume prepare to make sure the main channel will be
+     * created before the preempt channel.  E.g. with weak network, the
+     * dest QEMU may get messed up with the preempt and main channels on
+     * the order of connection setup.  This guarantees the correct order.
+     */
+    ret = postcopy_preempt_establish_channel(s);
+    if (ret) {
+        error_report("%s: postcopy_preempt_establish_channel(): %d",
+                     __func__, ret);
+        return ret;
+    }
+
+    /*
      * Last handshake with destination on the resume (destination will
      * switch to postcopy-active afterwards)
      */
@@ -3643,14 +3694,6 @@
         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
             /* Woken up by a recover procedure. Give it a shot */
 
-            if (postcopy_preempt_wait_channel(s)) {
-                /*
-                 * Preempt enabled, and new channel create failed; loop
-                 * back to wait for another recovery.
-                 */
-                continue;
-            }
-
             /*
              * Firstly, let's wake up the return path now, with a new
              * return path channel.
@@ -3820,20 +3863,18 @@
  */
 static MigIterateState migration_iteration_run(MigrationState *s)
 {
-    uint64_t pend_pre, pend_compat, pend_post;
+    uint64_t must_precopy, can_postcopy;
     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
 
-    qemu_savevm_state_pending_estimate(&pend_pre, &pend_compat, &pend_post);
-    uint64_t pending_size = pend_pre + pend_compat + pend_post;
+    qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
+    uint64_t pending_size = must_precopy + can_postcopy;
 
-    trace_migrate_pending_estimate(pending_size,
-                                   pend_pre, pend_compat, pend_post);
+    trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
 
-    if (pend_pre + pend_compat <= s->threshold_size) {
-        qemu_savevm_state_pending_exact(&pend_pre, &pend_compat, &pend_post);
-        pending_size = pend_pre + pend_compat + pend_post;
-        trace_migrate_pending_exact(pending_size,
-                                    pend_pre, pend_compat, pend_post);
+    if (must_precopy <= s->threshold_size) {
+        qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
+        pending_size = must_precopy + can_postcopy;
+        trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
     }
 
     if (!pending_size || pending_size < s->threshold_size) {
@@ -3843,7 +3884,7 @@
     }
 
     /* Still a significant amount to transfer */
-    if (!in_postcopy && pend_pre <= s->threshold_size &&
+    if (!in_postcopy && must_precopy <= s->threshold_size &&
         qatomic_read(&s->start_postcopy)) {
         if (postcopy_start(s)) {
             error_report("%s: postcopy failed to start", __func__);
@@ -4343,15 +4384,6 @@
         }
     }
 
-    /* This needs to be done before resuming a postcopy */
-    if (postcopy_preempt_setup(s, &local_err)) {
-        error_report_err(local_err);
-        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                          MIGRATION_STATUS_FAILED);
-        migrate_fd_cleanup(s);
-        return;
-    }
-
     if (resume) {
         /* Wakeup the main migration thread to do the recovery */
         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
@@ -4525,6 +4557,7 @@
     qemu_sem_destroy(&ms->postcopy_pause_sem);
     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
     qemu_sem_destroy(&ms->rp_state.rp_sem);
+    qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
     error_free(ms->error);
 }
@@ -4571,6 +4604,7 @@
     qemu_sem_init(&ms->postcopy_pause_sem, 0);
     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
     qemu_sem_init(&ms->rp_state.rp_sem, 0);
+    qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
     qemu_sem_init(&ms->rate_limit_sem, 0);
     qemu_sem_init(&ms->wait_unplug_sem, 0);
     qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);

diff --git a/migration/migration.h b/migration/migration.h
index 66511ce..2da2f8a 100644
--- a/migration/migration.h
+++ b/migration/migration.h

@@ -116,6 +116,12 @@
     unsigned int postcopy_channels;
     /* QEMUFile for postcopy only; it'll be handled by a separate thread */
     QEMUFile *postcopy_qemufile_dst;
+    /*
+     * When postcopy_qemufile_dst is properly setup, this sem is posted.
+     * One can wait on this semaphore to wait until the preempt channel is
+     * properly setup.
+     */
+    QemuSemaphore postcopy_qemufile_dst_done;
     /* Postcopy priority thread is used to receive postcopy requested pages */
     QemuThread postcopy_prio_thread;
     bool postcopy_prio_thread_created;
@@ -276,6 +282,12 @@
          */
         bool          rp_thread_created;
         QemuSemaphore rp_sem;
+        /*
+         * We post to this when we got one PONG from dest. So far it's an
+         * easy way to know the main channel has successfully established
+         * on dest QEMU.
+         */
+        QemuSemaphore rp_pong_acks;
     } rp_state;
 
     double mbps;
@@ -474,7 +486,4 @@
 void populate_vfio_info(MigrationInfo *info);
 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
 
-bool migrate_multi_channels_is_allowed(void);
-void migrate_protocol_allow_multi_channels(bool allow);
-
 #endif

diff --git a/migration/multifd.c b/migration/multifd.c
index b7ad700..5e85c3e 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c

@@ -516,7 +516,7 @@
 {
     int i;
 
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
+    if (!migrate_use_multifd()) {
         return;
     }
     multifd_send_terminate_threads(NULL);
@@ -843,30 +843,29 @@
         ioc, object_get_typename(OBJECT(ioc)),
         migrate_get_current()->hostname, error);
 
-    if (!error) {
-        if (migrate_channel_requires_tls_upgrade(ioc)) {
-            multifd_tls_channel_connect(p, ioc, &error);
-            if (!error) {
-                /*
-                 * tls_channel_connect will call back to this
-                 * function after the TLS handshake,
-                 * so we mustn't call multifd_send_thread until then
-                 */
-                return true;
-            } else {
-                return false;
-            }
-        } else {
-            migration_ioc_register_yank(ioc);
-            p->registered_yank = true;
-            p->c = ioc;
-            qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-                                   QEMU_THREAD_JOINABLE);
-       }
-       return true;
+    if (error) {
+        return false;
     }
-
-    return false;
+    if (migrate_channel_requires_tls_upgrade(ioc)) {
+        multifd_tls_channel_connect(p, ioc, &error);
+        if (!error) {
+            /*
+             * tls_channel_connect will call back to this
+             * function after the TLS handshake,
+             * so we mustn't call multifd_send_thread until then
+             */
+            return true;
+        } else {
+            return false;
+        }
+    } else {
+        migration_ioc_register_yank(ioc);
+        p->registered_yank = true;
+        p->c = ioc;
+        qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
+                           QEMU_THREAD_JOINABLE);
+    }
+    return true;
 }
 
 static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
@@ -893,19 +892,15 @@
     Error *local_err = NULL;
 
     trace_multifd_new_send_channel_async(p->id);
-    if (qio_task_propagate_error(task, &local_err)) {
-        goto cleanup;
-    } else {
+    if (!qio_task_propagate_error(task, &local_err)) {
         p->c = QIO_CHANNEL(sioc);
         qio_channel_set_delay(p->c, false);
         p->running = true;
-        if (!multifd_channel_connect(p, sioc, local_err)) {
-            goto cleanup;
+        if (multifd_channel_connect(p, sioc, local_err)) {
+            return;
         }
-        return;
     }
 
-cleanup:
     multifd_new_send_channel_cleanup(p, sioc, local_err);
 }
 
@@ -918,10 +913,6 @@
     if (!migrate_use_multifd()) {
         return 0;
     }
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
 
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
@@ -1022,26 +1013,33 @@
     }
 }
 
-int multifd_load_cleanup(Error **errp)
+void multifd_load_shutdown(void)
+{
+    if (migrate_use_multifd()) {
+        multifd_recv_terminate_threads(NULL);
+    }
+}
+
+void multifd_load_cleanup(void)
 {
     int i;
 
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
-        return 0;
+    if (!migrate_use_multifd()) {
+        return;
     }
     multifd_recv_terminate_threads(NULL);
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDRecvParams *p = &multifd_recv_state->params[i];
 
         if (p->running) {
-            p->quit = true;
             /*
              * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
              * however try to wakeup it without harm in cleanup phase.
              */
             qemu_sem_post(&p->sem_sync);
-            qemu_thread_join(&p->thread);
         }
+
+        qemu_thread_join(&p->thread);
     }
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDRecvParams *p = &multifd_recv_state->params[i];
@@ -1067,8 +1065,6 @@
     multifd_recv_state->params = NULL;
     g_free(multifd_recv_state);
     multifd_recv_state = NULL;
-
-    return 0;
 }
 
 void multifd_recv_sync_main(void)
@@ -1116,10 +1112,7 @@
 
         ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
                                        p->packet_len, &local_err);
-        if (ret == 0) {   /* EOF */
-            break;
-        }
-        if (ret == -1) {   /* Error */
+        if (ret == 0 || ret == -1) {   /* 0: EOF  -1: Error */
             break;
         }
 
@@ -1180,10 +1173,6 @@
         return 0;
     }
 
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
     thread_count = migrate_multifd_channels();
     multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
     multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);

diff --git a/migration/multifd.c.orig b/migration/multifd.c.orig
deleted file mode 100644
index ad89293..0000000
--- a/migration/multifd.c.orig
+++ /dev/null

@@ -1,1274 +0,0 @@
-/*
- * Multifd common code
- *
- * Copyright (c) 2019-2020 Red Hat Inc
- *
- * Authors:
- *  Juan Quintela <quintela@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/rcu.h"
-#include "exec/target_page.h"
-#include "sysemu/sysemu.h"
-#include "exec/ramblock.h"
-#include "qemu/error-report.h"
-#include "qapi/error.h"
-#include "ram.h"
-#include "migration.h"
-#include "socket.h"
-#include "tls.h"
-#include "qemu-file.h"
-#include "trace.h"
-#include "multifd.h"
-
-#include "qemu/yank.h"
-#include "io/channel-socket.h"
-#include "yank_functions.h"
-
-/* Multiple fd's */
-
-#define MULTIFD_MAGIC 0x11223344U
-#define MULTIFD_VERSION 1
-
-typedef struct {
-    uint32_t magic;
-    uint32_t version;
-    unsigned char uuid[16]; /* QemuUUID */
-    uint8_t id;
-    uint8_t unused1[7];     /* Reserved for future use */
-    uint64_t unused2[4];    /* Reserved for future use */
-} __attribute__((packed)) MultiFDInit_t;
-
-/* Multifd without compression */
-
-/**
- * nocomp_send_setup: setup send side
- *
- * For no compression this function does nothing.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_send_setup(MultiFDSendParams *p, Error **errp)
-{
-    return 0;
-}
-
-/**
- * nocomp_send_cleanup: cleanup send side
- *
- * For no compression this function does nothing.
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
-{
-    return;
-}
-
-/**
- * nocomp_send_prepare: prepare date to be able to send
- *
- * For no compression we just have to calculate the size of the
- * packet.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
-{
-    MultiFDPages_t *pages = p->pages;
-
-    for (int i = 0; i < p->normal_num; i++) {
-        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
-        p->iov[p->iovs_num].iov_len = p->page_size;
-        p->iovs_num++;
-    }
-
-    p->next_packet_size = p->normal_num * p->page_size;
-    p->flags |= MULTIFD_FLAG_NOCOMP;
-    return 0;
-}
-
-/**
- * nocomp_recv_setup: setup receive side
- *
- * For no compression this function does nothing.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
-{
-    return 0;
-}
-
-/**
- * nocomp_recv_cleanup: setup receive side
- *
- * For no compression this function does nothing.
- *
- * @p: Params for the channel that we are using
- */
-static void nocomp_recv_cleanup(MultiFDRecvParams *p)
-{
-}
-
-/**
- * nocomp_recv_pages: read the data from the channel into actual pages
- *
- * For no compression we just need to read things into the correct place.
- *
- * Returns 0 for success or -1 for error
- *
- * @p: Params for the channel that we are using
- * @errp: pointer to an error
- */
-static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
-{
-    uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
-
-    if (flags != MULTIFD_FLAG_NOCOMP) {
-        error_setg(errp, "multifd %u: flags received %x flags expected %x",
-                   p->id, flags, MULTIFD_FLAG_NOCOMP);
-        return -1;
-    }
-    for (int i = 0; i < p->normal_num; i++) {
-        p->iov[i].iov_base = p->host + p->normal[i];
-        p->iov[i].iov_len = p->page_size;
-    }
-    return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
-}
-
-static MultiFDMethods multifd_nocomp_ops = {
-    .send_setup = nocomp_send_setup,
-    .send_cleanup = nocomp_send_cleanup,
-    .send_prepare = nocomp_send_prepare,
-    .recv_setup = nocomp_recv_setup,
-    .recv_cleanup = nocomp_recv_cleanup,
-    .recv_pages = nocomp_recv_pages
-};
-
-static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = {
-    [MULTIFD_COMPRESSION_NONE] = &multifd_nocomp_ops,
-};
-
-void multifd_register_ops(int method, MultiFDMethods *ops)
-{
-    assert(0 < method && method < MULTIFD_COMPRESSION__MAX);
-    multifd_ops[method] = ops;
-}
-
-static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
-{
-    MultiFDInit_t msg = {};
-    int ret;
-
-    msg.magic = cpu_to_be32(MULTIFD_MAGIC);
-    msg.version = cpu_to_be32(MULTIFD_VERSION);
-    msg.id = p->id;
-    memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
-
-    ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
-    if (ret != 0) {
-        return -1;
-    }
-    return 0;
-}
-
-static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
-{
-    MultiFDInit_t msg;
-    int ret;
-
-    ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
-    if (ret != 0) {
-        return -1;
-    }
-
-    msg.magic = be32_to_cpu(msg.magic);
-    msg.version = be32_to_cpu(msg.version);
-
-    if (msg.magic != MULTIFD_MAGIC) {
-        error_setg(errp, "multifd: received packet magic %x "
-                   "expected %x", msg.magic, MULTIFD_MAGIC);
-        return -1;
-    }
-
-    if (msg.version != MULTIFD_VERSION) {
-        error_setg(errp, "multifd: received packet version %u "
-                   "expected %u", msg.version, MULTIFD_VERSION);
-        return -1;
-    }
-
-    if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
-        char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
-        char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
-
-        error_setg(errp, "multifd: received uuid '%s' and expected "
-                   "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
-        g_free(uuid);
-        g_free(msg_uuid);
-        return -1;
-    }
-
-    if (msg.id > migrate_multifd_channels()) {
-        error_setg(errp, "multifd: received channel version %u "
-                   "expected %u", msg.version, MULTIFD_VERSION);
-        return -1;
-    }
-
-    return msg.id;
-}
-
-static MultiFDPages_t *multifd_pages_init(size_t size)
-{
-    MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
-
-    pages->allocated = size;
-    pages->offset = g_new0(ram_addr_t, size);
-
-    return pages;
-}
-
-static void multifd_pages_clear(MultiFDPages_t *pages)
-{
-    pages->num = 0;
-    pages->allocated = 0;
-    pages->packet_num = 0;
-    pages->block = NULL;
-    g_free(pages->offset);
-    pages->offset = NULL;
-    g_free(pages);
-}
-
-static void multifd_send_fill_packet(MultiFDSendParams *p)
-{
-    MultiFDPacket_t *packet = p->packet;
-    int i;
-
-    packet->flags = cpu_to_be32(p->flags);
-    packet->pages_alloc = cpu_to_be32(p->pages->allocated);
-    packet->normal_pages = cpu_to_be32(p->normal_num);
-    packet->next_packet_size = cpu_to_be32(p->next_packet_size);
-    packet->packet_num = cpu_to_be64(p->packet_num);
-
-    if (p->pages->block) {
-        strncpy(packet->ramblock, p->pages->block->idstr, 256);
-    }
-
-    for (i = 0; i < p->normal_num; i++) {
-        /* there are architectures where ram_addr_t is 32 bit */
-        uint64_t temp = p->normal[i];
-
-        packet->offset[i] = cpu_to_be64(temp);
-    }
-}
-
-static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
-{
-    MultiFDPacket_t *packet = p->packet;
-    RAMBlock *block;
-    int i;
-
-    packet->magic = be32_to_cpu(packet->magic);
-    if (packet->magic != MULTIFD_MAGIC) {
-        error_setg(errp, "multifd: received packet "
-                   "magic %x and expected magic %x",
-                   packet->magic, MULTIFD_MAGIC);
-        return -1;
-    }
-
-    packet->version = be32_to_cpu(packet->version);
-    if (packet->version != MULTIFD_VERSION) {
-        error_setg(errp, "multifd: received packet "
-                   "version %u and expected version %u",
-                   packet->version, MULTIFD_VERSION);
-        return -1;
-    }
-
-    p->flags = be32_to_cpu(packet->flags);
-
-    packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
-    /*
-     * If we received a packet that is 100 times bigger than expected
-     * just stop migration.  It is a magic number.
-     */
-    if (packet->pages_alloc > p->page_count) {
-        error_setg(errp, "multifd: received packet "
-                   "with size %u and expected a size of %u",
-                   packet->pages_alloc, p->page_count) ;
-        return -1;
-    }
-
-    p->normal_num = be32_to_cpu(packet->normal_pages);
-    if (p->normal_num > packet->pages_alloc) {
-        error_setg(errp, "multifd: received packet "
-                   "with %u pages and expected maximum pages are %u",
-                   p->normal_num, packet->pages_alloc) ;
-        return -1;
-    }
-
-    p->next_packet_size = be32_to_cpu(packet->next_packet_size);
-    p->packet_num = be64_to_cpu(packet->packet_num);
-
-    if (p->normal_num == 0) {
-        return 0;
-    }
-
-    /* make sure that ramblock is 0 terminated */
-    packet->ramblock[255] = 0;
-    block = qemu_ram_block_by_name(packet->ramblock);
-    if (!block) {
-        error_setg(errp, "multifd: unknown ram block %s",
-                   packet->ramblock);
-        return -1;
-    }
-
-    p->host = block->host;
-    for (i = 0; i < p->normal_num; i++) {
-        uint64_t offset = be64_to_cpu(packet->offset[i]);
-
-        if (offset > (block->used_length - p->page_size)) {
-            error_setg(errp, "multifd: offset too long %" PRIu64
-                       " (max " RAM_ADDR_FMT ")",
-                       offset, block->used_length);
-            return -1;
-        }
-        p->normal[i] = offset;
-    }
-
-    return 0;
-}
-
-struct {
-    MultiFDSendParams *params;
-    /* array of pages to sent */
-    MultiFDPages_t *pages;
-    /* global number of generated multifd packets */
-    uint64_t packet_num;
-    /* send channels ready */
-    QemuSemaphore channels_ready;
-    /*
-     * Have we already run terminate threads.  There is a race when it
-     * happens that we got one error while we are exiting.
-     * We will use atomic operations.  Only valid values are 0 and 1.
-     */
-    int exiting;
-    /* multifd ops */
-    MultiFDMethods *ops;
-} *multifd_send_state;
-
-/*
- * How we use multifd_send_state->pages and channel->pages?
- *
- * We create a pages for each channel, and a main one.  Each time that
- * we need to send a batch of pages we interchange the ones between
- * multifd_send_state and the channel that is sending it.  There are
- * two reasons for that:
- *    - to not have to do so many mallocs during migration
- *    - to make easier to know what to free at the end of migration
- *
- * This way we always know who is the owner of each "pages" struct,
- * and we don't need any locking.  It belongs to the migration thread
- * or to the channel thread.  Switching is safe because the migration
- * thread is using the channel mutex when changing it, and the channel
- * have to had finish with its own, otherwise pending_job can't be
- * false.
- */
-
-static int multifd_send_pages(QEMUFile *f)
-{
-    int i;
-    static int next_channel;
-    MultiFDSendParams *p = NULL; /* make happy gcc */
-    MultiFDPages_t *pages = multifd_send_state->pages;
-    uint64_t transferred;
-
-    if (qatomic_read(&multifd_send_state->exiting)) {
-        return -1;
-    }
-
-    qemu_sem_wait(&multifd_send_state->channels_ready);
-    /*
-     * next_channel can remain from a previous migration that was
-     * using more channels, so ensure it doesn't overflow if the
-     * limit is lower now.
-     */
-    next_channel %= migrate_multifd_channels();
-    for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
-        p = &multifd_send_state->params[i];
-
-        qemu_mutex_lock(&p->mutex);
-        if (p->quit) {
-            error_report("%s: channel %d has already quit!", __func__, i);
-            qemu_mutex_unlock(&p->mutex);
-            return -1;
-        }
-        if (!p->pending_job) {
-            p->pending_job++;
-            next_channel = (i + 1) % migrate_multifd_channels();
-            break;
-        }
-        qemu_mutex_unlock(&p->mutex);
-    }
-    assert(!p->pages->num);
-    assert(!p->pages->block);
-
-    p->packet_num = multifd_send_state->packet_num++;
-    multifd_send_state->pages = p->pages;
-    p->pages = pages;
-    transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len;
-    qemu_file_acct_rate_limit(f, transferred);
-    ram_counters.multifd_bytes += transferred;
-    stat64_add(&ram_atomic_counters.transferred, transferred);
-    qemu_mutex_unlock(&p->mutex);
-    qemu_sem_post(&p->sem);
-
-    return 1;
-}
-
-int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
-{
-    MultiFDPages_t *pages = multifd_send_state->pages;
-    bool changed = false;
-
-    if (!pages->block) {
-        pages->block = block;
-    }
-
-    if (pages->block == block) {
-        pages->offset[pages->num] = offset;
-        pages->num++;
-
-        if (pages->num < pages->allocated) {
-            return 1;
-        }
-    } else {
-        changed = true;
-    }
-
-    if (multifd_send_pages(f) < 0) {
-        return -1;
-    }
-
-    if (changed) {
-        return multifd_queue_page(f, block, offset);
-    }
-
-    return 1;
-}
-
-static void multifd_send_terminate_threads(Error *err)
-{
-    int i;
-
-    trace_multifd_send_terminate_threads(err != NULL);
-
-    if (err) {
-        MigrationState *s = migrate_get_current();
-        migrate_set_error(s, err);
-        if (s->state == MIGRATION_STATUS_SETUP ||
-            s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
-            s->state == MIGRATION_STATUS_DEVICE ||
-            s->state == MIGRATION_STATUS_ACTIVE) {
-            migrate_set_state(&s->state, s->state,
-                              MIGRATION_STATUS_FAILED);
-        }
-    }
-
-    /*
-     * We don't want to exit each threads twice.  Depending on where
-     * we get the error, or if there are two independent errors in two
-     * threads at the same time, we can end calling this function
-     * twice.
-     */
-    if (qatomic_xchg(&multifd_send_state->exiting, 1)) {
-        return;
-    }
-
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        qemu_mutex_lock(&p->mutex);
-        p->quit = true;
-        qemu_sem_post(&p->sem);
-        if (p->c) {
-            qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        }
-        qemu_mutex_unlock(&p->mutex);
-    }
-}
-
-void multifd_save_cleanup(void)
-{
-    int i;
-
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
-        return;
-    }
-    multifd_send_terminate_threads(NULL);
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        if (p->running) {
-            qemu_thread_join(&p->thread);
-        }
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-        Error *local_err = NULL;
-
-        if (p->registered_yank) {
-            migration_ioc_unregister_yank(p->c);
-        }
-        socket_send_channel_destroy(p->c);
-        p->c = NULL;
-        qemu_mutex_destroy(&p->mutex);
-        qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
-        g_free(p->name);
-        p->name = NULL;
-        multifd_pages_clear(p->pages);
-        p->pages = NULL;
-        p->packet_len = 0;
-        g_free(p->packet);
-        p->packet = NULL;
-        g_free(p->iov);
-        p->iov = NULL;
-        g_free(p->normal);
-        p->normal = NULL;
-        multifd_send_state->ops->send_cleanup(p, &local_err);
-        if (local_err) {
-            migrate_set_error(migrate_get_current(), local_err);
-            error_free(local_err);
-        }
-    }
-    qemu_sem_destroy(&multifd_send_state->channels_ready);
-    g_free(multifd_send_state->params);
-    multifd_send_state->params = NULL;
-    multifd_pages_clear(multifd_send_state->pages);
-    multifd_send_state->pages = NULL;
-    g_free(multifd_send_state);
-    multifd_send_state = NULL;
-}
-
-static int multifd_zero_copy_flush(QIOChannel *c)
-{
-    int ret;
-    Error *err = NULL;
-
-    ret = qio_channel_flush(c, &err);
-    if (ret < 0) {
-        error_report_err(err);
-        return -1;
-    }
-    if (ret == 1) {
-        dirty_sync_missed_zero_copy();
-    }
-
-    return ret;
-}
-
-int multifd_send_sync_main(QEMUFile *f)
-{
-    int i;
-    bool flush_zero_copy;
-
-    if (!migrate_use_multifd()) {
-        return 0;
-    }
-    if (multifd_send_state->pages->num) {
-        if (multifd_send_pages(f) < 0) {
-            error_report("%s: multifd_send_pages fail", __func__);
-            return -1;
-        }
-    }
-
-    /*
-     * When using zero-copy, it's necessary to flush the pages before any of
-     * the pages can be sent again, so we'll make sure the new version of the
-     * pages will always arrive _later_ than the old pages.
-     *
-     * Currently we achieve this by flushing the zero-page requested writes
-     * per ram iteration, but in the future we could potentially optimize it
-     * to be less frequent, e.g. only after we finished one whole scanning of
-     * all the dirty bitmaps.
-     */
-
-    flush_zero_copy = migrate_use_zero_copy_send();
-
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        trace_multifd_send_sync_main_signal(p->id);
-
-        qemu_mutex_lock(&p->mutex);
-
-        if (p->quit) {
-            error_report("%s: channel %d has already quit", __func__, i);
-            qemu_mutex_unlock(&p->mutex);
-            return -1;
-        }
-
-        p->packet_num = multifd_send_state->packet_num++;
-        p->flags |= MULTIFD_FLAG_SYNC;
-        p->pending_job++;
-        qemu_file_acct_rate_limit(f, p->packet_len);
-        ram_counters.multifd_bytes += p->packet_len;
-        stat64_add(&ram_atomic_counters.transferred, p->packet_len);
-        qemu_mutex_unlock(&p->mutex);
-        qemu_sem_post(&p->sem);
-
-        if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
-            return -1;
-        }
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        trace_multifd_send_sync_main_wait(p->id);
-        qemu_sem_wait(&p->sem_sync);
-    }
-    trace_multifd_send_sync_main(multifd_send_state->packet_num);
-
-    return 0;
-}
-
-static void *multifd_send_thread(void *opaque)
-{
-    MultiFDSendParams *p = opaque;
-    Error *local_err = NULL;
-    int ret = 0;
-    bool use_zero_copy_send = migrate_use_zero_copy_send();
-
-    trace_multifd_send_thread_start(p->id);
-    rcu_register_thread();
-
-    if (multifd_send_initial_packet(p, &local_err) < 0) {
-        ret = -1;
-        goto out;
-    }
-    /* initial packet */
-    p->num_packets = 1;
-
-    while (true) {
-        qemu_sem_wait(&p->sem);
-
-        if (qatomic_read(&multifd_send_state->exiting)) {
-            break;
-        }
-        qemu_mutex_lock(&p->mutex);
-
-        if (p->pending_job) {
-            uint64_t packet_num = p->packet_num;
-            uint32_t flags = p->flags;
-            p->normal_num = 0;
-
-            if (use_zero_copy_send) {
-                p->iovs_num = 0;
-            } else {
-                p->iovs_num = 1;
-            }
-
-            for (int i = 0; i < p->pages->num; i++) {
-                p->normal[p->normal_num] = p->pages->offset[i];
-                p->normal_num++;
-            }
-
-            if (p->normal_num) {
-                ret = multifd_send_state->ops->send_prepare(p, &local_err);
-                if (ret != 0) {
-                    qemu_mutex_unlock(&p->mutex);
-                    break;
-                }
-            }
-            multifd_send_fill_packet(p);
-            p->flags = 0;
-            p->num_packets++;
-            p->total_normal_pages += p->normal_num;
-            p->pages->num = 0;
-            p->pages->block = NULL;
-            qemu_mutex_unlock(&p->mutex);
-
-            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
-                               p->next_packet_size);
-
-            if (use_zero_copy_send) {
-                /* Send header first, without zerocopy */
-                ret = qio_channel_write_all(p->c, (void *)p->packet,
-                                            p->packet_len, &local_err);
-                if (ret != 0) {
-                    break;
-                }
-            } else {
-                /* Send header using the same writev call */
-                p->iov[0].iov_len = p->packet_len;
-                p->iov[0].iov_base = p->packet;
-            }
-
-            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
-                                              0, p->write_flags, &local_err);
-            if (ret != 0) {
-                break;
-            }
-
-            qemu_mutex_lock(&p->mutex);
-            p->pending_job--;
-            qemu_mutex_unlock(&p->mutex);
-
-            if (flags & MULTIFD_FLAG_SYNC) {
-                qemu_sem_post(&p->sem_sync);
-            }
-            qemu_sem_post(&multifd_send_state->channels_ready);
-        } else if (p->quit) {
-            qemu_mutex_unlock(&p->mutex);
-            break;
-        } else {
-            qemu_mutex_unlock(&p->mutex);
-            /* sometimes there are spurious wakeups */
-        }
-    }
-
-out:
-    if (local_err) {
-        trace_multifd_send_error(p->id);
-        multifd_send_terminate_threads(local_err);
-        error_free(local_err);
-    }
-
-    /*
-     * Error happen, I will exit, but I can't just leave, tell
-     * who pay attention to me.
-     */
-    if (ret != 0) {
-        qemu_sem_post(&p->sem_sync);
-        qemu_sem_post(&multifd_send_state->channels_ready);
-    }
-
-    qemu_mutex_lock(&p->mutex);
-    p->running = false;
-    qemu_mutex_unlock(&p->mutex);
-
-    rcu_unregister_thread();
-    trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
-
-    return NULL;
-}
-
-static bool multifd_channel_connect(MultiFDSendParams *p,
-                                    QIOChannel *ioc,
-                                    Error *error);
-
-static void multifd_tls_outgoing_handshake(QIOTask *task,
-                                           gpointer opaque)
-{
-    MultiFDSendParams *p = opaque;
-    QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
-    Error *err = NULL;
-
-    if (qio_task_propagate_error(task, &err)) {
-        trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
-    } else {
-        trace_multifd_tls_outgoing_handshake_complete(ioc);
-    }
-
-    if (!multifd_channel_connect(p, ioc, err)) {
-        /*
-         * Error happen, mark multifd_send_thread status as 'quit' although it
-         * is not created, and then tell who pay attention to me.
-         */
-        p->quit = true;
-        qemu_sem_post(&multifd_send_state->channels_ready);
-        qemu_sem_post(&p->sem_sync);
-    }
-}
-
-static void *multifd_tls_handshake_thread(void *opaque)
-{
-    MultiFDSendParams *p = opaque;
-    QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c);
-
-    qio_channel_tls_handshake(tioc,
-                              multifd_tls_outgoing_handshake,
-                              p,
-                              NULL,
-                              NULL);
-    return NULL;
-}
-
-static void multifd_tls_channel_connect(MultiFDSendParams *p,
-                                        QIOChannel *ioc,
-                                        Error **errp)
-{
-    MigrationState *s = migrate_get_current();
-    const char *hostname = s->hostname;
-    QIOChannelTLS *tioc;
-
-    tioc = migration_tls_client_create(s, ioc, hostname, errp);
-    if (!tioc) {
-        return;
-    }
-
-    object_unref(OBJECT(ioc));
-    trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
-    qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
-    p->c = QIO_CHANNEL(tioc);
-    qemu_thread_create(&p->thread, "multifd-tls-handshake-worker",
-                       multifd_tls_handshake_thread, p,
-                       QEMU_THREAD_JOINABLE);
-}
-
-static bool multifd_channel_connect(MultiFDSendParams *p,
-                                    QIOChannel *ioc,
-                                    Error *error)
-{
-    trace_multifd_set_outgoing_channel(
-        ioc, object_get_typename(OBJECT(ioc)),
-        migrate_get_current()->hostname, error);
-
-    if (!error) {
-        if (migrate_channel_requires_tls_upgrade(ioc)) {
-            multifd_tls_channel_connect(p, ioc, &error);
-            if (!error) {
-                /*
-                 * tls_channel_connect will call back to this
-                 * function after the TLS handshake,
-                 * so we mustn't call multifd_send_thread until then
-                 */
-                return true;
-            } else {
-                return false;
-            }
-        } else {
-            migration_ioc_register_yank(ioc);
-            p->registered_yank = true;
-            p->c = ioc;
-            qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-                                   QEMU_THREAD_JOINABLE);
-       }
-       return true;
-    }
-
-    return false;
-}
-
-static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
-                                             QIOChannel *ioc, Error *err)
-{
-     migrate_set_error(migrate_get_current(), err);
-     /* Error happen, we need to tell who pay attention to me */
-     qemu_sem_post(&multifd_send_state->channels_ready);
-     qemu_sem_post(&p->sem_sync);
-     /*
-      * Although multifd_send_thread is not created, but main migration
-      * thread neet to judge whether it is running, so we need to mark
-      * its status.
-      */
-     p->quit = true;
-     object_unref(OBJECT(ioc));
-     error_free(err);
-}
-
-static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
-{
-    MultiFDSendParams *p = opaque;
-    QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
-    Error *local_err = NULL;
-
-    trace_multifd_new_send_channel_async(p->id);
-    if (qio_task_propagate_error(task, &local_err)) {
-        goto cleanup;
-    } else {
-        p->c = QIO_CHANNEL(sioc);
-        qio_channel_set_delay(p->c, false);
-        p->running = true;
-        if (!multifd_channel_connect(p, sioc, local_err)) {
-            goto cleanup;
-        }
-        return;
-    }
-
-cleanup:
-    multifd_new_send_channel_cleanup(p, sioc, local_err);
-}
-
-int multifd_save_setup(Error **errp)
-{
-    int thread_count;
-    uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
-    uint8_t i;
-
-    if (!migrate_use_multifd()) {
-        return 0;
-    }
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
-
-    thread_count = migrate_multifd_channels();
-    multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
-    multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-    multifd_send_state->pages = multifd_pages_init(page_count);
-    qemu_sem_init(&multifd_send_state->channels_ready, 0);
-    qatomic_set(&multifd_send_state->exiting, 0);
-    multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-
-        qemu_mutex_init(&p->mutex);
-        qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
-        p->quit = false;
-        p->pending_job = 0;
-        p->id = i;
-        p->pages = multifd_pages_init(page_count);
-        p->packet_len = sizeof(MultiFDPacket_t)
-                      + sizeof(uint64_t) * page_count;
-        p->packet = g_malloc0(p->packet_len);
-        p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
-        p->packet->version = cpu_to_be32(MULTIFD_VERSION);
-        p->name = g_strdup_printf("multifdsend_%d", i);
-        /* We need one extra place for the packet header */
-        p->iov = g_new0(struct iovec, page_count + 1);
-        p->normal = g_new0(ram_addr_t, page_count);
-        p->page_size = qemu_target_page_size();
-        p->page_count = page_count;
-
-        if (migrate_use_zero_copy_send()) {
-            p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
-        } else {
-            p->write_flags = 0;
-        }
-
-        socket_send_channel_create(multifd_new_send_channel_async, p);
-    }
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDSendParams *p = &multifd_send_state->params[i];
-        Error *local_err = NULL;
-        int ret;
-
-        ret = multifd_send_state->ops->send_setup(p, &local_err);
-        if (ret) {
-            error_propagate(errp, local_err);
-            return ret;
-        }
-    }
-    return 0;
-}
-
-struct {
-    MultiFDRecvParams *params;
-    /* number of created threads */
-    int count;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
-    /* global number of generated multifd packets */
-    uint64_t packet_num;
-    /* multifd ops */
-    MultiFDMethods *ops;
-} *multifd_recv_state;
-
-static void multifd_recv_terminate_threads(Error *err)
-{
-    int i;
-
-    trace_multifd_recv_terminate_threads(err != NULL);
-
-    if (err) {
-        MigrationState *s = migrate_get_current();
-        migrate_set_error(s, err);
-        if (s->state == MIGRATION_STATUS_SETUP ||
-            s->state == MIGRATION_STATUS_ACTIVE) {
-            migrate_set_state(&s->state, s->state,
-                              MIGRATION_STATUS_FAILED);
-        }
-    }
-
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        qemu_mutex_lock(&p->mutex);
-        p->quit = true;
-        /*
-         * We could arrive here for two reasons:
-         *  - normal quit, i.e. everything went fine, just finished
-         *  - error quit: We close the channels so the channel threads
-         *    finish the qio_channel_read_all_eof()
-         */
-        if (p->c) {
-            qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        }
-        qemu_mutex_unlock(&p->mutex);
-    }
-}
-
-int multifd_load_cleanup(Error **errp)
-{
-    int i;
-
-    if (!migrate_use_multifd() || !migrate_multi_channels_is_allowed()) {
-        return 0;
-    }
-    multifd_recv_terminate_threads(NULL);
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        if (p->running) {
-            p->quit = true;
-            /*
-             * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
-             * however try to wakeup it without harm in cleanup phase.
-             */
-            qemu_sem_post(&p->sem_sync);
-            qemu_thread_join(&p->thread);
-        }
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        migration_ioc_unregister_yank(p->c);
-        object_unref(OBJECT(p->c));
-        p->c = NULL;
-        qemu_mutex_destroy(&p->mutex);
-        qemu_sem_destroy(&p->sem_sync);
-        g_free(p->name);
-        p->name = NULL;
-        p->packet_len = 0;
-        g_free(p->packet);
-        p->packet = NULL;
-        g_free(p->iov);
-        p->iov = NULL;
-        g_free(p->normal);
-        p->normal = NULL;
-        multifd_recv_state->ops->recv_cleanup(p);
-    }
-    qemu_sem_destroy(&multifd_recv_state->sem_sync);
-    g_free(multifd_recv_state->params);
-    multifd_recv_state->params = NULL;
-    g_free(multifd_recv_state);
-    multifd_recv_state = NULL;
-
-    return 0;
-}
-
-void multifd_recv_sync_main(void)
-{
-    int i;
-
-    if (!migrate_use_multifd()) {
-        return;
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        trace_multifd_recv_sync_main_wait(p->id);
-        qemu_sem_wait(&multifd_recv_state->sem_sync);
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        WITH_QEMU_LOCK_GUARD(&p->mutex) {
-            if (multifd_recv_state->packet_num < p->packet_num) {
-                multifd_recv_state->packet_num = p->packet_num;
-            }
-        }
-        trace_multifd_recv_sync_main_signal(p->id);
-        qemu_sem_post(&p->sem_sync);
-    }
-    trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
-}
-
-static void *multifd_recv_thread(void *opaque)
-{
-    MultiFDRecvParams *p = opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    trace_multifd_recv_thread_start(p->id);
-    rcu_register_thread();
-
-    while (true) {
-        uint32_t flags;
-
-        if (p->quit) {
-            break;
-        }
-
-        ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
-                                       p->packet_len, &local_err);
-        if (ret == 0) {   /* EOF */
-            break;
-        }
-        if (ret == -1) {   /* Error */
-            break;
-        }
-
-        qemu_mutex_lock(&p->mutex);
-        ret = multifd_recv_unfill_packet(p, &local_err);
-        if (ret) {
-            qemu_mutex_unlock(&p->mutex);
-            break;
-        }
-
-        flags = p->flags;
-        /* recv methods don't know how to handle the SYNC flag */
-        p->flags &= ~MULTIFD_FLAG_SYNC;
-        trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags,
-                           p->next_packet_size);
-        p->num_packets++;
-        p->total_normal_pages += p->normal_num;
-        qemu_mutex_unlock(&p->mutex);
-
-        if (p->normal_num) {
-            ret = multifd_recv_state->ops->recv_pages(p, &local_err);
-            if (ret != 0) {
-                break;
-            }
-        }
-
-        if (flags & MULTIFD_FLAG_SYNC) {
-            qemu_sem_post(&multifd_recv_state->sem_sync);
-            qemu_sem_wait(&p->sem_sync);
-        }
-    }
-
-    if (local_err) {
-        multifd_recv_terminate_threads(local_err);
-        error_free(local_err);
-    }
-    qemu_mutex_lock(&p->mutex);
-    p->running = false;
-    qemu_mutex_unlock(&p->mutex);
-
-    rcu_unregister_thread();
-    trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages);
-
-    return NULL;
-}
-
-int multifd_load_setup(Error **errp)
-{
-    int thread_count;
-    uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
-    uint8_t i;
-
-    /*
-     * Return successfully if multiFD recv state is already initialised
-     * or multiFD is not enabled.
-     */
-    if (multifd_recv_state || !migrate_use_multifd()) {
-        return 0;
-    }
-
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "multifd is not supported by current protocol");
-        return -1;
-    }
-    thread_count = migrate_multifd_channels();
-    multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
-    multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
-    qatomic_set(&multifd_recv_state->count, 0);
-    qemu_sem_init(&multifd_recv_state->sem_sync, 0);
-    multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
-        qemu_mutex_init(&p->mutex);
-        qemu_sem_init(&p->sem_sync, 0);
-        p->quit = false;
-        p->id = i;
-        p->packet_len = sizeof(MultiFDPacket_t)
-                      + sizeof(uint64_t) * page_count;
-        p->packet = g_malloc0(p->packet_len);
-        p->name = g_strdup_printf("multifdrecv_%d", i);
-        p->iov = g_new0(struct iovec, page_count);
-        p->normal = g_new0(ram_addr_t, page_count);
-        p->page_count = page_count;
-        p->page_size = qemu_target_page_size();
-    }
-
-    for (i = 0; i < thread_count; i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-        Error *local_err = NULL;
-        int ret;
-
-        ret = multifd_recv_state->ops->recv_setup(p, &local_err);
-        if (ret) {
-            error_propagate(errp, local_err);
-            return ret;
-        }
-    }
-    return 0;
-}
-
-bool multifd_recv_all_channels_created(void)
-{
-    int thread_count = migrate_multifd_channels();
-
-    if (!migrate_use_multifd()) {
-        return true;
-    }
-
-    if (!multifd_recv_state) {
-        /* Called before any connections created */
-        return false;
-    }
-
-    return thread_count == qatomic_read(&multifd_recv_state->count);
-}
-
-/*
- * Try to receive all multifd channels to get ready for the migration.
- * Sets @errp when failing to receive the current channel.
- */
-void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
-{
-    MultiFDRecvParams *p;
-    Error *local_err = NULL;
-    int id;
-
-    id = multifd_recv_initial_packet(ioc, &local_err);
-    if (id < 0) {
-        multifd_recv_terminate_threads(local_err);
-        error_propagate_prepend(errp, local_err,
-                                "failed to receive packet"
-                                " via multifd channel %d: ",
-                                qatomic_read(&multifd_recv_state->count));
-        return;
-    }
-    trace_multifd_recv_new_channel(id);
-
-    p = &multifd_recv_state->params[id];
-    if (p->c != NULL) {
-        error_setg(&local_err, "multifd: received id '%d' already setup'",
-                   id);
-        multifd_recv_terminate_threads(local_err);
-        error_propagate(errp, local_err);
-        return;
-    }
-    p->c = ioc;
-    object_ref(OBJECT(ioc));
-    /* initial packet */
-    p->num_packets = 1;
-
-    p->running = true;
-    qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
-                       QEMU_THREAD_JOINABLE);
-    qatomic_inc(&multifd_recv_state->count);
-}

diff --git a/migration/multifd.h b/migration/multifd.h
index ff3aa2e..7cfc265 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h

@@ -16,7 +16,8 @@
 int multifd_save_setup(Error **errp);
 void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
-int multifd_load_cleanup(Error **errp);
+void multifd_load_cleanup(void);
+void multifd_load_shutdown(void);
 bool multifd_recv_all_channels_created(void);
 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 53299b7..f54f44d 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c

@@ -1198,6 +1198,11 @@
 
     if (migrate_postcopy_preempt()) {
         /*
+         * The preempt channel is established in asynchronous way.  Wait
+         * for its completion.
+         */
+        qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
+        /*
          * This thread needs to be created after the temp pages because
          * it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
          */
@@ -1544,6 +1549,7 @@
      */
     qemu_file_set_blocking(file, true);
     mis->postcopy_qemufile_dst = file;
+    qemu_sem_post(&mis->postcopy_qemufile_dst_done);
     trace_postcopy_preempt_new_channel();
 }
 
@@ -1612,14 +1618,21 @@
     postcopy_preempt_send_channel_done(s, ioc, local_err);
 }
 
-/* Returns 0 if channel established, -1 for error. */
-int postcopy_preempt_wait_channel(MigrationState *s)
+/*
+ * This function will kick off an async task to establish the preempt
+ * channel, and wait until the connection setup completed.  Returns 0 if
+ * channel established, -1 for error.
+ */
+int postcopy_preempt_establish_channel(MigrationState *s)
 {
     /* If preempt not enabled, no need to wait */
     if (!migrate_postcopy_preempt()) {
         return 0;
     }
 
+    /* Kick off async task to establish preempt channel */
+    postcopy_preempt_setup(s);
+
     /*
      * We need the postcopy preempt channel to be established before
      * starting doing anything.
@@ -1629,22 +1642,10 @@
     return s->postcopy_qemufile_src ? 0 : -1;
 }
 
-int postcopy_preempt_setup(MigrationState *s, Error **errp)
+void postcopy_preempt_setup(MigrationState *s)
 {
-    if (!migrate_postcopy_preempt()) {
-        return 0;
-    }
-
-    if (!migrate_multi_channels_is_allowed()) {
-        error_setg(errp, "Postcopy preempt is not supported as current "
-                   "migration stream does not support multi-channels.");
-        return -1;
-    }
-
     /* Kick an async task to connect */
     socket_send_channel_create(postcopy_preempt_send_channel_new, s);
-
-    return 0;
 }
 
 static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis)

diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 25881c4..b4867a3 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h

@@ -191,7 +191,7 @@
 };
 
 void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
-int postcopy_preempt_setup(MigrationState *s, Error **errp);
-int postcopy_preempt_wait_channel(MigrationState *s);
+void postcopy_preempt_setup(MigrationState *s);
+int postcopy_preempt_establish_channel(MigrationState *s);
 
 #endif

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2d5f74f..102ab3b 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c

@@ -940,3 +940,37 @@
 {
     return file->ioc;
 }
+
+/*
+ * Read size bytes from QEMUFile f and write them to fd.
+ */
+int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size)
+{
+    while (size) {
+        size_t pending = f->buf_size - f->buf_index;
+        ssize_t rc;
+
+        if (!pending) {
+            rc = qemu_fill_buffer(f);
+            if (rc < 0) {
+                return rc;
+            }
+            if (rc == 0) {
+                return -EIO;
+            }
+            continue;
+        }
+
+        rc = write(fd, f->buf + f->buf_index, MIN(pending, size));
+        if (rc < 0) {
+            return -errno;
+        }
+        if (rc == 0) {
+            return -EIO;
+        }
+        f->buf_index += rc;
+        size -= rc;
+    }
+
+    return 0;
+}

diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index fa13d04..9d0155a 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h

@@ -148,6 +148,7 @@
 QEMUFile *qemu_file_get_return_path(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 void qemu_file_set_blocking(QEMUFile *f, bool block);
+int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size);
 
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);

diff --git a/migration/ram.c b/migration/ram.c
index b966e14..96e8a19 100644
--- a/migration/ram.c
+++ b/migration/ram.c

@@ -67,21 +67,53 @@
 /***********************************************************/
 /* ram save/restore */
 
-/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
- * worked for pages that where filled with the same char.  We switched
+/*
+ * RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
+ * worked for pages that were filled with the same char.  We switched
  * it to only search for the zero value.  And to avoid confusion with
- * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
+ * RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
  */
-
-#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
+/*
+ * RAM_SAVE_FLAG_FULL was obsoleted in 2009, it can be reused now
+ */
+#define RAM_SAVE_FLAG_FULL     0x01
 #define RAM_SAVE_FLAG_ZERO     0x02
 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
+/* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
+/* We can't use any flag that is bigger than 0x200 */
+
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+}
+#endif
 
 XBZRLECacheStats xbzrle_counters;
 
@@ -330,6 +362,8 @@
     PageSearchStatus pss[RAM_CHANNEL_MAX];
     /* UFFD file descriptor, used in 'write-tracking' migration */
     int uffdio_fd;
+    /* total ram size in bytes */
+    uint64_t ram_bytes_total;
     /* Last block that we have visited searching for dirty pages */
     RAMBlock *last_seen_block;
     /* Last dirty target page we have sent */
@@ -450,6 +484,13 @@
     ram_counters.dirty_sync_missed_zero_copy++;
 }
 
+struct MigrationOps {
+    int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss);
+};
+typedef struct MigrationOps MigrationOps;
+
+MigrationOps *migration_ops;
+
 CompressionStats compression_counters;
 
 struct CompressParam {
@@ -797,9 +838,9 @@
     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 
     /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
+    encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+                                            TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                            TARGET_PAGE_SIZE);
 
     /*
      * Update the cache contents, so that it corresponds to the data
@@ -1546,17 +1587,23 @@
     return pages;
 }
 
+#define PAGE_ALL_CLEAN 0
+#define PAGE_TRY_AGAIN 1
+#define PAGE_DIRTY_FOUND 2
 /**
  * find_dirty_block: find the next dirty page and update any state
  * associated with the search process.
  *
- * Returns true if a page is found
+ * Returns:
+ *         PAGE_ALL_CLEAN: no dirty page found, give up
+ *         PAGE_TRY_AGAIN: no dirty page found, retry for next block
+ *         PAGE_DIRTY_FOUND: dirty page found
  *
  * @rs: current RAM state
  * @pss: data about the state of the current dirty page scan
  * @again: set to false if the search has scanned the whole of RAM
  */
-static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
+static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
 {
     /* Update pss->page for the next dirty bit in ramblock */
     pss_find_next_dirty(pss);
@@ -1567,8 +1614,7 @@
          * We've been once around the RAM and haven't found anything.
          * Give up.
          */
-        *again = false;
-        return false;
+        return PAGE_ALL_CLEAN;
     }
     if (!offset_in_ramblock(pss->block,
                             ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
@@ -1597,13 +1643,10 @@
             }
         }
         /* Didn't find anything this time, but try again on the new block */
-        *again = true;
-        return false;
+        return PAGE_TRY_AGAIN;
     } else {
-        /* Can go around again, but... */
-        *again = true;
-        /* We've found something so probably don't need to */
-        return true;
+        /* We've found something */
+        return PAGE_DIRTY_FOUND;
     }
 }
 
@@ -2291,14 +2334,14 @@
 }
 
 /**
- * ram_save_target_page: save one target page
+ * ram_save_target_page_legacy: save one target page
  *
  * Returns the number of pages written
  *
  * @rs: current RAM state
  * @pss: data about the page we want to send
  */
-static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
+static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
 {
     RAMBlock *block = pss->block;
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
@@ -2424,7 +2467,7 @@
 
         if (page_dirty) {
             /* Be strict to return code; it must be 1, or what else? */
-            if (ram_save_target_page(rs, pss) != 1) {
+            if (migration_ops->ram_save_target_page(rs, pss) != 1) {
                 error_report_once("%s: ram_save_target_page failed", __func__);
                 ret = -1;
                 goto out;
@@ -2493,7 +2536,7 @@
             if (preempt_active) {
                 qemu_mutex_unlock(&rs->bitmap_mutex);
             }
-            tmppages = ram_save_target_page(rs, pss);
+            tmppages = migration_ops->ram_save_target_page(rs, pss);
             if (tmppages >= 0) {
                 pages += tmppages;
                 /*
@@ -2542,10 +2585,9 @@
 {
     PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
     int pages = 0;
-    bool again, found;
 
     /* No dirty page as there is zero RAM */
-    if (!ram_bytes_total()) {
+    if (!rs->ram_bytes_total) {
         return pages;
     }
 
@@ -2563,19 +2605,23 @@
 
     pss_init(pss, rs->last_seen_block, rs->last_page);
 
-    do {
-        again = true;
-        found = get_queued_page(rs, pss);
-
-        if (!found) {
+    while (true){
+        if (!get_queued_page(rs, pss)) {
             /* priority queue empty, so just search for something dirty */
-            found = find_dirty_block(rs, pss, &again);
+            int res = find_dirty_block(rs, pss);
+            if (res != PAGE_DIRTY_FOUND) {
+                if (res == PAGE_ALL_CLEAN) {
+                    break;
+                } else if (res == PAGE_TRY_AGAIN) {
+                    continue;
+                }
+            }
         }
-
-        if (found) {
-            pages = ram_save_host_page(rs, pss);
+        pages = ram_save_host_page(rs, pss);
+        if (pages) {
+            break;
         }
-    } while (!pages && again);
+    }
 
     rs->last_seen_block = pss->block;
     rs->last_page = pss->page;
@@ -2596,28 +2642,30 @@
     }
 }
 
-static uint64_t ram_bytes_total_common(bool count_ignored)
+static uint64_t ram_bytes_total_with_ignored(void)
 {
     RAMBlock *block;
     uint64_t total = 0;
 
     RCU_READ_LOCK_GUARD();
 
-    if (count_ignored) {
-        RAMBLOCK_FOREACH_MIGRATABLE(block) {
-            total += block->used_length;
-        }
-    } else {
-        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-            total += block->used_length;
-        }
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        total += block->used_length;
     }
     return total;
 }
 
 uint64_t ram_bytes_total(void)
 {
-    return ram_bytes_total_common(false);
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    RCU_READ_LOCK_GUARD();
+
+    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+        total += block->used_length;
+    }
+    return total;
 }
 
 static void xbzrle_load_setup(void)
@@ -2688,6 +2736,8 @@
     xbzrle_cleanup();
     compress_threads_save_cleanup();
     ram_state_cleanup(rsp);
+    g_free(migration_ops);
+    migration_ops = NULL;
 }
 
 static void ram_state_reset(RAMState *rs)
@@ -3002,13 +3052,14 @@
     qemu_mutex_init(&(*rsp)->bitmap_mutex);
     qemu_mutex_init(&(*rsp)->src_page_req_mutex);
     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
+    (*rsp)->ram_bytes_total = ram_bytes_total();
 
     /*
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
      * This must match with the initial values of dirty bitmap.
      */
-    (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+    (*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS;
     ram_state_reset(*rsp);
 
     return 0;
@@ -3222,7 +3273,8 @@
     (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f;
 
     WITH_RCU_READ_LOCK_GUARD() {
-        qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
+        qemu_put_be64(f, ram_bytes_total_with_ignored()
+                         | RAM_SAVE_FLAG_MEM_SIZE);
 
         RAMBLOCK_FOREACH_MIGRATABLE(block) {
             qemu_put_byte(f, strlen(block->idstr));
@@ -3241,6 +3293,8 @@
     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 
+    migration_ops = g_malloc0(sizeof(MigrationOps));
+    migration_ops->ram_save_target_page = ram_save_target_page_legacy;
     ret =  multifd_send_sync_main(f);
     if (ret < 0) {
         return ret;
@@ -3435,10 +3489,8 @@
     return 0;
 }
 
-static void ram_state_pending_estimate(void *opaque,
-                                       uint64_t *res_precopy_only,
-                                       uint64_t *res_compatible,
-                                       uint64_t *res_postcopy_only)
+static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
+                                       uint64_t *can_postcopy)
 {
     RAMState **temp = opaque;
     RAMState *rs = *temp;
@@ -3447,16 +3499,14 @@
 
     if (migrate_postcopy_ram()) {
         /* We can do postcopy, and all the data is postcopiable */
-        *res_postcopy_only += remaining_size;
+        *can_postcopy += remaining_size;
     } else {
-        *res_precopy_only += remaining_size;
+        *must_precopy += remaining_size;
     }
 }
 
-static void ram_state_pending_exact(void *opaque,
-                                    uint64_t *res_precopy_only,
-                                    uint64_t *res_compatible,
-                                    uint64_t *res_postcopy_only)
+static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
+                                    uint64_t *can_postcopy)
 {
     RAMState **temp = opaque;
     RAMState *rs = *temp;
@@ -3474,9 +3524,9 @@
 
     if (migrate_postcopy_ram()) {
         /* We can do postcopy, and all the data is postcopiable */
-        *res_compatible += remaining_size;
+        *can_postcopy += remaining_size;
     } else {
-        *res_precopy_only += remaining_size;
+        *must_precopy += remaining_size;
     }
 }
 

diff --git a/migration/savevm.c b/migration/savevm.c
index e9cf499..aa54a67 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c

@@ -1541,40 +1541,13 @@
  * the result is split into the amount for units that can and
  * for units that can't do postcopy.
  */
-void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
-                                        uint64_t *res_compatible,
-                                        uint64_t *res_postcopy_only)
+void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
+                                        uint64_t *can_postcopy)
 {
     SaveStateEntry *se;
 
-    *res_precopy_only = 0;
-    *res_compatible = 0;
-    *res_postcopy_only = 0;
-
-    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-        if (!se->ops || !se->ops->state_pending_exact) {
-            continue;
-        }
-        if (se->ops->is_active) {
-            if (!se->ops->is_active(se->opaque)) {
-                continue;
-            }
-        }
-        se->ops->state_pending_exact(se->opaque,
-                                     res_precopy_only, res_compatible,
-                                     res_postcopy_only);
-    }
-}
-
-void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
-                                     uint64_t *res_compatible,
-                                     uint64_t *res_postcopy_only)
-{
-    SaveStateEntry *se;
-
-    *res_precopy_only = 0;
-    *res_compatible = 0;
-    *res_postcopy_only = 0;
+    *must_precopy = 0;
+    *can_postcopy = 0;
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->state_pending_estimate) {
@@ -1585,9 +1558,28 @@
                 continue;
             }
         }
-        se->ops->state_pending_estimate(se->opaque,
-                                        res_precopy_only, res_compatible,
-                                        res_postcopy_only);
+        se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
+    }
+}
+
+void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
+                                     uint64_t *can_postcopy)
+{
+    SaveStateEntry *se;
+
+    *must_precopy = 0;
+    *can_postcopy = 0;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->ops || !se->ops->state_pending_exact) {
+            continue;
+        }
+        if (se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+        se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
     }
 }
 
@@ -2200,7 +2192,11 @@
     qemu_sem_post(&mis->postcopy_pause_sem_fault);
 
     if (migrate_postcopy_preempt()) {
-        /* The channel should already be setup again; make sure of it */
+        /*
+         * The preempt channel will be created in async manner, now let's
+         * wait for it and make sure it's created.
+         */
+        qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
         assert(mis->postcopy_qemufile_dst);
         /* Kick the fast ram load thread too */
         qemu_sem_post(&mis->postcopy_pause_sem_fast_load);

diff --git a/migration/savevm.h b/migration/savevm.h
index b1901e6..fb63673 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h

@@ -40,12 +40,10 @@
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
                                        bool inactivate_disks);
-void qemu_savevm_state_pending_exact(uint64_t *res_precopy_only,
-                                     uint64_t *res_compatible,
-                                     uint64_t *res_postcopy_only);
-void qemu_savevm_state_pending_estimate(uint64_t *res_precopy_only,
-                                        uint64_t *res_compatible,
-                                        uint64_t *res_postcopy_only);
+void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
+                                     uint64_t *can_postcopy);
+void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
+                                        uint64_t *can_postcopy);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);

diff --git a/migration/trace-events b/migration/trace-events
index 67b65a7..92161ee 100644
--- a/migration/trace-events
+++ b/migration/trace-events

@@ -150,8 +150,8 @@
 migrate_fd_error(const char *error_desc) "error=%s"
 migrate_fd_cancel(void) ""
 migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
-migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t compat, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
-migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t compat, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
+migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")"
+migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")"
 migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
 migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64
 migration_completion_file_err(void) ""
@@ -357,6 +357,7 @@
 migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
 migration_block_save_complete(void) "Block migration completed"
 migration_block_state_pending(uint64_t pending) "Enter save live pending  %" PRIu64
+migration_block_progression(unsigned percent) "Completed %u%%"
 
 # page_cache.c
 migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64

diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index 1ba482d..05366e8 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c

@@ -174,3 +174,127 @@
 
     return d;
 }
+
+#if defined(CONFIG_AVX512BW_OPT)
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#include <immintrin.h>
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                             uint8_t *dst, int dlen)
+{
+    uint32_t zrun_len = 0, nzrun_len = 0;
+    int d = 0, i = 0, num = 0;
+    uint8_t *nzrun_start = NULL;
+    /* add 1 to include residual part in main loop */
+    uint32_t count512s = (slen >> 6) + 1;
+    /* countResidual is tail of data, i.e., countResidual = slen % 64 */
+    uint32_t count_residual = slen & 0b111111;
+    bool never_same = true;
+    uint64_t mask_residual = 1;
+    mask_residual <<= count_residual;
+    mask_residual -= 1;
+    __m512i r = _mm512_set1_epi32(0);
+
+    while (count512s) {
+        if (d + 2 > dlen) {
+            return -1;
+        }
+
+        int bytes_to_check = 64;
+        uint64_t mask = 0xffffffffffffffff;
+        if (count512s == 1) {
+            bytes_to_check = count_residual;
+            mask = mask_residual;
+        }
+        __m512i old_data = _mm512_mask_loadu_epi8(r,
+                                                  mask, old_buf + i);
+        __m512i new_data = _mm512_mask_loadu_epi8(r,
+                                                  mask, new_buf + i);
+        uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
+        count512s--;
+
+        bool is_same = (comp & 0x1);
+        while (bytes_to_check) {
+            if (is_same) {
+                if (nzrun_len) {
+                    d += uleb128_encode_small(dst + d, nzrun_len);
+                    if (d + nzrun_len > dlen) {
+                        return -1;
+                    }
+                    nzrun_start = new_buf + i - nzrun_len;
+                    memcpy(dst + d, nzrun_start, nzrun_len);
+                    d += nzrun_len;
+                    nzrun_len = 0;
+                }
+                /* 64 data at a time for speed */
+                if (count512s && (comp == 0xffffffffffffffff)) {
+                    i += 64;
+                    zrun_len += 64;
+                    break;
+                }
+                never_same = false;
+                num = __builtin_ctzll(~comp);
+                num = (num < bytes_to_check) ? num : bytes_to_check;
+                zrun_len += num;
+                bytes_to_check -= num;
+                comp >>= num;
+                i += num;
+                if (bytes_to_check) {
+                    /* still has different data after same data */
+                    d += uleb128_encode_small(dst + d, zrun_len);
+                    zrun_len = 0;
+                } else {
+                    break;
+                }
+            }
+            if (never_same || zrun_len) {
+                /*
+                 * never_same only acts if
+                 * data begins with diff in first count512s
+                 */
+                d += uleb128_encode_small(dst + d, zrun_len);
+                zrun_len = 0;
+                never_same = false;
+            }
+            /* has diff, 64 data at a time for speed */
+            if ((bytes_to_check == 64) && (comp == 0x0)) {
+                i += 64;
+                nzrun_len += 64;
+                break;
+            }
+            num = __builtin_ctzll(comp);
+            num = (num < bytes_to_check) ? num : bytes_to_check;
+            nzrun_len += num;
+            bytes_to_check -= num;
+            comp >>= num;
+            i += num;
+            if (bytes_to_check) {
+                /* mask like 111000 */
+                d += uleb128_encode_small(dst + d, nzrun_len);
+                /* overflow */
+                if (d + nzrun_len > dlen) {
+                    return -1;
+                }
+                nzrun_start = new_buf + i - nzrun_len;
+                memcpy(dst + d, nzrun_start, nzrun_len);
+                d += nzrun_len;
+                nzrun_len = 0;
+                is_same = true;
+            }
+        }
+    }
+
+    if (nzrun_len != 0) {
+        d += uleb128_encode_small(dst + d, nzrun_len);
+        /* overflow */
+        if (d + nzrun_len > dlen) {
+            return -1;
+        }
+        nzrun_start = new_buf + i - nzrun_len;
+        memcpy(dst + d, nzrun_start, nzrun_len);
+        d += nzrun_len;
+    }
+    return d;
+}
+#pragma GCC pop_options
+#endif

diff --git a/migration/xbzrle.h b/migration/xbzrle.h
index a0db507..6feb491 100644
--- a/migration/xbzrle.h
+++ b/migration/xbzrle.h

@@ -18,4 +18,8 @@
                          uint8_t *dst, int dlen);
 
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#if defined(CONFIG_AVX512BW_OPT)
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                                uint8_t *dst, int dlen);
+#endif
 #endif

diff --git a/net/l2tpv3.c b/net/l2tpv3.c
index 53b2d32..b5547cb 100644
--- a/net/l2tpv3.c
+++ b/net/l2tpv3.c

@@ -42,7 +42,7 @@
  */
 
 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
-#define BUFFER_SIZE 2048
+#define BUFFER_SIZE 16384
 #define IOVSIZE 2
 #define MAX_L2TPV3_MSGCNT 64
 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)

diff --git a/net/net.c b/net/net.c
index 251fc5a..ebc7ce0 100644
--- a/net/net.c
+++ b/net/net.c

@@ -899,6 +899,40 @@
     return -1;
 }
 
+GPtrArray *qemu_get_nic_models(const char *device_type)
+{
+    GPtrArray *nic_models = g_ptr_array_new();
+    GSList *list = object_class_get_list_sorted(device_type, false);
+
+    while (list) {
+        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data,
+                                             TYPE_DEVICE);
+        GSList *next;
+        if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) &&
+            dc->user_creatable) {
+            const char *name = object_class_get_name(list->data);
+            /*
+             * A network device might also be something else than a NIC, see
+             * e.g. the "rocker" device. Thus we have to look for the "netdev"
+             * property, too. Unfortunately, some devices like virtio-net only
+             * create this property during instance_init, so we have to create
+             * a temporary instance here to be able to check it.
+             */
+            Object *obj = object_new_with_class(OBJECT_CLASS(dc));
+            if (object_property_find(obj, "netdev")) {
+                g_ptr_array_add(nic_models, (gpointer)name);
+            }
+            object_unref(obj);
+        }
+        next = list->next;
+        g_slist_free_1(list);
+        list = next;
+    }
+    g_ptr_array_add(nic_models, NULL);
+
+    return nic_models;
+}
+
 int qemu_show_nic_models(const char *arg, const char *const *models)
 {
     int i;
@@ -907,7 +941,7 @@
         return 0;
     }
 
-    printf("Supported NIC models:\n");
+    printf("Available NIC models:\n");
     for (i = 0 ; models[i]; i++) {
         printf("%s\n", models[i]);
     }
@@ -1508,8 +1542,18 @@
     const char *type;
 
     type = qemu_opt_get(opts, "type");
-    if (type && g_str_equal(type, "none")) {
-        return 0;    /* Nothing to do, default_net is cleared in vl.c */
+    if (type) {
+        if (g_str_equal(type, "none")) {
+            return 0;    /* Nothing to do, default_net is cleared in vl.c */
+        }
+        if (is_help_option(type)) {
+            GPtrArray *nic_models = qemu_get_nic_models(TYPE_DEVICE);
+            show_netdevs();
+            printf("\n");
+            qemu_show_nic_models(type, (const char **)nic_models->pdata);
+            g_ptr_array_free(nic_models, true);
+            exit(0);
+        }
     }
 
     idx = nic_get_free_idx();

diff --git a/net/stream.c b/net/stream.c
index 37ff727..9204b4c 100644
--- a/net/stream.c
+++ b/net/stream.c

@@ -39,6 +39,8 @@
 #include "io/channel-socket.h"
 #include "io/net-listener.h"
 #include "qapi/qapi-events-net.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/clone-visitor.h"
 
 typedef struct NetStreamState {
     NetClientState nc;
@@ -49,11 +51,15 @@
     guint ioc_write_tag;
     SocketReadState rs;
     unsigned int send_index;      /* number of bytes sent*/
+    uint32_t reconnect;
+    guint timer_tag;
+    SocketAddress *addr;
 } NetStreamState;
 
 static void net_stream_listen(QIONetListener *listener,
                               QIOChannelSocket *cioc,
                               void *opaque);
+static void net_stream_arm_reconnect(NetStreamState *s);
 
 static gboolean net_stream_writable(QIOChannel *ioc,
                                     GIOCondition condition,
@@ -170,6 +176,7 @@
         qemu_set_info_str(&s->nc, "%s", "");
 
         qapi_event_send_netdev_stream_disconnected(s->nc.name);
+        net_stream_arm_reconnect(s);
 
         return G_SOURCE_REMOVE;
     }
@@ -187,6 +194,14 @@
 static void net_stream_cleanup(NetClientState *nc)
 {
     NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
+    if (s->timer_tag) {
+        g_source_remove(s->timer_tag);
+        s->timer_tag = 0;
+    }
+    if (s->addr) {
+        qapi_free_SocketAddress(s->addr);
+        s->addr = NULL;
+    }
     if (s->ioc) {
         if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) {
             if (s->ioc_read_tag) {
@@ -346,12 +361,37 @@
 error:
     object_unref(OBJECT(s->ioc));
     s->ioc = NULL;
+    net_stream_arm_reconnect(s);
+}
+
+static gboolean net_stream_reconnect(gpointer data)
+{
+    NetStreamState *s = data;
+    QIOChannelSocket *sioc;
+
+    s->timer_tag = 0;
+
+    sioc = qio_channel_socket_new();
+    s->ioc = QIO_CHANNEL(sioc);
+    qio_channel_socket_connect_async(sioc, s->addr,
+                                     net_stream_client_connected, s,
+                                     NULL, NULL);
+    return G_SOURCE_REMOVE;
+}
+
+static void net_stream_arm_reconnect(NetStreamState *s)
+{
+    if (s->reconnect && s->timer_tag == 0) {
+        s->timer_tag = g_timeout_add_seconds(s->reconnect,
+                                             net_stream_reconnect, s);
+    }
 }
 
 static int net_stream_client_init(NetClientState *peer,
                                   const char *model,
                                   const char *name,
                                   SocketAddress *addr,
+                                  uint32_t reconnect,
                                   Error **errp)
 {
     NetStreamState *s;
@@ -364,6 +404,10 @@
     s->ioc = QIO_CHANNEL(sioc);
     s->nc.link_down = true;
 
+    s->reconnect = reconnect;
+    if (reconnect) {
+        s->addr = QAPI_CLONE(SocketAddress, addr);
+    }
     qio_channel_socket_connect_async(sioc, addr,
                                      net_stream_client_connected, s,
                                      NULL, NULL);
@@ -380,7 +424,14 @@
     sock = &netdev->u.stream;
 
     if (!sock->has_server || !sock->server) {
-        return net_stream_client_init(peer, "stream", name, sock->addr, errp);
+        return net_stream_client_init(peer, "stream", name, sock->addr,
+                                      sock->has_reconnect ? sock->reconnect : 0,
+                                      errp);
+    }
+    if (sock->has_reconnect) {
+        error_setg(errp, "'reconnect' option is incompatible with "
+                         "socket in server mode");
+        return -1;
     }
     return net_stream_server_init(peer, "stream", name, sock->addr, errp);
 }

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 1a13a34..de5ed8f 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c

@@ -384,7 +384,7 @@
             g_strerror(errno), errno);
         return -1;
     }
-    if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
+    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
         !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
         return 0;
     }

diff --git a/net/vmnet-common.m b/net/vmnet-common.m
index 2cb60b9..2958283 100644
--- a/net/vmnet-common.m
+++ b/net/vmnet-common.m

@@ -17,6 +17,7 @@
 #include "clients.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "sysemu/runstate.h"
 
 #include <vmnet/vmnet.h>
 #include <dispatch/dispatch.h>
@@ -242,6 +243,35 @@
     }
 }
 
+/**
+ * Called on state change to un-register/re-register handlers
+ */
+static void vmnet_vm_state_change_cb(void *opaque, bool running, RunState state)
+{
+    VmnetState *s = opaque;
+
+    if (running) {
+        vmnet_interface_set_event_callback(
+            s->vmnet_if,
+            VMNET_INTERFACE_PACKETS_AVAILABLE,
+            s->if_queue,
+            ^(interface_event_t event_id, xpc_object_t event) {
+                assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
+                /*
+                 * This function is being called from a non qemu thread, so
+                 * we only schedule a BH, and do the rest of the io completion
+                 * handling from vmnet_send_bh() which runs in a qemu context.
+                 */
+                qemu_bh_schedule(s->send_bh);
+            });
+    } else {
+        vmnet_interface_set_event_callback(
+            s->vmnet_if,
+            VMNET_INTERFACE_PACKETS_AVAILABLE,
+            NULL,
+            NULL);
+    }
+}
 
 int vmnet_if_create(NetClientState *nc,
                     xpc_object_t if_desc,
@@ -329,19 +359,9 @@
     s->packets_send_current_pos = 0;
     s->packets_send_end_pos = 0;
 
-    vmnet_interface_set_event_callback(
-        s->vmnet_if,
-        VMNET_INTERFACE_PACKETS_AVAILABLE,
-        s->if_queue,
-        ^(interface_event_t event_id, xpc_object_t event) {
-            assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
-            /*
-             * This function is being called from a non qemu thread, so
-             * we only schedule a BH, and do the rest of the io completion
-             * handling from vmnet_send_bh() which runs in a qemu context.
-             */
-            qemu_bh_schedule(s->send_bh);
-        });
+    vmnet_vm_state_change_cb(s, 1, RUN_STATE_RUNNING);
+
+    s->change = qemu_add_vm_change_state_handler(vmnet_vm_state_change_cb, s);
 
     return 0;
 }
@@ -356,6 +376,8 @@
         return;
     }
 
+    vmnet_vm_state_change_cb(s, 0, RUN_STATE_SHUTDOWN);
+    qemu_del_vm_change_state_handler(s->change);
     if_stopped_sem = dispatch_semaphore_create(0);
     vmnet_stop_interface(
         s->vmnet_if,

diff --git a/net/vmnet_int.h b/net/vmnet_int.h
index d0b9059..a8a033d 100644
--- a/net/vmnet_int.h
+++ b/net/vmnet_int.h

@@ -45,6 +45,8 @@
     int packets_send_end_pos;
 
     struct iovec iov_buf[VMNET_PACKETS_LIMIT];
+
+    VMChangeStateEntry *change;
 } VmnetState;
 
 const char *vmnet_status_map_str(vmnet_return_t status);

diff --git a/pc-bios/README b/pc-bios/README
index b94f3fb..3702ed4 100644
--- a/pc-bios/README
+++ b/pc-bios/README

@@ -20,12 +20,6 @@
   -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and
   QEMU implements parts of the IEEE 1275 Open Firmware interface.
 
-- sgabios (the Serial Graphics Adapter option ROM) provides a means for
-  legacy x86 software to communicate with an attached serial console as
-  if a video card were attached.  The master sources reside in a subversion
-  repository at http://sgabios.googlecode.com/svn/trunk.  A git mirror is
-  available at https://gitlab.com/qemu-project/sgabios.git.
-
 - The PXE roms come from the iPXE project. Built with BANNER_TIME 0.
   Sources available at http://ipxe.org.  Vendor:Device ID -> ROM mapping:
 

diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index 388e0db..a7224ef 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build

@@ -28,7 +28,6 @@
   'bios-256k.bin',
   'bios-microvm.bin',
   'qboot.rom',
-  'sgabios.bin',
   'vgabios.bin',
   'vgabios-cirrus.bin',
   'vgabios-stdvga.bin',

diff --git a/pc-bios/sgabios.bin b/pc-bios/sgabios.bin
deleted file mode 100644
index 6308f2e..0000000
--- a/pc-bios/sgabios.bin
+++ /dev/null
Binary files differ

diff --git a/python/.gitignore b/python/.gitignore
index 904f324..c3ceb1c 100644
--- a/python/.gitignore
+++ b/python/.gitignore

@@ -11,8 +11,8 @@
 .idea/
 .vscode/
 
-# virtual environments (pipenv et al)
-.venv/
+# virtual environments
+.min-venv/
 .tox/
 .dev-venv/
 

diff --git a/python/Makefile b/python/Makefile
index b170708..c5bd6ff 100644
--- a/python/Makefile
+++ b/python/Makefile

@@ -1,15 +1,16 @@
 QEMU_VENV_DIR=.dev-venv
+QEMU_MINVENV_DIR=.min-venv
 QEMU_TOX_EXTRA_ARGS ?=
 
 .PHONY: help
 help:
 	@echo "python packaging help:"
 	@echo ""
-	@echo "make check-pipenv:"
-	@echo "    Run tests in pipenv's virtual environment."
+	@echo "make check-minreqs:"
+	@echo "    Run tests in the minreqs virtual environment."
 	@echo "    These tests use the oldest dependencies."
-	@echo "    Requires: Python 3.6 and pipenv."
-	@echo "    Hint (Fedora): 'sudo dnf install python3.6 pipenv'"
+	@echo "    Requires: Python 3.6"
+	@echo "    Hint (Fedora): 'sudo dnf install python3.6'"
 	@echo ""
 	@echo "make check-tox:"
 	@echo "    Run tests against multiple python versions."
@@ -33,8 +34,8 @@
 	@echo "    and install the qemu package in editable mode."
 	@echo "    (Can be used in or outside of a venv.)"
 	@echo ""
-	@echo "make pipenv"
-	@echo "    Creates pipenv's virtual environment (.venv)"
+	@echo "make min-venv"
+	@echo "    Creates the minreqs virtual environment ($(QEMU_MINVENV_DIR))"
 	@echo ""
 	@echo "make dev-venv"
 	@echo "    Creates a simple venv for check-dev. ($(QEMU_VENV_DIR))"
@@ -43,21 +44,38 @@
 	@echo "    Remove package build output."
 	@echo ""
 	@echo "make distclean:"
-	@echo "    remove pipenv/venv files, qemu package forwarder,"
+	@echo "    remove venv files, qemu package forwarder,"
 	@echo "    built distribution files, and everything from 'make clean'."
 	@echo ""
 	@echo -e "Have a nice day ^_^\n"
 
-.PHONY: pipenv
-pipenv: .venv
-.venv: Pipfile.lock
-	@PIPENV_VENV_IN_PROJECT=1 pipenv sync --dev --keep-outdated
-	rm -f pyproject.toml
-	@touch .venv
+.PHONY: pipenv check-pipenv
+pipenv check-pipenv:
+	@echo "pipenv was dropped; try 'make check-minreqs' or 'make min-venv'"
+	@exit 1
 
-.PHONY: check-pipenv
-check-pipenv: pipenv
-	@pipenv run make check
+.PHONY: min-venv
+min-venv: $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate
+$(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate: setup.cfg tests/minreqs.txt
+	@echo "VENV $(QEMU_MINVENV_DIR)"
+	@python3.6 -m venv $(QEMU_MINVENV_DIR)
+	@(								\
+		echo "ACTIVATE $(QEMU_MINVENV_DIR)";			\
+		. $(QEMU_MINVENV_DIR)/bin/activate;			\
+		echo "INSTALL -r tests/minreqs.txt $(QEMU_MINVENV_DIR)";\
+		pip install -r tests/minreqs.txt 1>/dev/null;		\
+		echo "INSTALL -e qemu $(QEMU_MINVENV_DIR)";		\
+		pip install -e . 1>/dev/null;				\
+	)
+	@touch $(QEMU_MINVENV_DIR)
+
+.PHONY: check-minreqs
+check-minreqs: min-venv
+	@(							\
+		echo "ACTIVATE $(QEMU_MINVENV_DIR)";		\
+		. $(QEMU_MINVENV_DIR)/bin/activate;		\
+		make check;					\
+	)
 
 .PHONY: dev-venv
 dev-venv: $(QEMU_VENV_DIR) $(QEMU_VENV_DIR)/bin/activate
@@ -106,6 +124,7 @@
 
 .PHONY: distclean
 distclean: clean
-	rm -rf qemu.egg-info/ .venv/ .tox/ $(QEMU_VENV_DIR) dist/
+	rm -rf qemu.egg-info/ .eggs/ dist/
+	rm -rf $(QEMU_VENV_DIR) $(QEMU_MINVENV_DIR) .tox/
 	rm -f .coverage .coverage.*
 	rm -rf htmlcov/

diff --git a/python/Pipfile b/python/Pipfile
deleted file mode 100644
index e7acb8c..0000000
--- a/python/Pipfile
+++ /dev/null

@@ -1,13 +0,0 @@
-[[source]]
-name = "pypi"
-url = "https://pypi.org/simple"
-verify_ssl = true
-
-[dev-packages]
-qemu = {editable = true, extras = ["devel"], path = "."}
-
-[packages]
-qemu = {editable = true,path = "."}
-
-[requires]
-python_version = "3.6"

diff --git a/python/Pipfile.lock b/python/Pipfile.lock
deleted file mode 100644
index ce46404..0000000
--- a/python/Pipfile.lock
+++ /dev/null

@@ -1,347 +0,0 @@
-{
-    "_meta": {
-        "hash": {
-            "sha256": "f1a25654d884a5b450e38d78b1f2e3ebb9073e421cc4358d4bbb83ac251a5670"
-        },
-        "pipfile-spec": 6,
-        "requires": {
-            "python_version": "3.6"
-        },
-        "sources": [
-            {
-                "name": "pypi",
-                "url": "https://pypi.org/simple",
-                "verify_ssl": true
-            }
-        ]
-    },
-    "default": {
-        "qemu": {
-            "editable": true,
-            "path": "."
-        }
-    },
-    "develop": {
-        "appdirs": {
-            "hashes": [
-                "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41",
-                "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"
-            ],
-            "version": "==1.4.4"
-        },
-        "astroid": {
-            "hashes": [
-                "sha256:09bdb456e02564731f8b5957cdd0c98a7f01d2db5e90eb1d794c353c28bfd705",
-                "sha256:6a8a51f64dae307f6e0c9db752b66a7951e282389d8362cc1d39a56f3feeb31d"
-            ],
-            "index": "pypi",
-            "version": "==2.6.0"
-        },
-        "avocado-framework": {
-            "hashes": [
-                "sha256:244cb569f8eb4e50a22ac82e1a2b2bba2458999f4281efbe2651bd415d59c65b",
-                "sha256:6f15998b67ecd0e7dde790c4de4dd249d6df52dfe6d5cc4e2dd6596df51c3583"
-            ],
-            "index": "pypi",
-            "version": "==90.0"
-        },
-        "distlib": {
-            "hashes": [
-                "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736",
-                "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"
-            ],
-            "index": "pypi",
-            "version": "==0.3.2"
-        },
-        "filelock": {
-            "hashes": [
-                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
-                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
-            ],
-            "index": "pypi",
-            "version": "==3.0.12"
-        },
-        "flake8": {
-            "hashes": [
-                "sha256:6a35f5b8761f45c5513e3405f110a86bea57982c3b75b766ce7b65217abe1670",
-                "sha256:c01f8a3963b3571a8e6bd7a4063359aff90749e160778e03817cd9b71c9e07d2"
-            ],
-            "index": "pypi",
-            "version": "==3.6.0"
-        },
-        "fusepy": {
-            "hashes": [
-                "sha256:10f5c7f5414241bffecdc333c4d3a725f1d6605cae6b4eaf86a838ff49cdaf6c",
-                "sha256:a9f3a3699080ddcf0919fd1eb2cf743e1f5859ca54c2018632f939bdfac269ee"
-            ],
-            "index": "pypi",
-            "version": "==2.0.4"
-        },
-        "importlib-metadata": {
-            "hashes": [
-                "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83",
-                "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"
-            ],
-            "markers": "python_version < '3.8'",
-            "version": "==1.7.0"
-        },
-        "importlib-resources": {
-            "hashes": [
-                "sha256:54161657e8ffc76596c4ede7080ca68cb02962a2e074a2586b695a93a925d36e",
-                "sha256:e962bff7440364183203d179d7ae9ad90cb1f2b74dcb84300e88ecc42dca3351"
-            ],
-            "index": "pypi",
-            "version": "==5.1.4"
-        },
-        "isort": {
-            "hashes": [
-                "sha256:408e4d75d84f51b64d0824894afee44469eba34a4caee621dc53799f80d71ccc",
-                "sha256:64022dea6a06badfa09b300b4dfe8ba968114a737919e8ed50aea1c288f078aa"
-            ],
-            "index": "pypi",
-            "version": "==5.1.2"
-        },
-        "lazy-object-proxy": {
-            "hashes": [
-                "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653",
-                "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61",
-                "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2",
-                "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837",
-                "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3",
-                "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43",
-                "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726",
-                "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3",
-                "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587",
-                "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8",
-                "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a",
-                "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd",
-                "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f",
-                "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad",
-                "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4",
-                "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b",
-                "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf",
-                "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981",
-                "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741",
-                "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e",
-                "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93",
-                "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b"
-            ],
-            "index": "pypi",
-            "version": "==1.6.0"
-        },
-        "mccabe": {
-            "hashes": [
-                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
-                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
-            ],
-            "version": "==0.6.1"
-        },
-        "mypy": {
-            "hashes": [
-                "sha256:00cb1964a7476e871d6108341ac9c1a857d6bd20bf5877f4773ac5e9d92cd3cd",
-                "sha256:127de5a9b817a03a98c5ae8a0c46a20dc44442af6dcfa2ae7f96cb519b312efa",
-                "sha256:1f3976a945ad7f0a0727aafdc5651c2d3278e3c88dee94e2bf75cd3386b7b2f4",
-                "sha256:2f8c098f12b402c19b735aec724cc9105cc1a9eea405d08814eb4b14a6fb1a41",
-                "sha256:4ef13b619a289aa025f2273e05e755f8049bb4eaba6d703a425de37d495d178d",
-                "sha256:5d142f219bf8c7894dfa79ebfb7d352c4c63a325e75f10dfb4c3db9417dcd135",
-                "sha256:62eb5dd4ea86bda8ce386f26684f7f26e4bfe6283c9f2b6ca6d17faf704dcfad",
-                "sha256:64c36eb0936d0bfb7d8da49f92c18e312ad2e3ed46e5548ae4ca997b0d33bd59",
-                "sha256:75eed74d2faf2759f79c5f56f17388defd2fc994222312ec54ee921e37b31ad4",
-                "sha256:974bebe3699b9b46278a7f076635d219183da26e1a675c1f8243a69221758273",
-                "sha256:a5e5bb12b7982b179af513dddb06fca12285f0316d74f3964078acbfcf4c68f2",
-                "sha256:d31291df31bafb997952dc0a17ebb2737f802c754aed31dd155a8bfe75112c57",
-                "sha256:d3b4941de44341227ece1caaf5b08b23e42ad4eeb8b603219afb11e9d4cfb437",
-                "sha256:eadb865126da4e3c4c95bdb47fe1bb087a3e3ea14d39a3b13224b8a4d9f9a102"
-            ],
-            "index": "pypi",
-            "version": "==0.780"
-        },
-        "mypy-extensions": {
-            "hashes": [
-                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
-                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
-            ],
-            "version": "==0.4.3"
-        },
-        "packaging": {
-            "hashes": [
-                "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
-                "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
-            ],
-            "index": "pypi",
-            "version": "==20.9"
-        },
-        "pluggy": {
-            "hashes": [
-                "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
-                "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
-            ],
-            "index": "pypi",
-            "version": "==0.13.1"
-        },
-        "py": {
-            "hashes": [
-                "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
-                "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
-            ],
-            "index": "pypi",
-            "version": "==1.10.0"
-        },
-        "pycodestyle": {
-            "hashes": [
-                "sha256:74abc4e221d393ea5ce1f129ea6903209940c1ecd29e002e8c6933c2b21026e0",
-                "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83",
-                "sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a"
-            ],
-            "version": "==2.4.0"
-        },
-        "pyflakes": {
-            "hashes": [
-                "sha256:9a7662ec724d0120012f6e29d6248ae3727d821bba522a0e6b356eff19126a49",
-                "sha256:f661252913bc1dbe7fcfcbf0af0db3f42ab65aabd1a6ca68fe5d466bace94dae"
-            ],
-            "version": "==2.0.0"
-        },
-        "pygments": {
-            "hashes": [
-                "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f",
-                "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"
-            ],
-            "index": "pypi",
-            "version": "==2.9.0"
-        },
-        "pylint": {
-            "hashes": [
-                "sha256:082a6d461b54f90eea49ca90fff4ee8b6e45e8029e5dbd72f6107ef84f3779c0",
-                "sha256:a01cd675eccf6e25b3bdb42be184eb46aaf89187d612ba0fb5f93328ed6b0fd5"
-            ],
-            "index": "pypi",
-            "version": "==2.8.0"
-        },
-        "pyparsing": {
-            "hashes": [
-                "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
-                "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
-            ],
-            "index": "pypi",
-            "version": "==2.4.7"
-        },
-        "qemu": {
-            "editable": true,
-            "path": "."
-        },
-        "setuptools": {
-            "hashes": [
-                "sha256:22c7348c6d2976a52632c67f7ab0cdf40147db7789f9aed18734643fe9cf3373",
-                "sha256:4ce92f1e1f8f01233ee9952c04f6b81d1e02939d6e1b488428154974a4d0783e"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==59.6.0"
-        },
-        "six": {
-            "hashes": [
-                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
-                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.16.0"
-        },
-        "toml": {
-            "hashes": [
-                "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
-                "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
-            ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==0.10.2"
-        },
-        "tox": {
-            "hashes": [
-                "sha256:c60692d92fe759f46c610ac04c03cf0169432d1ff8e981e8ae63e068d0954fc3",
-                "sha256:f179cb4043d7dc1339425dd49ab1dd8c916246b0d9173143c1b0af7498a03ab0"
-            ],
-            "index": "pypi",
-            "version": "==3.18.0"
-        },
-        "typed-ast": {
-            "hashes": [
-                "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace",
-                "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff",
-                "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266",
-                "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528",
-                "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6",
-                "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808",
-                "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4",
-                "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363",
-                "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341",
-                "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04",
-                "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41",
-                "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e",
-                "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3",
-                "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899",
-                "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805",
-                "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c",
-                "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c",
-                "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39",
-                "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a",
-                "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3",
-                "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7",
-                "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f",
-                "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075",
-                "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0",
-                "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40",
-                "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428",
-                "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927",
-                "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3",
-                "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
-                "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
-            ],
-            "markers": "python_version < '3.8' and implementation_name == 'cpython'",
-            "version": "==1.4.3"
-        },
-        "typing-extensions": {
-            "hashes": [
-                "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497",
-                "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342",
-                "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"
-            ],
-            "index": "pypi",
-            "version": "==3.10.0.0"
-        },
-        "urwid": {
-            "hashes": [
-                "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae"
-            ],
-            "index": "pypi",
-            "version": "==2.1.2"
-        },
-        "urwid-readline": {
-            "hashes": [
-                "sha256:018020cbc864bb5ed87be17dc26b069eae2755cb29f3a9c569aac3bded1efaf4"
-            ],
-            "index": "pypi",
-            "version": "==0.13"
-        },
-        "virtualenv": {
-            "hashes": [
-                "sha256:14fdf849f80dbb29a4eb6caa9875d476ee2a5cf76a5f5415fa2f1606010ab467",
-                "sha256:2b0126166ea7c9c3661f5b8e06773d28f83322de7a3ff7d06f0aed18c9de6a76"
-            ],
-            "index": "pypi",
-            "version": "==20.4.7"
-        },
-        "wrapt": {
-            "hashes": [
-                "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"
-            ],
-            "version": "==1.12.1"
-        },
-        "zipp": {
-            "hashes": [
-                "sha256:3607921face881ba3e026887d8150cca609d517579abe052ac81fc5aeffdbd76",
-                "sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098"
-            ],
-            "index": "pypi",
-            "version": "==3.4.1"
-        }
-    }
-}

diff --git a/python/README.rst b/python/README.rst
index 9c1fcea..d62e715 100644
--- a/python/README.rst
+++ b/python/README.rst

@@ -77,9 +77,6 @@
 - ``MANIFEST.in`` is read by python setuptools, it specifies additional files
   that should be included by a source distribution.
 - ``PACKAGE.rst`` is used as the README file that is visible on PyPI.org.
-- ``Pipfile`` is used by Pipenv to generate ``Pipfile.lock``.
-- ``Pipfile.lock`` is a set of pinned package dependencies that this package
-  is tested under in our CI suite. It is used by ``make check-pipenv``.
 - ``README.rst`` you are here!
 - ``VERSION`` contains the PEP-440 compliant version used to describe
   this package; it is referenced by ``setup.cfg``.

diff --git a/python/qemu/qmp/protocol.py b/python/qemu/qmp/protocol.py
index 6d3d739..22e6029 100644
--- a/python/qemu/qmp/protocol.py
+++ b/python/qemu/qmp/protocol.py

@@ -207,7 +207,7 @@
     logger = logging.getLogger(__name__)
 
     # Maximum allowable size of read buffer
-    _limit = (64 * 1024)
+    _limit = 64 * 1024
 
     # -------------------------
     # Section: Public interface

diff --git a/python/qemu/qmp/qmp_client.py b/python/qemu/qmp/qmp_client.py
index b5772e7..9d73ae6 100644
--- a/python/qemu/qmp/qmp_client.py
+++ b/python/qemu/qmp/qmp_client.py

@@ -198,7 +198,7 @@
     logger = logging.getLogger(__name__)
 
     # Read buffer limit; 10MB like libvirt default
-    _limit = (10 * 1024 * 1024)
+    _limit = 10 * 1024 * 1024
 
     # Type alias for pending execute() result items
     _PendingT = Union[Message, ExecInterruptedError]

diff --git a/python/qemu/utils/qemu_ga_client.py b/python/qemu/utils/qemu_ga_client.py
index 8c38a7a..d8411bb 100644
--- a/python/qemu/utils/qemu_ga_client.py
+++ b/python/qemu/utils/qemu_ga_client.py

@@ -155,7 +155,7 @@
 
     def fsfreeze(self, cmd: str) -> object:
         if cmd not in ['status', 'freeze', 'thaw']:
-            raise Exception('Invalid command: ' + cmd)
+            raise ValueError('Invalid command: ' + cmd)
         # Can be int (freeze, thaw) or GuestFsfreezeStatus (status)
         return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
 
@@ -167,7 +167,7 @@
 
     def suspend(self, mode: str) -> None:
         if mode not in ['disk', 'ram', 'hybrid']:
-            raise Exception('Invalid mode: ' + mode)
+            raise ValueError('Invalid mode: ' + mode)
 
         try:
             getattr(self.qga, 'suspend' + '_' + mode)()
@@ -178,7 +178,7 @@
 
     def shutdown(self, mode: str = 'powerdown') -> None:
         if mode not in ['powerdown', 'halt', 'reboot']:
-            raise Exception('Invalid mode: ' + mode)
+            raise ValueError('Invalid mode: ' + mode)
 
         try:
             self.qga.shutdown(mode=mode)

diff --git a/python/setup.cfg b/python/setup.cfg
index 5641815..9e923d9 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg

@@ -33,9 +33,7 @@
 * = py.typed
 
 [options.extras_require]
-# For the devel group, When adding new dependencies or bumping the minimum
-# version, use e.g. "pipenv install --dev pylint==3.0.0".
-# Subsequently, edit 'Pipfile' to remove e.g. 'pylint = "==3.0.0'.
+# Remember to update tests/minreqs.txt if changing anything below:
 devel =
     avocado-framework >= 90.0
     flake8 >= 3.6.0

diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt
new file mode 100644
index 0000000..dfb8abb
--- /dev/null
+++ b/python/tests/minreqs.txt

@@ -0,0 +1,45 @@
+# This file lists the ***oldest possible dependencies*** needed to run
+# "make check" successfully under ***Python 3.6***. It is used primarily
+# by GitLab CI to ensure that our stated minimum versions in setup.cfg
+# are truthful and regularly validated.
+#
+# This file should not contain any dependencies that are not expressed
+# by the [devel] section of setup.cfg, except for transitive
+# dependencies which must be enumerated here explicitly to eliminate
+# dependency resolution ambiguity.
+#
+# When adding new dependencies, pin the very oldest non-yanked version
+# on PyPI that allows the test suite to pass.
+
+# Dependencies for the TUI addon (Required for successful linting)
+urwid==2.1.2
+urwid-readline==0.13
+Pygments==2.9.0
+
+# Dependencies for FUSE support for qom-fuse
+fusepy==2.0.4
+
+# Test-runners, utilities, etc.
+avocado-framework==90.0
+
+# Linters
+flake8==3.6.0
+isort==5.1.2
+mypy==0.780
+pylint==2.8.0
+
+# Transitive flake8 dependencies
+mccabe==0.6.0
+pycodestyle==2.4.0
+pyflakes==2.0.0
+
+# Transitive mypy dependencies
+mypy-extensions==0.4.3
+typed-ast==1.4.0
+typing-extensions==3.7.4
+
+# Transitive pylint dependencies
+astroid==2.5.4
+lazy-object-proxy==1.4.0
+toml==0.10.0
+wrapt==1.12.1

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7f331eb..c05ad0c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json

@@ -3922,10 +3922,12 @@
 ##
 # @RbdImageEncryptionFormat:
 #
+# @luks-any: Used for opening either luks or luks2 (Since 8.0)
+#
 # Since: 6.1
 ##
 { 'enum': 'RbdImageEncryptionFormat',
-  'data': [ 'luks', 'luks2' ] }
+  'data': [ 'luks', 'luks2', 'luks-any' ] }
 
 ##
 # @RbdEncryptionOptionsLUKSBase:
@@ -3968,6 +3970,15 @@
   'data': { } }
 
 ##
+# @RbdEncryptionOptionsLUKSAny:
+#
+# Since: 8.0
+##
+{ 'struct': 'RbdEncryptionOptionsLUKSAny',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
+##
 # @RbdEncryptionCreateOptionsLUKS:
 #
 # Since: 6.1
@@ -3988,13 +3999,23 @@
 ##
 # @RbdEncryptionOptions:
 #
+# @format: Encryption format.
+#
+# @parent: Parent image encryption options (for cloned images).
+#          Can be left unspecified if this cloned image is encrypted
+#          using the same format and secret as its parent image (i.e.
+#          not explicitly formatted) or if its parent image is not
+#          encrypted. (Since 8.0)
+#
 # Since: 6.1
 ##
 { 'union': 'RbdEncryptionOptions',
-  'base': { 'format': 'RbdImageEncryptionFormat' },
+  'base': { 'format': 'RbdImageEncryptionFormat',
+            '*parent': 'RbdEncryptionOptions' },
   'discriminator': 'format',
   'data': { 'luks': 'RbdEncryptionOptionsLUKS',
-            'luks2': 'RbdEncryptionOptionsLUKS2' } }
+            'luks2': 'RbdEncryptionOptionsLUKS2',
+            'luks-any': 'RbdEncryptionOptionsLUKSAny'} }
 
 ##
 # @RbdEncryptionCreateOptions:

diff --git a/qapi/net.json b/qapi/net.json
index 522ac58..d6eb300 100644
--- a/qapi/net.json
+++ b/qapi/net.json

@@ -585,6 +585,10 @@
 # @addr: socket address to listen on (server=true)
 #        or connect to (server=false)
 # @server: create server socket (default: false)
+# @reconnect: For a client socket, if a socket is disconnected,
+#             then attempt a reconnect after the given number of seconds.
+#             Setting this to zero disables this function. (default: 0)
+#             (since 8.0)
 #
 # Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
 #
@@ -593,7 +597,8 @@
 { 'struct': 'NetdevStreamOptions',
   'data': {
     'addr':   'SocketAddress',
-    '*server': 'bool' } }
+    '*server': 'bool',
+    '*reconnect': 'uint32' } }
 
 ##
 # @NetdevDgramOptions:

diff --git a/qemu-img.c b/qemu-img.c
index 7c05931..9aeac69 100644
--- a/qemu-img.c
+++ b/qemu-img.c

@@ -1991,7 +1991,9 @@
             qemu_co_mutex_unlock(&s->lock);
             break;
         }
-        n = convert_iteration_sectors(s, s->sector_num);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            n = convert_iteration_sectors(s, s->sector_num);
+        }
         if (n < 0) {
             qemu_co_mutex_unlock(&s->lock);
             s->ret = n;
@@ -2039,7 +2041,9 @@
 
         if (s->ret == -EINPROGRESS) {
             if (copy_range) {
-                ret = convert_co_copy_range(s, sector_num, n);
+                WITH_GRAPH_RDLOCK_GUARD() {
+                    ret = convert_co_copy_range(s, sector_num, n);
+                }
                 if (ret) {
                     s->copy_range = false;
                     goto retry;

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index a061031..e7a02f5 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c

@@ -338,7 +338,8 @@
  */
 
 #define MISALIGN_OFFSET     16
-static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern)
+static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern,
+                           bool register_buf)
 {
     void *buf;
 
@@ -347,16 +348,24 @@
     }
     buf = blk_blockalign(blk, len);
     memset(buf, pattern, len);
+    if (register_buf) {
+        blk_register_buf(blk, buf, len, &error_abort);
+    }
     if (qemuio_misalign) {
         buf += MISALIGN_OFFSET;
     }
     return buf;
 }
 
-static void qemu_io_free(void *p)
+static void qemu_io_free(BlockBackend *blk, void *p, size_t len,
+                         bool unregister_buf)
 {
     if (qemuio_misalign) {
         p -= MISALIGN_OFFSET;
+        len += MISALIGN_OFFSET;
+    }
+    if (unregister_buf) {
+        blk_unregister_buf(blk, p, len);
     }
     qemu_vfree(p);
 }
@@ -371,14 +380,16 @@
  * @blk - the block backend where the buffer content is going to be written to
  * @len - the buffer length
  * @file_name - the file to read the content from
+ * @register_buf - call blk_register_buf()
  *
  * Returns: the buffer pointer on success
  *          NULL on error
  */
 static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
-                                     const char *file_name)
+                                     const char *file_name, bool register_buf)
 {
-    char *buf, *buf_origin;
+    size_t alloc_len = len + (qemuio_misalign ? MISALIGN_OFFSET : 0);
+    char *alloc_buf, *buf, *end;
     FILE *f = fopen(file_name, "r");
     int pattern_len;
 
@@ -387,19 +398,13 @@
         return NULL;
     }
 
-    if (qemuio_misalign) {
-        len += MISALIGN_OFFSET;
-    }
-
-    buf_origin = buf = blk_blockalign(blk, len);
+    alloc_buf = buf = blk_blockalign(blk, alloc_len);
 
     if (qemuio_misalign) {
-        buf_origin += MISALIGN_OFFSET;
         buf += MISALIGN_OFFSET;
-        len -= MISALIGN_OFFSET;
     }
 
-    pattern_len = fread(buf_origin, 1, len, f);
+    pattern_len = fread(buf, 1, len, f);
 
     if (ferror(f)) {
         perror(file_name);
@@ -414,24 +419,23 @@
     fclose(f);
     f = NULL;
 
-    if (len > pattern_len) {
-        len -= pattern_len;
-        buf += pattern_len;
-
-        while (len > 0) {
-            size_t len_to_copy = MIN(pattern_len, len);
-
-            memcpy(buf, buf_origin, len_to_copy);
-
-            len -= len_to_copy;
-            buf += len_to_copy;
-        }
+    if (register_buf) {
+        blk_register_buf(blk, alloc_buf, alloc_len, &error_abort);
     }
 
-    return buf_origin;
+    end = buf + len;
+    for (char *p = buf + pattern_len; p < end; p += pattern_len) {
+        memcpy(p, buf, MIN(pattern_len, end - p));
+    }
+
+    return buf;
 
 error:
-    qemu_io_free(buf_origin);
+    /*
+     * This code path is only taken before blk_register_buf() is called, so
+     * hardcode the qemu_io_free() unregister_buf argument to false.
+     */
+    qemu_io_free(blk, alloc_buf, alloc_len, false);
     if (f) {
         fclose(f);
     }
@@ -490,7 +494,7 @@
  */
 static void *
 create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov,
-             int pattern)
+             int pattern, bool register_buf)
 {
     size_t *sizes = g_new0(size_t, nr_iov);
     size_t count = 0;
@@ -526,7 +530,7 @@
 
     qemu_iovec_init(qiov, nr_iov);
 
-    buf = p = qemu_io_alloc(blk, count, pattern);
+    buf = p = qemu_io_alloc(blk, count, pattern, register_buf);
 
     for (i = 0; i < nr_iov; i++) {
         qemu_iovec_add(qiov, p, sizes[i]);
@@ -539,7 +543,7 @@
 }
 
 static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
-                    int64_t bytes, int64_t *total)
+                    int64_t bytes, BdrvRequestFlags flags, int64_t *total)
 {
     int ret;
 
@@ -547,7 +551,7 @@
         return -ERANGE;
     }
 
-    ret = blk_pread(blk, offset, bytes, (uint8_t *)buf, 0);
+    ret = blk_pread(blk, offset, bytes, (uint8_t *)buf, flags);
     if (ret < 0) {
         return ret;
     }
@@ -556,7 +560,7 @@
 }
 
 static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset,
-                     int64_t bytes, int flags, int64_t *total)
+                     int64_t bytes, BdrvRequestFlags flags, int64_t *total)
 {
     int ret;
 
@@ -573,7 +577,8 @@
 }
 
 static int do_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                               int64_t bytes, int flags, int64_t *total)
+                               int64_t bytes, BdrvRequestFlags flags,
+                               int64_t *total)
 {
     int ret = blk_pwrite_zeroes(blk, offset, bytes,
                                 flags | BDRV_REQ_ZERO_WRITE);
@@ -637,11 +642,11 @@
 }
 
 static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov,
-                        int64_t offset, int *total)
+                        int64_t offset, BdrvRequestFlags flags, int *total)
 {
     int async_ret = NOT_DONE;
 
-    blk_aio_preadv(blk, offset, qiov, 0, aio_rw_done, &async_ret);
+    blk_aio_preadv(blk, offset, qiov, flags, aio_rw_done, &async_ret);
     while (async_ret == NOT_DONE) {
         main_loop_wait(false);
     }
@@ -651,7 +656,7 @@
 }
 
 static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov,
-                         int64_t offset, int flags, int *total)
+                         int64_t offset, BdrvRequestFlags flags, int *total)
 {
     int async_ret = NOT_DONE;
 
@@ -681,6 +686,7 @@
 " -p, -- ignored for backwards compatibility\n"
 " -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 " -s, -- start offset for pattern verification (only with -P)\n"
 " -v, -- dump buffer to standard output\n"
 "\n");
@@ -694,7 +700,7 @@
     .cfunc      = read_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-abCqv] [-P pattern [-s off] [-l len]] off len",
+    .args       = "[-abCqrv] [-P pattern [-s off] [-l len]] off len",
     .oneline    = "reads a number of bytes at a specified offset",
     .help       = read_help,
 };
@@ -712,8 +718,9 @@
     int64_t total = 0;
     int pattern = 0;
     int64_t pattern_offset = 0, pattern_count = 0;
+    BdrvRequestFlags flags = 0;
 
-    while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) {
+    while ((c = getopt(argc, argv, "bCl:pP:qrs:v")) != -1) {
         switch (c) {
         case 'b':
             bflag = true;
@@ -742,6 +749,9 @@
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 's':
             sflag = true;
             pattern_offset = cvtnum(optarg);
@@ -806,15 +816,20 @@
                    count);
             return -EINVAL;
         }
+        if (flags & BDRV_REQ_REGISTERED_BUF) {
+            printf("I/O buffer registration is not supported when reading "
+                    "from vmstate\n");
+            return -EINVAL;
+        }
     }
 
-    buf = qemu_io_alloc(blk, count, 0xab);
+    buf = qemu_io_alloc(blk, count, 0xab, flags & BDRV_REQ_REGISTERED_BUF);
 
     clock_gettime(CLOCK_MONOTONIC, &t1);
     if (bflag) {
         ret = do_load_vmstate(blk, buf, offset, count, &total);
     } else {
-        ret = do_pread(blk, buf, offset, count, &total);
+        ret = do_pread(blk, buf, offset, count, flags, &total);
     }
     clock_gettime(CLOCK_MONOTONIC, &t2);
 
@@ -851,7 +866,7 @@
     print_report("read", &t2, offset, count, total, cnt, Cflag);
 
 out:
-    qemu_io_free(buf);
+    qemu_io_free(blk, buf, count, flags & BDRV_REQ_REGISTERED_BUF);
     return ret;
 }
 
@@ -869,8 +884,9 @@
 " Uses multiple iovec buffers if more than one byte range is specified.\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -P, -- use a pattern to verify read data\n"
-" -v, -- dump buffer to standard output\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -v, -- dump buffer to standard output\n"
 "\n");
 }
 
@@ -881,7 +897,7 @@
     .cfunc      = readv_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cqv] [-P pattern] off len [len..]",
+    .args       = "[-Cqrv] [-P pattern] off len [len..]",
     .oneline    = "reads a number of bytes at a specified offset",
     .help       = readv_help,
 };
@@ -899,8 +915,9 @@
     QEMUIOVector qiov;
     int pattern = 0;
     bool Pflag = false;
+    BdrvRequestFlags flags = 0;
 
-    while ((c = getopt(argc, argv, "CP:qv")) != -1) {
+    while ((c = getopt(argc, argv, "CP:qrv")) != -1) {
         switch (c) {
         case 'C':
             Cflag = true;
@@ -915,6 +932,9 @@
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'v':
             vflag = true;
             break;
@@ -938,13 +958,14 @@
     optind++;
 
     nr_iov = argc - optind;
-    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab);
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab,
+                       flags & BDRV_REQ_REGISTERED_BUF);
     if (buf == NULL) {
         return -EINVAL;
     }
 
     clock_gettime(CLOCK_MONOTONIC, &t1);
-    ret = do_aio_readv(blk, &qiov, offset, &total);
+    ret = do_aio_readv(blk, &qiov, offset, flags, &total);
     clock_gettime(CLOCK_MONOTONIC, &t2);
 
     if (ret < 0) {
@@ -979,8 +1000,8 @@
     print_report("read", &t2, offset, qiov.size, total, cnt, Cflag);
 
 out:
+    qemu_io_free(blk, buf, qiov.size, flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&qiov);
-    qemu_io_free(buf);
     return ret;
 }
 
@@ -997,13 +1018,14 @@
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
 " -c, -- write compressed data with blk_write_compressed\n"
+" -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -n, -- with -z, don't allow slow fallback\n"
 " -p, -- ignored for backwards compatibility\n"
 " -P, -- use different pattern to fill file\n"
-" -s, -- use a pattern file to fill the write buffer\n"
-" -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -s, -- use a pattern file to fill the write buffer\n"
 " -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_pwrite_zeroes\n"
 "\n");
@@ -1018,7 +1040,7 @@
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-bcCfnquz] [-P pattern | -s source_file] off len",
+    .args       = "[-bcCfnqruz] [-P pattern | -s source_file] off len",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = write_help,
 };
@@ -1028,7 +1050,7 @@
     struct timespec t1, t2;
     bool Cflag = false, qflag = false, bflag = false;
     bool Pflag = false, zflag = false, cflag = false, sflag = false;
-    int flags = 0;
+    BdrvRequestFlags flags = 0;
     int c, cnt, ret;
     char *buf = NULL;
     int64_t offset;
@@ -1038,7 +1060,7 @@
     int pattern = 0xcd;
     const char *file_name = NULL;
 
-    while ((c = getopt(argc, argv, "bcCfnpP:qs:uz")) != -1) {
+    while ((c = getopt(argc, argv, "bcCfnpP:qrs:uz")) != -1) {
         switch (c) {
         case 'b':
             bflag = true;
@@ -1068,6 +1090,9 @@
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 's':
             sflag = true;
             file_name = optarg;
@@ -1147,14 +1172,21 @@
         }
     }
 
-    if (!zflag) {
+    if (zflag) {
+        if (flags & BDRV_REQ_REGISTERED_BUF) {
+            printf("cannot combine zero write with registered I/O buffer\n");
+            return -EINVAL;
+        }
+    } else {
         if (sflag) {
-            buf = qemu_io_alloc_from_file(blk, count, file_name);
+            buf = qemu_io_alloc_from_file(blk, count, file_name,
+                                          flags & BDRV_REQ_REGISTERED_BUF);
             if (!buf) {
                 return -EINVAL;
             }
         } else {
-            buf = qemu_io_alloc(blk, count, pattern);
+            buf = qemu_io_alloc(blk, count, pattern,
+                                flags & BDRV_REQ_REGISTERED_BUF);
         }
     }
 
@@ -1188,7 +1220,7 @@
 
 out:
     if (!zflag) {
-        qemu_io_free(buf);
+        qemu_io_free(blk, buf, count, flags & BDRV_REQ_REGISTERED_BUF);
     }
     return ret;
 }
@@ -1205,10 +1237,11 @@
 "\n"
 " Writes into a segment of the currently open file, using a buffer\n"
 " filled with a set pattern (0xcdcdcdcd).\n"
-" -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
+" -P, -- use different pattern to fill file\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 "\n");
 }
 
@@ -1220,7 +1253,7 @@
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cfq] [-P pattern] off len [len..]",
+    .args       = "[-Cfqr] [-P pattern] off len [len..]",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = writev_help,
 };
@@ -1229,7 +1262,7 @@
 {
     struct timespec t1, t2;
     bool Cflag = false, qflag = false;
-    int flags = 0;
+    BdrvRequestFlags flags = 0;
     int c, cnt, ret;
     char *buf;
     int64_t offset;
@@ -1239,7 +1272,7 @@
     int pattern = 0xcd;
     QEMUIOVector qiov;
 
-    while ((c = getopt(argc, argv, "CfqP:")) != -1) {
+    while ((c = getopt(argc, argv, "CfP:qr")) != -1) {
         switch (c) {
         case 'C':
             Cflag = true;
@@ -1250,6 +1283,9 @@
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'P':
             pattern = parse_pattern(optarg);
             if (pattern < 0) {
@@ -1275,7 +1311,8 @@
     optind++;
 
     nr_iov = argc - optind;
-    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern);
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern,
+                       flags & BDRV_REQ_REGISTERED_BUF);
     if (buf == NULL) {
         return -EINVAL;
     }
@@ -1300,8 +1337,8 @@
     t2 = tsub(t2, t1);
     print_report("wrote", &t2, offset, qiov.size, total, cnt, Cflag);
 out:
+    qemu_io_free(blk, buf, qiov.size, flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&qiov);
-    qemu_io_free(buf);
     return ret;
 }
 
@@ -1317,6 +1354,7 @@
     bool zflag;
     BlockAcctCookie acct;
     int pattern;
+    BdrvRequestFlags flags;
     struct timespec t1;
 };
 
@@ -1346,7 +1384,8 @@
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
     if (!ctx->zflag) {
-        qemu_io_free(ctx->buf);
+        qemu_io_free(ctx->blk, ctx->buf, ctx->qiov.size,
+                     ctx->flags & BDRV_REQ_REGISTERED_BUF);
         qemu_iovec_destroy(&ctx->qiov);
     }
     g_free(ctx);
@@ -1391,7 +1430,8 @@
     print_report("read", &t2, ctx->offset, ctx->qiov.size,
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
-    qemu_io_free(ctx->buf);
+    qemu_io_free(ctx->blk, ctx->buf, ctx->qiov.size,
+                 ctx->flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&ctx->qiov);
     g_free(ctx);
 }
@@ -1413,10 +1453,11 @@
 " considered successful once the request is submitted, independently\n"
 " of potential I/O errors or pattern mismatches.\n"
 " -C, -- report statistics in a machine parsable format\n"
-" -P, -- use a pattern to verify read data\n"
 " -i, -- treat request as invalid, for exercising stats\n"
-" -v, -- dump buffer to standard output\n"
+" -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -v, -- dump buffer to standard output\n"
 "\n");
 }
 
@@ -1427,7 +1468,7 @@
     .cfunc      = aio_read_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Ciqv] [-P pattern] off len [len..]",
+    .args       = "[-Ciqrv] [-P pattern] off len [len..]",
     .oneline    = "asynchronously reads a number of bytes",
     .help       = aio_read_help,
 };
@@ -1438,7 +1479,7 @@
     struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
 
     ctx->blk = blk;
-    while ((c = getopt(argc, argv, "CP:iqv")) != -1) {
+    while ((c = getopt(argc, argv, "CiP:qrv")) != -1) {
         switch (c) {
         case 'C':
             ctx->Cflag = true;
@@ -1459,6 +1500,9 @@
         case 'q':
             ctx->qflag = true;
             break;
+        case 'r':
+            ctx->flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'v':
             ctx->vflag = true;
             break;
@@ -1485,7 +1529,8 @@
     optind++;
 
     nr_iov = argc - optind;
-    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab);
+    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab,
+                            ctx->flags & BDRV_REQ_REGISTERED_BUF);
     if (ctx->buf == NULL) {
         block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
         g_free(ctx);
@@ -1495,7 +1540,8 @@
     clock_gettime(CLOCK_MONOTONIC, &ctx->t1);
     block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
                      BLOCK_ACCT_READ);
-    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, 0, aio_read_done, ctx);
+    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, ctx->flags, aio_read_done,
+                   ctx);
     return 0;
 }
 
@@ -1516,11 +1562,12 @@
 " Note that due to its asynchronous nature, this command will be\n"
 " considered successful once the request is submitted, independently\n"
 " of potential I/O errors or pattern mismatches.\n"
-" -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -i, -- treat request as invalid, for exercising stats\n"
+" -P, -- use different pattern to fill file\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 " -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_aio_pwrite_zeroes\n"
 "\n");
@@ -1534,7 +1581,7 @@
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cfiquz] [-P pattern] off len [len..]",
+    .args       = "[-Cfiqruz] [-P pattern] off len [len..]",
     .oneline    = "asynchronously writes a number of bytes",
     .help       = aio_write_help,
 };
@@ -1544,22 +1591,24 @@
     int nr_iov, c;
     int pattern = 0xcd;
     struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
-    int flags = 0;
 
     ctx->blk = blk;
-    while ((c = getopt(argc, argv, "CfiqP:uz")) != -1) {
+    while ((c = getopt(argc, argv, "CfiP:qruz")) != -1) {
         switch (c) {
         case 'C':
             ctx->Cflag = true;
             break;
         case 'f':
-            flags |= BDRV_REQ_FUA;
+            ctx->flags |= BDRV_REQ_FUA;
             break;
         case 'q':
             ctx->qflag = true;
             break;
+        case 'r':
+            ctx->flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'u':
-            flags |= BDRV_REQ_MAY_UNMAP;
+            ctx->flags |= BDRV_REQ_MAY_UNMAP;
             break;
         case 'P':
             pattern = parse_pattern(optarg);
@@ -1595,7 +1644,7 @@
         return -EINVAL;
     }
 
-    if ((flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
+    if ((ctx->flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
         printf("-u requires -z to be specified\n");
         g_free(ctx);
         return -EINVAL;
@@ -1607,6 +1656,12 @@
         return -EINVAL;
     }
 
+    if (ctx->zflag && (ctx->flags & BDRV_REQ_REGISTERED_BUF)) {
+        printf("cannot combine zero write with registered I/O buffer\n");
+        g_free(ctx);
+        return -EINVAL;
+    }
+
     ctx->offset = cvtnum(argv[optind]);
     if (ctx->offset < 0) {
         int ret = ctx->offset;
@@ -1625,12 +1680,12 @@
         }
 
         ctx->qiov.size = count;
-        blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done,
-                              ctx);
+        blk_aio_pwrite_zeroes(blk, ctx->offset, count, ctx->flags,
+                              aio_write_done, ctx);
     } else {
         nr_iov = argc - optind;
         ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov,
-                                pattern);
+                                pattern, ctx->flags & BDRV_REQ_REGISTERED_BUF);
         if (ctx->buf == NULL) {
             block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
             g_free(ctx);
@@ -1641,8 +1696,8 @@
         block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
                          BLOCK_ACCT_WRITE);
 
-        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, flags, aio_write_done,
-                        ctx);
+        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, ctx->flags,
+                        aio_write_done, ctx);
     }
 
     return 0;

diff --git a/qemu-options.hx b/qemu-options.hx
index 88e93c6..beeb447 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx

@@ -1892,8 +1892,8 @@
 ERST
 
 DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi,
-    "-iscsi [user=user][,password=password]\n"
-    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n"
+    "-iscsi [user=user][,password=password][,password-secret=secret-id]\n"
+    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE]\n"
     "       [,initiator-name=initiator-iqn][,id=target-iqn]\n"
     "       [,timeout=timeout]\n"
     "                iSCSI session parameters\n", QEMU_ARCH_ALL)
@@ -2135,7 +2135,7 @@
     "       [,tls-channel=[main|display|cursor|inputs|record|playback]]\n"
     "       [,plaintext-channel=[main|display|cursor|inputs|record|playback]]\n"
     "       [,sasl=on|off][,disable-ticketing=on|off]\n"
-    "       [,password=<string>][,password-secret=<secret-id>]\n"
+    "       [,password-secret=<secret-id>]\n"
     "       [,image-compression=[auto_glz|auto_lz|quic|glz|lz|off]]\n"
     "       [,jpeg-wan-compression=[auto|never|always]]\n"
     "       [,zlib-glz-wan-compression=[auto|never|always]]\n"
@@ -2161,13 +2161,6 @@
     ``ipv4=on|off``; \ ``ipv6=on|off``; \ ``unix=on|off``
         Force using the specified IP version.
 
-    ``password=<string>``
-        Set the password you need to authenticate.
-
-        This option is deprecated and insecure because it leaves the
-        password visible in the process listing. Use ``password-secret``
-        instead.
-
     ``password-secret=<secret-id>``
         Set the ID of the ``secret`` object containing the password
         you need to authenticate.
@@ -2769,9 +2762,9 @@
     "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
     "                configure a network backend to connect to another network\n"
     "                using an UDP tunnel\n"
-    "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n"
-    "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n"
-    "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n"
     "                configure a network backend to connect to another network\n"
     "                using a socket connection in stream mode.\n"
     "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n"

diff --git a/qga/commands.c b/qga/commands.c
index 3600773..172826f 100644
--- a/qga/commands.c
+++ b/qga/commands.c

@@ -32,9 +32,8 @@
 #define GUEST_FILE_READ_COUNT_MAX (48 * MiB)
 
 /* Note: in some situations, like with the fsfreeze, logging may be
- * temporarilly disabled. if it is necessary that a command be able
- * to log for accounting purposes, check ga_logging_enabled() beforehand,
- * and use the QERR_QGA_LOGGING_DISABLED to generate an error
+ * temporarily disabled. if it is necessary that a command be able
+ * to log for accounting purposes, check ga_logging_enabled() beforehand.
  */
 void slog(const gchar *fmt, ...)
 {

diff --git a/qga/main.c b/qga/main.c
index 85b7d6c..2b992a5 100644
--- a/qga/main.c
+++ b/qga/main.c

@@ -24,7 +24,6 @@
 #include "qapi/qmp/qjson.h"
 #include "guest-agent-core.h"
 #include "qga-qapi-init-commands.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/error.h"
 #include "channel.h"
 #include "qemu/cutils.h"

diff --git a/replay/replay.c b/replay/replay.c
index 9a0dc1c..c39156c 100644
--- a/replay/replay.c
+++ b/replay/replay.c

@@ -376,8 +376,12 @@
     replay_mode = REPLAY_MODE_NONE;
 }
 
-void replay_add_blocker(Error *reason)
+void replay_add_blocker(const char *feature)
 {
+    Error *reason = NULL;
+
+    error_setg(&reason, "Record/replay feature is not supported for '%s'",
+               feature);
     replay_blockers = g_slist_prepend(replay_blockers, reason);
 }
 

diff --git a/replay/stubs-system.c b/replay/stubs-system.c
index 5c262b0..50cefdb 100644
--- a/replay/stubs-system.c
+++ b/replay/stubs-system.c

@@ -12,7 +12,7 @@
     qemu_input_event_sync_impl();
 }
 
-void replay_add_blocker(Error *reason)
+void replay_add_blocker(const char *feature)
 {
 }
 void replay_audio_in(size_t *recorded, void *samples, size_t *wpos, size_t size)

diff --git a/roms/Makefile b/roms/Makefile
index 5e44d97..955f922 100644
--- a/roms/Makefile
+++ b/roms/Makefile

@@ -57,7 +57,6 @@
 	@echo "available build targets:"
 	@echo "  bios               -- update bios.bin (seabios)"
 	@echo "  vgabios            -- update vgabios binaries (seabios)"
-	@echo "  sgabios            -- update sgabios binaries"
 	@echo "  pxerom             -- update nic roms (bios only)"
 	@echo "  efirom             -- update nic roms (bios+efi)"
 	@echo "  slof               -- update slof.bin"
@@ -102,11 +101,7 @@
 		OUT=$(CURDIR)/seabios/builds/$*/ all
 
 
-.PHONY: sgabios skiboot qboot
-sgabios:
-	$(MAKE) -C sgabios
-	cp sgabios/sgabios.bin ../pc-bios
-
+.PHONY: skiboot qboot
 
 pxerom: $(patsubst %,pxe-rom-%,$(pxerom_variants))
 
@@ -199,8 +194,6 @@
 
 clean:
 	rm -rf seabios/.config seabios/out seabios/builds
-	$(MAKE) -C sgabios clean
-	rm -f sgabios/.depend
 	$(MAKE) -C ipxe/src veryclean
 	$(MAKE) -C edk2/BaseTools clean
 	$(MAKE) -C SLOF clean

diff --git a/roms/sgabios b/roms/sgabios
deleted file mode 160000
index cbaee52..0000000
--- a/roms/sgabios
+++ /dev/null

@@ -1 +0,0 @@
-Subproject commit cbaee52287e5f32373181cff50a00b6c4ac9015a

diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
index e82b648..60e9b31 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py

@@ -63,8 +63,8 @@
 
 
 class FuncDecl:
-    def __init__(self, return_type: str, name: str, args: str,
-                 variant: str) -> None:
+    def __init__(self, wrapper_type: str, return_type: str, name: str,
+                 args: str, variant: str) -> None:
         self.return_type = return_type.strip()
         self.name = name.strip()
         self.struct_name = snake_to_camel(self.name)
@@ -72,8 +72,21 @@
         self.create_only_co = 'mixed' not in variant
         self.graph_rdlock = 'bdrv_rdlock' in variant
 
-        subsystem, subname = self.name.split('_', 1)
-        self.co_name = f'{subsystem}_co_{subname}'
+        self.wrapper_type = wrapper_type
+
+        if wrapper_type == 'co':
+            subsystem, subname = self.name.split('_', 1)
+            self.target_name = f'{subsystem}_co_{subname}'
+        else:
+            assert wrapper_type == 'no_co'
+            subsystem, co_infix, subname = self.name.split('_', 2)
+            if co_infix != 'co':
+                raise ValueError(f"Invalid no_co function name: {self.name}")
+            if not self.create_only_co:
+                raise ValueError(f"no_co function can't be mixed: {self.name}")
+            if self.graph_rdlock:
+                raise ValueError(f"no_co function can't be rdlock: {self.name}")
+            self.target_name = f'{subsystem}_{subname}'
 
         t = self.args[0].type
         if t == 'BlockDriverState *':
@@ -105,7 +118,8 @@
 
 # Match wrappers declared with a co_wrapper mark
 func_decl_re = re.compile(r'^(?P<return_type>[a-zA-Z][a-zA-Z0-9_]* [\*]?)'
-                          r'\s*co_wrapper'
+                          r'(\s*coroutine_fn)?'
+                          r'\s*(?P<wrapper_type>(no_)?co)_wrapper'
                           r'(?P<variant>(_[a-z][a-z0-9_]*)?)\s*'
                           r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
                           r'\((?P<args>[^)]*)\);$', re.MULTILINE)
@@ -113,7 +127,8 @@
 
 def func_decl_iter(text: str) -> Iterator:
     for m in func_decl_re.finditer(text):
-        yield FuncDecl(return_type=m.group('return_type'),
+        yield FuncDecl(wrapper_type=m.group('wrapper_type'),
+                       return_type=m.group('return_type'),
                        name=m.group('wrapper_name'),
                        args=m.group('args'),
                        variant=m.group('variant'))
@@ -133,7 +148,7 @@
     """
     Checks if we are already in coroutine
     """
-    name = func.co_name
+    name = func.target_name
     struct_name = func.struct_name
     graph_assume_lock = 'assume_graph_lock();' if func.graph_rdlock else ''
 
@@ -163,7 +178,7 @@
     """
     Assumes we are not in coroutine, and creates one
     """
-    name = func.co_name
+    name = func.target_name
     struct_name = func.struct_name
     return f"""\
 {func.return_type} {func.name}({ func.gen_list('{decl}') })
@@ -183,10 +198,11 @@
 }}"""
 
 
-def gen_wrapper(func: FuncDecl) -> str:
+def gen_co_wrapper(func: FuncDecl) -> str:
     assert not '_co_' in func.name
+    assert func.wrapper_type == 'co'
 
-    name = func.co_name
+    name = func.target_name
     struct_name = func.struct_name
 
     graph_lock=''
@@ -225,11 +241,56 @@
 {creation_function(func)}"""
 
 
+def gen_no_co_wrapper(func: FuncDecl) -> str:
+    assert '_co_' in func.name
+    assert func.wrapper_type == 'no_co'
+
+    name = func.target_name
+    struct_name = func.struct_name
+
+    return f"""\
+/*
+ * Wrappers for {name}
+ */
+
+typedef struct {struct_name} {{
+    Coroutine *co;
+    {func.return_field}
+{ func.gen_block('    {decl};') }
+}} {struct_name};
+
+static void {name}_bh(void *opaque)
+{{
+    {struct_name} *s = opaque;
+
+    {func.get_result}{name}({ func.gen_list('s->{name}') });
+
+    aio_co_wake(s->co);
+}}
+
+{func.return_type} coroutine_fn {func.name}({ func.gen_list('{decl}') })
+{{
+    {struct_name} s = {{
+        .co = qemu_coroutine_self(),
+{ func.gen_block('        .{name} = {name},') }
+    }};
+    assert(qemu_in_coroutine());
+
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), {name}_bh, &s);
+    qemu_coroutine_yield();
+
+    {func.ret}
+}}"""
+
+
 def gen_wrappers(input_code: str) -> str:
     res = ''
     for func in func_decl_iter(input_code):
         res += '\n\n\n'
-        res += gen_wrapper(func)
+        if func.wrapper_type == 'co':
+            res += gen_co_wrapper(func)
+        else:
+            res += gen_no_co_wrapper(func)
 
     return res
 

diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 65eacf3..6e8983f 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure

@@ -138,7 +138,6 @@
 --disable-vhost-vdpa \
 --disable-virglrenderer \
 --disable-virtfs \
---disable-virtiofsd \
 --disable-vnc \
 --disable-vnc-jpeg \
 --disable-png \
@@ -191,7 +190,6 @@
 --enable-tpm \
 --enable-trace-backends=dtrace \
 --enable-usb-redir \
---enable-virtiofsd \
 --enable-vhost-kernel \
 --enable-vhost-net \
 --enable-vhost-user \

diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
index 7aeecbc..f403e4e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/test-avocado
+++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado

@@ -14,13 +14,6 @@
 # * Require machine type "x-remote":
 #   - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64
 #
-# * Needs superuser privileges:
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up
-#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs
-#
 # * Requires display type "egl-headless":
 #   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl
 #   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl

diff --git a/scripts/coverity-scan/COMPONENTS.md b/scripts/coverity-scan/COMPONENTS.md
index 0e6ab49..639dcee 100644
--- a/scripts/coverity-scan/COMPONENTS.md
+++ b/scripts/coverity-scan/COMPONENTS.md

@@ -132,9 +132,6 @@
 xen
   ~ (/qemu)?(.*/xen.*)
 
-virtiofsd
-  ~ (/qemu)?(/tools/virtiofsd/.*)
-
 (headers)
   ~ (/qemu)?(/include/.*)
 

diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py
index 3e2b478..a04dcc7 100755
--- a/scripts/meson-buildoptions.py
+++ b/scripts/meson-buildoptions.py

@@ -61,7 +61,10 @@
 
 # Convert the default value of an option to the string used in
 # the help message
-def value_to_help(value):
+def get_help(opt):
+    if opt["name"] == "libdir":
+        return 'system default'
+    value = opt["value"]
     if isinstance(value, list):
         return ",".join(value)
     if isinstance(value, bool):
@@ -88,7 +91,7 @@
 def help_line(left, opt, indent, long):
     right = f'{opt["description"]}'
     if long:
-        value = value_to_help(opt["value"])
+        value = get_help(opt)
         if value != "auto" and value != "":
             right += f" [{value}]"
     if "choices" in opt and long:

diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 0f71e92..5d969a9 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh

@@ -49,7 +49,7 @@
   printf "%s\n" '  --includedir=VALUE       Header file directory [include]'
   printf "%s\n" '  --interp-prefix=VALUE    where to find shared libraries etc., use %M for'
   printf "%s\n" '                           cpu name [/usr/gnemul/qemu-%M]'
-  printf "%s\n" '  --libdir=VALUE           Library directory [lib64]'
+  printf "%s\n" '  --libdir=VALUE           Library directory [system default]'
   printf "%s\n" '  --libexecdir=VALUE       Library executable directory [libexec]'
   printf "%s\n" '  --localedir=VALUE        Locale data directory [share/locale]'
   printf "%s\n" '  --localstatedir=VALUE    Localstate data directory [/var/local]'
@@ -70,6 +70,7 @@
   printf "%s\n" '  attr            attr/xattr support'
   printf "%s\n" '  auth-pam        PAM access control'
   printf "%s\n" '  avx2            AVX2 optimizations'
+  printf "%s\n" '  avx512bw        AVX512BW optimizations'
   printf "%s\n" '  avx512f         AVX512F optimizations'
   printf "%s\n" '  blkio           libblkio block device driver'
   printf "%s\n" '  bochs           bochs image format support'
@@ -108,6 +109,7 @@
   printf "%s\n" '  kvm             KVM acceleration support'
   printf "%s\n" '  l2tpv3          l2tpv3 network backend support'
   printf "%s\n" '  libdaxctl       libdaxctl support'
+  printf "%s\n" '  libdw           debuginfo support'
   printf "%s\n" '  libiscsi        libiscsi userspace initiator'
   printf "%s\n" '  libnfs          libnfs block device driver'
   printf "%s\n" '  libpmem         libpmem support'
@@ -173,7 +175,6 @@
   printf "%s\n" '  vhost-vdpa      vhost-vdpa kernel backend support'
   printf "%s\n" '  virglrenderer   virgl rendering support'
   printf "%s\n" '  virtfs          virtio-9p support'
-  printf "%s\n" '  virtiofsd       build virtiofs daemon (virtiofsd)'
   printf "%s\n" '  vmnet           vmnet.framework network backend support'
   printf "%s\n" '  vnc             VNC server'
   printf "%s\n" '  vnc-jpeg        JPEG lossy compression for VNC server'
@@ -198,6 +199,8 @@
     --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
     --enable-avx2) printf "%s" -Davx2=enabled ;;
     --disable-avx2) printf "%s" -Davx2=disabled ;;
+    --enable-avx512bw) printf "%s" -Davx512bw=enabled ;;
+    --disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
     --enable-avx512f) printf "%s" -Davx512f=enabled ;;
     --disable-avx512f) printf "%s" -Davx512f=disabled ;;
     --enable-gcov) printf "%s" -Db_coverage=true ;;
@@ -309,6 +312,8 @@
     --enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;;
     --disable-libdaxctl) printf "%s" -Dlibdaxctl=disabled ;;
     --libdir=*) quote_sh "-Dlibdir=$2" ;;
+    --enable-libdw) printf "%s" -Dlibdw=enabled ;;
+    --disable-libdw) printf "%s" -Dlibdw=disabled ;;
     --libexecdir=*) quote_sh "-Dlibexecdir=$2" ;;
     --enable-libiscsi) printf "%s" -Dlibiscsi=enabled ;;
     --disable-libiscsi) printf "%s" -Dlibiscsi=disabled ;;
@@ -455,8 +460,6 @@
     --disable-virglrenderer) printf "%s" -Dvirglrenderer=disabled ;;
     --enable-virtfs) printf "%s" -Dvirtfs=enabled ;;
     --disable-virtfs) printf "%s" -Dvirtfs=disabled ;;
-    --enable-virtiofsd) printf "%s" -Dvirtiofsd=enabled ;;
-    --disable-virtiofsd) printf "%s" -Dvirtiofsd=disabled ;;
     --enable-vmnet) printf "%s" -Dvmnet=enabled ;;
     --disable-vmnet) printf "%s" -Dvmnet=disabled ;;
     --enable-vnc) printf "%s" -Dvnc=enabled ;;

diff --git a/scripts/qapi/.flake8 b/scripts/qapi/.flake8
index 6b158c6..a873ff6 100644
--- a/scripts/qapi/.flake8
+++ b/scripts/qapi/.flake8

@@ -1,2 +1,3 @@
 [flake8]
-extend-ignore = E722  # Prefer pylint's bare-except checks to flake8's
+# Prefer pylint's bare-except checks to flake8's
+extend-ignore = E722

diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py
index 5a1782b..ca01ea6 100644
--- a/scripts/qapi/expr.py
+++ b/scripts/qapi/expr.py

@@ -33,7 +33,6 @@
 
 import re
 from typing import (
-    Collection,
     Dict,
     Iterable,
     List,
@@ -44,18 +43,10 @@
 
 from .common import c_name
 from .error import QAPISemError
-from .parser import QAPIDoc
+from .parser import QAPIExpression
 from .source import QAPISourceInfo
 
 
-# Deserialized JSON objects as returned by the parser.
-# The values of this mapping are not necessary to exhaustively type
-# here (and also not practical as long as mypy lacks recursive
-# types), because the purpose of this module is to interrogate that
-# type.
-_JSONObject = Dict[str, object]
-
-
 # See check_name_str(), below.
 valid_name = re.compile(r'(__[a-z0-9.-]+_)?'
                         r'(x-)?'
@@ -192,11 +183,11 @@
                 info, "%s name should not end in 'List'" % meta)
 
 
-def check_keys(value: _JSONObject,
+def check_keys(value: Dict[str, object],
                info: QAPISourceInfo,
                source: str,
-               required: Collection[str],
-               optional: Collection[str]) -> None:
+               required: List[str],
+               optional: List[str]) -> None:
     """
     Ensure that a dict has a specific set of keys.
 
@@ -229,12 +220,11 @@
                pprint(unknown), pprint(allowed)))
 
 
-def check_flags(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_flags(expr: QAPIExpression) -> None:
     """
     Ensure flag members (if present) have valid values.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError:
         When certain flags have an invalid value, or when
@@ -243,21 +233,22 @@
     for key in ('gen', 'success-response'):
         if key in expr and expr[key] is not False:
             raise QAPISemError(
-                info, "flag '%s' may only use false value" % key)
+                expr.info, "flag '%s' may only use false value" % key)
     for key in ('boxed', 'allow-oob', 'allow-preconfig', 'coroutine'):
         if key in expr and expr[key] is not True:
             raise QAPISemError(
-                info, "flag '%s' may only use true value" % key)
+                expr.info, "flag '%s' may only use true value" % key)
     if 'allow-oob' in expr and 'coroutine' in expr:
         # This is not necessarily a fundamental incompatibility, but
         # we don't have a use case and the desired semantics isn't
         # obvious.  The simplest solution is to forbid it until we get
         # a use case for it.
-        raise QAPISemError(info, "flags 'allow-oob' and 'coroutine' "
-                                 "are incompatible")
+        raise QAPISemError(
+            expr.info, "flags 'allow-oob' and 'coroutine' are incompatible")
 
 
-def check_if(expr: _JSONObject, info: QAPISourceInfo, source: str) -> None:
+def check_if(expr: Dict[str, object],
+             info: QAPISourceInfo, source: str) -> None:
     """
     Validate the ``if`` member of an object.
 
@@ -447,12 +438,11 @@
         check_if(feat, info, source)
 
 
-def check_enum(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_enum(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as an ``enum`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``enum``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -460,6 +450,7 @@
     name = expr['enum']
     members = expr['data']
     prefix = expr.get('prefix')
+    info = expr.info
 
     if not isinstance(members, list):
         raise QAPISemError(info, "'data' must be an array")
@@ -486,12 +477,11 @@
         check_features(member.get('features'), info)
 
 
-def check_struct(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_struct(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as a ``struct`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``struct``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -499,16 +489,15 @@
     name = cast(str, expr['struct'])  # Checked in check_exprs
     members = expr['data']
 
-    check_type(members, info, "'data'", allow_dict=name)
-    check_type(expr.get('base'), info, "'base'")
+    check_type(members, expr.info, "'data'", allow_dict=name)
+    check_type(expr.get('base'), expr.info, "'base'")
 
 
-def check_union(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_union(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as a ``union`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: when ``expr`` is not a valid ``union``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -517,6 +506,7 @@
     base = expr['base']
     discriminator = expr['discriminator']
     members = expr['data']
+    info = expr.info
 
     check_type(base, info, "'base'", allow_dict=name)
     check_name_is_str(discriminator, info, "'discriminator'")
@@ -531,17 +521,17 @@
         check_type(value['type'], info, source, allow_array=not base)
 
 
-def check_alternate(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_alternate(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as an ``alternate`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``alternate``.
     :return: None, ``expr`` is normalized in-place as needed.
     """
     members = expr['data']
+    info = expr.info
 
     if not members:
         raise QAPISemError(info, "'data' must not be empty")
@@ -557,12 +547,11 @@
         check_type(value['type'], info, source, allow_array=True)
 
 
-def check_command(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_command(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as a ``command`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``command``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -572,17 +561,16 @@
     boxed = expr.get('boxed', False)
 
     if boxed and args is None:
-        raise QAPISemError(info, "'boxed': true requires 'data'")
-    check_type(args, info, "'data'", allow_dict=not boxed)
-    check_type(rets, info, "'returns'", allow_array=True)
+        raise QAPISemError(expr.info, "'boxed': true requires 'data'")
+    check_type(args, expr.info, "'data'", allow_dict=not boxed)
+    check_type(rets, expr.info, "'returns'", allow_array=True)
 
 
-def check_event(expr: _JSONObject, info: QAPISourceInfo) -> None:
+def check_event(expr: QAPIExpression) -> None:
     """
     Normalize and validate this expression as an ``event`` definition.
 
     :param expr: The expression to validate.
-    :param info: QAPI schema source file information.
 
     :raise QAPISemError: When ``expr`` is not a valid ``event``.
     :return: None, ``expr`` is normalized in-place as needed.
@@ -591,11 +579,11 @@
     boxed = expr.get('boxed', False)
 
     if boxed and args is None:
-        raise QAPISemError(info, "'boxed': true requires 'data'")
-    check_type(args, info, "'data'", allow_dict=not boxed)
+        raise QAPISemError(expr.info, "'boxed': true requires 'data'")
+    check_type(args, expr.info, "'data'", allow_dict=not boxed)
 
 
-def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]:
+def check_exprs(exprs: List[QAPIExpression]) -> List[QAPIExpression]:
     """
     Validate and normalize a list of parsed QAPI schema expressions.
 
@@ -607,21 +595,9 @@
     :raise QAPISemError: When any expression fails validation.
     :return: The same list of expressions (now modified).
     """
-    for expr_elem in exprs:
-        # Expression
-        assert isinstance(expr_elem['expr'], dict)
-        for key in expr_elem['expr'].keys():
-            assert isinstance(key, str)
-        expr: _JSONObject = expr_elem['expr']
-
-        # QAPISourceInfo
-        assert isinstance(expr_elem['info'], QAPISourceInfo)
-        info: QAPISourceInfo = expr_elem['info']
-
-        # Optional[QAPIDoc]
-        tmp = expr_elem.get('doc')
-        assert tmp is None or isinstance(tmp, QAPIDoc)
-        doc: Optional[QAPIDoc] = tmp
+    for expr in exprs:
+        info = expr.info
+        doc = expr.doc
 
         if 'include' in expr:
             continue
@@ -653,24 +629,24 @@
         if meta == 'enum':
             check_keys(expr, info, meta,
                        ['enum', 'data'], ['if', 'features', 'prefix'])
-            check_enum(expr, info)
+            check_enum(expr)
         elif meta == 'union':
             check_keys(expr, info, meta,
                        ['union', 'base', 'discriminator', 'data'],
                        ['if', 'features'])
             normalize_members(expr.get('base'))
             normalize_members(expr['data'])
-            check_union(expr, info)
+            check_union(expr)
         elif meta == 'alternate':
             check_keys(expr, info, meta,
                        ['alternate', 'data'], ['if', 'features'])
             normalize_members(expr['data'])
-            check_alternate(expr, info)
+            check_alternate(expr)
         elif meta == 'struct':
             check_keys(expr, info, meta,
                        ['struct', 'data'], ['base', 'if', 'features'])
             normalize_members(expr['data'])
-            check_struct(expr, info)
+            check_struct(expr)
         elif meta == 'command':
             check_keys(expr, info, meta,
                        ['command'],
@@ -678,17 +654,17 @@
                         'gen', 'success-response', 'allow-oob',
                         'allow-preconfig', 'coroutine'])
             normalize_members(expr.get('data'))
-            check_command(expr, info)
+            check_command(expr)
         elif meta == 'event':
             check_keys(expr, info, meta,
                        ['event'], ['data', 'boxed', 'if', 'features'])
             normalize_members(expr.get('data'))
-            check_event(expr, info)
+            check_event(expr)
         else:
             assert False, 'unexpected meta type'
 
         check_if(expr, info, meta)
         check_features(expr.get('features'), info)
-        check_flags(expr, info)
+        check_flags(expr)
 
     return exprs

diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 1b006cd..878f90b 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py

@@ -21,6 +21,7 @@
     TYPE_CHECKING,
     Dict,
     List,
+    Mapping,
     Optional,
     Set,
     Union,
@@ -37,15 +38,19 @@
     from .schema import QAPISchemaFeature, QAPISchemaMember
 
 
-#: Represents a single Top Level QAPI schema expression.
-TopLevelExpr = Dict[str, object]
-
 # Return value alias for get_expr().
 _ExprValue = Union[List[object], Dict[str, object], str, bool]
 
-# FIXME: Consolidate and centralize definitions for TopLevelExpr,
-# _ExprValue, _JSONValue, and _JSONObject; currently scattered across
-# several modules.
+
+class QAPIExpression(Dict[str, object]):
+    # pylint: disable=too-few-public-methods
+    def __init__(self,
+                 data: Mapping[str, object],
+                 info: QAPISourceInfo,
+                 doc: Optional['QAPIDoc'] = None):
+        super().__init__(data)
+        self.info = info
+        self.doc: Optional['QAPIDoc'] = doc
 
 
 class QAPIParseError(QAPISourceError):
@@ -100,7 +105,7 @@
         self.line_pos = 0
 
         # Parser output:
-        self.exprs: List[Dict[str, object]] = []
+        self.exprs: List[QAPIExpression] = []
         self.docs: List[QAPIDoc] = []
 
         # Showtime!
@@ -147,8 +152,7 @@
                                        "value of 'include' must be a string")
                 incl_fname = os.path.join(os.path.dirname(self._fname),
                                           include)
-                self.exprs.append({'expr': {'include': incl_fname},
-                                   'info': info})
+                self._add_expr(OrderedDict({'include': incl_fname}), info)
                 exprs_include = self._include(include, info, incl_fname,
                                               self._included)
                 if exprs_include:
@@ -165,17 +169,18 @@
                 for name, value in pragma.items():
                     self._pragma(name, value, info)
             else:
-                expr_elem = {'expr': expr,
-                             'info': info}
-                if cur_doc:
-                    if not cur_doc.symbol:
-                        raise QAPISemError(
-                            cur_doc.info, "definition documentation required")
-                    expr_elem['doc'] = cur_doc
-                self.exprs.append(expr_elem)
+                if cur_doc and not cur_doc.symbol:
+                    raise QAPISemError(
+                        cur_doc.info, "definition documentation required")
+                self._add_expr(expr, info, cur_doc)
             cur_doc = None
         self.reject_expr_doc(cur_doc)
 
+    def _add_expr(self, expr: Mapping[str, object],
+                  info: QAPISourceInfo,
+                  doc: Optional['QAPIDoc'] = None) -> None:
+        self.exprs.append(QAPIExpression(expr, info, doc))
+
     @staticmethod
     def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
         if doc and doc.symbol:
@@ -784,7 +789,7 @@
                                % feature.name)
         self.features[feature.name].connect(feature)
 
-    def check_expr(self, expr: TopLevelExpr) -> None:
+    def check_expr(self, expr: QAPIExpression) -> None:
         if self.has_section('Returns') and 'command' not in expr:
             raise QAPISemError(self.info,
                                "'Returns:' is only valid for commands")

diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc
index a724628..90546df 100644
--- a/scripts/qapi/pylintrc
+++ b/scripts/qapi/pylintrc

@@ -23,6 +23,7 @@
         too-many-statements,
         too-many-instance-attributes,
         consider-using-f-string,
+        useless-option-value,
 
 [REPORTS]
 

diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
index cd86611..207e4d7 100644
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py

@@ -17,7 +17,7 @@
 from collections import OrderedDict
 import os
 import re
-from typing import Optional
+from typing import List, Optional
 
 from .common import (
     POINTER_SUFFIX,
@@ -29,7 +29,7 @@
 )
 from .error import QAPIError, QAPISemError, QAPISourceError
 from .expr import check_exprs
-from .parser import QAPISchemaParser
+from .parser import QAPIExpression, QAPISchemaParser
 
 
 class QAPISchemaIfCond:
@@ -964,10 +964,11 @@
         name = self._module_name(fname)
         return self._module_dict[name]
 
-    def _def_include(self, expr, info, doc):
+    def _def_include(self, expr: QAPIExpression):
         include = expr['include']
-        assert doc is None
-        self._def_entity(QAPISchemaInclude(self._make_module(include), info))
+        assert expr.doc is None
+        self._def_entity(
+            QAPISchemaInclude(self._make_module(include), expr.info))
 
     def _def_builtin_type(self, name, json_type, c_type):
         self._def_entity(QAPISchemaBuiltinType(name, json_type, c_type))
@@ -1045,14 +1046,15 @@
                 name, info, None, ifcond, None, None, members, None))
         return name
 
-    def _def_enum_type(self, expr, info, doc):
+    def _def_enum_type(self, expr: QAPIExpression):
         name = expr['enum']
         data = expr['data']
         prefix = expr.get('prefix')
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaEnumType(
-            name, info, doc, ifcond, features,
+            name, info, expr.doc, ifcond, features,
             self._make_enum_members(data, info), prefix))
 
     def _make_member(self, name, typ, ifcond, features, info):
@@ -1072,14 +1074,15 @@
                                   value.get('features'), info)
                 for (key, value) in data.items()]
 
-    def _def_struct_type(self, expr, info, doc):
+    def _def_struct_type(self, expr: QAPIExpression):
         name = expr['struct']
         base = expr.get('base')
         data = expr['data']
+        info = expr.info
         ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaObjectType(
-            name, info, doc, ifcond, features, base,
+            name, info, expr.doc, ifcond, features, base,
             self._make_members(data, info),
             None))
 
@@ -1089,11 +1092,13 @@
             typ = self._make_array_type(typ[0], info)
         return QAPISchemaVariant(case, info, typ, ifcond)
 
-    def _def_union_type(self, expr, info, doc):
+    def _def_union_type(self, expr: QAPIExpression):
         name = expr['union']
         base = expr['base']
         tag_name = expr['discriminator']
         data = expr['data']
+        assert isinstance(data, dict)
+        info = expr.info
         ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         if isinstance(base, dict):
@@ -1105,17 +1110,19 @@
                                QAPISchemaIfCond(value.get('if')),
                                info)
             for (key, value) in data.items()]
-        members = []
+        members: List[QAPISchemaObjectTypeMember] = []
         self._def_entity(
-            QAPISchemaObjectType(name, info, doc, ifcond, features,
+            QAPISchemaObjectType(name, info, expr.doc, ifcond, features,
                                  base, members,
                                  QAPISchemaVariants(
                                      tag_name, info, None, variants)))
 
-    def _def_alternate_type(self, expr, info, doc):
+    def _def_alternate_type(self, expr: QAPIExpression):
         name = expr['alternate']
         data = expr['data']
+        assert isinstance(data, dict)
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         variants = [
             self._make_variant(key, value['type'],
@@ -1124,11 +1131,11 @@
             for (key, value) in data.items()]
         tag_member = QAPISchemaObjectTypeMember('type', info, 'QType', False)
         self._def_entity(
-            QAPISchemaAlternateType(name, info, doc, ifcond, features,
-                                    QAPISchemaVariants(
-                                        None, info, tag_member, variants)))
+            QAPISchemaAlternateType(
+                name, info, expr.doc, ifcond, features,
+                QAPISchemaVariants(None, info, tag_member, variants)))
 
-    def _def_command(self, expr, info, doc):
+    def _def_command(self, expr: QAPIExpression):
         name = expr['command']
         data = expr.get('data')
         rets = expr.get('returns')
@@ -1139,6 +1146,7 @@
         allow_preconfig = expr.get('allow-preconfig', False)
         coroutine = expr.get('coroutine', False)
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
@@ -1147,44 +1155,42 @@
         if isinstance(rets, list):
             assert len(rets) == 1
             rets = self._make_array_type(rets[0], info)
-        self._def_entity(QAPISchemaCommand(name, info, doc, ifcond, features,
-                                           data, rets,
+        self._def_entity(QAPISchemaCommand(name, info, expr.doc, ifcond,
+                                           features, data, rets,
                                            gen, success_response,
                                            boxed, allow_oob, allow_preconfig,
                                            coroutine))
 
-    def _def_event(self, expr, info, doc):
+    def _def_event(self, expr: QAPIExpression):
         name = expr['event']
         data = expr.get('data')
         boxed = expr.get('boxed', False)
         ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
                 name, info, ifcond,
                 'arg', self._make_members(data, info))
-        self._def_entity(QAPISchemaEvent(name, info, doc, ifcond, features,
-                                         data, boxed))
+        self._def_entity(QAPISchemaEvent(name, info, expr.doc, ifcond,
+                                         features, data, boxed))
 
     def _def_exprs(self, exprs):
-        for expr_elem in exprs:
-            expr = expr_elem['expr']
-            info = expr_elem['info']
-            doc = expr_elem.get('doc')
+        for expr in exprs:
             if 'enum' in expr:
-                self._def_enum_type(expr, info, doc)
+                self._def_enum_type(expr)
             elif 'struct' in expr:
-                self._def_struct_type(expr, info, doc)
+                self._def_struct_type(expr)
             elif 'union' in expr:
-                self._def_union_type(expr, info, doc)
+                self._def_union_type(expr)
             elif 'alternate' in expr:
-                self._def_alternate_type(expr, info, doc)
+                self._def_alternate_type(expr)
             elif 'command' in expr:
-                self._def_command(expr, info, doc)
+                self._def_command(expr)
             elif 'event' in expr:
-                self._def_event(expr, info, doc)
+                self._def_event(expr)
             elif 'include' in expr:
-                self._def_include(expr, info, doc)
+                self._def_include(expr)
             else:
                 assert False
 

diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 7820fec..2463964 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c

@@ -113,17 +113,19 @@
 static void dma_blk_cb(void *opaque, int ret)
 {
     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+    AioContext *ctx = dbs->ctx;
     dma_addr_t cur_addr, cur_len;
     void *mem;
 
     trace_dma_blk_cb(dbs, ret);
 
+    aio_context_acquire(ctx);
     dbs->acb = NULL;
     dbs->offset += dbs->iov.size;
 
     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
         dma_complete(dbs, ret);
-        return;
+        goto out;
     }
     dma_blk_unmap(dbs);
 
@@ -164,9 +166,9 @@
 
     if (dbs->iov.size == 0) {
         trace_dma_map_wait(dbs);
-        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
+        dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
         cpu_register_map_client(dbs->bh);
-        return;
+        goto out;
     }
 
     if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -174,11 +176,11 @@
                                 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
     }
 
-    aio_context_acquire(dbs->ctx);
     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
                             dma_blk_cb, dbs, dbs->io_func_opaque);
-    aio_context_release(dbs->ctx);
     assert(dbs->acb);
+out:
+    aio_context_release(ctx);
 }
 
 static void dma_aio_cancel(BlockAIOCB *acb)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index d3e0ab4..34bd2a3 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c

@@ -28,7 +28,6 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
-#include "qapi/qmp/qerror.h"
 #include "qom/object_interfaces.h"
 #include CONFIG_DEVICES
 #ifdef CONFIG_PSERIES

diff --git a/softmmu/rtc.c b/softmmu/rtc.c
index f7114be..4b2bf75 100644
--- a/softmmu/rtc.c
+++ b/softmmu/rtc.c

@@ -152,11 +152,8 @@
         if (!strcmp(value, "utc")) {
             rtc_base_type = RTC_BASE_UTC;
         } else if (!strcmp(value, "localtime")) {
-            Error *blocker = NULL;
             rtc_base_type = RTC_BASE_LOCALTIME;
-            error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED,
-                      "-rtc base=localtime");
-            replay_add_blocker(blocker);
+            replay_add_blocker("-rtc base=localtime");
         } else {
             rtc_base_type = RTC_BASE_DATETIME;
             configure_rtc_base_datetime(value);

diff --git a/softmmu/vl.c b/softmmu/vl.c
index b2ee3fe..6e526d9 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c

@@ -1852,9 +1852,7 @@
     }
 
     if (current_machine->smp.cpus > 1) {
-        Error *blocker = NULL;
-        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
-        replay_add_blocker(blocker);
+        replay_add_blocker("smp");
     }
 }
 
@@ -2204,14 +2202,18 @@
     int ret;
     bool qtest_with_kvm;
 
+    if (!acc) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "accel");
+        goto bad;
+    }
+
     qtest_with_kvm = g_str_equal(acc, "kvm") && qtest_chrdev != NULL;
 
     if (!ac) {
-        *p_init_failed = true;
         if (!qtest_with_kvm) {
             error_report("invalid accelerator %s", acc);
         }
-        return 0;
+        goto bad;
     }
     accel = ACCEL(object_new_with_class(OBJECT_CLASS(ac)));
     object_apply_compat_props(OBJECT(accel));
@@ -2221,14 +2223,17 @@
 
     ret = accel_init_machine(accel, current_machine);
     if (ret < 0) {
-        *p_init_failed = true;
         if (!qtest_with_kvm || ret != -ENOENT) {
             error_report("failed to initialize %s: %s", acc, strerror(-ret));
         }
-        return 0;
+        goto bad;
     }
 
     return 1;
+
+bad:
+    *p_init_failed = true;
+    return 0;
 }
 
 static void configure_accelerators(const char *progname)
@@ -2767,13 +2772,8 @@
                 drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS);
                 break;
             case QEMU_OPTION_snapshot:
-                {
-                    Error *blocker = NULL;
-                    snapshot = 1;
-                    error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED,
-                               "-snapshot");
-                    replay_add_blocker(blocker);
-                }
+                snapshot = 1;
+                replay_add_blocker("-snapshot");
                 break;
             case QEMU_OPTION_numa:
                 opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),

diff --git a/stubs/meson.build b/stubs/meson.build
index 981585c..7657467 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build

@@ -45,7 +45,6 @@
 stub_ss.add(files('target-monitor-defs.c'))
 stub_ss.add(files('trace-control.c'))
 stub_ss.add(files('uuid.c'))
-stub_ss.add(files('vmgenid.c'))
 stub_ss.add(files('vmstate.c'))
 stub_ss.add(files('vm-stop.c'))
 stub_ss.add(files('win32-kbd-hook.c'))

diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c
deleted file mode 100644
index bfad656..0000000
--- a/stubs/vmgenid.c
+++ /dev/null

@@ -1,10 +0,0 @@
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
-#include "qapi/qmp/qerror.h"
-
-GuidInfo *qmp_query_vm_generation_id(Error **errp)
-{
-    error_setg(errp, QERR_UNSUPPORTED);
-    return NULL;
-}

diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index efcf918..1ee64e9 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h

@@ -121,6 +121,104 @@
 };
 
 /*
+ * Interface for defining coprocessor registers.
+ * Registers are defined in tables of arm_cp_reginfo structs
+ * which are passed to define_arm_cp_regs().
+ */
+
+/*
+ * When looking up a coprocessor register we look for it
+ * via an integer which encodes all of:
+ *  coprocessor number
+ *  Crn, Crm, opc1, opc2 fields
+ *  32 or 64 bit register (ie is it accessed via MRC/MCR
+ *    or via MRRC/MCRR?)
+ *  non-secure/secure bank (AArch32 only)
+ * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
+ * (In this case crn and opc2 should be zero.)
+ * For AArch64, there is no 32/64 bit size distinction;
+ * instead all registers have a 2 bit op0, 3 bit op1 and op2,
+ * and 4 bit CRn and CRm. The encoding patterns are chosen
+ * to be easy to convert to and from the KVM encodings, and also
+ * so that the hashtable can contain both AArch32 and AArch64
+ * registers (to allow for interprocessing where we might run
+ * 32 bit code on a 64 bit core).
+ */
+/*
+ * This bit is private to our hashtable cpreg; in KVM register
+ * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
+ * in the upper bits of the 64 bit ID.
+ */
+#define CP_REG_AA64_SHIFT 28
+#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
+
+/*
+ * To enable banking of coprocessor registers depending on ns-bit we
+ * add a bit to distinguish between secure and non-secure cpregs in the
+ * hashtable.
+ */
+#define CP_REG_NS_SHIFT 29
+#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
+
+#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2)   \
+    ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) |   \
+     ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
+
+#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
+    (CP_REG_AA64_MASK |                                 \
+     ((cp) << CP_REG_ARM_COPROC_SHIFT) |                \
+     ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) |         \
+     ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) |         \
+     ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) |         \
+     ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) |         \
+     ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
+
+/*
+ * Convert a full 64 bit KVM register ID to the truncated 32 bit
+ * version used as a key for the coprocessor register hashtable
+ */
+static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
+{
+    uint32_t cpregid = kvmid;
+    if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
+        cpregid |= CP_REG_AA64_MASK;
+    } else {
+        if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
+            cpregid |= (1 << 15);
+        }
+
+        /*
+         * KVM is always non-secure so add the NS flag on AArch32 register
+         * entries.
+         */
+         cpregid |= 1 << CP_REG_NS_SHIFT;
+    }
+    return cpregid;
+}
+
+/*
+ * Convert a truncated 32 bit hashtable key into the full
+ * 64 bit KVM register ID.
+ */
+static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
+{
+    uint64_t kvmid;
+
+    if (cpregid & CP_REG_AA64_MASK) {
+        kvmid = cpregid & ~CP_REG_AA64_MASK;
+        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
+    } else {
+        kvmid = cpregid & ~(1 << 15);
+        if (cpregid & (1 << 15)) {
+            kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
+        } else {
+            kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
+        }
+    }
+    return kvmid;
+}
+
+/*
  * Valid values for ARMCPRegInfo state field, indicating which of
  * the AArch32 and AArch64 execution states this register is visible in.
  * If the reginfo doesn't explicitly specify then it is AArch32 only.

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5f63316..876ab8f 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c

@@ -36,7 +36,10 @@
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/loader.h"
 #include "hw/boards.h"
-#endif
+#ifdef CONFIG_TCG
+#include "hw/intc/armv7m_nvic.h"
+#endif /* CONFIG_TCG */
+#endif /* !CONFIG_USER_ONLY */
 #include "sysemu/tcg.h"
 #include "sysemu/qtest.h"
 #include "sysemu/hw_accel.h"

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 7bc97fe..12b1082 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h

@@ -227,6 +227,8 @@
 
 typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
 
+typedef struct NVICState NVICState;
+
 typedef struct CPUArchState {
     /* Regs for current mode.  */
     uint32_t regs[16];
@@ -721,11 +723,6 @@
     ARMVectorReg zarray[ARM_MAX_VQ * 16];
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-    /* For usermode syscall translation.  */
-    int eabi;
-#endif
-
     struct CPUBreakpoint *cpu_breakpoint[16];
     struct CPUWatchpoint *cpu_watchpoint[16];
 
@@ -772,10 +769,15 @@
         uint32_t ctrl;
     } sau;
 
-    void *nvic;
+#if !defined(CONFIG_USER_ONLY)
+    NVICState *nvic;
     const struct arm_boot_info *boot_info;
     /* Store GICv3CPUState to access from this struct */
     void *gicv3state;
+#else /* CONFIG_USER_ONLY */
+    /* For usermode syscall translation.  */
+    bool eabi;
+#endif /* CONFIG_USER_ONLY */
 
 #ifdef TARGET_TAGGED_ADDRESSES
     /* Linux syscall tagged address support */
@@ -2557,220 +2559,6 @@
 uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
                                  uint32_t cur_el, bool secure);
 
-/* Interface between CPU and Interrupt controller.  */
-#ifndef CONFIG_USER_ONLY
-bool armv7m_nvic_can_take_pending_exception(void *opaque);
-#else
-static inline bool armv7m_nvic_can_take_pending_exception(void *opaque)
-{
-    return true;
-}
-#endif
-/**
- * armv7m_nvic_set_pending: mark the specified exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Marks the specified exception as pending. Note that we will assert()
- * if @secure is true and @irq does not specify one of the fixed set
- * of architecturally banked exceptions.
- */
-void armv7m_nvic_set_pending(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_set_pending_derived: mark this derived exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Similar to armv7m_nvic_set_pending(), but specifically for derived
- * exceptions (exceptions generated in the course of trying to take
- * a different exception).
- */
-void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_set_pending_lazyfp: mark this lazy FP exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Similar to armv7m_nvic_set_pending(), but specifically for exceptions
- * generated in the course of lazy stacking of FP registers.
- */
-void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_get_pending_irq_info: return highest priority pending
- *    exception, and whether it targets Secure state
- * @opaque: the NVIC
- * @pirq: set to pending exception number
- * @ptargets_secure: set to whether pending exception targets Secure
- *
- * This function writes the number of the highest priority pending
- * exception (the one which would be made active by
- * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
- * to true if the current highest priority pending exception should
- * be taken to Secure state, false for NS.
- */
-void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq,
-                                      bool *ptargets_secure);
-/**
- * armv7m_nvic_acknowledge_irq: make highest priority pending exception active
- * @opaque: the NVIC
- *
- * Move the current highest priority pending exception from the pending
- * state to the active state, and update v7m.exception to indicate that
- * it is the exception currently being handled.
- */
-void armv7m_nvic_acknowledge_irq(void *opaque);
-/**
- * armv7m_nvic_complete_irq: complete specified interrupt or exception
- * @opaque: the NVIC
- * @irq: the exception number to complete
- * @secure: true if this exception was secure
- *
- * Returns: -1 if the irq was not active
- *           1 if completing this irq brought us back to base (no active irqs)
- *           0 if there is still an irq active after this one was completed
- * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
- */
-int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Return whether an exception is "ready", i.e. whether the exception is
- * enabled and is configured at a priority which would allow it to
- * interrupt the current execution priority. This controls whether the
- * RDY bit for it in the FPCCR is set.
- */
-bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_raw_execution_priority: return the raw execution priority
- * @opaque: the NVIC
- *
- * Returns: the raw execution priority as defined by the v8M architecture.
- * This is the execution priority minus the effects of AIRCR.PRIS,
- * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting.
- * (v8M ARM ARM I_PKLD.)
- */
-int armv7m_nvic_raw_execution_priority(void *opaque);
-/**
- * armv7m_nvic_neg_prio_requested: return true if the requested execution
- * priority is negative for the specified security state.
- * @opaque: the NVIC
- * @secure: the security state to test
- * This corresponds to the pseudocode IsReqExecPriNeg().
- */
-#ifndef CONFIG_USER_ONLY
-bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure);
-#else
-static inline bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
-{
-    return false;
-}
-#endif
-
-/* Interface for defining coprocessor registers.
- * Registers are defined in tables of arm_cp_reginfo structs
- * which are passed to define_arm_cp_regs().
- */
-
-/* When looking up a coprocessor register we look for it
- * via an integer which encodes all of:
- *  coprocessor number
- *  Crn, Crm, opc1, opc2 fields
- *  32 or 64 bit register (ie is it accessed via MRC/MCR
- *    or via MRRC/MCRR?)
- *  non-secure/secure bank (AArch32 only)
- * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
- * (In this case crn and opc2 should be zero.)
- * For AArch64, there is no 32/64 bit size distinction;
- * instead all registers have a 2 bit op0, 3 bit op1 and op2,
- * and 4 bit CRn and CRm. The encoding patterns are chosen
- * to be easy to convert to and from the KVM encodings, and also
- * so that the hashtable can contain both AArch32 and AArch64
- * registers (to allow for interprocessing where we might run
- * 32 bit code on a 64 bit core).
- */
-/* This bit is private to our hashtable cpreg; in KVM register
- * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
- * in the upper bits of the 64 bit ID.
- */
-#define CP_REG_AA64_SHIFT 28
-#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
-
-/* To enable banking of coprocessor registers depending on ns-bit we
- * add a bit to distinguish between secure and non-secure cpregs in the
- * hashtable.
- */
-#define CP_REG_NS_SHIFT 29
-#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
-
-#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2)   \
-    ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) |   \
-     ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
-
-#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
-    (CP_REG_AA64_MASK |                                 \
-     ((cp) << CP_REG_ARM_COPROC_SHIFT) |                \
-     ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) |         \
-     ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) |         \
-     ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) |         \
-     ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) |         \
-     ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
-
-/* Convert a full 64 bit KVM register ID to the truncated 32 bit
- * version used as a key for the coprocessor register hashtable
- */
-static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
-{
-    uint32_t cpregid = kvmid;
-    if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
-        cpregid |= CP_REG_AA64_MASK;
-    } else {
-        if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
-            cpregid |= (1 << 15);
-        }
-
-        /* KVM is always non-secure so add the NS flag on AArch32 register
-         * entries.
-         */
-         cpregid |= 1 << CP_REG_NS_SHIFT;
-    }
-    return cpregid;
-}
-
-/* Convert a truncated 32 bit hashtable key into the full
- * 64 bit KVM register ID.
- */
-static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
-{
-    uint64_t kvmid;
-
-    if (cpregid & CP_REG_AA64_MASK) {
-        kvmid = cpregid & ~CP_REG_AA64_MASK;
-        kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
-    } else {
-        kvmid = cpregid & ~(1 << 15);
-        if (cpregid & (1 << 15)) {
-            kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
-        } else {
-            kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
-        }
-    }
-    return kvmid;
-}
-
 /* Return the highest implemented Exception Level */
 static inline int arm_highest_el(CPUARMState *env)
 {

diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index ccde508..df0c45e 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c

@@ -19,6 +19,9 @@
 #include "hw/boards.h"
 #endif
 #include "cpregs.h"
+#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
+#include "hw/intc/armv7m_nvic.h"
+#endif
 
 
 /* Share AArch32 -cpu max features with AArch64. */

diff --git a/target/arm/helper.c b/target/arm/helper.c
index c62ed05..07d4100 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c

@@ -22,6 +22,7 @@
 #include "hw/irq.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
 #include "qapi/qapi-commands-machine-target.h"
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
@@ -7021,6 +7022,7 @@
     }
 }
 
+#ifndef CONFIG_USER_ONLY
 /*
  * We don't know until after realize whether there's a GICv3
  * attached, and that is what registers the gicv3 sysregs.
@@ -7038,7 +7040,6 @@
     return pfr1;
 }
 
-#ifndef CONFIG_USER_ONLY
 static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     ARMCPU *cpu = env_archcpu(env);
@@ -7998,8 +7999,16 @@
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_NO_RAW,
               .accessfn = access_aa32_tid3,
+#ifdef CONFIG_USER_ONLY
+              .type = ARM_CP_CONST,
+              .resetvalue = cpu->isar.id_pfr1,
+#else
+              .type = ARM_CP_NO_RAW,
+              .accessfn = access_aa32_tid3,
               .readfn = id_pfr1_read,
-              .writefn = arm_cp_write_ignore },
+              .writefn = arm_cp_write_ignore
+#endif
+            },
             { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -10818,11 +10827,13 @@
     unsigned int cur_el = arm_current_el(env);
     int rt;
 
-    /*
-     * Note that new_el can never be 0.  If cur_el is 0, then
-     * el0_a64 is is_a64(), else el0_a64 is ignored.
-     */
-    aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+    if (tcg_enabled()) {
+        /*
+         * Note that new_el can never be 0.  If cur_el is 0, then
+         * el0_a64 is is_a64(), else el0_a64 is ignored.
+         */
+        aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+    }
 
     if (cur_el < new_el) {
         /*
@@ -11006,7 +11017,7 @@
  * trapped to the hypervisor in KVM.
  */
 #ifdef CONFIG_TCG
-static void handle_semihosting(CPUState *cs)
+static void tcg_handle_semihosting(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
@@ -11055,7 +11066,7 @@
                       env->exception.syndrome);
     }
 
-    if (arm_is_psci_call(cpu, cs->exception_index)) {
+    if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) {
         arm_handle_psci_call(cpu);
         qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
         return;
@@ -11068,7 +11079,7 @@
      */
 #ifdef CONFIG_TCG
     if (cs->exception_index == EXCP_SEMIHOST) {
-        handle_semihosting(cs);
+        tcg_handle_semihosting(cs);
         return;
     }
 #endif

diff --git a/target/arm/internals.h b/target/arm/internals.h
index e1e018d..759b70c 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h

@@ -597,20 +597,6 @@
 
 int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx);
 
-/*
- * Return the MMU index for a v7M CPU with all relevant information
- * manually specified.
- */
-ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
-                              bool secstate, bool priv, bool negpri);
-
-/*
- * Return the MMU index for a v7M CPU in the specified security and
- * privilege state.
- */
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
-                                                bool secstate, bool priv);
-
 /* Return the MMU index for a v7M CPU in the specified security state */
 ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate);
 

diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
index e7e746e..f94e87e 100644
--- a/target/arm/m_helper.c
+++ b/target/arm/m_helper.c

@@ -18,6 +18,9 @@
 #include "exec/cpu_ldst.h"
 #include "semihosting/common-semi.h"
 #endif
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/intc/armv7m_nvic.h"
+#endif
 
 static void v7m_msr_xpsr(CPUARMState *env, uint32_t mask,
                          uint32_t reg, uint32_t val)
@@ -150,7 +153,49 @@
     return 0;
 }
 
-#else
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+{
+    return ARMMMUIdx_MUser;
+}
+
+#else /* !CONFIG_USER_ONLY */
+
+static ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
+                                     bool secstate, bool priv, bool negpri)
+{
+    ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
+
+    if (priv) {
+        mmu_idx |= ARM_MMU_IDX_M_PRIV;
+    }
+
+    if (negpri) {
+        mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
+    }
+
+    if (secstate) {
+        mmu_idx |= ARM_MMU_IDX_M_S;
+    }
+
+    return mmu_idx;
+}
+
+static ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
+                                                       bool secstate, bool priv)
+{
+    bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate);
+
+    return arm_v7m_mmu_idx_all(env, secstate, priv, negpri);
+}
+
+/* Return the MMU index for a v7M CPU in the specified security state */
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+{
+    bool priv = arm_v7m_is_handler_mode(env) ||
+        !(env->v7m.control[secstate] & 1);
+
+    return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
+}
 
 /*
  * What kind of stack write are we doing? This affects how exceptions
@@ -973,7 +1018,7 @@
      * that we will need later in order to do lazy FP reg stacking.
      */
     bool is_secure = env->v7m.secure;
-    void *nvic = env->nvic;
+    NVICState *nvic = env->nvic;
     /*
      * Some bits are unbanked and live always in fpccr[M_REG_S]; some bits
      * are banked and we want to update the bit in the bank for the
@@ -2855,40 +2900,3 @@
 }
 
 #endif /* !CONFIG_USER_ONLY */
-
-ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
-                              bool secstate, bool priv, bool negpri)
-{
-    ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
-
-    if (priv) {
-        mmu_idx |= ARM_MMU_IDX_M_PRIV;
-    }
-
-    if (negpri) {
-        mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
-    }
-
-    if (secstate) {
-        mmu_idx |= ARM_MMU_IDX_M_S;
-    }
-
-    return mmu_idx;
-}
-
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
-                                                bool secstate, bool priv)
-{
-    bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate);
-
-    return arm_v7m_mmu_idx_all(env, secstate, priv, negpri);
-}
-
-/* Return the MMU index for a v7M CPU in the specified security state */
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
-{
-    bool priv = arm_v7m_is_handler_mode(env) ||
-        !(env->v7m.control[secstate] & 1);
-
-    return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
-}

diff --git a/target/arm/machine.c b/target/arm/machine.c
index 5f26152..b4c3850 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c

@@ -839,6 +839,15 @@
         }
     }
 
+    /*
+     * Misaligned thumb pc is architecturally impossible. Fail the
+     * incoming migration. For TCG it would trigger the assert in
+     * thumb_tr_translate_insn().
+     */
+    if (!is_a64(env) && env->thumb && (env->regs[15] & 1)) {
+        return -1;
+    }
+
     hw_breakpoint_update_all(cpu);
     hw_watchpoint_update_all(cpu);
 
@@ -856,15 +865,6 @@
         }
     }
 
-    /*
-     * Misaligned thumb pc is architecturally impossible.
-     * We have an assert in thumb_tr_translate_insn to verify this.
-     * Fail an incoming migrate to avoid this assert.
-     */
-    if (!is_a64(env) && env->thumb && (env->regs[15] & 1)) {
-        return -1;
-    }
-
     if (!kvm_enabled()) {
         pmu_op_finish(&cpu->env);
     }

diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c
index c3a2cf6..7869712 100644
--- a/target/i386/gdbstub.c
+++ b/target/i386/gdbstub.c

@@ -121,7 +121,9 @@
             return gdb_get_reg32(mem_buf, env->regs[gpr_map32[n]]);
         }
     } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) {
-        floatx80 *fp = (floatx80 *) &env->fpregs[n - IDX_FP_REGS];
+        int st_index = n - IDX_FP_REGS;
+        int r_index = (st_index + env->fpstt) % 8;
+        floatx80 *fp = &env->fpregs[r_index].d;
         int len = gdb_get_reg64(mem_buf, cpu_to_le64(fp->low));
         len += gdb_get_reg16(mem_buf, cpu_to_le16(fp->high));
         return len;

diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index ad5b7b8..6512846 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c

@@ -28,7 +28,6 @@
 #include "monitor/hmp-target.h"
 #include "monitor/hmp.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
 #include "sysemu/kvm.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc-target.h"

diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
index 7a29295..96e1c15 100644
--- a/target/i386/sev-sysemu-stub.c
+++ b/target/i386/sev-sysemu-stub.c

@@ -15,7 +15,6 @@
 #include "monitor/monitor.h"
 #include "monitor/hmp-target.h"
 #include "qapi/qapi-commands-misc-target.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/error.h"
 #include "sev.h"
 

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 32f7dba..0ec9704 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c

@@ -34,7 +34,6 @@
 #include "monitor/monitor.h"
 #include "monitor/hmp-target.h"
 #include "qapi/qapi-commands-misc-target.h"
-#include "qapi/qmp/qerror.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/pc.h"
 #include "exec/address-spaces.h"

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 7037ff9..e61ae9a 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc

@@ -1015,6 +1015,7 @@
 
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
+    int opposite_cc_op;
     TCGv carry_in = NULL;
     TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
     TCGv zero;
@@ -1022,14 +1023,8 @@
     if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
         /* Re-use the carry-out from a previous round.  */
         carry_in = carry_out;
-        cc_op = s->cc_op;
-    } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
-        /* Merge with the carry-out from the opposite instruction.  */
-        cc_op = CC_OP_ADCOX;
-    }
-
-    /* If we don't have a carry-in, get it out of EFLAGS.  */
-    if (!carry_in) {
+    } else {
+        /* We don't have a carry-in, get it out of EFLAGS.  */
         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
             gen_compute_eflags(s);
         }
@@ -1042,6 +1037,8 @@
 #ifdef TARGET_X86_64
     case MO_32:
         /* If TL is 64-bit just do everything in 64-bit arithmetic.  */
+        tcg_gen_ext32u_tl(s->T0, s->T0);
+        tcg_gen_ext32u_tl(s->T1, s->T1);
         tcg_gen_add_i64(s->T0, s->T0, s->T1);
         tcg_gen_add_i64(s->T0, s->T0, carry_in);
         tcg_gen_shri_i64(carry_out, s->T0, 32);
@@ -1053,7 +1050,14 @@
         tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
         break;
     }
-    set_cc_op(s, cc_op);
+
+    opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
+    if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
+        /* Merge with the carry-out from the opposite instruction.  */
+        set_cc_op(s, CC_OP_ADCOX);
+    } else {
+        set_cc_op(s, cc_op);
+    }
 }
 
 static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -1078,30 +1082,30 @@
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
-    TCGv bound, zero;
+    TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+    TCGv zero = tcg_constant_tl(0);
+    TCGv mone = tcg_constant_tl(-1);
 
     /*
      * Extract START, and shift the operand.
      * Shifts larger than operand size get zeros.
      */
     tcg_gen_ext8u_tl(s->A0, s->T1);
+    if (TARGET_LONG_BITS == 64 && ot == MO_32) {
+        tcg_gen_ext32u_tl(s->T0, s->T0);
+    }
     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
 
-    bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
-    zero = tcg_constant_tl(0);
     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
 
     /*
-     * Extract the LEN into a mask.  Lengths larger than
-     * operand size get all ones.
+     * Extract the LEN into an inverse mask.  Lengths larger than
+     * operand size get all zeros, length 0 gets all ones.
      */
     tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
-    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
-
-    tcg_gen_movi_tl(s->T1, 1);
-    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
-    tcg_gen_subi_tl(s->T1, s->T1, 1);
-    tcg_gen_and_tl(s->T0, s->T0, s->T1);
+    tcg_gen_shl_tl(s->T1, mone, s->A0);
+    tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
+    tcg_gen_andc_tl(s->T0, s->T0, s->T1);
 
     gen_op_update1_cc(s);
     set_cc_op(s, CC_OP_LOGICB + ot);
@@ -1111,6 +1115,7 @@
 {
     MemOp ot = decode->op[0].ot;
 
+    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_neg_tl(s->T1, s->T0);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1121,6 +1126,7 @@
 {
     MemOp ot = decode->op[0].ot;
 
+    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_subi_tl(s->T1, s->T0, 1);
     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1131,6 +1137,7 @@
 {
     MemOp ot = decode->op[0].ot;
 
+    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_subi_tl(s->T1, s->T0, 1);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);

diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 817681f..a2d2f5c 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c

@@ -28,6 +28,7 @@
 #include "qemu/module.h"
 #include "hw/qdev-properties.h"
 #include "exec/exec-all.h"
+#include "exec/gdbstub.h"
 #include "fpu/softfloat-helpers.h"
 
 static const struct {
@@ -294,6 +295,9 @@
     CPUMBState *env = &cpu->env;
 
     cpu_set_cpustate_pointers(cpu);
+    gdb_register_coprocessor(CPU(cpu), mb_cpu_gdb_read_stack_protect,
+                             mb_cpu_gdb_write_stack_protect, 2,
+                             "microblaze-stack-protect.xml", 0);
 
     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
 
@@ -422,7 +426,8 @@
     cc->sysemu_ops = &mb_sysemu_ops;
 #endif
     device_class_set_props(dc, mb_properties);
-    cc->gdb_num_core_regs = 32 + 27;
+    cc->gdb_num_core_regs = 32 + 25;
+    cc->gdb_core_xml_file = "microblaze-core.xml";
 
     cc->disas_set_info = mb_disas_set_info;
     cc->tcg_ops = &mb_tcg_ops;

diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index 1e84dd8..e541fbb 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h

@@ -367,6 +367,8 @@
                                         MemTxAttrs *attrs);
 int mb_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int mb_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int mb_cpu_gdb_read_stack_protect(CPUArchState *cpu, GByteArray *buf, int reg);
+int mb_cpu_gdb_write_stack_protect(CPUArchState *cpu, uint8_t *buf, int reg);
 
 static inline uint32_t mb_cpu_read_msr(const CPUMBState *env)
 {

diff --git a/target/microblaze/gdbstub.c b/target/microblaze/gdbstub.c
index 2e6e070..8143fca 100644
--- a/target/microblaze/gdbstub.c
+++ b/target/microblaze/gdbstub.c

@@ -39,8 +39,11 @@
     GDB_PVR0  = 32 + 6,
     GDB_PVR11 = 32 + 17,
     GDB_EDR   = 32 + 18,
-    GDB_SLR   = 32 + 25,
-    GDB_SHR   = 32 + 26,
+};
+
+enum {
+    GDB_SP_SHL,
+    GDB_SP_SHR,
 };
 
 int mb_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
@@ -83,12 +86,6 @@
     case GDB_EDR:
         val = env->edr;
         break;
-    case GDB_SLR:
-        val = env->slr;
-        break;
-    case GDB_SHR:
-        val = env->shr;
-        break;
     default:
         /* Other SRegs aren't modeled, so report a value of 0 */
         val = 0;
@@ -97,6 +94,23 @@
     return gdb_get_reg32(mem_buf, val);
 }
 
+int mb_cpu_gdb_read_stack_protect(CPUMBState *env, GByteArray *mem_buf, int n)
+{
+    uint32_t val;
+
+    switch (n) {
+    case GDB_SP_SHL:
+        val = env->slr;
+        break;
+    case GDB_SP_SHR:
+        val = env->shr;
+        break;
+    default:
+        return 0;
+    }
+    return gdb_get_reg32(mem_buf, val);
+}
+
 int mb_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
     MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
@@ -135,12 +149,21 @@
     case GDB_EDR:
         env->edr = tmp;
         break;
-    case GDB_SLR:
-        env->slr = tmp;
+    }
+    return 4;
+}
+
+int mb_cpu_gdb_write_stack_protect(CPUMBState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+    case GDB_SP_SHL:
+        env->slr = ldl_p(mem_buf);
         break;
-    case GDB_SHR:
-        env->shr = tmp;
+    case GDB_SP_SHR:
+        env->shr = ldl_p(mem_buf);
         break;
+    default:
+        return 0;
     }
     return 4;
 }

diff --git a/tests/avocado/avocado_qemu/__init__.py b/tests/avocado/avocado_qemu/__init__.py
index 25a5468..a313e88 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py

@@ -274,6 +274,10 @@
 
         super().setUp('qemu-system-')
 
+        accel_required = self._get_unique_tag_val('accel')
+        if accel_required:
+            self.require_accelerator(accel_required)
+
         self.machine = self.params.get('machine',
                                        default=self._get_unique_tag_val('machine'))
 

diff --git a/tests/avocado/boot_linux.py b/tests/avocado/boot_linux.py
index b3e58fa..fe0bb18 100644
--- a/tests/avocado/boot_linux.py
+++ b/tests/avocado/boot_linux.py

@@ -58,52 +58,16 @@
         self.launch_and_wait(set_up_ssh_connection=False)
 
 
-# For Aarch64 we only boot KVM tests in CI as the TCG tests are very
-# heavyweight. There are lighter weight distros which we use in the
-# machine_aarch64_virt.py tests.
+# For Aarch64 we only boot KVM tests in CI as booting the current
+# Fedora OS in TCG tests is very heavyweight. There are lighter weight
+# distros which we use in the machine_aarch64_virt.py tests.
 class BootLinuxAarch64(LinuxTest):
     """
     :avocado: tags=arch:aarch64
     :avocado: tags=machine:virt
-    :avocado: tags=machine:gic-version=2
     """
     timeout = 720
 
-    def add_common_args(self):
-        self.vm.add_args('-bios',
-                         os.path.join(BUILD_DIR, 'pc-bios',
-                                      'edk2-aarch64-code.fd'))
-        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
-        self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
-
-    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
-    def test_fedora_cloud_tcg_gicv2(self):
-        """
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
-        :avocado: tags=device:gicv2
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.vm.add_args("-cpu", "max,lpa2=off")
-        self.vm.add_args("-machine", "virt,gic-version=2")
-        self.add_common_args()
-        self.launch_and_wait(set_up_ssh_connection=False)
-
-    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
-    def test_fedora_cloud_tcg_gicv3(self):
-        """
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
-        :avocado: tags=device:gicv3
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.vm.add_args("-cpu", "max,lpa2=off")
-        self.vm.add_args("-machine", "virt,gic-version=3")
-        self.add_common_args()
-        self.launch_and_wait(set_up_ssh_connection=False)
-
     def test_virt_kvm(self):
         """
         :avocado: tags=accel:kvm
@@ -112,7 +76,11 @@
         self.require_accelerator("kvm")
         self.vm.add_args("-accel", "kvm")
         self.vm.add_args("-machine", "virt,gic-version=host")
-        self.add_common_args()
+        self.vm.add_args('-bios',
+                         os.path.join(BUILD_DIR, 'pc-bios',
+                                      'edk2-aarch64-code.fd'))
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
         self.launch_and_wait(set_up_ssh_connection=False)
 
 

diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py
index be60f8c..574609b 100644
--- a/tests/avocado/boot_linux_console.py
+++ b/tests/avocado/boot_linux_console.py

@@ -997,6 +997,7 @@
 
     def test_aarch64_raspi3_atf(self):
         """
+        :avocado: tags=accel:tcg
         :avocado: tags=arch:aarch64
         :avocado: tags=machine:raspi3b
         :avocado: tags=cpu:cortex-a53

diff --git a/tests/avocado/machine_aarch64_virt.py b/tests/avocado/machine_aarch64_virt.py
index c2b2ba2..25dab8d 100644
--- a/tests/avocado/machine_aarch64_virt.py
+++ b/tests/avocado/machine_aarch64_virt.py

@@ -10,11 +10,14 @@
 
 import time
 import os
+import logging
 
 from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado_qemu import exec_command
 from avocado_qemu import BUILD_DIR
+from avocado.utils import process
+from avocado.utils.path import find_command
 
 class Aarch64VirtMachine(QemuSystemTest):
     KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
@@ -65,16 +68,15 @@
         self.wait_for_console_pattern('Welcome to Alpine Linux 3.16')
 
 
-    def test_aarch64_virt(self):
+    def common_aarch64_virt(self, machine):
         """
-        :avocado: tags=arch:aarch64
-        :avocado: tags=machine:virt
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
+        Common code to launch basic virt machine with kernel+initrd
+        and a scratch disk.
         """
+        logger = logging.getLogger('aarch64_virt')
+
         kernel_url = ('https://fileserver.linaro.org/s/'
                       'z6B2ARM7DQT3HWN/download')
-
         kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347'
         kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
 
@@ -83,13 +85,62 @@
                                'console=ttyAMA0')
         self.require_accelerator("tcg")
         self.vm.add_args('-cpu', 'max,pauth-impdef=on',
+                         '-machine', machine,
                          '-accel', 'tcg',
                          '-kernel', kernel_path,
                          '-append', kernel_command_line)
+
+        # A RNG offers an easy way to generate a few IRQs
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object',
+                         'rng-random,id=rng0,filename=/dev/urandom')
+
+        # Also add a scratch block device
+        logger.info('creating scratch qcow2 image')
+        image_path = os.path.join(self.workdir, 'scratch.qcow2')
+        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
+        if not os.path.exists(qemu_img):
+            qemu_img = find_command('qemu-img', False)
+        if qemu_img is False:
+            self.cancel('Could not find "qemu-img", which is required to '
+                        'create the temporary qcow2 image')
+        cmd = '%s create -f qcow2 %s 8M' % (qemu_img, image_path)
+        process.run(cmd)
+
+        # Add the device
+        self.vm.add_args('-blockdev',
+                         f"driver=qcow2,file.driver=file,file.filename={image_path},node-name=scratch")
+        self.vm.add_args('-device',
+                         'virtio-blk-device,drive=scratch')
+
         self.vm.launch()
         self.wait_for_console_pattern('Welcome to Buildroot')
         time.sleep(0.1)
         exec_command(self, 'root')
         time.sleep(0.1)
+        exec_command(self, 'dd if=/dev/hwrng of=/dev/vda bs=512 count=4')
+        time.sleep(0.1)
+        exec_command(self, 'md5sum /dev/vda')
+        time.sleep(0.1)
+        exec_command(self, 'cat /proc/interrupts')
+        time.sleep(0.1)
         exec_command(self, 'cat /proc/self/maps')
         time.sleep(0.1)
+
+    def test_aarch64_virt_gicv3(self):
+        """
+        :avocado: tags=arch:aarch64
+        :avocado: tags=machine:virt
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:max
+        """
+        self.common_aarch64_virt("virt,gic_version=3")
+
+    def test_aarch64_virt_gicv2(self):
+        """
+        :avocado: tags=arch:aarch64
+        :avocado: tags=machine:virt
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:max
+        """
+        self.common_aarch64_virt("virt,gic-version=2")

diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index d2921e7..680c314 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py

@@ -173,6 +173,10 @@
         vm.shutdown()
 
 class ReverseDebugging_X86_64(ReverseDebugging):
+    """
+    :avocado: tags=accel:tcg
+    """
+
     REG_PC = 0x10
     REG_CS = 0x12
     def get_pc(self, g):
@@ -190,6 +194,10 @@
         self.reverse_debugging()
 
 class ReverseDebugging_AArch64(ReverseDebugging):
+    """
+    :avocado: tags=accel:tcg
+    """
+
     REG_PC = 32
 
     # unidentified gitlab timeout problem

diff --git a/tests/avocado/virtiofs_submounts.py b/tests/avocado/virtiofs_submounts.py
deleted file mode 100644
index e6dc32f..0000000
--- a/tests/avocado/virtiofs_submounts.py
+++ /dev/null

@@ -1,217 +0,0 @@
-import logging
-import re
-import os
-import subprocess
-import time
-
-from avocado import skipUnless
-from avocado_qemu import LinuxTest, BUILD_DIR
-from avocado_qemu import has_cmds
-from avocado_qemu import run_cmd
-from avocado_qemu import wait_for_console_pattern
-from avocado.utils import ssh
-
-
-class VirtiofsSubmountsTest(LinuxTest):
-    """
-    :avocado: tags=arch:x86_64
-    :avocado: tags=accel:kvm
-    """
-
-    def run(self, args, ignore_error=False):
-        stdout, stderr, ret = run_cmd(args)
-
-        if ret != 0:
-            cmdline = ' '.join(args)
-            if not ignore_error:
-                self.fail(f'{cmdline}: Returned {ret}: {stderr}')
-            else:
-                self.log.warn(f'{cmdline}: Returned {ret}: {stderr}')
-
-        return (stdout, stderr, ret)
-
-    def set_up_shared_dir(self):
-        self.shared_dir = os.path.join(self.workdir, 'virtiofs-shared')
-
-        os.mkdir(self.shared_dir)
-
-        self.run(('cp', self.get_data('guest.sh'),
-                 os.path.join(self.shared_dir, 'check.sh')))
-
-        self.run(('cp', self.get_data('guest-cleanup.sh'),
-                 os.path.join(self.shared_dir, 'cleanup.sh')))
-
-    def set_up_virtiofs(self):
-        attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR')
-        self.vfsdsock = os.path.join(attmp, 'vfsdsock')
-
-        self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True)
-
-        self.virtiofsd = \
-            subprocess.Popen(('sudo', '-n',
-                              'tools/virtiofsd/virtiofsd',
-                              f'--socket-path={self.vfsdsock}',
-                              '-o', f'source={self.shared_dir}',
-                              '-o', 'cache=always',
-                              '-o', 'xattr',
-                              '-o', 'announce_submounts',
-                              '-f'),
-                             stdout=subprocess.DEVNULL,
-                             stderr=subprocess.PIPE,
-                             universal_newlines=True)
-
-        while not os.path.exists(self.vfsdsock):
-            if self.virtiofsd.poll() is not None:
-                self.fail('virtiofsd exited prematurely: ' +
-                          self.virtiofsd.communicate()[1])
-            time.sleep(0.1)
-
-        self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock))
-
-        self.vm.add_args('-chardev',
-                         f'socket,id=vfsdsock,path={self.vfsdsock}',
-                         '-device',
-                         'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \
-                             ',tag=host',
-                         '-object',
-                         'memory-backend-file,id=mem,size=1G,' \
-                             'mem-path=/dev/shm,share=on',
-                         '-numa',
-                         'node,memdev=mem')
-
-    def set_up_nested_mounts(self):
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        try:
-            os.mkdir(scratch_dir)
-        except FileExistsError:
-            pass
-
-        args = ['bash', self.get_data('host.sh'), scratch_dir]
-        if self.seed:
-            args += [self.seed]
-
-        out, _, _ = self.run(args)
-        seed = re.search(r'^Seed: \d+', out)
-        self.log.info(seed[0])
-
-    def mount_in_guest(self):
-        self.ssh_command('mkdir -p /mnt/host')
-        self.ssh_command('mount -t virtiofs host /mnt/host')
-
-    def check_in_guest(self):
-        self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share')
-
-    def live_cleanup(self):
-        self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch')
-
-        # It would be nice if the above was sufficient to make virtiofsd clear
-        # all references to the mounted directories (so they can be unmounted
-        # on the host), but unfortunately it is not.  To do so, we have to
-        # resort to a remount.
-        self.ssh_command('mount -o remount /mnt/host')
-
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir))
-
-    @skipUnless(*has_cmds(('sudo -n', ('sudo', '-n', 'true')),
-                          'ssh-keygen', 'bash', 'losetup', 'mkfs.xfs', 'mount'))
-    def setUp(self):
-        vmlinuz = self.params.get('vmlinuz')
-        if vmlinuz is None:
-            """
-            The Linux kernel supports FUSE auto-submounts only as of 5.10.
-            boot_linux.py currently provides Fedora 31, whose kernel is too
-            old, so this test cannot pass with the on-image kernel (you are
-            welcome to try, hence the option to force such a test with
-            -p vmlinuz='').  Therefore, for now the user must provide a
-            sufficiently new custom kernel, or effectively explicitly
-            request failure with -p vmlinuz=''.
-            Once an image with a sufficiently new kernel is available
-            (probably Fedora 34), we can make -p vmlinuz='' the default, so
-            that this parameter no longer needs to be specified.
-            """
-            self.cancel('vmlinuz parameter not set; you must point it to a '
-                        'Linux kernel binary to test (to run this test with ' \
-                        'the on-image kernel, set it to an empty string)')
-
-        self.seed = self.params.get('seed')
-
-        self.ssh_key = os.path.join(self.workdir, 'id_ed25519')
-
-        self.run(('ssh-keygen', '-N', '', '-t', 'ed25519', '-f', self.ssh_key))
-
-        pubkey = self.ssh_key + '.pub'
-
-        super(VirtiofsSubmountsTest, self).setUp(pubkey)
-
-        if vmlinuz:
-            self.vm.add_args('-kernel', vmlinuz,
-                             '-append', 'console=ttyS0 root=/dev/sda1')
-
-        self.require_accelerator("kvm")
-        self.vm.add_args('-accel', 'kvm')
-
-    def tearDown(self):
-        try:
-            self.vm.shutdown()
-        except:
-            pass
-
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir),
-                 ignore_error=True)
-
-    def test_pre_virtiofsd_set_up(self):
-        self.set_up_shared_dir()
-
-        self.set_up_nested_mounts()
-
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_pre_launch_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-
-        self.set_up_nested_mounts()
-
-        self.launch_and_wait()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_post_launch_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-
-        self.set_up_nested_mounts()
-
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_post_mount_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-        self.mount_in_guest()
-
-        self.set_up_nested_mounts()
-
-        self.check_in_guest()
-
-    def test_two_runs(self):
-        self.set_up_shared_dir()
-
-        self.set_up_nested_mounts()
-
-        self.set_up_virtiofs()
-        self.launch_and_wait()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-        self.live_cleanup()
-        self.set_up_nested_mounts()
-
-        self.check_in_guest()

diff --git a/tests/bench/meson.build b/tests/bench/meson.build
index 279a8fc..7477a1f 100644
--- a/tests/bench/meson.build
+++ b/tests/bench/meson.build

@@ -3,6 +3,12 @@
                        sources: 'qht-bench.c',
                        dependencies: [qemuutil])
 
+if have_system
+xbzrle_bench = executable('xbzrle-bench',
+                       sources: 'xbzrle-bench.c',
+                       dependencies: [qemuutil,migration])
+endif
+
 executable('atomic_add-bench',
            sources: files('atomic_add-bench.c'),
            dependencies: [qemuutil],

diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
new file mode 100644
index 0000000..8848a3a
--- /dev/null
+++ b/tests/bench/xbzrle-bench.c

@@ -0,0 +1,469 @@
+/*
+ * Xor Based Zero Run Length Encoding unit tests.
+ *
+ * Copyright 2013 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Orit Wasserman  <owasserm@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "../migration/xbzrle.h"
+
+#if defined(CONFIG_AVX512BW_OPT)
+#define XBZRLE_PAGE_SIZE 4096
+static bool is_cpu_support_avx512bw;
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    is_cpu_support_avx512bw = false;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                is_cpu_support_avx512bw = true;
+            }
+        }
+    }
+    return ;
+}
+
+struct ResTime {
+    float t_raw;
+    float t_512;
+};
+
+
+/* Function prototypes
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                                uint8_t *dst, int dlen);
+*/
+static void encode_decode_zero(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0;
+    int dlen = 0, dlen512 = 0;
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        buffer[1000 + i] = i;
+        buffer512[1000 + i] = i;
+    }
+
+    buffer[1000 + diff_len + 3] = 103;
+    buffer[1000 + diff_len + 5] = 105;
+
+    buffer512[1000 + diff_len + 3] = 103;
+    buffer512[1000 + diff_len + 5] = 105;
+
+    /* encode zero page */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+                       XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(buffer512);
+    g_free(compressed512);
+
+}
+
+static void test_encode_decode_zero_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_zero(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Zero test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_unchanged(struct ResTime *res)
+{
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0;
+    int dlen = 0, dlen512 = 0;
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        test[1000 + i] = i + 4;
+        test512[1000 + i] = i + 4;
+    }
+
+    test[1000 + diff_len + 3] = 107;
+    test[1000 + diff_len + 5] = 109;
+
+    test512[1000 + diff_len + 3] = 107;
+    test512[1000 + diff_len + 5] = 109;
+
+    /* test unchanged buffer */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(test);
+    g_free(compressed);
+    g_free(test512);
+    g_free(compressed512);
+
+}
+
+static void test_encode_decode_unchanged_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_unchanged(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Unchanged test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_1_byte(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
+    uint8_t buf[2];
+    uint8_t buf512[2];
+
+    test[XBZRLE_PAGE_SIZE - 1] = 1;
+    test512[XBZRLE_PAGE_SIZE - 1] = 1;
+
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+                       XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
+
+    rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
+    g_assert(rc == XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
+
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
+                                 XBZRLE_PAGE_SIZE);
+    g_assert(rc512 == XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_1_byte_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_1_byte(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("1 byte test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_overflow(struct ResTime *res)
+{
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+
+    for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
+        test[i * 2] = 1;
+        test512[i * 2] = 1;
+    }
+
+    /* encode overflow */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+                              XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(rc == -1);
+
+    t_start512 = clock();
+    rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+                                     compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(rc512 == -1);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_overflow_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_overflow(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Overflow test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_range_avx512(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+    int dlen = 0, dlen512 = 0;
+
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        buffer[1000 + i] = i;
+        test[1000 + i] = i + 4;
+        buffer512[1000 + i] = i;
+        test512[1000 + i] = i + 4;
+    }
+
+    buffer[1000 + diff_len + 3] = 103;
+    test[1000 + diff_len + 3] = 107;
+
+    buffer[1000 + diff_len + 5] = 105;
+    test[1000 + diff_len + 5] = 109;
+
+    buffer512[1000 + diff_len + 3] = 103;
+    test512[1000 + diff_len + 3] = 107;
+
+    buffer512[1000 + diff_len + 5] = 105;
+    test512[1000 + diff_len + 5] = 109;
+
+    /* test encode/decode */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+    g_assert(rc < XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+    g_assert(rc512 < XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_range_avx512(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Encode decode test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_random(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+    int dlen = 0, dlen512 = 0;
+
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+    /* store the index of diff */
+    int dirty_index[diff_len];
+    for (int j = 0; j < diff_len; j++) {
+        dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+    }
+    for (i = diff_len - 1; i >= 0; i--) {
+        buffer[dirty_index[i]] = i;
+        test[dirty_index[i]] = i + 4;
+        buffer512[dirty_index[i]] = i;
+        test512[dirty_index[i]] = i + 4;
+    }
+
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+    g_assert(rc < XBZRLE_PAGE_SIZE);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+    g_assert(rc512 < XBZRLE_PAGE_SIZE);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_random_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_random(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Random test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_rand_int();
+    #if defined(CONFIG_AVX512BW_OPT)
+    if (likely(is_cpu_support_avx512bw)) {
+        g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
+        g_test_add_func("/xbzrle/encode_decode_unchanged",
+                        test_encode_decode_unchanged_avx512);
+        g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
+        g_test_add_func("/xbzrle/encode_decode_overflow",
+                        test_encode_decode_overflow_avx512);
+        g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
+        g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
+    }
+    #endif
+    return g_test_run();
+}

diff --git a/tests/docker/dockerfiles/python.docker b/tests/docker/dockerfiles/python.docker
index 56d8841..175c10a 100644
--- a/tests/docker/dockerfiles/python.docker
+++ b/tests/docker/dockerfiles/python.docker

@@ -7,7 +7,6 @@
 ENV PACKAGES \
     gcc \
     make \
-    pipenv \
     python3 \
     python3-pip \
     python3-tox \

diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py
index cc06fac..e69d16a 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py

@@ -337,7 +337,7 @@
         argv.extend(self._get_qemu_serial_args())
 
         if self._debug:
-            argv.extend(["-device", "sga"])
+            argv.extend(["-machine", "graphics=off"])
 
         if hardware._prealloc_pages:
             argv_source += ["-mem-path", "/dev/shm",

diff --git a/tests/qemu-iotests/186 b/tests/qemu-iotests/186
index 072e54e..eaf13c7 100755
--- a/tests/qemu-iotests/186
+++ b/tests/qemu-iotests/186

@@ -40,6 +40,7 @@
 _supported_fmt qcow2
 _supported_proto file fuse
 _require_drivers null-co
+_require_devices virtio-scsi-pci
 
 if [ "$QEMU_DEFAULT_MACHINE" != "pc" ]; then
     _notrun "Requires a PC machine"

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 94aeb3f..3e82c63 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py

@@ -720,7 +720,7 @@
         signal.setitimer(signal.ITIMER_REAL, 0)
         return False
     def timeout(self, signum, frame):
-        raise Exception(self.errmsg)
+        raise TimeoutError(self.errmsg)
 
 def file_pattern(name):
     return "{0}-{1}".format(os.getpid(), name)
@@ -804,7 +804,7 @@
     elif imgproto == 'ssh':
         return "ssh://%s@127.0.0.1:22%s" % (os.environ.get('USER'), path)
     else:
-        raise Exception("Protocol %s not supported" % (imgproto))
+        raise ValueError("Protocol %s not supported" % (imgproto))
 
 class VM(qtest.QEMUQtestMachine):
     '''A QEMU VM'''

diff --git a/tests/qemu-iotests/tests/detect-zeroes-registered-buf b/tests/qemu-iotests/tests/detect-zeroes-registered-buf
new file mode 100755
index 0000000..edb5f2c
--- /dev/null
+++ b/tests/qemu-iotests/tests/detect-zeroes-registered-buf

@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# group: rw auto quick
+#
+# Check that detect-zeroes=unmap works on writes with registered I/O buffers.
+# This is a regression test for
+# https://gitlab.com/qemu-project/qemu/-/issues/1404 where I/O requests failed
+# unexpectedly.
+#
+# Copyright Red Hat
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+
+size=128M
+_make_test_img $size
+IMGSPEC="driver=$IMGFMT,file.filename=$TEST_IMG,discard=unmap,detect-zeroes=unmap"
+
+echo
+echo "== writing zero buffer to image =="
+QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO -c "write -r -P 0 0 4k" --image-opts "$IMGSPEC" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0

diff --git a/tests/qemu-iotests/tests/detect-zeroes-registered-buf.out b/tests/qemu-iotests/tests/detect-zeroes-registered-buf.out
new file mode 100644
index 0000000..42c56fc
--- /dev/null
+++ b/tests/qemu-iotests/tests/detect-zeroes-registered-buf.out

@@ -0,0 +1,7 @@
+QA output created by detect-zeroes-registered-buf
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+
+== writing zero buffer to image ==
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done

diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
index fc9c4b4..dda55fa 100755
--- a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test

@@ -84,7 +84,7 @@
                 e['vm'] = 'SRC'
             for e in self.vm_b_events:
                 e['vm'] = 'DST'
-            events = (self.vm_a_events + self.vm_b_events)
+            events = self.vm_a_events + self.vm_b_events
             events = [(e['timestamp']['seconds'],
                        e['timestamp']['microseconds'],
                        e['vm'],

diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 8691802..1cb0813 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c

@@ -21,7 +21,7 @@
 #define SVE_MAX_VQ 16
 
 #define MACHINE     "-machine virt,gic-version=max -accel tcg "
-#define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel tcg "
+#define MACHINE_KVM "-machine virt,gic-version=max -accel kvm "
 #define QUERY_HEAD  "{ 'execute': 'query-cpu-model-expansion', " \
                     "  'arguments': { 'type': 'full', "
 #define QUERY_TAIL  "}}"
@@ -607,31 +607,39 @@
 {
     g_test_init(&argc, &argv, NULL);
 
-    qtest_add_data_func("/arm/query-cpu-model-expansion",
-                        NULL, test_query_cpu_model_expansion);
+    if (qtest_has_accel("tcg")) {
+        qtest_add_data_func("/arm/query-cpu-model-expansion",
+                            NULL, test_query_cpu_model_expansion);
+    }
+
+    if (!g_str_equal(qtest_get_arch(), "aarch64")) {
+        goto out;
+    }
 
     /*
      * For now we only run KVM specific tests with AArch64 QEMU in
      * order avoid attempting to run an AArch32 QEMU with KVM on
      * AArch64 hosts. That won't work and isn't easy to detect.
      */
-    if (g_str_equal(qtest_get_arch(), "aarch64") && qtest_has_accel("kvm")) {
+    if (qtest_has_accel("kvm")) {
         /*
          * This tests target the 'host' CPU type, so register it only if
          * KVM is available.
          */
         qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
                             NULL, test_query_cpu_model_expansion_kvm);
-    }
 
-    if (g_str_equal(qtest_get_arch(), "aarch64")) {
-        qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-max-vq-8",
-                            NULL, sve_tests_sve_max_vq_8);
-        qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-off",
-                            NULL, sve_tests_sve_off);
         qtest_add_data_func("/arm/kvm/query-cpu-model-expansion/sve-off",
                             NULL, sve_tests_sve_off_kvm);
     }
 
+    if (qtest_has_accel("tcg")) {
+        qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-max-vq-8",
+                            NULL, sve_tests_sve_max_vq_8);
+        qtest_add_data_func("/arm/max/query-cpu-model-expansion/sve-off",
+                            NULL, sve_tests_sve_off);
+    }
+
+out:
     return g_test_run();
 }

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index d8c8cda..d29a4e4 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c

@@ -1008,6 +1008,12 @@
         .machine = MACHINE_Q35,
         .variant = ".multi-bridge",
     };
+
+    if (!qtest_has_device("pcie-root-port")) {
+        g_test_skip("Device pcie-root-port is not available");
+        goto out;
+    }
+
     test_vm_prepare("-S"
         " -device virtio-balloon,id=balloon0,addr=0x4.0x2"
         " -device pcie-root-port,id=rp0,multifunction=on,"
@@ -1043,6 +1049,7 @@
     /* check that reboot/reset doesn't change any ACPI tables  */
     qtest_qmp_send(data.qts, "{'execute':'system_reset' }");
     process_acpi_tables(&data);
+out:
     free_test_data(&data);
 }
 
@@ -1396,6 +1403,11 @@
 {
     test_data data;
 
+    if (!qtest_has_device("nvdimm")) {
+        g_test_skip("Device nvdimm is not available");
+        return;
+    }
+
     memset(&data, 0, sizeof(data));
     data.machine = machine;
     data.variant = ".dimmpxm";
@@ -1444,6 +1456,11 @@
         .scan_len = 256ULL * 1024 * 1024,
     };
 
+    if (!qtest_has_device("nvdimm")) {
+        g_test_skip("Device nvdimm is not available");
+        goto out;
+    }
+
     data.variant = ".memhp";
     test_acpi_one(" -machine nvdimm=on"
                   " -cpu cortex-a57"
@@ -1457,7 +1474,7 @@
                   " -device pc-dimm,id=dimm0,memdev=ram2,node=0"
                   " -device nvdimm,id=dimm1,memdev=nvm0,node=1",
                   &data);
-
+out:
     free_test_data(&data);
 
 }
@@ -1475,6 +1492,11 @@
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=off",
                   &data);
@@ -1485,6 +1507,11 @@
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".usb";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,usb=on,rtc=off",
@@ -1496,6 +1523,11 @@
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".rtc";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=on",
@@ -1507,6 +1539,11 @@
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".pcie";
     data.tcg_only = true; /* need constant host-phys-bits */
@@ -1519,6 +1556,11 @@
 {
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".ioapic2";
     test_acpi_one(" -machine microvm,acpi=on,ioapic2=on,rtc=off",
@@ -1558,6 +1600,12 @@
         .ram_start = 0x40000000ULL,
         .scan_len = 128ULL * 1024 * 1024,
     };
+
+    if (!qtest_has_device("pcie-root-port")) {
+        g_test_skip("Device pcie-root-port is not available");
+        goto out;
+    }
+
     /*
      * While using -cdrom, the cdrom would auto plugged into pxb-pcie,
      * the reason is the bus of pxb-pcie is also root bus, it would lead
@@ -1576,7 +1624,7 @@
                   " -cpu cortex-a57"
                   " -device pxb-pcie,bus_nr=128",
                   &data);
-
+out:
     free_test_data(&data);
 }
 
@@ -1764,6 +1812,12 @@
     gchar *params;
     test_data data;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        g_free(tmp_path);
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
     data.variant = ".pcie";
     data.tcg_only = true; /* need constant host-phys-bits */
@@ -1824,6 +1878,11 @@
         .variant = ".viot",
     };
 
+    if (!qtest_has_device("virtio-iommu")) {
+        g_test_skip("Device virtio-iommu is not available");
+        goto out;
+    }
+
     /*
      * To keep things interesting, two buses bypass the IOMMU.
      * VIOT should only describes the other two buses.
@@ -1834,6 +1893,7 @@
                   "-device pxb-pcie,bus_nr=0x20,id=pcie.200,bus=pcie.0,bypass_iommu=on "
                   "-device pxb-pcie,bus_nr=0x30,id=pcie.300,bus=pcie.0",
                   &data);
+out:
     free_test_data(&data);
 }
 
@@ -1894,8 +1954,10 @@
         .scan_len = 128ULL * 1024 * 1024,
     };
 
-    test_acpi_one("-cpu cortex-a57 "
-                  "-device virtio-iommu-pci", &data);
+    if (qtest_has_device("virtio-iommu")) {
+        test_acpi_one("-cpu cortex-a57 "
+                       "-device virtio-iommu-pci", &data);
+    }
     free_test_data(&data);
 }
 
@@ -2004,6 +2066,11 @@
     test_data data;
     char *args;
 
+    if (!qtest_has_device("virtio-blk-device")) {
+        g_test_skip("Device virtio-blk-device is not available");
+        return;
+    }
+
     test_acpi_microvm_prepare(&data);
 
     args = test_acpi_create_args(&data,

diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c
index 5a6afa2..01cecd6 100644
--- a/tests/qtest/device-plug-test.c
+++ b/tests/qtest/device-plug-test.c

@@ -64,15 +64,21 @@
 
 static void test_pci_unplug_request(void)
 {
+    QTestState *qtest;
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
 
-    QTestState *qtest = qtest_initf("%s -device virtio-mouse-pci,id=dev0",
-                                    machine_addition);
+    qtest = qtest_initf("%s -device virtio-mouse-pci,id=dev0",
+                        machine_addition);
 
     process_device_remove(qtest, "dev0");
 
@@ -81,11 +87,17 @@
 
 static void test_q35_pci_unplug_request(void)
 {
+    QTestState *qtest;
 
-    QTestState *qtest = qtest_initf("-machine q35 "
-                                    "-device pcie-root-port,id=p1 "
-                                    "-device pcie-pci-bridge,bus=p1,id=b1 "
-                                    "-device virtio-mouse-pci,bus=b1,id=dev0");
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
+    qtest = qtest_initf("-machine q35 "
+                        "-device pcie-root-port,id=p1 "
+                        "-device pcie-pci-bridge,bus=p1,id=b1 "
+                        "-device virtio-mouse-pci,bus=b1,id=dev0");
 
     process_device_remove(qtest, "dev0");
 
@@ -94,14 +106,20 @@
 
 static void test_pci_unplug_json_request(void)
 {
+    QTestState *qtest;
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
 
-    QTestState *qtest = qtest_initf(
+    qtest = qtest_initf(
         "%s -device \"{'driver': 'virtio-mouse-pci', 'id': 'dev0'}\"",
         machine_addition);
 
@@ -112,6 +130,7 @@
 
 static void test_q35_pci_unplug_json_request(void)
 {
+    QTestState *qtest;
     const char *port = "-device \"{'driver': 'pcie-root-port', "
                                   "'id': 'p1'}\"";
 
@@ -123,8 +142,12 @@
                                     "'bus': 'b1', "
                                     "'id': 'dev0'}\"";
 
-    QTestState *qtest = qtest_initf("-machine q35 %s %s %s",
-                                    port, bridge, device);
+    if (!qtest_has_device("virtio-mouse-pci")) {
+        g_test_skip("Device virtio-mouse-pci not available");
+        return;
+    }
+
+    qtest = qtest_initf("-machine q35 %s %s %s", port, bridge, device);
 
     process_device_remove(qtest, "dev0");
 

diff --git a/tests/qtest/drive_del-test.c b/tests/qtest/drive_del-test.c
index 9a75039..8a6f3ac 100644
--- a/tests/qtest/drive_del-test.c
+++ b/tests/qtest/drive_del-test.c

@@ -16,6 +16,8 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 
+static const char *qvirtio_get_dev_type(void);
+
 static bool look_for_drive0(QTestState *qts, const char *command, const char *key)
 {
     QDict *response;
@@ -40,6 +42,19 @@
     return found;
 }
 
+/*
+ * This covers the possible absence of a device due to QEMU build
+ * options.
+ */
+static bool has_device_builtin(const char *dev)
+{
+    gchar *device = g_strdup_printf("%s-%s", dev, qvirtio_get_dev_type());
+    bool rc = qtest_has_device(device);
+
+    g_free(device);
+    return rc;
+}
+
 static bool has_drive(QTestState *qts)
 {
     return look_for_drive0(qts, "query-block", "device");
@@ -208,6 +223,11 @@
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-scsi")) {
+        g_test_skip("Device virtio-scsi is not available");
+        return;
+    }
+
     /* Start with a drive used by a device that unplugs instantaneously */
     qts = qtest_initf("-drive if=none,id=drive0,file=null-co://,"
                       "file.read-zeroes=on,format=raw"
@@ -232,6 +252,11 @@
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -256,6 +281,11 @@
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     /*
      * -drive/-device and device_del.  Start with a drive used by a
      * device that unplugs after reset.
@@ -277,6 +307,11 @@
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-scsi")) {
+        g_test_skip("Device virtio-scsi is not available");
+        return;
+    }
+
     /* device_del with no drive plugged.  */
     qts = qtest_initf("-device virtio-scsi-%s -device scsi-cd,id=dev0",
                       qvirtio_get_dev_type());
@@ -291,6 +326,11 @@
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -330,6 +370,11 @@
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     /*
      * -drive/device_add and device_del.  Start with a drive used by a
      * device that unplugs after reset.
@@ -352,6 +397,11 @@
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -374,6 +424,11 @@
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 "
                      "-device pcie-pci-bridge,bus=p1,id=b1");
 
@@ -395,6 +450,11 @@
     const char *arch = qtest_get_arch();
     const char *machine_addition = "";
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         machine_addition = "-machine pc";
     }
@@ -417,6 +477,11 @@
 {
     QTestState *qts;
 
+    if (!has_device_builtin("virtio-blk")) {
+        g_test_skip("Device virtio-blk is not available");
+        return;
+    }
+
     qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 "
                      "-device pcie-pci-bridge,bus=p1,id=b1");
 

diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
index 392a7ae..a9254b4 100644
--- a/tests/qtest/fuzz-lsi53c895a-test.c
+++ b/tests/qtest/fuzz-lsi53c895a-test.c

@@ -112,6 +112,10 @@
 
 int main(int argc, char **argv)
 {
+    if (!qtest_has_device("lsi53c895a")) {
+        return 0;
+    }
+
     g_test_init(&argc, &argv, NULL);
 
     qtest_add_func("fuzz/lsi53c895a/lsi_do_dma_empty_queue",

diff --git a/tests/qtest/fuzz/fork_fuzz.c b/tests/qtest/fuzz/fork_fuzz.c
deleted file mode 100644
index 6ffb2a7..0000000
--- a/tests/qtest/fuzz/fork_fuzz.c
+++ /dev/null

@@ -1,41 +0,0 @@
-/*
- * Fork-based fuzzing helpers
- *
- * Copyright Red Hat Inc., 2019
- *
- * Authors:
- *  Alexander Bulekov   <alxndr@bu.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "fork_fuzz.h"
-
-
-void counter_shm_init(void)
-{
-    /* Copy what's in the counter region to a temporary buffer.. */
-    void *copy = malloc(&__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START);
-    memcpy(copy,
-           &__FUZZ_COUNTERS_START,
-           &__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START);
-
-    /* Map a shared region over the counter region */
-    if (mmap(&__FUZZ_COUNTERS_START,
-             &__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START,
-             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS,
-             0, 0) == MAP_FAILED) {
-        perror("Error: ");
-        exit(1);
-    }
-
-    /* Copy the original data back to the counter-region */
-    memcpy(&__FUZZ_COUNTERS_START, copy,
-           &__FUZZ_COUNTERS_END - &__FUZZ_COUNTERS_START);
-    free(copy);
-}
-
-

diff --git a/tests/qtest/fuzz/fork_fuzz.h b/tests/qtest/fuzz/fork_fuzz.h
deleted file mode 100644
index 9ecb8b5..0000000
--- a/tests/qtest/fuzz/fork_fuzz.h
+++ /dev/null

@@ -1,23 +0,0 @@
-/*
- * Fork-based fuzzing helpers
- *
- * Copyright Red Hat Inc., 2019
- *
- * Authors:
- *  Alexander Bulekov   <alxndr@bu.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef FORK_FUZZ_H
-#define FORK_FUZZ_H
-
-extern uint8_t __FUZZ_COUNTERS_START;
-extern uint8_t __FUZZ_COUNTERS_END;
-
-void counter_shm_init(void);
-
-#endif
-

diff --git a/tests/qtest/fuzz/fork_fuzz.ld b/tests/qtest/fuzz/fork_fuzz.ld
deleted file mode 100644
index cfb88b7..0000000
--- a/tests/qtest/fuzz/fork_fuzz.ld
+++ /dev/null

@@ -1,56 +0,0 @@
-/*
- * We adjust linker script modification to place all of the stuff that needs to
- * persist across fuzzing runs into a contiguous section of memory. Then, it is
- * easy to re-map the counter-related memory as shared.
- */
-
-SECTIONS
-{
-  .data.fuzz_start : ALIGN(4K)
-  {
-      __FUZZ_COUNTERS_START = .;
-      __start___sancov_cntrs = .;
-      *(_*sancov_cntrs);
-      __stop___sancov_cntrs = .;
-
-      /* Lowest stack counter */
-      *(__sancov_lowest_stack);
-  }
-}
-INSERT AFTER .data;
-
-SECTIONS
-{
-  .data.fuzz_ordered :
-  {
-      /*
-       * Coverage counters. They're not necessary for fuzzing, but are useful
-       * for analyzing the fuzzing performance
-       */
-      __start___llvm_prf_cnts = .;
-      *(*llvm_prf_cnts);
-      __stop___llvm_prf_cnts = .;
-
-      /* Internal Libfuzzer TracePC object which contains the ValueProfileMap */
-      FuzzerTracePC*(.bss*);
-      /*
-       * In case the above line fails, explicitly specify the (mangled) name of
-       * the object we care about
-       */
-       *(.bss._ZN6fuzzer3TPCE);
-  }
-}
-INSERT AFTER .data.fuzz_start;
-
-SECTIONS
-{
-  .data.fuzz_end : ALIGN(4K)
-  {
-      __FUZZ_COUNTERS_END = .;
-  }
-}
-/*
- * Don't overwrite the SECTIONS in the default linker script. Instead insert the
- * above into the default script
- */
-INSERT AFTER .data.fuzz_ordered;

diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c
index eb75205..3bedb81 100644
--- a/tests/qtest/fuzz/fuzz.c
+++ b/tests/qtest/fuzz/fuzz.c

@@ -51,6 +51,12 @@
     }
 }
 
+void fuzz_reset(QTestState *s)
+{
+    qemu_system_reset(SHUTDOWN_CAUSE_GUEST_RESET);
+    main_loop_wait(true);
+}
+
 static QTestState *qtest_setup(void)
 {
     qtest_server_set_send_handler(&qtest_client_inproc_recv, &fuzz_qts);

diff --git a/tests/qtest/fuzz/fuzz.h b/tests/qtest/fuzz/fuzz.h
index 327c1c5..21d1362 100644
--- a/tests/qtest/fuzz/fuzz.h
+++ b/tests/qtest/fuzz/fuzz.h

@@ -103,7 +103,7 @@
 } FuzzTarget;
 
 void flush_events(QTestState *);
-void reboot(QTestState *);
+void fuzz_reset(QTestState *);
 
 /* Use the QTest ASCII protocol or call address_space API directly?*/
 void fuzz_qtest_set_serialize(bool option);

diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
index 7326f68..c525d22 100644
--- a/tests/qtest/fuzz/generic_fuzz.c
+++ b/tests/qtest/fuzz/generic_fuzz.c

@@ -18,7 +18,6 @@
 #include "tests/qtest/libqtest.h"
 #include "tests/qtest/libqos/pci-pc.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "string.h"
 #include "exec/memory.h"
 #include "exec/ramblock.h"
@@ -29,6 +28,8 @@
 #include "generic_fuzz_configs.h"
 #include "hw/mem/sparse-mem.h"
 
+static void pci_enum(gpointer pcidev, gpointer bus);
+
 /*
  * SEPARATOR is used to separate "operations" in the fuzz input
  */
@@ -47,10 +48,10 @@
     OP_CLOCK_STEP,
 };
 
-#define DEFAULT_TIMEOUT_US 100000
 #define USEC_IN_SEC 1000000000
 
 #define MAX_DMA_FILL_SIZE 0x10000
+#define MAX_TOTAL_DMA_SIZE 0x10000000
 
 #define PCI_HOST_BRIDGE_CFG 0xcf8
 #define PCI_HOST_BRIDGE_DATA 0xcfc
@@ -60,9 +61,8 @@
     ram_addr_t size; /* The number of bytes until the end of the I/O region */
 } address_range;
 
-static useconds_t timeout = DEFAULT_TIMEOUT_US;
-
 static bool qtest_log_enabled;
+size_t dma_bytes_written;
 
 MemoryRegion *sparse_mem_mr;
 
@@ -196,6 +196,7 @@
      */
     if (dma_patterns->len == 0
         || len == 0
+        || dma_bytes_written + len > MAX_TOTAL_DMA_SIZE
         || (mr != current_machine->ram && mr != sparse_mem_mr)) {
         return;
     }
@@ -268,6 +269,7 @@
                 fflush(stderr);
             }
             qtest_memwrite(qts_global, addr, buf, l);
+            dma_bytes_written += l;
         }
         len -= l;
         buf += l;
@@ -589,30 +591,6 @@
     pci_disabled = true;
 }
 
-static void handle_timeout(int sig)
-{
-    if (qtest_log_enabled) {
-        fprintf(stderr, "[Timeout]\n");
-        fflush(stderr);
-    }
-
-    /*
-     * If there is a crash, libfuzzer/ASAN forks a child to run an
-     * "llvm-symbolizer" process for printing out a pretty stacktrace. It
-     * communicates with this child using a pipe.  If we timeout+Exit, while
-     * libfuzzer is still communicating with the llvm-symbolizer child, we will
-     * be left with an orphan llvm-symbolizer process. Sometimes, this appears
-     * to lead to a deadlock in the forkserver. Use waitpid to check if there
-     * are any waitable children. If so, exit out of the signal-handler, and
-     * let libfuzzer finish communicating with the child, and exit, on its own.
-     */
-    if (waitpid(-1, NULL, WNOHANG) == 0) {
-        return;
-    }
-
-    _Exit(0);
-}
-
 /*
  * Here, we interpret random bytes from the fuzzer, as a sequence of commands.
  * Some commands can be variable-width, so we use a separator, SEPARATOR, to
@@ -669,64 +647,33 @@
     size_t cmd_len;
     uint8_t op;
 
-    if (fork() == 0) {
-        struct sigaction sact;
-        struct itimerval timer;
-        sigset_t set;
-        /*
-         * Sometimes the fuzzer will find inputs that take quite a long time to
-         * process. Often times, these inputs do not result in new coverage.
-         * Even if these inputs might be interesting, they can slow down the
-         * fuzzer, overall. Set a timeout for each command to avoid hurting
-         * performance, too much
-         */
-        if (timeout) {
+    op_clear_dma_patterns(s, NULL, 0);
+    pci_disabled = false;
+    dma_bytes_written = 0;
 
-            sigemptyset(&sact.sa_mask);
-            sact.sa_flags   = SA_NODEFER;
-            sact.sa_handler = handle_timeout;
-            sigaction(SIGALRM, &sact, NULL);
+    QPCIBus *pcibus = qpci_new_pc(s, NULL);
+    g_ptr_array_foreach(fuzzable_pci_devices, pci_enum, pcibus);
+    qpci_free_pc(pcibus);
 
-            sigemptyset(&set);
-            sigaddset(&set, SIGALRM);
-            pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+    while (cmd && Size) {
+        /* Get the length until the next command or end of input */
+        nextcmd = memmem(cmd, Size, SEPARATOR, strlen(SEPARATOR));
+        cmd_len = nextcmd ? nextcmd - cmd : Size;
 
-            memset(&timer, 0, sizeof(timer));
-            timer.it_value.tv_sec = timeout / USEC_IN_SEC;
-            timer.it_value.tv_usec = timeout % USEC_IN_SEC;
+        if (cmd_len > 0) {
+            /* Interpret the first byte of the command as an opcode */
+            op = *cmd % (sizeof(ops) / sizeof((ops)[0]));
+            ops[op](s, cmd + 1, cmd_len - 1);
+
+            /* Run the main loop */
+            flush_events(s);
         }
-
-        op_clear_dma_patterns(s, NULL, 0);
-        pci_disabled = false;
-
-        while (cmd && Size) {
-            /* Reset the timeout, each time we run a new command */
-            if (timeout) {
-                setitimer(ITIMER_REAL, &timer, NULL);
-            }
-
-            /* Get the length until the next command or end of input */
-            nextcmd = memmem(cmd, Size, SEPARATOR, strlen(SEPARATOR));
-            cmd_len = nextcmd ? nextcmd - cmd : Size;
-
-            if (cmd_len > 0) {
-                /* Interpret the first byte of the command as an opcode */
-                op = *cmd % (sizeof(ops) / sizeof((ops)[0]));
-                ops[op](s, cmd + 1, cmd_len - 1);
-
-                /* Run the main loop */
-                flush_events(s);
-            }
-            /* Advance to the next command */
-            cmd = nextcmd ? nextcmd + sizeof(SEPARATOR) - 1 : nextcmd;
-            Size = Size - (cmd_len + sizeof(SEPARATOR) - 1);
-            g_array_set_size(dma_regions, 0);
-        }
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(0);
+        /* Advance to the next command */
+        cmd = nextcmd ? nextcmd + sizeof(SEPARATOR) - 1 : nextcmd;
+        Size = Size - (cmd_len + sizeof(SEPARATOR) - 1);
+        g_array_set_size(dma_regions, 0);
     }
+    fuzz_reset(s);
 }
 
 static void usage(void)
@@ -738,8 +685,6 @@
     printf("Optionally: QEMU_AVOID_DOUBLE_FETCH= "
             "Try to avoid racy DMA double fetch bugs? %d by default\n",
             avoid_double_fetches);
-    printf("Optionally: QEMU_FUZZ_TIMEOUT= Specify a custom timeout (us). "
-            "0 to disable. %d by default\n", timeout);
     exit(0);
 }
 
@@ -825,7 +770,6 @@
 {
     GHashTableIter iter;
     MemoryRegion *mr;
-    QPCIBus *pcibus;
     char **result;
     GString *name_pattern;
 
@@ -838,9 +782,6 @@
     if (getenv("QEMU_AVOID_DOUBLE_FETCH")) {
         avoid_double_fetches = 1;
     }
-    if (getenv("QEMU_FUZZ_TIMEOUT")) {
-        timeout = g_ascii_strtoll(getenv("QEMU_FUZZ_TIMEOUT"), NULL, 0);
-    }
     qts_global = s;
 
     /*
@@ -883,12 +824,6 @@
         printf("No fuzzable memory regions found...\n");
         exit(1);
     }
-
-    pcibus = qpci_new_pc(s, NULL);
-    g_ptr_array_foreach(fuzzable_pci_devices, pci_enum, pcibus);
-    qpci_free_pc(pcibus);
-
-    counter_shm_init();
 }
 
 /*

diff --git a/tests/qtest/fuzz/i440fx_fuzz.c b/tests/qtest/fuzz/i440fx_fuzz.c
index b17fc72..155fe01 100644
--- a/tests/qtest/fuzz/i440fx_fuzz.c
+++ b/tests/qtest/fuzz/i440fx_fuzz.c

@@ -18,7 +18,6 @@
 #include "tests/qtest/libqos/pci-pc.h"
 #include "fuzz.h"
 #include "qos_fuzz.h"
-#include "fork_fuzz.h"
 
 
 #define I440FX_PCI_HOST_BRIDGE_CFG 0xcf8
@@ -89,6 +88,7 @@
                               size_t Size)
 {
     ioport_fuzz_qtest(s, Data, Size);
+    fuzz_reset(s);
 }
 
 static void pciconfig_fuzz_qos(QTestState *s, QPCIBus *bus,
@@ -145,17 +145,6 @@
     pciconfig_fuzz_qos(s, bus, Data, Size);
 }
 
-static void i440fx_fuzz_qos_fork(QTestState *s,
-        const unsigned char *Data, size_t Size) {
-    if (fork() == 0) {
-        i440fx_fuzz_qos(s, Data, Size);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
-
 static const char *i440fx_qtest_argv = TARGET_NAME " -machine accel=qtest"
                                        " -m 0 -display none";
 static GString *i440fx_argv(FuzzTarget *t)
@@ -163,10 +152,6 @@
     return g_string_new(i440fx_qtest_argv);
 }
 
-static void fork_init(void)
-{
-    counter_shm_init();
-}
 
 static void register_pci_fuzz_targets(void)
 {
@@ -178,16 +163,6 @@
                 .get_init_cmdline = i440fx_argv,
                 .fuzz = i440fx_fuzz_qtest});
 
-    /* Uses libqos and forks to prevent state leakage */
-    fuzz_add_qos_target(&(FuzzTarget){
-                .name = "i440fx-qos-fork-fuzz",
-                .description = "Fuzz the i440fx using raw qtest commands and "
-                               "rebooting after each run",
-                .pre_vm_init = &fork_init,
-                .fuzz = i440fx_fuzz_qos_fork,},
-                "i440FX-pcihost",
-                &(QOSGraphTestOptions){}
-                );
 
     /*
      * Uses libqos. Doesn't do anything to reset state. Note that if we were to

diff --git a/tests/qtest/fuzz/meson.build b/tests/qtest/fuzz/meson.build
index 189901d..4d10b47 100644
--- a/tests/qtest/fuzz/meson.build
+++ b/tests/qtest/fuzz/meson.build

@@ -2,7 +2,7 @@
   subdir_done()
 endif
 
-specific_fuzz_ss.add(files('fuzz.c', 'fork_fuzz.c', 'qos_fuzz.c',
+specific_fuzz_ss.add(files('fuzz.c', 'qos_fuzz.c',
                            'qtest_wrappers.c'), qos)
 
 # Targets
@@ -12,7 +12,7 @@
 specific_fuzz_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio_blk_fuzz.c'))
 specific_fuzz_ss.add(files('generic_fuzz.c'))
 
-fork_fuzz = declare_dependency(
+fuzz_ld = declare_dependency(
   link_args: fuzz_exe_ldflags +
              ['-Wl,-wrap,qtest_inb',
               '-Wl,-wrap,qtest_inw',
@@ -35,4 +35,4 @@
               '-Wl,-wrap,qtest_memset']
 )
 
-specific_fuzz_ss.add(fork_fuzz)
+specific_fuzz_ss.add(fuzz_ld)

diff --git a/tests/qtest/fuzz/virtio_blk_fuzz.c b/tests/qtest/fuzz/virtio_blk_fuzz.c
index a9fb9ec..651fd4f 100644
--- a/tests/qtest/fuzz/virtio_blk_fuzz.c
+++ b/tests/qtest/fuzz/virtio_blk_fuzz.c

@@ -19,7 +19,6 @@
 #include "standard-headers/linux/virtio_pci.h"
 #include "standard-headers/linux/virtio_blk.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "qos_fuzz.h"
 
 #define TEST_IMAGE_SIZE         (64 * 1024 * 1024)
@@ -128,48 +127,24 @@
     }
 }
 
-static void virtio_blk_fork_fuzz(QTestState *s,
-        const unsigned char *Data, size_t Size)
-{
-    QVirtioBlk *blk = fuzz_qos_obj;
-    static QVirtioBlkQueues *queues;
-    if (!queues) {
-        queues = qvirtio_blk_init(blk->vdev, 0);
-    }
-    if (fork() == 0) {
-        virtio_blk_fuzz(s, queues, Data, Size);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
-
 static void virtio_blk_with_flag_fuzz(QTestState *s,
         const unsigned char *Data, size_t Size)
 {
     QVirtioBlk *blk = fuzz_qos_obj;
     static QVirtioBlkQueues *queues;
 
-    if (fork() == 0) {
-        if (Size >= sizeof(uint64_t)) {
-            queues = qvirtio_blk_init(blk->vdev, *(uint64_t *)Data);
-            virtio_blk_fuzz(s, queues,
-                             Data + sizeof(uint64_t), Size - sizeof(uint64_t));
-            flush_events(s);
-        }
-        _Exit(0);
-    } else {
+    if (Size >= sizeof(uint64_t)) {
+        queues = qvirtio_blk_init(blk->vdev, *(uint64_t *)Data);
+        virtio_blk_fuzz(s, queues,
+                Data + sizeof(uint64_t), Size - sizeof(uint64_t));
         flush_events(s);
-        wait(NULL);
     }
+    fuzz_reset(s);
 }
 
 static void virtio_blk_pre_fuzz(QTestState *s)
 {
     qos_init_path(s);
-    counter_shm_init();
 }
 
 static void drive_destroy(void *path)
@@ -209,21 +184,9 @@
 static void register_virtio_blk_fuzz_targets(void)
 {
     fuzz_add_qos_target(&(FuzzTarget){
-                .name = "virtio-blk-fuzz",
-                .description = "Fuzz the virtio-blk virtual queues, forking "
-                                "for each fuzz run",
-                .pre_vm_init = &counter_shm_init,
-                .pre_fuzz = &virtio_blk_pre_fuzz,
-                .fuzz = virtio_blk_fork_fuzz,},
-                "virtio-blk",
-                &(QOSGraphTestOptions){.before = virtio_blk_test_setup}
-                );
-
-    fuzz_add_qos_target(&(FuzzTarget){
                 .name = "virtio-blk-flags-fuzz",
-                .description = "Fuzz the virtio-blk virtual queues, forking "
-                "for each fuzz run (also fuzzes the virtio flags)",
-                .pre_vm_init = &counter_shm_init,
+                .description = "Fuzz the virtio-blk virtual queues. "
+                "Also fuzzes the virtio flags)",
                 .pre_fuzz = &virtio_blk_pre_fuzz,
                 .fuzz = virtio_blk_with_flag_fuzz,},
                 "virtio-blk",

diff --git a/tests/qtest/fuzz/virtio_net_fuzz.c b/tests/qtest/fuzz/virtio_net_fuzz.c
index c2c15f0..e239875 100644
--- a/tests/qtest/fuzz/virtio_net_fuzz.c
+++ b/tests/qtest/fuzz/virtio_net_fuzz.c

@@ -16,7 +16,6 @@
 #include "tests/qtest/libqtest.h"
 #include "tests/qtest/libqos/virtio-net.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "qos_fuzz.h"
 
 
@@ -115,36 +114,18 @@
     }
 }
 
-static void virtio_net_fork_fuzz(QTestState *s,
-        const unsigned char *Data, size_t Size)
-{
-    if (fork() == 0) {
-        virtio_net_fuzz_multi(s, Data, Size, false);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
 
-static void virtio_net_fork_fuzz_check_used(QTestState *s,
+static void virtio_net_fuzz_check_used(QTestState *s,
         const unsigned char *Data, size_t Size)
 {
-    if (fork() == 0) {
-        virtio_net_fuzz_multi(s, Data, Size, true);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
+    virtio_net_fuzz_multi(s, Data, Size, true);
+    flush_events(s);
+    fuzz_reset(s);
 }
 
 static void virtio_net_pre_fuzz(QTestState *s)
 {
     qos_init_path(s);
-    counter_shm_init();
 }
 
 static void *virtio_net_test_setup_socket(GString *cmd_line, void *arg)
@@ -158,23 +139,8 @@
     return arg;
 }
 
-static void *virtio_net_test_setup_user(GString *cmd_line, void *arg)
-{
-    g_string_append_printf(cmd_line, " -netdev user,id=hs0 ");
-    return arg;
-}
-
 static void register_virtio_net_fuzz_targets(void)
 {
-    fuzz_add_qos_target(&(FuzzTarget){
-            .name = "virtio-net-socket",
-            .description = "Fuzz the virtio-net virtual queues. Fuzz incoming "
-            "traffic using the socket backend",
-            .pre_fuzz = &virtio_net_pre_fuzz,
-            .fuzz = virtio_net_fork_fuzz,},
-            "virtio-net",
-            &(QOSGraphTestOptions){.before = virtio_net_test_setup_socket}
-            );
 
     fuzz_add_qos_target(&(FuzzTarget){
             .name = "virtio-net-socket-check-used",
@@ -182,20 +148,10 @@
             "descriptors to be used. Timeout may indicate improperly handled "
             "input",
             .pre_fuzz = &virtio_net_pre_fuzz,
-            .fuzz = virtio_net_fork_fuzz_check_used,},
+            .fuzz = virtio_net_fuzz_check_used,},
             "virtio-net",
             &(QOSGraphTestOptions){.before = virtio_net_test_setup_socket}
             );
-    fuzz_add_qos_target(&(FuzzTarget){
-            .name = "virtio-net-slirp",
-            .description = "Fuzz the virtio-net virtual queues with the slirp "
-            " backend. Warning: May result in network traffic emitted from the "
-            " process. Run in an isolated network environment.",
-            .pre_fuzz = &virtio_net_pre_fuzz,
-            .fuzz = virtio_net_fork_fuzz,},
-            "virtio-net",
-            &(QOSGraphTestOptions){.before = virtio_net_test_setup_user}
-            );
 }
 
 fuzz_target_init(register_virtio_net_fuzz_targets);

diff --git a/tests/qtest/fuzz/virtio_scsi_fuzz.c b/tests/qtest/fuzz/virtio_scsi_fuzz.c
index b3220ef..b6268ef 100644
--- a/tests/qtest/fuzz/virtio_scsi_fuzz.c
+++ b/tests/qtest/fuzz/virtio_scsi_fuzz.c

@@ -20,7 +20,6 @@
 #include "standard-headers/linux/virtio_pci.h"
 #include "standard-headers/linux/virtio_scsi.h"
 #include "fuzz.h"
-#include "fork_fuzz.h"
 #include "qos_fuzz.h"
 
 #define PCI_SLOT                0x02
@@ -132,48 +131,24 @@
     }
 }
 
-static void virtio_scsi_fork_fuzz(QTestState *s,
-        const unsigned char *Data, size_t Size)
-{
-    QVirtioSCSI *scsi = fuzz_qos_obj;
-    static QVirtioSCSIQueues *queues;
-    if (!queues) {
-        queues = qvirtio_scsi_init(scsi->vdev, 0);
-    }
-    if (fork() == 0) {
-        virtio_scsi_fuzz(s, queues, Data, Size);
-        flush_events(s);
-        _Exit(0);
-    } else {
-        flush_events(s);
-        wait(NULL);
-    }
-}
-
 static void virtio_scsi_with_flag_fuzz(QTestState *s,
         const unsigned char *Data, size_t Size)
 {
     QVirtioSCSI *scsi = fuzz_qos_obj;
     static QVirtioSCSIQueues *queues;
 
-    if (fork() == 0) {
-        if (Size >= sizeof(uint64_t)) {
-            queues = qvirtio_scsi_init(scsi->vdev, *(uint64_t *)Data);
-            virtio_scsi_fuzz(s, queues,
-                             Data + sizeof(uint64_t), Size - sizeof(uint64_t));
-            flush_events(s);
-        }
-        _Exit(0);
-    } else {
+    if (Size >= sizeof(uint64_t)) {
+        queues = qvirtio_scsi_init(scsi->vdev, *(uint64_t *)Data);
+        virtio_scsi_fuzz(s, queues,
+                Data + sizeof(uint64_t), Size - sizeof(uint64_t));
         flush_events(s);
-        wait(NULL);
     }
+    fuzz_reset(s);
 }
 
 static void virtio_scsi_pre_fuzz(QTestState *s)
 {
     qos_init_path(s);
-    counter_shm_init();
 }
 
 static void *virtio_scsi_test_setup(GString *cmd_line, void *arg)
@@ -190,21 +165,9 @@
 static void register_virtio_scsi_fuzz_targets(void)
 {
     fuzz_add_qos_target(&(FuzzTarget){
-                .name = "virtio-scsi-fuzz",
-                .description = "Fuzz the virtio-scsi virtual queues, forking "
-                                "for each fuzz run",
-                .pre_vm_init = &counter_shm_init,
-                .pre_fuzz = &virtio_scsi_pre_fuzz,
-                .fuzz = virtio_scsi_fork_fuzz,},
-                "virtio-scsi",
-                &(QOSGraphTestOptions){.before = virtio_scsi_test_setup}
-                );
-
-    fuzz_add_qos_target(&(FuzzTarget){
                 .name = "virtio-scsi-flags-fuzz",
-                .description = "Fuzz the virtio-scsi virtual queues, forking "
-                "for each fuzz run (also fuzzes the virtio flags)",
-                .pre_vm_init = &counter_shm_init,
+                .description = "Fuzz the virtio-scsi virtual queues. "
+                "Also fuzzes the virtio flags",
                 .pre_fuzz = &virtio_scsi_pre_fuzz,
                 .fuzz = virtio_scsi_with_flag_fuzz,},
                 "virtio-scsi",

diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c
index 4a76280..5aa258a 100644
--- a/tests/qtest/hd-geo-test.c
+++ b/tests/qtest/hd-geo-test.c

@@ -1090,30 +1090,42 @@
         qtest_add_func("hd-geo/override/ide", test_override_ide);
         if (qtest_has_device("lsi53c895a")) {
             qtest_add_func("hd-geo/override/scsi", test_override_scsi);
-            qtest_add_func("hd-geo/override/scsi_2_controllers",
-                           test_override_scsi_2_controllers);
+            if (qtest_has_device("virtio-scsi-pci")) {
+                qtest_add_func("hd-geo/override/scsi_2_controllers",
+                               test_override_scsi_2_controllers);
+            }
         }
-        qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk);
         qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs);
-        qtest_add_func("hd-geo/override/scsi_hot_unplug",
-                       test_override_scsi_hot_unplug);
-        qtest_add_func("hd-geo/override/virtio_hot_unplug",
-                       test_override_virtio_hot_unplug);
+        if (qtest_has_device("virtio-scsi-pci")) {
+            qtest_add_func("hd-geo/override/scsi_hot_unplug",
+                           test_override_scsi_hot_unplug);
+        }
+        if (qtest_has_device("virtio-blk-pci")) {
+            qtest_add_func("hd-geo/override/virtio_hot_unplug",
+                           test_override_virtio_hot_unplug);
+            qtest_add_func("hd-geo/override/virtio_blk",
+                           test_override_virtio_blk);
+        }
 
         if (qtest_has_machine("q35")) {
             qtest_add_func("hd-geo/override/sata", test_override_sata);
-            qtest_add_func("hd-geo/override/virtio_blk_q35",
-                           test_override_virtio_blk_q35);
             qtest_add_func("hd-geo/override/zero_chs_q35",
                            test_override_zero_chs_q35);
             if (qtest_has_device("lsi53c895a")) {
                 qtest_add_func("hd-geo/override/scsi_q35",
                                test_override_scsi_q35);
             }
-            qtest_add_func("hd-geo/override/scsi_hot_unplug_q35",
-                           test_override_scsi_hot_unplug_q35);
-            qtest_add_func("hd-geo/override/virtio_hot_unplug_q35",
-                           test_override_virtio_hot_unplug_q35);
+            if (qtest_has_device("virtio-scsi-pci")) {
+                qtest_add_func("hd-geo/override/scsi_hot_unplug_q35",
+                               test_override_scsi_hot_unplug_q35);
+            }
+            if (qtest_has_device("virtio-blk-pci")) {
+                qtest_add_func("hd-geo/override/virtio_hot_unplug_q35",
+                               test_override_virtio_hot_unplug_q35);
+                qtest_add_func("hd-geo/override/virtio_blk_q35",
+                               test_override_virtio_blk_q35);
+            }
+
         }
     } else {
         g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; "

diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index d658222..2bfd460 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c

@@ -158,6 +158,7 @@
         CloseHandle((HANDLE)pid);
 #endif
         s->qemu_pid = -1;
+        qtest_remove_abrt_handler(s);
     }
     return false;
 }
@@ -169,6 +170,8 @@
 
 static void qtest_check_status(QTestState *s)
 {
+    assert(s->qemu_pid == -1);
+
     /*
      * Check whether qemu exited with expected exit status; anything else is
      * fishy and should be logged with as much detail as possible.
@@ -202,36 +205,40 @@
 
 void qtest_wait_qemu(QTestState *s)
 {
+    if (s->qemu_pid != -1) {
 #ifndef _WIN32
-    pid_t pid;
-    uint64_t end;
+        pid_t pid;
+        uint64_t end;
 
-    /* poll for a while until sending SIGKILL */
-    end = g_get_monotonic_time() + WAITPID_TIMEOUT * G_TIME_SPAN_SECOND;
+        /* poll for a while until sending SIGKILL */
+        end = g_get_monotonic_time() + WAITPID_TIMEOUT * G_TIME_SPAN_SECOND;
 
-    do {
-        pid = waitpid(s->qemu_pid, &s->wstatus, WNOHANG);
-        if (pid != 0) {
-            break;
+        do {
+            pid = waitpid(s->qemu_pid, &s->wstatus, WNOHANG);
+            if (pid != 0) {
+                break;
+            }
+            g_usleep(100 * 1000);
+        } while (g_get_monotonic_time() < end);
+
+        if (pid == 0) {
+            kill(s->qemu_pid, SIGKILL);
+            pid = RETRY_ON_EINTR(waitpid(s->qemu_pid, &s->wstatus, 0));
         }
-        g_usleep(100 * 1000);
-    } while (g_get_monotonic_time() < end);
 
-    if (pid == 0) {
-        kill(s->qemu_pid, SIGKILL);
-        pid = RETRY_ON_EINTR(waitpid(s->qemu_pid, &s->wstatus, 0));
-    }
-
-    assert(pid == s->qemu_pid);
+        assert(pid == s->qemu_pid);
 #else
-    DWORD ret;
+        DWORD ret;
 
-    ret = WaitForSingleObject((HANDLE)s->qemu_pid, INFINITE);
-    assert(ret == WAIT_OBJECT_0);
-    GetExitCodeProcess((HANDLE)s->qemu_pid, &s->exit_code);
-    CloseHandle((HANDLE)s->qemu_pid);
+        ret = WaitForSingleObject((HANDLE)s->qemu_pid, INFINITE);
+        assert(ret == WAIT_OBJECT_0);
+        GetExitCodeProcess((HANDLE)s->qemu_pid, &s->exit_code);
+        CloseHandle((HANDLE)s->qemu_pid);
 #endif
 
+        s->qemu_pid = -1;
+        qtest_remove_abrt_handler(s);
+    }
     qtest_check_status(s);
 }
 
@@ -245,7 +252,6 @@
         TerminateProcess((HANDLE)s->qemu_pid, s->expected_status);
 #endif
         qtest_wait_qemu(s);
-        s->qemu_pid = -1;
         return;
     }
 
@@ -307,6 +313,11 @@
 void qtest_remove_abrt_handler(void *data)
 {
     GHook *hook = g_hook_find_data(&abrt_hooks, TRUE, data);
+
+    if (!hook) {
+        return;
+    }
+
     g_hook_destroy_link(&abrt_hooks, hook);
 
     /* Uninstall SIGABRT handler on last instance */
@@ -360,60 +371,25 @@
 }
 #endif /* _WIN32 */
 
-QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
+static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...)
 {
-    QTestState *s;
-    int sock, qmpsock, i;
-    gchar *socket_path;
-    gchar *qmp_socket_path;
-    gchar *command;
-    const char *qemu_binary = qtest_qemu_binary();
+    va_list ap;
+    QTestState *s = g_new0(QTestState, 1);
     const char *trace = g_getenv("QTEST_TRACE");
     g_autofree char *tracearg = trace ?
         g_strdup_printf("-trace %s ", trace) : g_strdup("");
+    g_autoptr(GString) command = g_string_new("");
 
-    s = g_new(QTestState, 1);
-
-    socket_path = g_strdup_printf("%s/qtest-%d.sock",
-                                  g_get_tmp_dir(), getpid());
-    qmp_socket_path = g_strdup_printf("%s/qtest-%d.qmp",
-                                      g_get_tmp_dir(), getpid());
-
-    /* It's possible that if an earlier test run crashed it might
-     * have left a stale unix socket lying around. Delete any
-     * stale old socket to avoid spurious test failures with
-     * tests/libqtest.c:70:init_socket: assertion failed (ret != -1): (-1 != -1)
-     */
-    unlink(socket_path);
-    unlink(qmp_socket_path);
-
-    socket_init();
-    sock = init_socket(socket_path);
-    qmpsock = init_socket(qmp_socket_path);
-
-    qtest_client_set_rx_handler(s, qtest_client_socket_recv_line);
-    qtest_client_set_tx_handler(s, qtest_client_socket_send);
+    va_start(ap, fmt);
+    g_string_append_printf(command, CMD_EXEC "%s %s",
+                           qtest_qemu_binary(), tracearg);
+    g_string_append_vprintf(command, fmt, ap);
+    va_end(ap);
 
     qtest_add_abrt_handler(kill_qemu_hook_func, s);
 
-    command = g_strdup_printf(CMD_EXEC "%s %s"
-                              "-qtest unix:%s "
-                              "-qtest-log %s "
-                              "-chardev socket,path=%s,id=char0 "
-                              "-mon chardev=char0,mode=control "
-                              "-display none "
-                              "%s"
-                              " -accel qtest",
-                              qemu_binary, tracearg, socket_path,
-                              getenv("QTEST_LOG") ? DEV_STDERR : DEV_NULL,
-                              qmp_socket_path,
-                              extra_args ?: "");
+    g_test_message("starting QEMU: %s", command->str);
 
-    g_test_message("starting QEMU: %s", command);
-
-    s->pending_events = NULL;
-    s->wstatus = 0;
-    s->expected_status = 0;
 #ifndef _WIN32
     s->qemu_pid = fork();
     if (s->qemu_pid == 0) {
@@ -434,14 +410,56 @@
         if (!g_setenv("QEMU_AUDIO_DRV", "none", true)) {
             exit(1);
         }
-        execlp("/bin/sh", "sh", "-c", command, NULL);
+        execlp("/bin/sh", "sh", "-c", command->str, NULL);
         exit(1);
     }
 #else
-    s->qemu_pid = qtest_create_process(command);
+    s->qemu_pid = qtest_create_process(command->str);
 #endif /* _WIN32 */
 
-    g_free(command);
+    return s;
+}
+
+QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
+{
+    QTestState *s;
+    int sock, qmpsock, i;
+    gchar *socket_path;
+    gchar *qmp_socket_path;
+
+    socket_path = g_strdup_printf("%s/qtest-%d.sock",
+                                  g_get_tmp_dir(), getpid());
+    qmp_socket_path = g_strdup_printf("%s/qtest-%d.qmp",
+                                      g_get_tmp_dir(), getpid());
+
+    /*
+     * It's possible that if an earlier test run crashed it might
+     * have left a stale unix socket lying around. Delete any
+     * stale old socket to avoid spurious test failures with
+     * tests/libqtest.c:70:init_socket: assertion failed (ret != -1): (-1 != -1)
+     */
+    unlink(socket_path);
+    unlink(qmp_socket_path);
+
+    socket_init();
+    sock = init_socket(socket_path);
+    qmpsock = init_socket(qmp_socket_path);
+
+    s = qtest_spawn_qemu("-qtest unix:%s "
+                         "-qtest-log %s "
+                         "-chardev socket,path=%s,id=char0 "
+                         "-mon chardev=char0,mode=control "
+                         "-display none "
+                         "%s"
+                         " -accel qtest",
+                         socket_path,
+                         getenv("QTEST_LOG") ? DEV_STDERR : DEV_NULL,
+                         qmp_socket_path,
+                         extra_args ?: "");
+
+    qtest_client_set_rx_handler(s, qtest_client_socket_recv_line);
+    qtest_client_set_tx_handler(s, qtest_client_socket_send);
+
     s->fd = socket_accept(sock);
     if (s->fd >= 0) {
         s->qmp_fd = socket_accept(qmpsock);

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index e97616d..29a4efb 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build

@@ -73,11 +73,14 @@
   (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) +                 \
   (config_host.has_key('CONFIG_POSIX') and                                                  \
    config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) +                   \
-  (config_all_devices.has_key('CONFIG_VIRTIO_NET') and                                      \
+  (config_all_devices.has_key('CONFIG_PCIE_PORT') and                                       \
+   config_all_devices.has_key('CONFIG_VIRTIO_NET') and                                      \
    config_all_devices.has_key('CONFIG_Q35') and                                             \
    config_all_devices.has_key('CONFIG_VIRTIO_PCI') and                                      \
    slirp.found() ? ['virtio-net-failover'] : []) +                                          \
-  (unpack_edk2_blobs ? ['bios-tables-test'] : []) +                                         \
+  (unpack_edk2_blobs and                                                                    \
+   config_all_devices.has_key('CONFIG_HPET') and                                            \
+   config_all_devices.has_key('CONFIG_PARALLEL') ? ['bios-tables-test'] : []) +             \
   qtests_pci +                                                                              \
   qtests_cxl +                                                                              \
   ['fdc-test',
@@ -196,17 +199,17 @@
   (config_all_devices.has_key('CONFIG_PFLASH_CFI02') ? ['pflash-cfi02-test'] : []) +         \
   (config_all_devices.has_key('CONFIG_ASPEED_SOC') ? qtests_aspeed : []) + \
   (config_all_devices.has_key('CONFIG_NPCM7XX') ? qtests_npcm7xx : []) + \
+  (config_all_devices.has_key('CONFIG_GENERIC_LOADER') ? ['hexloader-test'] : []) + \
   ['arm-cpu-features',
    'microbit-test',
    'test-arm-mptimer',
-   'boot-serial-test',
-   'hexloader-test']
+   'boot-serial-test']
 
 # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
 qtests_aarch64 = \
   (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) +                            \
-  (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) +        \
-  (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) +  \
+  (config_all.has_key('CONFIG_TCG') and config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ?            \
+    ['tpm-tis-device-test', 'tpm-tis-device-swtpm-test'] : []) +                                         \
   (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \
   (config_all_devices.has_key('CONFIG_RASPI') ? ['bcm2835-dma-test'] : []) +  \
   ['arm-cpu-features',
@@ -254,10 +257,14 @@
   'virtio-net-test.c',
   'virtio-rng-test.c',
   'virtio-scsi-test.c',
-  'virtio-serial-test.c',
   'virtio-iommu-test.c',
   'vmxnet3-test.c',
 )
+
+if config_all_devices.has_key('CONFIG_VIRTIO_SERIAL')
+  qos_test_ss.add(files('virtio-serial-test.c'))
+endif
+
 if config_host.has_key('CONFIG_POSIX')
   qos_test_ss.add(files('e1000e-test.c'))
 endif

diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c
index 1d98dca..270e424 100644
--- a/tests/qtest/netdev-socket.c
+++ b/tests/qtest/netdev-socket.c

@@ -11,6 +11,10 @@
 #include <glib/gstdio.h>
 #include "../unit/socket-helpers.h"
 #include "libqtest.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/sockets.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qapi-visit-sockets.h"
 
 #define CONNECTION_TIMEOUT    60
 
@@ -142,6 +146,101 @@
     qtest_quit(qts0);
 }
 
+static void wait_stream_connected(QTestState *qts, const char *id,
+                                  SocketAddress **addr)
+{
+    QDict *resp, *data;
+    QString *qstr;
+    QObject *obj;
+    Visitor *v = NULL;
+
+    resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED");
+    g_assert_nonnull(resp);
+    data = qdict_get_qdict(resp, "data");
+    g_assert_nonnull(data);
+
+    qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+    g_assert_nonnull(data);
+
+    g_assert(!strcmp(qstring_get_str(qstr), id));
+
+    obj = qdict_get(data, "addr");
+
+    v = qobject_input_visitor_new(obj);
+    visit_type_SocketAddress(v, NULL, addr, NULL);
+    visit_free(v);
+    qobject_unref(resp);
+}
+
+static void wait_stream_disconnected(QTestState *qts, const char *id)
+{
+    QDict *resp, *data;
+    QString *qstr;
+
+    resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED");
+    g_assert_nonnull(resp);
+    data = qdict_get_qdict(resp, "data");
+    g_assert_nonnull(data);
+
+    qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+    g_assert_nonnull(data);
+
+    g_assert(!strcmp(qstring_get_str(qstr), id));
+    qobject_unref(resp);
+}
+
+static void test_stream_inet_reconnect(void)
+{
+    QTestState *qts0, *qts1;
+    int port;
+    SocketAddress *addr;
+
+    port = inet_get_free_port(false);
+    qts0 = qtest_initf("-nodefaults -M none "
+                       "-netdev stream,id=st0,server=true,addr.type=inet,"
+                       "addr.ipv4=on,addr.ipv6=off,"
+                       "addr.host=127.0.0.1,addr.port=%d", port);
+
+    EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+    qts1 = qtest_initf("-nodefaults -M none "
+                       "-netdev stream,server=false,id=st0,addr.type=inet,"
+                       "addr.ipv4=on,addr.ipv6=off,reconnect=1,"
+                       "addr.host=127.0.0.1,addr.port=%d", port);
+
+    wait_stream_connected(qts0, "st0", &addr);
+    g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+    g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+    qapi_free_SocketAddress(addr);
+
+    /* kill server */
+    qtest_quit(qts0);
+
+    /* check client has been disconnected */
+    wait_stream_disconnected(qts1, "st0");
+
+    /* restart server */
+    qts0 = qtest_initf("-nodefaults -M none "
+                       "-netdev stream,id=st0,server=true,addr.type=inet,"
+                       "addr.ipv4=on,addr.ipv6=off,"
+                       "addr.host=127.0.0.1,addr.port=%d", port);
+
+    /* wait connection events*/
+    wait_stream_connected(qts0, "st0", &addr);
+    g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+    g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+    qapi_free_SocketAddress(addr);
+
+    wait_stream_connected(qts1, "st0", &addr);
+    g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+    g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+    g_assert_cmpint(atoi(addr->u.inet.port), ==, port);
+    qapi_free_SocketAddress(addr);
+
+    qtest_quit(qts1);
+    qtest_quit(qts0);
+}
+
 static void test_stream_inet_ipv6(void)
 {
     QTestState *qts0, *qts1;
@@ -418,6 +517,8 @@
 #ifndef _WIN32
         qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast);
 #endif
+        qtest_add_func("/netdev/stream/inet/reconnect",
+                       test_stream_inet_reconnect);
     }
     if (has_ipv6) {
         qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6);

diff --git a/tests/qtest/npcm7xx_pwm-test.c b/tests/qtest/npcm7xx_pwm-test.c
index e320a62..ea4ca1d 100644
--- a/tests/qtest/npcm7xx_pwm-test.c
+++ b/tests/qtest/npcm7xx_pwm-test.c

@@ -20,6 +20,8 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qnum.h"
 
+static int verbosity_level;
+
 #define REF_HZ          25000000
 
 /* Register field definitions. */
@@ -221,7 +223,9 @@
     QDict *response;
     uint64_t val;
 
-    g_test_message("Getting properties %s from %s", name, path);
+    if (verbosity_level >= 2) {
+        g_test_message("Getting properties %s from %s", name, path);
+    }
     response = qtest_qmp(qts, "{ 'execute': 'qom-get',"
             " 'arguments': { 'path': %s, 'property': %s}}",
             path, name);
@@ -260,8 +264,10 @@
     QDict *response;
     char *path = g_strdup_printf("/machine/soc/mft[%d]", index);
 
-    g_test_message("Setting properties %s of mft[%d] with value %u",
-                   name, index, value);
+    if (verbosity_level >= 2) {
+        g_test_message("Setting properties %s of mft[%d] with value %u",
+                       name, index, value);
+    }
     response = qtest_qmp(qts, "{ 'execute': 'qom-set',"
             " 'arguments': { 'path': %s, "
             " 'property': %s, 'value': %u}}",
@@ -506,9 +512,12 @@
     int32_t expected_cnt = mft_compute_cnt(rpm, clk);
 
     qtest_irq_intercept_in(qts, "/machine/soc/a9mpcore/gic");
-    g_test_message(
-        "verifying rpm for mft[%d]: clk: %" PRIu64 ", duty: %" PRIu64 ", rpm: %u, cnt: %d",
-        index, clk, duty, rpm, expected_cnt);
+    if (verbosity_level >= 2) {
+        g_test_message(
+            "verifying rpm for mft[%d]: clk: %" PRIu64 ", duty: %" PRIu64
+            ", rpm: %u, cnt: %d",
+            index, clk, duty, rpm, expected_cnt);
+    }
 
     /* Verify rpm for fan A */
     /* Stop capture */
@@ -670,6 +679,12 @@
 {
     TestData test_data_list[ARRAY_SIZE(pwm_module_list) * ARRAY_SIZE(pwm_list)];
 
+    char *v_env = getenv("V");
+
+    if (v_env) {
+        verbosity_level = atoi(v_env);
+    }
+
     g_test_init(&argc, &argv, NULL);
 
     for (int i = 0; i < ARRAY_SIZE(pwm_module_list); ++i) {

diff --git a/tests/qtest/pxe-test.c b/tests/qtest/pxe-test.c
index 52f0b5c..62b6eef 100644
--- a/tests/qtest/pxe-test.c
+++ b/tests/qtest/pxe-test.c

@@ -108,6 +108,10 @@
         const testdef_t *test = &tests[i];
         char *testname;
 
+        if (!qtest_has_device(test->model)) {
+            continue;
+        }
+
         testname = g_strdup_printf("pxe/ipv4/%s/%s",
                                    test->machine, test->model);
         qtest_add_data_func(testname, test, test_pxe_ipv4);

diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index 81831ca..bafd8c2 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target

@@ -14,7 +14,7 @@
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
 SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
 
 test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
 run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
@@ -28,6 +28,10 @@
 run-test-i386-bmi2: QEMU_OPTS += -cpu max
 run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
 
+test-i386-adcox: CFLAGS=-O2
+run-test-i386-adcox: QEMU_OPTS += -cpu max
+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
+
 #
 # hello-i386 is a barebones app
 #

diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
new file mode 100644
index 0000000..16169ef
--- /dev/null
+++ b/tests/tcg/i386/test-i386-adcox.c

@@ -0,0 +1,75 @@
+/* See if various BMI2 instructions give expected results */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define CC_C 1
+#define CC_O (1 << 11)
+
+#ifdef __x86_64__
+#define REG uint64_t
+#else
+#define REG uint32_t
+#endif
+
+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
+{
+    REG flags;
+    REG out_adcx, out_adox;
+
+    asm("pushf; pop %0" : "=r"(flags));
+    flags &= ~(CC_C | CC_O);
+    flags |= (in_c ? CC_C : 0);
+    flags |= (in_o ? CC_O : 0);
+
+    out_adcx = adcx_operand;
+    out_adox = adox_operand;
+    asm("push %0; popf;"
+        "adox %3, %2;"
+        "adcx %3, %1;"
+        "pushf; pop %0"
+        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+
+    assert(out_adcx == in_c + adcx_operand - 1);
+    assert(out_adox == in_o + adox_operand - 1);
+    assert(!!(flags & CC_C) == (in_c || adcx_operand));
+    assert(!!(flags & CC_O) == (in_o || adox_operand));
+}
+
+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
+{
+    REG flags;
+    REG out_adcx, out_adox;
+
+    asm("pushf; pop %0" : "=r"(flags));
+    flags &= ~(CC_C | CC_O);
+    flags |= (in_c ? CC_C : 0);
+    flags |= (in_o ? CC_O : 0);
+
+    out_adcx = adcx_operand;
+    out_adox = adox_operand;
+    asm("push %0; popf;"
+        "adcx %3, %1;"
+        "adox %3, %2;"
+        "pushf; pop %0"
+        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+
+    assert(out_adcx == in_c + adcx_operand - 1);
+    assert(out_adox == in_o + adox_operand - 1);
+    assert(!!(flags & CC_C) == (in_c || adcx_operand));
+    assert(!!(flags & CC_O) == (in_o || adox_operand));
+}
+
+int main(int argc, char *argv[]) {
+    /* try all combinations of input CF, input OF, CF from op1+op2,  OF from op2+op1 */
+    int i;
+    for (i = 0; i <= 15; i++) {
+        printf("%d\n", i);
+        test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
+        test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
+    }
+    return 0;
+}
+

diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
index 5fadf47..982d4ab 100644
--- a/tests/tcg/i386/test-i386-bmi2.c
+++ b/tests/tcg/i386/test-i386-bmi2.c

@@ -3,34 +3,40 @@
 #include <stdint.h>
 #include <stdio.h>
 
+#ifdef __x86_64
+typedef uint64_t reg_t;
+#else
+typedef uint32_t reg_t;
+#endif
+
 #define insn1q(name, arg0)                                                           \
-static inline uint64_t name##q(uint64_t arg0)                                        \
+static inline reg_t name##q(reg_t arg0)                                              \
 {                                                                                    \
-    uint64_t result64;                                                               \
+    reg_t result64;                                                                  \
     asm volatile (#name "q   %1, %0" : "=r"(result64) : "rm"(arg0));                 \
     return result64;                                                                 \
 }
 
 #define insn1l(name, arg0)                                                           \
-static inline uint32_t name##l(uint32_t arg0)                                        \
+static inline reg_t name##l(reg_t arg0)                                              \
 {                                                                                    \
-    uint32_t result32;                                                               \
+    reg_t result32;                                                                  \
     asm volatile (#name "l   %k1, %k0" : "=r"(result32) : "rm"(arg0));               \
     return result32;                                                                 \
 }
 
 #define insn2q(name, arg0, c0, arg1, c1)                                             \
-static inline uint64_t name##q(uint64_t arg0, uint64_t arg1)                         \
+static inline reg_t name##q(reg_t arg0, reg_t arg1)                                  \
 {                                                                                    \
-    uint64_t result64;                                                               \
+    reg_t result64;                                                                  \
     asm volatile (#name "q   %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1));     \
     return result64;                                                                 \
 }
 
 #define insn2l(name, arg0, c0, arg1, c1)                                             \
-static inline uint32_t name##l(uint32_t arg0, uint32_t arg1)                         \
+static inline reg_t name##l(reg_t arg0, reg_t arg1)                                  \
 {                                                                                    \
-    uint32_t result32;                                                               \
+    reg_t result32;                                                                  \
     asm volatile (#name "l   %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1));  \
     return result32;                                                                 \
 }
@@ -65,130 +71,140 @@
 int main(int argc, char *argv[]) {
     uint64_t ehlo = 0x202020204f4c4845ull;
     uint64_t mask = 0xa080800302020001ull;
-    uint32_t result32;
+    reg_t result;
 
 #ifdef __x86_64
-    uint64_t result64;
-
     /* 64 bits */
-    result64 = andnq(mask, ehlo);
-    assert(result64 == 0x002020204d4c4844);
+    result = andnq(mask, ehlo);
+    assert(result == 0x002020204d4c4844);
 
-    result64 = pextq(ehlo, mask);
-    assert(result64 == 133);
+    result = pextq(ehlo, mask);
+    assert(result == 133);
 
-    result64 = pdepq(result64, mask);
-    assert(result64 == (ehlo & mask));
+    result = pdepq(result, mask);
+    assert(result == (ehlo & mask));
 
-    result64 = pextq(-1ull, mask);
-    assert(result64 == 511); /* mask has 9 bits set */
+    result = pextq(-1ull, mask);
+    assert(result == 511); /* mask has 9 bits set */
 
-    result64 = pdepq(-1ull, mask);
-    assert(result64 == mask);
+    result = pdepq(-1ull, mask);
+    assert(result == mask);
 
-    result64 = bextrq(mask, 0x3f00);
-    assert(result64 == (mask & ~INT64_MIN));
+    result = bextrq(mask, 0x3f00);
+    assert(result == (mask & ~INT64_MIN));
 
-    result64 = bextrq(mask, 0x1038);
-    assert(result64 == 0xa0);
+    result = bextrq(mask, 0x1038);
+    assert(result == 0xa0);
 
-    result64 = bextrq(mask, 0x10f8);
-    assert(result64 == 0);
+    result = bextrq(mask, 0x10f8);
+    assert(result == 0);
 
-    result64 = blsiq(0x30);
-    assert(result64 == 0x10);
+    result = bextrq(0xfedcba9876543210ull, 0x7f00);
+    assert(result == 0xfedcba9876543210ull);
 
-    result64 = blsiq(0x30ull << 32);
-    assert(result64 == 0x10ull << 32);
+    result = blsiq(0x30);
+    assert(result == 0x10);
 
-    result64 = blsmskq(0x30);
-    assert(result64 == 0x1f);
+    result = blsiq(0x30ull << 32);
+    assert(result == 0x10ull << 32);
 
-    result64 = blsrq(0x30);
-    assert(result64 == 0x20);
+    result = blsmskq(0x30);
+    assert(result == 0x1f);
 
-    result64 = blsrq(0x30ull << 32);
-    assert(result64 == 0x20ull << 32);
+    result = blsrq(0x30);
+    assert(result == 0x20);
 
-    result64 = bzhiq(mask, 0x3f);
-    assert(result64 == (mask & ~INT64_MIN));
+    result = blsrq(0x30ull << 32);
+    assert(result == 0x20ull << 32);
 
-    result64 = bzhiq(mask, 0x1f);
-    assert(result64 == (mask & ~(-1 << 30)));
+    result = bzhiq(mask, 0x3f);
+    assert(result == (mask & ~INT64_MIN));
 
-    result64 = rorxq(0x2132435465768798, 8);
-    assert(result64 == 0x9821324354657687);
+    result = bzhiq(mask, 0x1f);
+    assert(result == (mask & ~(-1 << 30)));
 
-    result64 = sarxq(0xffeeddccbbaa9988, 8);
-    assert(result64 == 0xffffeeddccbbaa99);
+    result = rorxq(0x2132435465768798, 8);
+    assert(result == 0x9821324354657687);
 
-    result64 = sarxq(0x77eeddccbbaa9988, 8 | 64);
-    assert(result64 == 0x0077eeddccbbaa99);
+    result = sarxq(0xffeeddccbbaa9988, 8);
+    assert(result == 0xffffeeddccbbaa99);
 
-    result64 = shrxq(0xffeeddccbbaa9988, 8);
-    assert(result64 == 0x00ffeeddccbbaa99);
+    result = sarxq(0x77eeddccbbaa9988, 8 | 64);
+    assert(result == 0x0077eeddccbbaa99);
 
-    result64 = shrxq(0x77eeddccbbaa9988, 8 | 192);
-    assert(result64 == 0x0077eeddccbbaa99);
+    result = shrxq(0xffeeddccbbaa9988, 8);
+    assert(result == 0x00ffeeddccbbaa99);
 
-    result64 = shlxq(0xffeeddccbbaa9988, 8);
-    assert(result64 == 0xeeddccbbaa998800);
+    result = shrxq(0x77eeddccbbaa9988, 8 | 192);
+    assert(result == 0x0077eeddccbbaa99);
+
+    result = shlxq(0xffeeddccbbaa9988, 8);
+    assert(result == 0xeeddccbbaa998800);
 #endif
 
     /* 32 bits */
-    result32 = andnl(mask, ehlo);
-    assert(result32 == 0x04d4c4844);
+    result = andnl(mask, ehlo);
+    assert(result == 0x04d4c4844);
 
-    result32 = pextl((uint32_t) ehlo, mask);
-    assert(result32 == 5);
+    result = pextl((uint32_t) ehlo, mask);
+    assert(result == 5);
 
-    result32 = pdepl(result32, mask);
-    assert(result32 == (uint32_t)(ehlo & mask));
+    result = pdepl(result, mask);
+    assert(result == (uint32_t)(ehlo & mask));
 
-    result32 = pextl(-1u, mask);
-    assert(result32 == 7); /* mask has 3 bits set */
+    result = pextl(-1u, mask);
+    assert(result == 7); /* mask has 3 bits set */
 
-    result32 = pdepl(-1u, mask);
-    assert(result32 == (uint32_t)mask);
+    result = pdepl(-1u, mask);
+    assert(result == (uint32_t)mask);
 
-    result32 = bextrl(mask, 0x1f00);
-    assert(result32 == (mask & ~INT32_MIN));
+    result = bextrl(mask, 0x1f00);
+    assert(result == (mask & ~INT32_MIN));
 
-    result32 = bextrl(ehlo, 0x1018);
-    assert(result32 == 0x4f);
+    result = bextrl(ehlo, 0x1018);
+    assert(result == 0x4f);
 
-    result32 = bextrl(mask, 0x1038);
-    assert(result32 == 0);
+    result = bextrl(mask, 0x1038);
+    assert(result == 0);
 
-    result32 = blsil(0xffff);
-    assert(result32 == 1);
+    result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018);
+    assert(result == 0x5a);
 
-    result32 = blsmskl(0x300);
-    assert(result32 == 0x1ff);
+    result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00);
+    assert(result == 0x76543210u);
 
-    result32 = blsrl(0xffc);
-    assert(result32 == 0xff8);
+    result = bextrl(-1, 0);
+    assert(result == 0);
 
-    result32 = bzhil(mask, 0xf);
-    assert(result32 == 1);
+    result = blsil(0xffff);
+    assert(result == 1);
 
-    result32 = rorxl(0x65768798, 8);
-    assert(result32 == 0x98657687);
+    result = blsmskl(0x300);
+    assert(result == 0x1ff);
 
-    result32 = sarxl(0xffeeddcc, 8);
-    assert(result32 == 0xffffeedd);
+    result = blsrl(0xffc);
+    assert(result == 0xff8);
 
-    result32 = sarxl(0x77eeddcc, 8 | 32);
-    assert(result32 == 0x0077eedd);
+    result = bzhil(mask, 0xf);
+    assert(result == 1);
 
-    result32 = shrxl(0xffeeddcc, 8);
-    assert(result32 == 0x00ffeedd);
+    result = rorxl(0x65768798, 8);
+    assert(result == 0x98657687);
 
-    result32 = shrxl(0x77eeddcc, 8 | 128);
-    assert(result32 == 0x0077eedd);
+    result = sarxl(0xffeeddcc, 8);
+    assert(result == 0xffffeedd);
 
-    result32 = shlxl(0xffeeddcc, 8);
-    assert(result32 == 0xeeddcc00);
+    result = sarxl(0x77eeddcc, 8 | 32);
+    assert(result == 0x0077eedd);
+
+    result = shrxl(0xffeeddcc, 8);
+    assert(result == 0x00ffeedd);
+
+    result = shrxl(0x77eeddcc, 8 | 128);
+    assert(result == 0x0077eedd);
+
+    result = shlxl(0xffeeddcc, 8);
+    assert(result == 0xeeddcc00);
 
     return 0;
 }

diff --git a/tests/tcg/s390x/Makefile.softmmu-target b/tests/tcg/s390x/Makefile.softmmu-target
index a34fa68..50c1b88 100644
--- a/tests/tcg/s390x/Makefile.softmmu-target
+++ b/tests/tcg/s390x/Makefile.softmmu-target

@@ -3,7 +3,7 @@
 QEMU_OPTS=-action panic=exit-failure -kernel
 
 %: %.S
-	$(CC) -march=z13 -m64 -nostartfiles -static -Wl,-Ttext=0 \
+	$(CC) -march=z13 -m64 -nostdlib -static -Wl,-Ttext=0 \
 		-Wl,--build-id=none $< -o $@
 
 TESTS += unaligned-lowcore

diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index 4eac782..e64aab1 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target

@@ -12,11 +12,14 @@
 X86_64_TESTS += vsyscall
 X86_64_TESTS += noexec
 X86_64_TESTS += cmpxchg
+X86_64_TESTS += adox
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)
 endif
 
+adox: CFLAGS=-O2
+
 run-test-i386-ssse3: QEMU_OPTS += -cpu max
 run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
 

diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c
new file mode 100644
index 0000000..36be644
--- /dev/null
+++ b/tests/tcg/x86_64/adox.c

@@ -0,0 +1,69 @@
+/* See if ADOX give expected results */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c)
+{
+    asm ("addl $0x7fffffff, %k1\n\t"
+         "adoxq %2, %0\n\t"
+         "seto %b1"
+         : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c));
+    *c_out = c;
+    return a;
+}
+
+static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c)
+{
+    asm ("addl $0x7fffffff, %k1\n\t"
+         "adoxl %k2, %k0\n\t"
+         "seto %b1"
+         : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c));
+    *c_out = c;
+    return a;
+}
+
+int main()
+{
+    uint64_t r;
+    bool c;
+
+    r = adoxq(&c, 0, 0, 0);
+    assert(r == 0);
+    assert(c == 0);
+
+    r = adoxl(&c, 0, 0, 0);
+    assert(r == 0);
+    assert(c == 0);
+
+    r = adoxl(&c, 0x100000000, 0, 0);
+    assert(r == 0);
+    assert(c == 0);
+
+    r = adoxq(&c, 0, 0, 1);
+    assert(r == 1);
+    assert(c == 0);
+
+    r = adoxl(&c, 0, 0, 1);
+    assert(r == 1);
+    assert(c == 0);
+
+    r = adoxq(&c, -1, -1, 0);
+    assert(r == -2);
+    assert(c == 1);
+
+    r = adoxl(&c, -1, -1, 0);
+    assert(r == 0xfffffffe);
+    assert(c == 1);
+
+    r = adoxq(&c, -1, -1, 1);
+    assert(r == -1);
+    assert(c == 1);
+
+    r = adoxl(&c, -1, -1, 1);
+    assert(r == 0xffffffff);
+    assert(c == 1);
+
+    return 0;
+}

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 4fed8b7..d9d3807 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c

@@ -933,10 +933,9 @@
     }
 }
 
-static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
-                                                int64_t offset, int64_t bytes,
-                                                QEMUIOVector *qiov,
-                                                BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_test_top_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVTestTopState *tts = bs->opaque;
     return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
@@ -967,6 +966,8 @@
     void *buffer = g_malloc(65536);
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
 
+    GRAPH_RDLOCK_GUARD();
+
     /* Pretend some internal write operation from parent to child.
      * Important: We have to read from the child, not from the parent!
      * Draining works by first propagating it all up the tree to the
@@ -1698,11 +1699,9 @@
  * Otherwise:
  *   Set .has_read to true and return success.
  */
-static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
-                                                    int64_t offset,
-                                                    int64_t bytes,
-                                                    QEMUIOVector *qiov,
-                                                    BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_replace_test_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVReplaceTestState *s = bs->opaque;
 
@@ -1778,7 +1777,10 @@
     int ret;
 
     /* Queue a read request post-drain */
+    bdrv_graph_co_rdlock();
     ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+    bdrv_graph_co_rdunlock();
+
     g_assert(ret >= 0);
     bdrv_dec_in_flight(bs);
 }

diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 6dfac64..3a5e1eb 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c

@@ -312,7 +312,8 @@
     g_assert_cmpint(ret, ==, -EINVAL);
 }
 
-static void test_sync_op_block_status(BdrvChild *c)
+/* Disable TSA to make bdrv_test.bdrv_co_block_status writable */
+static void TSA_NO_TSA test_sync_op_block_status(BdrvChild *c)
 {
     int ret;
     int64_t n;

diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c
index ef951b6..547046d 100644
--- a/tests/unit/test-xbzrle.c
+++ b/tests/unit/test-xbzrle.c

@@ -16,6 +16,35 @@
 
 #define XBZRLE_PAGE_SIZE 4096
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+    return ;
+}
+#endif
+
 static void test_uleb(void)
 {
     uint32_t i, val;
@@ -54,7 +83,7 @@
     buffer[1000 + diff_len + 5] = 105;
 
     /* encode zero page */
-    dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
                        XBZRLE_PAGE_SIZE);
     g_assert(dlen == 0);
 
@@ -78,7 +107,7 @@
     test[1000 + diff_len + 5] = 109;
 
     /* test unchanged buffer */
-    dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
                                 XBZRLE_PAGE_SIZE);
     g_assert(dlen == 0);
 
@@ -96,7 +125,7 @@
 
     test[XBZRLE_PAGE_SIZE - 1] = 1;
 
-    dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
                        XBZRLE_PAGE_SIZE);
     g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
 
@@ -121,7 +150,7 @@
     }
 
     /* encode overflow */
-    rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+    rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
                               XBZRLE_PAGE_SIZE);
     g_assert(rc == -1);
 
@@ -152,7 +181,7 @@
     test[1000 + diff_len + 5] = 109;
 
     /* test encode/decode */
-    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
                                 XBZRLE_PAGE_SIZE);
 
     rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);

diff --git a/tools/meson.build b/tools/meson.build
index 10eb3a0..e69de29 100644
--- a/tools/meson.build
+++ b/tools/meson.build

@@ -1,13 +0,0 @@
-have_virtiofsd = get_option('virtiofsd') \
-    .require(targetos == 'linux',
-             error_message: 'virtiofsd requires Linux') \
-    .require(seccomp.found() and libcap_ng.found(),
-             error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \
-    .require(have_vhost_user,
-             error_message: 'virtiofsd needs vhost-user-support') \
-    .disable_auto_if(not have_tools and not have_system) \
-    .allowed()
-
-if have_virtiofsd
-  subdir('virtiofsd')
-endif

diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in
deleted file mode 100644
index 9bcd86f..0000000
--- a/tools/virtiofsd/50-qemu-virtiofsd.json.in
+++ /dev/null

@@ -1,5 +0,0 @@
-{
-  "description": "QEMU virtiofsd vhost-user-fs",
-  "type": "fs",
-  "binary": "@libexecdir@/virtiofsd"
-}

diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
deleted file mode 100644
index b5f04be..0000000
--- a/tools/virtiofsd/buffer.c
+++ /dev/null

@@ -1,350 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2010  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Functions for dealing with `struct fuse_buf` and `struct
- * fuse_bufvec`.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "fuse_lowlevel.h"
-
-size_t fuse_buf_size(const struct fuse_bufvec *bufv)
-{
-    size_t i;
-    size_t size = 0;
-
-    for (i = 0; i < bufv->count; i++) {
-        if (bufv->buf[i].size == SIZE_MAX) {
-            size = SIZE_MAX;
-        } else {
-            size += bufv->buf[i].size;
-        }
-    }
-
-    return size;
-}
-
-static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
-                               struct fuse_bufvec *in_buf)
-{
-    ssize_t res, i, j;
-    size_t iovcnt = in_buf->count;
-    struct iovec *iov;
-    int fd = out_buf->fd;
-
-    iov = g_try_new0(struct iovec, iovcnt);
-    if (!iov) {
-        return -ENOMEM;
-    }
-
-    for (i = 0, j = 0; i < iovcnt; i++) {
-        /* Skip the buf with 0 size */
-        if (in_buf->buf[i].size) {
-            iov[j].iov_base = in_buf->buf[i].mem;
-            iov[j].iov_len = in_buf->buf[i].size;
-            j++;
-        }
-    }
-
-    if (out_buf->flags & FUSE_BUF_FD_SEEK) {
-        res = pwritev(fd, iov, iovcnt, out_buf->pos);
-    } else {
-        res = writev(fd, iov, iovcnt);
-    }
-
-    if (res == -1) {
-        res = -errno;
-    }
-
-    g_free(iov);
-    return res;
-}
-
-static size_t min_size(size_t s1, size_t s2)
-{
-    return s1 < s2 ? s1 : s2;
-}
-
-static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off,
-                              const struct fuse_buf *src, size_t src_off,
-                              size_t len)
-{
-    ssize_t res = 0;
-    size_t copied = 0;
-
-    while (len) {
-        if (dst->flags & FUSE_BUF_FD_SEEK) {
-            res = pwrite(dst->fd, (char *)src->mem + src_off, len,
-                         dst->pos + dst_off);
-        } else {
-            res = write(dst->fd, (char *)src->mem + src_off, len);
-        }
-        if (res == -1) {
-            if (!copied) {
-                return -errno;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        copied += res;
-        if (!(dst->flags & FUSE_BUF_FD_RETRY)) {
-            break;
-        }
-
-        src_off += res;
-        dst_off += res;
-        len -= res;
-    }
-
-    return copied;
-}
-
-static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off,
-                             const struct fuse_buf *src, size_t src_off,
-                             size_t len)
-{
-    ssize_t res = 0;
-    size_t copied = 0;
-
-    while (len) {
-        if (src->flags & FUSE_BUF_FD_SEEK) {
-            res = pread(src->fd, (char *)dst->mem + dst_off, len,
-                        src->pos + src_off);
-        } else {
-            res = read(src->fd, (char *)dst->mem + dst_off, len);
-        }
-        if (res == -1) {
-            if (!copied) {
-                return -errno;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        copied += res;
-        if (!(src->flags & FUSE_BUF_FD_RETRY)) {
-            break;
-        }
-
-        dst_off += res;
-        src_off += res;
-        len -= res;
-    }
-
-    return copied;
-}
-
-static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off,
-                                 const struct fuse_buf *src, size_t src_off,
-                                 size_t len)
-{
-    char buf[4096];
-    struct fuse_buf tmp = {
-        .size = sizeof(buf),
-        .flags = 0,
-    };
-    ssize_t res;
-    size_t copied = 0;
-
-    tmp.mem = buf;
-
-    while (len) {
-        size_t this_len = min_size(tmp.size, len);
-        size_t read_len;
-
-        res = fuse_buf_read(&tmp, 0, src, src_off, this_len);
-        if (res < 0) {
-            if (!copied) {
-                return res;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        read_len = res;
-        res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len);
-        if (res < 0) {
-            if (!copied) {
-                return res;
-            }
-            break;
-        }
-        if (res == 0) {
-            break;
-        }
-
-        copied += res;
-
-        if (res < this_len) {
-            break;
-        }
-
-        dst_off += res;
-        src_off += res;
-        len -= res;
-    }
-
-    return copied;
-}
-
-static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off,
-                                 const struct fuse_buf *src, size_t src_off,
-                                 size_t len)
-{
-    int src_is_fd = src->flags & FUSE_BUF_IS_FD;
-    int dst_is_fd = dst->flags & FUSE_BUF_IS_FD;
-
-    if (!src_is_fd && !dst_is_fd) {
-        char *dstmem = (char *)dst->mem + dst_off;
-        char *srcmem = (char *)src->mem + src_off;
-
-        if (dstmem != srcmem) {
-            if (dstmem + len <= srcmem || srcmem + len <= dstmem) {
-                memcpy(dstmem, srcmem, len);
-            } else {
-                memmove(dstmem, srcmem, len);
-            }
-        }
-
-        return len;
-    } else if (!src_is_fd) {
-        return fuse_buf_write(dst, dst_off, src, src_off, len);
-    } else if (!dst_is_fd) {
-        return fuse_buf_read(dst, dst_off, src, src_off, len);
-    } else {
-        return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len);
-    }
-}
-
-static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv)
-{
-    if (bufv->idx < bufv->count) {
-        return &bufv->buf[bufv->idx];
-    } else {
-        return NULL;
-    }
-}
-
-static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len)
-{
-    const struct fuse_buf *buf = fuse_bufvec_current(bufv);
-
-    if (!buf) {
-        return 0;
-    }
-
-    bufv->off += len;
-    assert(bufv->off <= buf->size);
-    if (bufv->off == buf->size) {
-        assert(bufv->idx < bufv->count);
-        bufv->idx++;
-        if (bufv->idx == bufv->count) {
-            return 0;
-        }
-        bufv->off = 0;
-    }
-    return 1;
-}
-
-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
-{
-    size_t copied = 0, i;
-
-    if (dstv == srcv) {
-        return fuse_buf_size(dstv);
-    }
-
-    /*
-     * use writev to improve bandwidth when all the
-     * src buffers already mapped by the daemon
-     * process
-     */
-    for (i = 0; i < srcv->count; i++) {
-        if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
-            break;
-        }
-    }
-    if ((i == srcv->count) && (dstv->count == 1) &&
-        (dstv->idx == 0) &&
-        (dstv->buf[0].flags & FUSE_BUF_IS_FD)) {
-        dstv->buf[0].pos += dstv->off;
-        return fuse_buf_writev(&dstv->buf[0], srcv);
-    }
-
-    for (;;) {
-        const struct fuse_buf *src = fuse_bufvec_current(srcv);
-        const struct fuse_buf *dst = fuse_bufvec_current(dstv);
-        size_t src_len;
-        size_t dst_len;
-        size_t len;
-        ssize_t res;
-
-        if (src == NULL || dst == NULL) {
-            break;
-        }
-
-        src_len = src->size - srcv->off;
-        dst_len = dst->size - dstv->off;
-        len = min_size(src_len, dst_len);
-
-        res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len);
-        if (res < 0) {
-            if (!copied) {
-                return res;
-            }
-            break;
-        }
-        copied += res;
-
-        if (!fuse_bufvec_advance(srcv, res) ||
-            !fuse_bufvec_advance(dstv, res)) {
-            break;
-        }
-
-        if (res < len) {
-            break;
-        }
-    }
-
-    return copied;
-}
-
-void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len)
-{
-    void *ptr;
-
-    if (len > iter->size - iter->pos) {
-        return NULL;
-    }
-
-    ptr = iter->mem + iter->pos;
-    iter->pos += len;
-    return ptr;
-}
-
-const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter)
-{
-    const char *str = iter->mem + iter->pos;
-    size_t remaining = iter->size - iter->pos;
-    size_t i;
-
-    for (i = 0; i < remaining; i++) {
-        if (str[i] == '\0') {
-            iter->pos += i + 1;
-            return str;
-        }
-    }
-    return NULL;
-}

diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
deleted file mode 100644
index bf46954..0000000
--- a/tools/virtiofsd/fuse_common.h
+++ /dev/null

@@ -1,837 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-/** @file */
-
-#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_)
-#error \
-    "Never include <fuse_common.h> directly; use <fuse.h> or <fuse_lowlevel.h> instead."
-#endif
-
-#ifndef FUSE_COMMON_H_
-#define FUSE_COMMON_H_
-
-#include "fuse_log.h"
-#include "fuse_opt.h"
-
-/** Major version of FUSE library interface */
-#define FUSE_MAJOR_VERSION 3
-
-/** Minor version of FUSE library interface */
-#define FUSE_MINOR_VERSION 2
-
-#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min))
-#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION)
-
-/**
- * Information about an open file.
- *
- * File Handles are created by the open, opendir, and create methods and closed
- * by the release and releasedir methods.  Multiple file handles may be
- * concurrently open for the same file.  Generally, a client will create one
- * file handle per file descriptor, though in some cases multiple file
- * descriptors can share a single file handle.
- */
-struct fuse_file_info {
-    /** Open flags. Available in open() and release() */
-    int flags;
-
-    /*
-     * In case of a write operation indicates if this was caused
-     * by a delayed write from the page cache. If so, then the
-     * context's pid, uid, and gid fields will not be valid, and
-     * the *fh* value may not match the *fh* value that would
-     * have been sent with the corresponding individual write
-     * requests if write caching had been disabled.
-     */
-    unsigned int writepage:1;
-
-    /** Can be filled in by open, to use direct I/O on this file. */
-    unsigned int direct_io:1;
-
-    /*
-     *  Can be filled in by open. It signals the kernel that any
-     *  currently cached file data (ie., data that the filesystem
-     *  provided the last time the file was open) need not be
-     *  invalidated. Has no effect when set in other contexts (in
-     *  particular it does nothing when set by opendir()).
-     */
-    unsigned int keep_cache:1;
-
-    /*
-     *  Indicates a flush operation.  Set in flush operation, also
-     *  maybe set in highlevel lock operation and lowlevel release
-     *  operation.
-     */
-    unsigned int flush:1;
-
-    /*
-     *  Can be filled in by open, to indicate that the file is not
-     *  seekable.
-     */
-    unsigned int nonseekable:1;
-
-    /*
-     * Indicates that flock locks for this file should be
-     * released.  If set, lock_owner shall contain a valid value.
-     * May only be set in ->release().
-     */
-    unsigned int flock_release:1;
-
-    /*
-     *  Can be filled in by opendir. It signals the kernel to
-     *  enable caching of entries returned by readdir().  Has no
-     *  effect when set in other contexts (in particular it does
-     *  nothing when set by open()).
-     */
-    unsigned int cache_readdir:1;
-
-    /* Indicates that suid/sgid bits should be removed upon write */
-    unsigned int kill_priv:1;
-
-
-    /** Padding.  Reserved for future use*/
-    unsigned int padding:24;
-    unsigned int padding2:32;
-
-    /*
-     *  File handle id.  May be filled in by filesystem in create,
-     * open, and opendir().  Available in most other file operations on the
-     * same file handle.
-     */
-    uint64_t fh;
-
-    /** Lock owner id.  Available in locking operations and flush */
-    uint64_t lock_owner;
-
-    /*
-     * Requested poll events.  Available in ->poll.  Only set on kernels
-     * which support it.  If unsupported, this field is set to zero.
-     */
-    uint32_t poll_events;
-};
-
-/*
- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want'
- */
-
-/**
- * Indicates that the filesystem supports asynchronous read requests.
- *
- * If this capability is not requested/available, the kernel will
- * ensure that there is at most one pending read request per
- * file-handle at any time, and will attempt to order read requests by
- * increasing offset.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_ASYNC_READ (1 << 0)
-
-/**
- * Indicates that the filesystem supports "remote" locking.
- *
- * This feature is enabled by default when supported by the kernel,
- * and if getlk() and setlk() handlers are implemented.
- */
-#define FUSE_CAP_POSIX_LOCKS (1 << 1)
-
-/**
- * Indicates that the filesystem supports the O_TRUNC open flag.  If
- * disabled, and an application specifies O_TRUNC, fuse first calls
- * truncate() and then open() with O_TRUNC filtered out.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3)
-
-/**
- * Indicates that the filesystem supports lookups of "." and "..".
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_EXPORT_SUPPORT (1 << 4)
-
-/**
- * Indicates that the kernel should not apply the umask to the
- * file mode on create operations.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_DONT_MASK (1 << 6)
-
-/**
- * Indicates that libfuse should try to use splice() when writing to
- * the fuse device. This may improve performance.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_SPLICE_WRITE (1 << 7)
-
-/**
- * Indicates that libfuse should try to move pages instead of copying when
- * writing to / reading from the fuse device. This may improve performance.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_SPLICE_MOVE (1 << 8)
-
-/**
- * Indicates that libfuse should try to use splice() when reading from
- * the fuse device. This may improve performance.
- *
- * This feature is enabled by default when supported by the kernel and
- * if the filesystem implements a write_buf() handler.
- */
-#define FUSE_CAP_SPLICE_READ (1 << 9)
-
-/**
- * If set, the calls to flock(2) will be emulated using POSIX locks and must
- * then be handled by the filesystem's setlock() handler.
- *
- * If not set, flock(2) calls will be handled by the FUSE kernel module
- * internally (so any access that does not go through the kernel cannot be taken
- * into account).
- *
- * This feature is enabled by default when supported by the kernel and
- * if the filesystem implements a flock() handler.
- */
-#define FUSE_CAP_FLOCK_LOCKS (1 << 10)
-
-/**
- * Indicates that the filesystem supports ioctl's on directories.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_IOCTL_DIR (1 << 11)
-
-/**
- * Traditionally, while a file is open the FUSE kernel module only
- * asks the filesystem for an update of the file's attributes when a
- * client attempts to read beyond EOF. This is unsuitable for
- * e.g. network filesystems, where the file contents may change
- * without the kernel knowing about it.
- *
- * If this flag is set, FUSE will check the validity of the attributes
- * on every read. If the attributes are no longer valid (i.e., if the
- * *attr_timeout* passed to fuse_reply_attr() or set in `struct
- * fuse_entry_param` has passed), it will first issue a `getattr`
- * request. If the new mtime differs from the previous value, any
- * cached file *contents* will be invalidated as well.
- *
- * This flag should always be set when available. If all file changes
- * go through the kernel, *attr_timeout* should be set to a very large
- * number to avoid unnecessary getattr() calls.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12)
-
-/**
- * Indicates that the filesystem supports readdirplus.
- *
- * This feature is enabled by default when supported by the kernel and if the
- * filesystem implements a readdirplus() handler.
- */
-#define FUSE_CAP_READDIRPLUS (1 << 13)
-
-/**
- * Indicates that the filesystem supports adaptive readdirplus.
- *
- * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect.
- *
- * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel
- * will always issue readdirplus() requests to retrieve directory
- * contents.
- *
- * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel
- * will issue both readdir() and readdirplus() requests, depending on
- * how much information is expected to be required.
- *
- * As of Linux 4.20, the algorithm is as follows: when userspace
- * starts to read directory entries, issue a READDIRPLUS request to
- * the filesystem. If any entry attributes have been looked up by the
- * time userspace requests the next batch of entries continue with
- * READDIRPLUS, otherwise switch to plain READDIR.  This will reasult
- * in eg plain "ls" triggering READDIRPLUS first then READDIR after
- * that because it doesn't do lookups.  "ls -l" should result in all
- * READDIRPLUS, except if dentries are already cached.
- *
- * This feature is enabled by default when supported by the kernel and
- * if the filesystem implements both a readdirplus() and a readdir()
- * handler.
- */
-#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14)
-
-/**
- * Indicates that the filesystem supports asynchronous direct I/O submission.
- *
- * If this capability is not requested/available, the kernel will ensure that
- * there is at most one pending read and one pending write request per direct
- * I/O file-handle at any time.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_ASYNC_DIO (1 << 15)
-
-/**
- * Indicates that writeback caching should be enabled. This means that
- * individual write request may be buffered and merged in the kernel
- * before they are send to the filesystem.
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_WRITEBACK_CACHE (1 << 16)
-
-/**
- * Indicates support for zero-message opens. If this flag is set in
- * the `capable` field of the `fuse_conn_info` structure, then the
- * filesystem may return `ENOSYS` from the open() handler to indicate
- * success. Further attempts to open files will be handled in the
- * kernel. (If this flag is not set, returning ENOSYS will be treated
- * as an error and signaled to the caller).
- *
- * Setting (or unsetting) this flag in the `want` field has *no
- * effect*.
- */
-#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17)
-
-/**
- * Indicates support for parallel directory operations. If this flag
- * is unset, the FUSE kernel module will ensure that lookup() and
- * readdir() requests are never issued concurrently for the same
- * directory.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_PARALLEL_DIROPS (1 << 18)
-
-/**
- * Indicates support for POSIX ACLs.
- *
- * If this feature is enabled, the kernel will cache and have
- * responsibility for enforcing ACLs. ACL will be stored as xattrs and
- * passed to userspace, which is responsible for updating the ACLs in
- * the filesystem, keeping the file mode in sync with the ACL, and
- * ensuring inheritance of default ACLs when new filesystem nodes are
- * created. Note that this requires that the file system is able to
- * parse and interpret the xattr representation of ACLs.
- *
- * Enabling this feature implicitly turns on the
- * ``default_permissions`` mount option (even if it was not passed to
- * mount(2)).
- *
- * This feature is disabled by default.
- */
-#define FUSE_CAP_POSIX_ACL (1 << 19)
-
-/**
- * Indicates that the filesystem is responsible for unsetting
- * setuid and setgid bits when a file is written, truncated, or
- * its owner is changed.
- *
- * This feature is enabled by default when supported by the kernel.
- */
-#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20)
-
-/**
- * Indicates support for zero-message opendirs. If this flag is set in
- * the `capable` field of the `fuse_conn_info` structure, then the filesystem
- * may return `ENOSYS` from the opendir() handler to indicate success. Further
- * opendir and releasedir messages will be handled in the kernel. (If this
- * flag is not set, returning ENOSYS will be treated as an error and signalled
- * to the caller.)
- *
- * Setting (or unsetting) this flag in the `want` field has *no effect*.
- */
-#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24)
-
-/**
- * Indicates that the kernel supports the FUSE_ATTR_SUBMOUNT flag.
- *
- * Setting (or unsetting) this flag in the `want` field has *no effect*.
- */
-#define FUSE_CAP_SUBMOUNTS (1 << 27)
-
-/**
- * Indicates that the filesystem is responsible for clearing
- * security.capability xattr and clearing setuid and setgid bits. Following
- * are the rules.
- * - clear "security.capability" on write, truncate and chown unconditionally
- * - clear suid/sgid if following is true. Note, sgid is cleared only if
- *   group executable bit is set.
- *    o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
- *    o setattr has FATTR_UID or FATTR_GID
- *    o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
- *    o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
- *    o write has FUSE_WRITE_KILL_SUIDGID
- */
-#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
-
-/**
- * Indicates that file server supports extended struct fuse_setxattr_in
- */
-#define FUSE_CAP_SETXATTR_EXT (1 << 29)
-
-/**
- * Indicates that file server supports creating file security context
- */
-#define FUSE_CAP_SECURITY_CTX (1ULL << 32)
-
-/**
- * Ioctl flags
- *
- * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
- * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
- * FUSE_IOCTL_RETRY: retry with new iovecs
- * FUSE_IOCTL_DIR: is a directory
- *
- * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
- */
-#define FUSE_IOCTL_COMPAT (1 << 0)
-#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
-#define FUSE_IOCTL_RETRY (1 << 2)
-#define FUSE_IOCTL_DIR (1 << 4)
-
-#define FUSE_IOCTL_MAX_IOV 256
-
-/**
- * Connection information, passed to the ->init() method
- *
- * Some of the elements are read-write, these can be changed to
- * indicate the value requested by the filesystem.  The requested
- * value must usually be smaller than the indicated value.
- */
-struct fuse_conn_info {
-    /**
-     * Major version of the protocol (read-only)
-     */
-    unsigned proto_major;
-
-    /**
-     * Minor version of the protocol (read-only)
-     */
-    unsigned proto_minor;
-
-    /**
-     * Maximum size of the write buffer
-     */
-    unsigned max_write;
-
-    /**
-     * Maximum size of read requests. A value of zero indicates no
-     * limit. However, even if the filesystem does not specify a
-     * limit, the maximum size of read requests will still be
-     * limited by the kernel.
-     *
-     * NOTE: For the time being, the maximum size of read requests
-     * must be set both here *and* passed to fuse_session_new()
-     * using the ``-o max_read=<n>`` mount option. At some point
-     * in the future, specifying the mount option will no longer
-     * be necessary.
-     */
-    unsigned max_read;
-
-    /**
-     * Maximum readahead
-     */
-    unsigned max_readahead;
-
-    /**
-     * Capability flags that the kernel supports (read-only)
-     */
-    uint64_t capable;
-
-    /**
-     * Capability flags that the filesystem wants to enable.
-     *
-     * libfuse attempts to initialize this field with
-     * reasonable default values before calling the init() handler.
-     */
-    uint64_t want;
-
-    /**
-     * Maximum number of pending "background" requests. A
-     * background request is any type of request for which the
-     * total number is not limited by other means. As of kernel
-     * 4.8, only two types of requests fall into this category:
-     *
-     *   1. Read-ahead requests
-     *   2. Asynchronous direct I/O requests
-     *
-     * Read-ahead requests are generated (if max_readahead is
-     * non-zero) by the kernel to preemptively fill its caches
-     * when it anticipates that userspace will soon read more
-     * data.
-     *
-     * Asynchronous direct I/O requests are generated if
-     * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large
-     * direct I/O request. In this case the kernel will internally
-     * split it up into multiple smaller requests and submit them
-     * to the filesystem concurrently.
-     *
-     * Note that the following requests are *not* background
-     * requests: writeback requests (limited by the kernel's
-     * flusher algorithm), regular (i.e., synchronous and
-     * buffered) userspace read/write requests (limited to one per
-     * thread), asynchronous read requests (Linux's io_submit(2)
-     * call actually blocks, so these are also limited to one per
-     * thread).
-     */
-    unsigned max_background;
-
-    /**
-     * Kernel congestion threshold parameter. If the number of pending
-     * background requests exceeds this number, the FUSE kernel module will
-     * mark the filesystem as "congested". This instructs the kernel to
-     * expect that queued requests will take some time to complete, and to
-     * adjust its algorithms accordingly (e.g. by putting a waiting thread
-     * to sleep instead of using a busy-loop).
-     */
-    unsigned congestion_threshold;
-
-    /**
-     * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible
-     * for updating mtime and ctime when write requests are received. The
-     * updated values are passed to the filesystem with setattr() requests.
-     * However, if the filesystem does not support the full resolution of
-     * the kernel timestamps (nanoseconds), the mtime and ctime values used
-     * by kernel and filesystem will differ (and result in an apparent
-     * change of times after a cache flush).
-     *
-     * To prevent this problem, this variable can be used to inform the
-     * kernel about the timestamp granularity supported by the file-system.
-     * The value should be power of 10.  The default is 1, i.e. full
-     * nano-second resolution. Filesystems supporting only second resolution
-     * should set this to 1000000000.
-     */
-    unsigned time_gran;
-
-    /**
-     * For future use.
-     */
-    unsigned reserved[22];
-};
-
-struct fuse_session;
-struct fuse_pollhandle;
-struct fuse_conn_info_opts;
-
-/**
- * This function parses several command-line options that can be used
- * to override elements of struct fuse_conn_info. The pointer returned
- * by this function should be passed to the
- * fuse_apply_conn_info_opts() method by the file system's init()
- * handler.
- *
- * Before using this function, think twice if you really want these
- * parameters to be adjustable from the command line. In most cases,
- * they should be determined by the file system internally.
- *
- * The following options are recognized:
- *
- *   -o max_write=N         sets conn->max_write
- *   -o max_readahead=N     sets conn->max_readahead
- *   -o max_background=N    sets conn->max_background
- *   -o congestion_threshold=N  sets conn->congestion_threshold
- *   -o async_read          sets FUSE_CAP_ASYNC_READ in conn->want
- *   -o sync_read           unsets FUSE_CAP_ASYNC_READ in conn->want
- *   -o atomic_o_trunc      sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want
- *   -o no_remote_lock      Equivalent to -o
- *no_remote_flock,no_remote_posix_lock -o no_remote_flock     Unsets
- *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock  Unsets
- *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write     (un-)sets
- *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move      (un-)sets
- *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read      (un-)sets
- *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data  (un-)sets
- *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no        unsets
- *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes       sets
- *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o
- *readdirplus=auto      sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO
- *in conn->want -o [no_]async_dio        (un-)sets FUSE_CAP_ASYNC_DIO in
- *conn->want -o [no_]writeback_cache  (un-)sets FUSE_CAP_WRITEBACK_CACHE in
- *conn->want -o time_gran=N           sets conn->time_gran
- *
- * Known options will be removed from *args*, unknown options will be
- * passed through unchanged.
- *
- * @param args argument vector (input+output)
- * @return parsed options
- **/
-struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args);
-
-/**
- * This function applies the (parsed) parameters in *opts* to the
- * *conn* pointer. It may modify the following fields: wants,
- * max_write, max_readahead, congestion_threshold, max_background,
- * time_gran. A field is only set (or unset) if the corresponding
- * option has been explicitly set.
- */
-void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
-                               struct fuse_conn_info *conn);
-
-/**
- * Go into the background
- *
- * @param foreground if true, stay in the foreground
- * @return 0 on success, -1 on failure
- */
-int fuse_daemonize(int foreground);
-
-/**
- * Get the version of the library
- *
- * @return the version
- */
-int fuse_version(void);
-
-/**
- * Get the full package version string of the library
- *
- * @return the package version
- */
-const char *fuse_pkgversion(void);
-
-/**
- * Destroy poll handle
- *
- * @param ph the poll handle
- */
-void fuse_pollhandle_destroy(struct fuse_pollhandle *ph);
-
-/*
- * Data buffer
- */
-
-/**
- * Buffer flags
- */
-enum fuse_buf_flags {
-    /**
-     * Buffer contains a file descriptor
-     *
-     * If this flag is set, the .fd field is valid, otherwise the
-     * .mem fields is valid.
-     */
-    FUSE_BUF_IS_FD = (1 << 1),
-
-    /**
-     * Seek on the file descriptor
-     *
-     * If this flag is set then the .pos field is valid and is
-     * used to seek to the given offset before performing
-     * operation on file descriptor.
-     */
-    FUSE_BUF_FD_SEEK = (1 << 2),
-
-    /**
-     * Retry operation on file descriptor
-     *
-     * If this flag is set then retry operation on file descriptor
-     * until .size bytes have been copied or an error or EOF is
-     * detected.
-     */
-    FUSE_BUF_FD_RETRY = (1 << 3),
-};
-
-/**
- * Single data buffer
- *
- * Generic data buffer for I/O, extended attributes, etc...  Data may
- * be supplied as a memory pointer or as a file descriptor
- */
-struct fuse_buf {
-    /**
-     * Size of data in bytes
-     */
-    size_t size;
-
-    /**
-     * Buffer flags
-     */
-    enum fuse_buf_flags flags;
-
-    /**
-     * Memory pointer
-     *
-     * Used unless FUSE_BUF_IS_FD flag is set.
-     */
-    void *mem;
-
-    /**
-     * File descriptor
-     *
-     * Used if FUSE_BUF_IS_FD flag is set.
-     */
-    int fd;
-
-    /**
-     * File position
-     *
-     * Used if FUSE_BUF_FD_SEEK flag is set.
-     */
-    off_t pos;
-};
-
-/**
- * Data buffer vector
- *
- * An array of data buffers, each containing a memory pointer or a
- * file descriptor.
- *
- * Allocate dynamically to add more than one buffer.
- */
-struct fuse_bufvec {
-    /**
-     * Number of buffers in the array
-     */
-    size_t count;
-
-    /**
-     * Index of current buffer within the array
-     */
-    size_t idx;
-
-    /**
-     * Current offset within the current buffer
-     */
-    size_t off;
-
-    /**
-     * Array of buffers
-     */
-    struct fuse_buf buf[1];
-};
-
-/* Initialize bufvec with a single buffer of given size */
-#define FUSE_BUFVEC_INIT(size__)                                      \
-    ((struct fuse_bufvec){ /* .count= */ 1,                           \
-                           /* .idx =  */ 0,                           \
-                           /* .off =  */ 0, /* .buf =  */             \
-                           { /* [0] = */ {                            \
-                               /* .size =  */ (size__),               \
-                               /* .flags = */ (enum fuse_buf_flags)0, \
-                               /* .mem =   */ NULL,                   \
-                               /* .fd =    */ -1,                     \
-                               /* .pos =   */ 0,                      \
-                           } } })
-
-/**
- * Get total size of data in a fuse buffer vector
- *
- * @param bufv buffer vector
- * @return size of data
- */
-size_t fuse_buf_size(const struct fuse_bufvec *bufv);
-
-/**
- * Copy data from one buffer vector to another
- *
- * @param dst destination buffer vector
- * @param src source buffer vector
- * @return actual number of bytes copied or -errno on error
- */
-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
-
-/**
- * Memory buffer iterator
- *
- */
-struct fuse_mbuf_iter {
-    /**
-     * Data pointer
-     */
-    void *mem;
-
-    /**
-     * Total length, in bytes
-     */
-    size_t size;
-
-    /**
-     * Offset from start of buffer
-     */
-    size_t pos;
-};
-
-/* Initialize memory buffer iterator from a fuse_buf */
-#define FUSE_MBUF_ITER_INIT(fbuf) \
-    ((struct fuse_mbuf_iter){     \
-        .mem = fbuf->mem,         \
-        .size = fbuf->size,       \
-        .pos = 0,                 \
-    })
-
-/**
- * Consume bytes from a memory buffer iterator
- *
- * @param iter memory buffer iterator
- * @param len number of bytes to consume
- * @return pointer to start of consumed bytes or
- *         NULL if advancing beyond end of buffer
- */
-void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len);
-
-/**
- * Consume a NUL-terminated string from a memory buffer iterator
- *
- * @param iter memory buffer iterator
- * @return pointer to the string or
- *         NULL if advancing beyond end of buffer or there is no NUL-terminator
- */
-const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter);
-
-/*
- * Signal handling
- */
-/**
- * Exit session on HUP, TERM and INT signals and ignore PIPE signal
- *
- * Stores session in a global variable. May only be called once per
- * process until fuse_remove_signal_handlers() is called.
- *
- * Once either of the POSIX signals arrives, the signal handler calls
- * fuse_session_exit().
- *
- * @param se the session to exit
- * @return 0 on success, -1 on failure
- *
- * See also:
- * fuse_remove_signal_handlers()
- */
-int fuse_set_signal_handlers(struct fuse_session *se);
-
-/**
- * Restore default signal handlers
- *
- * Resets global session.  After this fuse_set_signal_handlers() may
- * be called again.
- *
- * @param se the same session as given in fuse_set_signal_handlers()
- *
- * See also:
- * fuse_set_signal_handlers()
- */
-void fuse_remove_signal_handlers(struct fuse_session *se);
-
-/*
- * Compatibility stuff
- */
-
-#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30
-#error only API version 30 or greater is supported
-#endif
-
-
-/*
- * This interface uses 64 bit off_t.
- *
- * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags!
- */
-QEMU_BUILD_BUG_ON(sizeof(off_t) != 8);
-
-#endif /* FUSE_COMMON_H_ */

diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
deleted file mode 100644
index a5572fa..0000000
--- a/tools/virtiofsd/fuse_i.h
+++ /dev/null

@@ -1,107 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#ifndef FUSE_I_H
-#define FUSE_I_H
-
-#define FUSE_USE_VERSION 31
-#include "fuse_lowlevel.h"
-
-struct fv_VuDev;
-struct fv_QueueInfo;
-
-struct fuse_security_context {
-        const char *name;
-        uint32_t ctxlen;
-        const void *ctx;
-};
-
-struct fuse_req {
-    struct fuse_session *se;
-    uint64_t unique;
-    int ctr;
-    pthread_mutex_t lock;
-    struct fuse_ctx ctx;
-    struct fuse_chan *ch;
-    int interrupted;
-    unsigned int ioctl_64bit:1;
-    union {
-        struct {
-            uint64_t unique;
-        } i;
-        struct {
-            fuse_interrupt_func_t func;
-            void *data;
-        } ni;
-    } u;
-    struct fuse_req *next;
-    struct fuse_req *prev;
-    struct fuse_security_context secctx;
-};
-
-struct fuse_notify_req {
-    uint64_t unique;
-    void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t,
-                  const void *, const struct fuse_buf *);
-    struct fuse_notify_req *next;
-    struct fuse_notify_req *prev;
-};
-
-struct fuse_session {
-    char *mountpoint;
-    volatile int exited;
-    int fd;
-    int debug;
-    int deny_others;
-    struct fuse_lowlevel_ops op;
-    int got_init;
-    struct cuse_data *cuse_data;
-    void *userdata;
-    uid_t owner;
-    struct fuse_conn_info conn;
-    struct fuse_req list;
-    struct fuse_req interrupts;
-    pthread_mutex_t lock;
-    pthread_rwlock_t init_rwlock;
-    int got_destroy;
-    int broken_splice_nonblock;
-    uint64_t notify_ctr;
-    struct fuse_notify_req notify_list;
-    size_t bufsize;
-    int error;
-    char *vu_socket_path;
-    char *vu_socket_group;
-    int   vu_listen_fd;
-    int   vu_socketfd;
-    struct fv_VuDev *virtio_dev;
-    int thread_pool_size;
-};
-
-struct fuse_chan {
-    pthread_mutex_t lock;
-    int ctr;
-    int fd;
-    struct fv_QueueInfo *qi;
-};
-
-int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
-                               int count);
-void fuse_free_req(fuse_req_t req);
-
-void fuse_session_process_buf_int(struct fuse_session *se,
-                                  struct fuse_bufvec *bufv,
-                                  struct fuse_chan *ch);
-
-
-#define FUSE_MAX_MAX_PAGES 256
-#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
-
-/* room needed in buffer to accommodate header */
-#define FUSE_BUFFER_HEADER_SIZE 0x1000
-
-#endif

diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c
deleted file mode 100644
index 2de3f48..0000000
--- a/tools/virtiofsd/fuse_log.c
+++ /dev/null

@@ -1,40 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2019  Red Hat, Inc.
- *
- * Logging API.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_log.h"
-
-
-G_GNUC_PRINTF(2, 0)
-static void default_log_func(__attribute__((unused)) enum fuse_log_level level,
-                             const char *fmt, va_list ap)
-{
-    vfprintf(stderr, fmt, ap);
-}
-
-static fuse_log_func_t log_func = default_log_func;
-
-void fuse_set_log_func(fuse_log_func_t func)
-{
-    if (!func) {
-        func = default_log_func;
-    }
-
-    log_func = func;
-}
-
-void fuse_log(enum fuse_log_level level, const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    log_func(level, fmt, ap);
-    va_end(ap);
-}

diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h
deleted file mode 100644
index e5c2967..0000000
--- a/tools/virtiofsd/fuse_log.h
+++ /dev/null

@@ -1,75 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2019  Red Hat, Inc.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_LOG_H_
-#define FUSE_LOG_H_
-
-/** @file
- *
- * This file defines the logging interface of FUSE
- */
-
-
-/**
- * Log severity level
- *
- * These levels correspond to syslog(2) log levels since they are widely used.
- */
-enum fuse_log_level {
-    FUSE_LOG_EMERG,
-    FUSE_LOG_ALERT,
-    FUSE_LOG_CRIT,
-    FUSE_LOG_ERR,
-    FUSE_LOG_WARNING,
-    FUSE_LOG_NOTICE,
-    FUSE_LOG_INFO,
-    FUSE_LOG_DEBUG
-};
-
-/**
- * Log message handler function.
- *
- * This function must be thread-safe.  It may be called from any libfuse
- * function, including fuse_parse_cmdline() and other functions invoked before
- * a FUSE filesystem is created.
- *
- * Install a custom log message handler function using fuse_set_log_func().
- *
- * @param level log severity level
- * @param fmt sprintf-style format string including newline
- * @param ap format string arguments
- */
-typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt,
-                                va_list ap)
-    G_GNUC_PRINTF(2, 0);
-
-/**
- * Install a custom log handler function.
- *
- * Log messages are emitted by libfuse functions to report errors and debug
- * information.  Messages are printed to stderr by default but this can be
- * overridden by installing a custom log message handler function.
- *
- * The log message handler function is global and affects all FUSE filesystems
- * created within this process.
- *
- * @param func a custom log message handler function or NULL to revert to
- *             the default
- */
-void fuse_set_log_func(fuse_log_func_t func);
-
-/**
- * Emit a log message
- *
- * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc)
- * @param fmt sprintf-style format string including newline
- */
-void fuse_log(enum fuse_log_level level, const char *fmt, ...)
-    G_GNUC_PRINTF(2, 3);
-
-#endif /* FUSE_LOG_H_ */

diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
deleted file mode 100644
index 194a1b8..0000000
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ /dev/null

@@ -1,2732 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Implementation of (most of) the low-level FUSE API. The session loop
- * functions are implemented in separate files.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "standard-headers/linux/fuse.h"
-#include "fuse_misc.h"
-#include "fuse_opt.h"
-#include "fuse_virtio.h"
-
-#include <sys/file.h>
-
-#define THREAD_POOL_SIZE 0
-
-#define OFFSET_MAX 0x7fffffffffffffffLL
-
-struct fuse_pollhandle {
-    uint64_t kh;
-    struct fuse_session *se;
-};
-
-static size_t pagesize;
-
-static __attribute__((constructor)) void fuse_ll_init_pagesize(void)
-{
-    pagesize = getpagesize();
-}
-
-static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr)
-{
-    *attr = (struct fuse_attr){
-        .ino = stbuf->st_ino,
-        .mode = stbuf->st_mode,
-        .nlink = stbuf->st_nlink,
-        .uid = stbuf->st_uid,
-        .gid = stbuf->st_gid,
-        .rdev = stbuf->st_rdev,
-        .size = stbuf->st_size,
-        .blksize = stbuf->st_blksize,
-        .blocks = stbuf->st_blocks,
-        .atime = stbuf->st_atime,
-        .mtime = stbuf->st_mtime,
-        .ctime = stbuf->st_ctime,
-        .atimensec = ST_ATIM_NSEC(stbuf),
-        .mtimensec = ST_MTIM_NSEC(stbuf),
-        .ctimensec = ST_CTIM_NSEC(stbuf),
-    };
-}
-
-static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf)
-{
-    stbuf->st_mode = attr->mode;
-    stbuf->st_uid = attr->uid;
-    stbuf->st_gid = attr->gid;
-    stbuf->st_size = attr->size;
-    stbuf->st_atime = attr->atime;
-    stbuf->st_mtime = attr->mtime;
-    stbuf->st_ctime = attr->ctime;
-    ST_ATIM_NSEC_SET(stbuf, attr->atimensec);
-    ST_MTIM_NSEC_SET(stbuf, attr->mtimensec);
-    ST_CTIM_NSEC_SET(stbuf, attr->ctimensec);
-}
-
-static size_t iov_length(const struct iovec *iov, size_t count)
-{
-    size_t seg;
-    size_t ret = 0;
-
-    for (seg = 0; seg < count; seg++) {
-        ret += iov[seg].iov_len;
-    }
-    return ret;
-}
-
-static void list_init_req(struct fuse_req *req)
-{
-    req->next = req;
-    req->prev = req;
-}
-
-static void list_del_req(struct fuse_req *req)
-{
-    struct fuse_req *prev = req->prev;
-    struct fuse_req *next = req->next;
-    prev->next = next;
-    next->prev = prev;
-}
-
-static void list_add_req(struct fuse_req *req, struct fuse_req *next)
-{
-    struct fuse_req *prev = next->prev;
-    req->next = next;
-    req->prev = prev;
-    prev->next = req;
-    next->prev = req;
-}
-
-static void destroy_req(fuse_req_t req)
-{
-    pthread_mutex_destroy(&req->lock);
-    g_free(req);
-}
-
-void fuse_free_req(fuse_req_t req)
-{
-    int ctr;
-    struct fuse_session *se = req->se;
-
-    pthread_mutex_lock(&se->lock);
-    req->u.ni.func = NULL;
-    req->u.ni.data = NULL;
-    list_del_req(req);
-    ctr = --req->ctr;
-    req->ch = NULL;
-    pthread_mutex_unlock(&se->lock);
-    if (!ctr) {
-        destroy_req(req);
-    }
-}
-
-static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se)
-{
-    struct fuse_req *req;
-
-    req = g_try_new0(struct fuse_req, 1);
-    if (req == NULL) {
-        fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n");
-    } else {
-        req->se = se;
-        req->ctr = 1;
-        list_init_req(req);
-        fuse_mutex_init(&req->lock);
-    }
-
-    return req;
-}
-
-/* Send data. If *ch* is NULL, send via session master fd */
-static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch,
-                         struct iovec *iov, int count)
-{
-    struct fuse_out_header *out = iov[0].iov_base;
-
-    out->len = iov_length(iov, count);
-    if (out->unique == 0) {
-        fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error,
-                 out->len);
-    } else if (out->error) {
-        fuse_log(FUSE_LOG_DEBUG,
-                 "   unique: %llu, error: %i (%s), outsize: %i\n",
-                 (unsigned long long)out->unique, out->error,
-                 strerror(-out->error), out->len);
-    } else {
-        fuse_log(FUSE_LOG_DEBUG, "   unique: %llu, success, outsize: %i\n",
-                 (unsigned long long)out->unique, out->len);
-    }
-
-    if (fuse_lowlevel_is_virtio(se)) {
-        return virtio_send_msg(se, ch, iov, count);
-    }
-
-    abort(); /* virtio should have taken it before here */
-    return 0;
-}
-
-
-int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
-                               int count)
-{
-    struct fuse_out_header out = {
-        .unique = req->unique,
-        .error = error,
-    };
-
-    if (error <= -1000 || error > 0) {
-        fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error);
-        out.error = -ERANGE;
-    }
-
-    iov[0].iov_base = &out;
-    iov[0].iov_len = sizeof(struct fuse_out_header);
-
-    return fuse_send_msg(req->se, req->ch, iov, count);
-}
-
-static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov,
-                          int count)
-{
-    int res;
-
-    res = fuse_send_reply_iov_nofree(req, error, iov, count);
-    fuse_free_req(req);
-    return res;
-}
-
-static int send_reply(fuse_req_t req, int error, const void *arg,
-                      size_t argsize)
-{
-    struct iovec iov[2];
-    int count = 1;
-    if (argsize) {
-        iov[1].iov_base = (void *)arg;
-        iov[1].iov_len = argsize;
-        count++;
-    }
-    return send_reply_iov(req, error, iov, count);
-}
-
-int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count)
-{
-    g_autofree struct iovec *padded_iov = NULL;
-
-    padded_iov = g_try_new(struct iovec, count + 1);
-    if (padded_iov == NULL) {
-        return fuse_reply_err(req, ENOMEM);
-    }
-
-    memcpy(padded_iov + 1, iov, count * sizeof(struct iovec));
-    count++;
-
-    return send_reply_iov(req, 0, padded_iov, count);
-}
-
-
-/*
- * 'buf` is allowed to be empty so that the proper size may be
- * allocated by the caller
- */
-size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
-                         const char *name, const struct stat *stbuf, off_t off)
-{
-    (void)req;
-    size_t namelen;
-    size_t entlen;
-    size_t entlen_padded;
-    struct fuse_dirent *dirent;
-
-    namelen = strlen(name);
-    entlen = FUSE_NAME_OFFSET + namelen;
-    entlen_padded = FUSE_DIRENT_ALIGN(entlen);
-
-    if ((buf == NULL) || (entlen_padded > bufsize)) {
-        return entlen_padded;
-    }
-
-    dirent = (struct fuse_dirent *)buf;
-    dirent->ino = stbuf->st_ino;
-    dirent->off = off;
-    dirent->namelen = namelen;
-    dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
-    memcpy(dirent->name, name, namelen);
-    memset(dirent->name + namelen, 0, entlen_padded - entlen);
-
-    return entlen_padded;
-}
-
-static void convert_statfs(const struct statvfs *stbuf,
-                           struct fuse_kstatfs *kstatfs)
-{
-    *kstatfs = (struct fuse_kstatfs){
-        .bsize = stbuf->f_bsize,
-        .frsize = stbuf->f_frsize,
-        .blocks = stbuf->f_blocks,
-        .bfree = stbuf->f_bfree,
-        .bavail = stbuf->f_bavail,
-        .files = stbuf->f_files,
-        .ffree = stbuf->f_ffree,
-        .namelen = stbuf->f_namemax,
-    };
-}
-
-static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize)
-{
-    return send_reply(req, 0, arg, argsize);
-}
-
-int fuse_reply_err(fuse_req_t req, int err)
-{
-    return send_reply(req, -err, NULL, 0);
-}
-
-void fuse_reply_none(fuse_req_t req)
-{
-    fuse_free_req(req);
-}
-
-static unsigned long calc_timeout_sec(double t)
-{
-    if (t > (double)ULONG_MAX) {
-        return ULONG_MAX;
-    } else if (t < 0.0) {
-        return 0;
-    } else {
-        return (unsigned long)t;
-    }
-}
-
-static unsigned int calc_timeout_nsec(double t)
-{
-    double f = t - (double)calc_timeout_sec(t);
-    if (f < 0.0) {
-        return 0;
-    } else if (f >= 0.999999999) {
-        return 999999999;
-    } else {
-        return (unsigned int)(f * 1.0e9);
-    }
-}
-
-static void fill_entry(struct fuse_entry_out *arg,
-                       const struct fuse_entry_param *e)
-{
-    *arg = (struct fuse_entry_out){
-        .nodeid = e->ino,
-        .generation = e->generation,
-        .entry_valid = calc_timeout_sec(e->entry_timeout),
-        .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout),
-        .attr_valid = calc_timeout_sec(e->attr_timeout),
-        .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout),
-    };
-    convert_stat(&e->attr, &arg->attr);
-
-    arg->attr.flags = e->attr_flags;
-}
-
-/*
- * `buf` is allowed to be empty so that the proper size may be
- * allocated by the caller
- */
-size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
-                              const char *name,
-                              const struct fuse_entry_param *e, off_t off)
-{
-    (void)req;
-    size_t namelen;
-    size_t entlen;
-    size_t entlen_padded;
-
-    namelen = strlen(name);
-    entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen;
-    entlen_padded = FUSE_DIRENT_ALIGN(entlen);
-    if ((buf == NULL) || (entlen_padded > bufsize)) {
-        return entlen_padded;
-    }
-
-    struct fuse_direntplus *dp = (struct fuse_direntplus *)buf;
-    memset(&dp->entry_out, 0, sizeof(dp->entry_out));
-    fill_entry(&dp->entry_out, e);
-
-    struct fuse_dirent *dirent = &dp->dirent;
-    *dirent = (struct fuse_dirent){
-        .ino = e->attr.st_ino,
-        .off = off,
-        .namelen = namelen,
-        .type = (e->attr.st_mode & S_IFMT) >> 12,
-    };
-    memcpy(dirent->name, name, namelen);
-    memset(dirent->name + namelen, 0, entlen_padded - entlen);
-
-    return entlen_padded;
-}
-
-static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f)
-{
-    arg->fh = f->fh;
-    if (f->direct_io) {
-        arg->open_flags |= FOPEN_DIRECT_IO;
-    }
-    if (f->keep_cache) {
-        arg->open_flags |= FOPEN_KEEP_CACHE;
-    }
-    if (f->cache_readdir) {
-        arg->open_flags |= FOPEN_CACHE_DIR;
-    }
-    if (f->nonseekable) {
-        arg->open_flags |= FOPEN_NONSEEKABLE;
-    }
-}
-
-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
-{
-    struct fuse_entry_out arg;
-    size_t size = sizeof(arg);
-
-    memset(&arg, 0, sizeof(arg));
-    fill_entry(&arg, e);
-    return send_reply_ok(req, &arg, size);
-}
-
-int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
-                      const struct fuse_file_info *f)
-{
-    char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)];
-    size_t entrysize = sizeof(struct fuse_entry_out);
-    struct fuse_entry_out *earg = (struct fuse_entry_out *)buf;
-    struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize);
-
-    memset(buf, 0, sizeof(buf));
-    fill_entry(earg, e);
-    fill_open(oarg, f);
-    return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out));
-}
-
-int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
-                    double attr_timeout)
-{
-    struct fuse_attr_out arg;
-    size_t size = sizeof(arg);
-
-    memset(&arg, 0, sizeof(arg));
-    arg.attr_valid = calc_timeout_sec(attr_timeout);
-    arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout);
-    convert_stat(attr, &arg.attr);
-
-    return send_reply_ok(req, &arg, size);
-}
-
-int fuse_reply_readlink(fuse_req_t req, const char *linkname)
-{
-    return send_reply_ok(req, linkname, strlen(linkname));
-}
-
-int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f)
-{
-    struct fuse_open_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    fill_open(&arg, f);
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_write(fuse_req_t req, size_t count)
-{
-    struct fuse_write_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.size = count;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size)
-{
-    return send_reply_ok(req, buf, size);
-}
-
-static int fuse_send_data_iov_fallback(struct fuse_session *se,
-                                       struct fuse_chan *ch, struct iovec *iov,
-                                       int iov_count, struct fuse_bufvec *buf,
-                                       size_t len)
-{
-    /* Optimize common case */
-    if (buf->count == 1 && buf->idx == 0 && buf->off == 0 &&
-        !(buf->buf[0].flags & FUSE_BUF_IS_FD)) {
-        /*
-         * FIXME: also avoid memory copy if there are multiple buffers
-         * but none of them contain an fd
-         */
-
-        iov[iov_count].iov_base = buf->buf[0].mem;
-        iov[iov_count].iov_len = len;
-        iov_count++;
-        return fuse_send_msg(se, ch, iov, iov_count);
-    }
-
-    if (fuse_lowlevel_is_virtio(se) && buf->count == 1 &&
-        buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) {
-        return virtio_send_data_iov(se, ch, iov, iov_count, buf, len);
-    }
-
-    abort(); /* Will have taken vhost path */
-    return 0;
-}
-
-static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
-                              struct iovec *iov, int iov_count,
-                              struct fuse_bufvec *buf)
-{
-    size_t len = fuse_buf_size(buf);
-
-    return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len);
-}
-
-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv)
-{
-    struct iovec iov[2];
-    struct fuse_out_header out = {
-        .unique = req->unique,
-    };
-    int res;
-
-    iov[0].iov_base = &out;
-    iov[0].iov_len = sizeof(struct fuse_out_header);
-
-    res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv);
-    if (res <= 0) {
-        fuse_free_req(req);
-        return res;
-    } else {
-        return fuse_reply_err(req, res);
-    }
-}
-
-int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf)
-{
-    struct fuse_statfs_out arg;
-    size_t size = sizeof(arg);
-
-    memset(&arg, 0, sizeof(arg));
-    convert_statfs(stbuf, &arg.st);
-
-    return send_reply_ok(req, &arg, size);
-}
-
-int fuse_reply_xattr(fuse_req_t req, size_t count)
-{
-    struct fuse_getxattr_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.size = count;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_lock(fuse_req_t req, const struct flock *lock)
-{
-    struct fuse_lk_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.lk.type = lock->l_type;
-    if (lock->l_type != F_UNLCK) {
-        arg.lk.start = lock->l_start;
-        if (lock->l_len == 0) {
-            arg.lk.end = OFFSET_MAX;
-        } else {
-            arg.lk.end = lock->l_start + lock->l_len - 1;
-        }
-    }
-    arg.lk.pid = lock->l_pid;
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_bmap(fuse_req_t req, uint64_t idx)
-{
-    struct fuse_bmap_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.block = idx;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov,
-                                                      size_t count)
-{
-    struct fuse_ioctl_iovec *fiov;
-    size_t i;
-
-    fiov = g_try_new(struct fuse_ioctl_iovec, count);
-    if (!fiov) {
-        return NULL;
-    }
-
-    for (i = 0; i < count; i++) {
-        fiov[i].base = (uintptr_t)iov[i].iov_base;
-        fiov[i].len = iov[i].iov_len;
-    }
-
-    return fiov;
-}
-
-int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
-                           size_t in_count, const struct iovec *out_iov,
-                           size_t out_count)
-{
-    struct fuse_ioctl_out arg;
-    g_autofree struct fuse_ioctl_iovec *in_fiov = NULL;
-    g_autofree struct fuse_ioctl_iovec *out_fiov = NULL;
-    struct iovec iov[4];
-    size_t count = 1;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.flags |= FUSE_IOCTL_RETRY;
-    arg.in_iovs = in_count;
-    arg.out_iovs = out_count;
-    iov[count].iov_base = &arg;
-    iov[count].iov_len = sizeof(arg);
-    count++;
-
-    /* Can't handle non-compat 64bit ioctls on 32bit */
-    if (sizeof(void *) == 4 && req->ioctl_64bit) {
-        return fuse_reply_err(req, EINVAL);
-    }
-
-    if (in_count) {
-        in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count);
-        if (!in_fiov) {
-            return fuse_reply_err(req, ENOMEM);
-        }
-
-        iov[count].iov_base = (void *)in_fiov;
-        iov[count].iov_len = sizeof(in_fiov[0]) * in_count;
-        count++;
-    }
-    if (out_count) {
-        out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count);
-        if (!out_fiov) {
-            return fuse_reply_err(req, ENOMEM);
-        }
-
-        iov[count].iov_base = (void *)out_fiov;
-        iov[count].iov_len = sizeof(out_fiov[0]) * out_count;
-        count++;
-    }
-
-    return send_reply_iov(req, 0, iov, count);
-}
-
-int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size)
-{
-    struct fuse_ioctl_out arg;
-    struct iovec iov[3];
-    size_t count = 1;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.result = result;
-    iov[count].iov_base = &arg;
-    iov[count].iov_len = sizeof(arg);
-    count++;
-
-    if (size) {
-        iov[count].iov_base = (char *)buf;
-        iov[count].iov_len = size;
-        count++;
-    }
-
-    return send_reply_iov(req, 0, iov, count);
-}
-
-int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
-                         int count)
-{
-    g_autofree struct iovec *padded_iov = NULL;
-    struct fuse_ioctl_out arg;
-
-    padded_iov = g_try_new(struct iovec, count + 2);
-    if (padded_iov == NULL) {
-        return fuse_reply_err(req, ENOMEM);
-    }
-
-    memset(&arg, 0, sizeof(arg));
-    arg.result = result;
-    padded_iov[1].iov_base = &arg;
-    padded_iov[1].iov_len = sizeof(arg);
-
-    memcpy(&padded_iov[2], iov, count * sizeof(struct iovec));
-
-    return send_reply_iov(req, 0, padded_iov, count + 2);
-}
-
-int fuse_reply_poll(fuse_req_t req, unsigned revents)
-{
-    struct fuse_poll_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.revents = revents;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-int fuse_reply_lseek(fuse_req_t req, off_t off)
-{
-    struct fuse_lseek_out arg;
-
-    memset(&arg, 0, sizeof(arg));
-    arg.offset = off;
-
-    return send_reply_ok(req, &arg, sizeof(arg));
-}
-
-static void do_lookup(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.lookup) {
-        req->se->op.lookup(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_forget(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    struct fuse_forget_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.forget) {
-        req->se->op.forget(req, nodeid, arg->nlookup);
-    } else {
-        fuse_reply_none(req);
-    }
-}
-
-static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid,
-                            struct fuse_mbuf_iter *iter)
-{
-    struct fuse_batch_forget_in *arg;
-    struct fuse_forget_data *forgets;
-    size_t scount;
-
-    (void)nodeid;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_none(req);
-        return;
-    }
-
-    /*
-     * Prevent integer overflow.  The compiler emits the following warning
-     * unless we use the scount local variable:
-     *
-     * error: comparison is always false due to limited range of data type
-     * [-Werror=type-limits]
-     *
-     * This may be true on 64-bit hosts but we need this check for 32-bit
-     * hosts.
-     */
-    scount = arg->count;
-    if (scount > SIZE_MAX / sizeof(forgets[0])) {
-        fuse_reply_none(req);
-        return;
-    }
-
-    forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0]));
-    if (!forgets) {
-        fuse_reply_none(req);
-        return;
-    }
-
-    if (req->se->op.forget_multi) {
-        req->se->op.forget_multi(req, arg->count, forgets);
-    } else if (req->se->op.forget) {
-        unsigned int i;
-
-        for (i = 0; i < arg->count; i++) {
-            struct fuse_req *dummy_req;
-
-            dummy_req = fuse_ll_alloc_req(req->se);
-            if (dummy_req == NULL) {
-                break;
-            }
-
-            dummy_req->unique = req->unique;
-            dummy_req->ctx = req->ctx;
-            dummy_req->ch = NULL;
-
-            req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup);
-        }
-        fuse_reply_none(req);
-    } else {
-        fuse_reply_none(req);
-    }
-}
-
-static void do_getattr(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_file_info *fip = NULL;
-    struct fuse_file_info fi;
-
-    struct fuse_getattr_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (arg->getattr_flags & FUSE_GETATTR_FH) {
-        memset(&fi, 0, sizeof(fi));
-        fi.fh = arg->fh;
-        fip = &fi;
-    }
-
-    if (req->se->op.getattr) {
-        req->se->op.getattr(req, nodeid, fip);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_setattr(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    if (req->se->op.setattr) {
-        struct fuse_setattr_in *arg;
-        struct fuse_file_info *fi = NULL;
-        struct fuse_file_info fi_store;
-        struct stat stbuf;
-
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-        if (!arg) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-
-        memset(&stbuf, 0, sizeof(stbuf));
-        convert_attr(arg, &stbuf);
-        if (arg->valid & FATTR_FH) {
-            arg->valid &= ~FATTR_FH;
-            memset(&fi_store, 0, sizeof(fi_store));
-            fi = &fi_store;
-            fi->fh = arg->fh;
-        }
-        arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID |
-                      FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE |
-                      FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME |
-                      FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW |
-                      FUSE_SET_ATTR_CTIME | FUSE_SET_ATTR_KILL_SUIDGID;
-
-        req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_access(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    struct fuse_access_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.access) {
-        req->se->op.access(req, nodeid, arg->mask);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_readlink(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    (void)iter;
-
-    if (req->se->op.readlink) {
-        req->se->op.readlink(req, nodeid);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static int parse_secctx_fill_req(fuse_req_t req, struct fuse_mbuf_iter *iter)
-{
-    struct fuse_secctx_header *fsecctx_header;
-    struct fuse_secctx *fsecctx;
-    const void *secctx;
-    const char *name;
-
-    fsecctx_header = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx_header));
-    if (!fsecctx_header) {
-        return -EINVAL;
-    }
-
-    /*
-     * As of now maximum of one security context is supported. It can
-     * change in future though.
-     */
-    if (fsecctx_header->nr_secctx > 1) {
-        return -EINVAL;
-    }
-
-    /* No security context sent. Maybe no LSM supports it */
-    if (!fsecctx_header->nr_secctx) {
-        return 0;
-    }
-
-    fsecctx = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx));
-    if (!fsecctx) {
-        return -EINVAL;
-    }
-
-    /* struct fsecctx with zero sized context is not expected */
-    if (!fsecctx->size) {
-        return -EINVAL;
-    }
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!name) {
-        return -EINVAL;
-    }
-
-    secctx = fuse_mbuf_iter_advance(iter, fsecctx->size);
-    if (!secctx) {
-        return -EINVAL;
-    }
-
-    req->secctx.name = name;
-    req->secctx.ctx = secctx;
-    req->secctx.ctxlen = fsecctx->size;
-    return 0;
-}
-
-static void do_mknod(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_mknod_in *arg;
-    const char *name;
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-    int err;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    req->ctx.umask = arg->umask;
-
-    if (secctx_enabled) {
-        err = parse_secctx_fill_req(req, iter);
-        if (err) {
-            fuse_reply_err(req, -err);
-            return;
-        }
-    }
-
-    if (req->se->op.mknod) {
-        req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_mkdir_in *arg;
-    const char *name;
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-    int err;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    req->ctx.umask = arg->umask;
-
-    if (secctx_enabled) {
-        err = parse_secctx_fill_req(req, iter);
-        if (err) {
-            fuse_reply_err(req, err);
-            return;
-        }
-    }
-
-    if (req->se->op.mkdir) {
-        req->se->op.mkdir(req, nodeid, name, arg->mode);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_unlink(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.unlink) {
-        req->se->op.unlink(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.rmdir) {
-        req->se->op.rmdir(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_symlink(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-    const char *linkname = fuse_mbuf_iter_advance_str(iter);
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-    int err;
-
-    if (!name || !linkname) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (secctx_enabled) {
-        err = parse_secctx_fill_req(req, iter);
-        if (err) {
-            fuse_reply_err(req, err);
-            return;
-        }
-    }
-
-    if (req->se->op.symlink) {
-        req->se->op.symlink(req, linkname, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_rename(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    struct fuse_rename_in *arg;
-    const char *oldname;
-    const char *newname;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    oldname = fuse_mbuf_iter_advance_str(iter);
-    newname = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !oldname || !newname) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.rename) {
-        req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_rename2(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_rename2_in *arg;
-    const char *oldname;
-    const char *newname;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    oldname = fuse_mbuf_iter_advance_str(iter);
-    newname = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !oldname || !newname) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.rename) {
-        req->se->op.rename(req, nodeid, oldname, arg->newdir, newname,
-                           arg->flags);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_link(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.link) {
-        req->se->op.link(req, arg->oldnodeid, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_create(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX;
-
-    if (req->se->op.create) {
-        struct fuse_create_in *arg;
-        struct fuse_file_info fi;
-        const char *name;
-
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-        name = fuse_mbuf_iter_advance_str(iter);
-        if (!arg || !name) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-
-        if (secctx_enabled) {
-            int err;
-            err = parse_secctx_fill_req(req, iter);
-            if (err) {
-                fuse_reply_err(req, err);
-                return;
-            }
-        }
-
-        memset(&fi, 0, sizeof(fi));
-        fi.flags = arg->flags;
-        fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
-
-        req->ctx.umask = arg->umask;
-
-        req->se->op.create(req, nodeid, name, arg->mode, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_open(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_open_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    /* File creation is handled by do_create() or do_mknod() */
-    if (arg->flags & (O_CREAT | O_TMPFILE)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-    fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
-
-    if (req->se->op.open) {
-        req->se->op.open(req, nodeid, &fi);
-    } else {
-        fuse_reply_open(req, &fi);
-    }
-}
-
-static void do_read(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    if (req->se->op.read) {
-        struct fuse_read_in *arg;
-        struct fuse_file_info fi;
-
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-        if (!arg) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-
-        memset(&fi, 0, sizeof(fi));
-        fi.fh = arg->fh;
-        fi.lock_owner = arg->lock_owner;
-        fi.flags = arg->flags;
-        req->se->op.read(req, nodeid, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_write(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_write_in *arg;
-    struct fuse_file_info fi;
-    const char *param;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    param = fuse_mbuf_iter_advance(iter, arg->size);
-    if (!param) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
-    fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
-
-    fi.lock_owner = arg->lock_owner;
-    fi.flags = arg->flags;
-
-    if (req->se->op.write) {
-        req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv)
-{
-    struct fuse_session *se = req->se;
-    struct fuse_bufvec *pbufv = ibufv;
-    struct fuse_bufvec tmpbufv = {
-        .buf[0] = ibufv->buf[0],
-        .count = 1,
-    };
-    struct fuse_write_in *arg;
-    size_t arg_size = sizeof(*arg);
-    struct fuse_file_info fi;
-
-    memset(&fi, 0, sizeof(fi));
-
-    arg = fuse_mbuf_iter_advance(iter, arg_size);
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    fi.lock_owner = arg->lock_owner;
-    fi.flags = arg->flags;
-    fi.fh = arg->fh;
-    fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE);
-    fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
-
-    if (ibufv->count == 1) {
-        assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD));
-        tmpbufv.buf[0].mem = ((char *)arg) + arg_size;
-        tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size;
-        pbufv = &tmpbufv;
-    } else {
-        /*
-         *  Input bufv contains the headers in the first element
-         * and the data in the rest, we need to skip that first element
-         */
-        ibufv->buf[0].size = 0;
-    }
-
-    if (fuse_buf_size(pbufv) != arg->size) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: do_write_buf: buffer size doesn't match arg->size\n");
-        fuse_reply_err(req, EIO);
-        return;
-    }
-
-    se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi);
-}
-
-static void do_flush(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_flush_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.flush = 1;
-    fi.lock_owner = arg->lock_owner;
-
-    if (req->se->op.flush) {
-        req->se->op.flush(req, nodeid, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_release(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_release_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-    fi.fh = arg->fh;
-    fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
-    fi.lock_owner = arg->lock_owner;
-
-    if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) {
-        fi.flock_release = 1;
-    }
-
-    if (req->se->op.release) {
-        req->se->op.release(req, nodeid, &fi);
-    } else {
-        fuse_reply_err(req, 0);
-    }
-}
-
-static void do_fsync(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_fsync_in *arg;
-    struct fuse_file_info fi;
-    int datasync;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-    datasync = arg->fsync_flags & 1;
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.fsync) {
-        if (fi.fh == (uint64_t)-1) {
-            req->se->op.fsync(req, nodeid, datasync, NULL);
-        } else {
-            req->se->op.fsync(req, nodeid, datasync, &fi);
-        }
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_opendir(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_open_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-
-    if (req->se->op.opendir) {
-        req->se->op.opendir(req, nodeid, &fi);
-    } else {
-        fuse_reply_open(req, &fi);
-    }
-}
-
-static void do_readdir(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_read_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.readdir) {
-        req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid,
-                           struct fuse_mbuf_iter *iter)
-{
-    struct fuse_read_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.readdirplus) {
-        req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid,
-                          struct fuse_mbuf_iter *iter)
-{
-    struct fuse_release_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.flags = arg->flags;
-    fi.fh = arg->fh;
-
-    if (req->se->op.releasedir) {
-        req->se->op.releasedir(req, nodeid, &fi);
-    } else {
-        fuse_reply_err(req, 0);
-    }
-}
-
-static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    struct fuse_fsync_in *arg;
-    struct fuse_file_info fi;
-    int datasync;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-    datasync = arg->fsync_flags & 1;
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.fsyncdir) {
-        req->se->op.fsyncdir(req, nodeid, datasync, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_statfs(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    (void)nodeid;
-    (void)iter;
-
-    if (req->se->op.statfs) {
-        req->se->op.statfs(req, nodeid);
-    } else {
-        struct statvfs buf = {
-            .f_namemax = 255,
-            .f_bsize = 512,
-        };
-        fuse_reply_statfs(req, &buf);
-    }
-}
-
-static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    struct fuse_setxattr_in *arg;
-    const char *name;
-    const char *value;
-    bool setxattr_ext = req->se->conn.want & FUSE_CAP_SETXATTR_EXT;
-
-    if (setxattr_ext) {
-        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    } else {
-        arg = fuse_mbuf_iter_advance(iter, FUSE_COMPAT_SETXATTR_IN_SIZE);
-    }
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    value = fuse_mbuf_iter_advance(iter, arg->size);
-    if (!value) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.setxattr) {
-        uint32_t setxattr_flags = setxattr_ext ? arg->setxattr_flags : 0;
-        req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags,
-                             setxattr_flags);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid,
-                        struct fuse_mbuf_iter *iter)
-{
-    struct fuse_getxattr_in *arg;
-    const char *name;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    name = fuse_mbuf_iter_advance_str(iter);
-    if (!arg || !name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.getxattr) {
-        req->se->op.getxattr(req, nodeid, name, arg->size);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter)
-{
-    struct fuse_getxattr_in *arg;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.listxattr) {
-        req->se->op.listxattr(req, nodeid, arg->size);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid,
-                           struct fuse_mbuf_iter *iter)
-{
-    const char *name = fuse_mbuf_iter_advance_str(iter);
-
-    if (!name) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.removexattr) {
-        req->se->op.removexattr(req, nodeid, name);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void convert_fuse_file_lock(struct fuse_file_lock *fl,
-                                   struct flock *flock)
-{
-    memset(flock, 0, sizeof(struct flock));
-    flock->l_type = fl->type;
-    flock->l_whence = SEEK_SET;
-    flock->l_start = fl->start;
-    if (fl->end == OFFSET_MAX) {
-        flock->l_len = 0;
-    } else {
-        flock->l_len = fl->end - fl->start + 1;
-    }
-    flock->l_pid = fl->pid;
-}
-
-static void do_getlk(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_lk_in *arg;
-    struct fuse_file_info fi;
-    struct flock flock;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.lock_owner = arg->owner;
-
-    convert_fuse_file_lock(&arg->lk, &flock);
-    if (req->se->op.getlk) {
-        req->se->op.getlk(req, nodeid, &fi, &flock);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid,
-                            struct fuse_mbuf_iter *iter, int sleep)
-{
-    struct fuse_lk_in *arg;
-    struct fuse_file_info fi;
-    struct flock flock;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.lock_owner = arg->owner;
-
-    if (arg->lk_flags & FUSE_LK_FLOCK) {
-        int op = 0;
-
-        switch (arg->lk.type) {
-        case F_RDLCK:
-            op = LOCK_SH;
-            break;
-        case F_WRLCK:
-            op = LOCK_EX;
-            break;
-        case F_UNLCK:
-            op = LOCK_UN;
-            break;
-        }
-        if (!sleep) {
-            op |= LOCK_NB;
-        }
-
-        if (req->se->op.flock) {
-            req->se->op.flock(req, nodeid, &fi, op);
-        } else {
-            fuse_reply_err(req, ENOSYS);
-        }
-    } else {
-        convert_fuse_file_lock(&arg->lk, &flock);
-        if (req->se->op.setlk) {
-            req->se->op.setlk(req, nodeid, &fi, &flock, sleep);
-        } else {
-            fuse_reply_err(req, ENOSYS);
-        }
-    }
-}
-
-static void do_setlk(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    do_setlk_common(req, nodeid, iter, 0);
-}
-
-static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    do_setlk_common(req, nodeid, iter, 1);
-}
-
-static int find_interrupted(struct fuse_session *se, struct fuse_req *req)
-{
-    struct fuse_req *curr;
-
-    for (curr = se->list.next; curr != &se->list; curr = curr->next) {
-        if (curr->unique == req->u.i.unique) {
-            fuse_interrupt_func_t func;
-            void *data;
-
-            curr->ctr++;
-            pthread_mutex_unlock(&se->lock);
-
-            /* Ugh, ugly locking */
-            pthread_mutex_lock(&curr->lock);
-            pthread_mutex_lock(&se->lock);
-            curr->interrupted = 1;
-            func = curr->u.ni.func;
-            data = curr->u.ni.data;
-            pthread_mutex_unlock(&se->lock);
-            if (func) {
-                func(curr, data);
-            }
-            pthread_mutex_unlock(&curr->lock);
-
-            pthread_mutex_lock(&se->lock);
-            curr->ctr--;
-            if (!curr->ctr) {
-                destroy_req(curr);
-            }
-
-            return 1;
-        }
-    }
-    for (curr = se->interrupts.next; curr != &se->interrupts;
-         curr = curr->next) {
-        if (curr->u.i.unique == req->u.i.unique) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter)
-{
-    struct fuse_interrupt_in *arg;
-    struct fuse_session *se = req->se;
-
-    (void)nodeid;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n",
-             (unsigned long long)arg->unique);
-
-    req->u.i.unique = arg->unique;
-
-    pthread_mutex_lock(&se->lock);
-    if (find_interrupted(se, req)) {
-        destroy_req(req);
-    } else {
-        list_add_req(req, &se->interrupts);
-    }
-    pthread_mutex_unlock(&se->lock);
-}
-
-static struct fuse_req *check_interrupt(struct fuse_session *se,
-                                        struct fuse_req *req)
-{
-    struct fuse_req *curr;
-
-    for (curr = se->interrupts.next; curr != &se->interrupts;
-         curr = curr->next) {
-        if (curr->u.i.unique == req->unique) {
-            req->interrupted = 1;
-            list_del_req(curr);
-            g_free(curr);
-            return NULL;
-        }
-    }
-    curr = se->interrupts.next;
-    if (curr != &se->interrupts) {
-        list_del_req(curr);
-        list_init_req(curr);
-        return curr;
-    } else {
-        return NULL;
-    }
-}
-
-static void do_bmap(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    if (req->se->op.bmap) {
-        req->se->op.bmap(req, nodeid, arg->blocksize, arg->block);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_ioctl_in *arg;
-    unsigned int flags;
-    void *in_buf = NULL;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    flags = arg->flags;
-    if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) {
-        fuse_reply_err(req, ENOTTY);
-        return;
-    }
-
-    if (arg->in_size) {
-        in_buf = fuse_mbuf_iter_advance(iter, arg->in_size);
-        if (!in_buf) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) {
-        req->ioctl_64bit = 1;
-    }
-
-    if (req->se->op.ioctl) {
-        req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg,
-                          &fi, flags, in_buf, arg->in_size, arg->out_size);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-void fuse_pollhandle_destroy(struct fuse_pollhandle *ph)
-{
-    free(ph);
-}
-
-static void do_poll(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    struct fuse_poll_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-    fi.poll_events = arg->events;
-
-    if (req->se->op.poll) {
-        struct fuse_pollhandle *ph = NULL;
-
-        if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) {
-            ph = malloc(sizeof(struct fuse_pollhandle));
-            if (ph == NULL) {
-                fuse_reply_err(req, ENOMEM);
-                return;
-            }
-            ph->kh = arg->kh;
-            ph->se = req->se;
-        }
-
-        req->se->op.poll(req, nodeid, &fi, ph);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid,
-                         struct fuse_mbuf_iter *iter)
-{
-    struct fuse_fallocate_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.fallocate) {
-        req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length,
-                              &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in,
-                               struct fuse_mbuf_iter *iter)
-{
-    struct fuse_copy_file_range_in *arg;
-    struct fuse_file_info fi_in, fi_out;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    memset(&fi_in, 0, sizeof(fi_in));
-    fi_in.fh = arg->fh_in;
-
-    memset(&fi_out, 0, sizeof(fi_out));
-    fi_out.fh = arg->fh_out;
-
-
-    if (req->se->op.copy_file_range) {
-        req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in,
-                                    arg->nodeid_out, arg->off_out, &fi_out,
-                                    arg->len, arg->flags);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
-                     struct fuse_mbuf_iter *iter)
-{
-    struct fuse_lseek_in *arg;
-    struct fuse_file_info fi;
-
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-    memset(&fi, 0, sizeof(fi));
-    fi.fh = arg->fh;
-
-    if (req->se->op.lseek) {
-        req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
-                      struct fuse_mbuf_iter *iter)
-{
-    if (req->se->op.syncfs) {
-        req->se->op.syncfs(req, nodeid);
-    } else {
-        fuse_reply_err(req, ENOSYS);
-    }
-}
-
-static void do_init(fuse_req_t req, fuse_ino_t nodeid,
-                    struct fuse_mbuf_iter *iter)
-{
-    size_t compat_size = offsetof(struct fuse_init_in, max_readahead);
-    size_t compat2_size = offsetof(struct fuse_init_in, flags) +
-                              sizeof(uint32_t);
-    /* Fuse structure extended with minor version 36 */
-    size_t compat3_size = endof(struct fuse_init_in, unused);
-    struct fuse_init_in *arg;
-    struct fuse_init_out outarg;
-    struct fuse_session *se = req->se;
-    size_t bufsize = se->bufsize;
-    size_t outargsize = sizeof(outarg);
-    uint64_t flags = 0;
-
-    (void)nodeid;
-
-    /* First consume the old fields... */
-    arg = fuse_mbuf_iter_advance(iter, compat_size);
-    if (!arg) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    /* ...and now consume the new fields. */
-    if (arg->major == 7 && arg->minor >= 6) {
-        if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-        flags |= arg->flags;
-    }
-
-    /*
-     * fuse_init_in was extended again with minor version 36. Just read
-     * current known size of fuse_init so that future extension and
-     * header rebase does not cause breakage.
-     */
-    if (sizeof(*arg) > compat2_size && (arg->flags & FUSE_INIT_EXT)) {
-        if (!fuse_mbuf_iter_advance(iter, compat3_size - compat2_size)) {
-            fuse_reply_err(req, EINVAL);
-            return;
-        }
-        flags |= (uint64_t) arg->flags2 << 32;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
-    if (arg->major == 7 && arg->minor >= 6) {
-        fuse_log(FUSE_LOG_DEBUG, "flags=0x%016" PRIx64 "\n", flags);
-        fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead);
-    }
-    se->conn.proto_major = arg->major;
-    se->conn.proto_minor = arg->minor;
-    se->conn.capable = 0;
-    se->conn.want = 0;
-
-    memset(&outarg, 0, sizeof(outarg));
-    outarg.major = FUSE_KERNEL_VERSION;
-    outarg.minor = FUSE_KERNEL_MINOR_VERSION;
-
-    if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) {
-        fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n",
-                 arg->major, arg->minor);
-        fuse_reply_err(req, EPROTO);
-        return;
-    }
-
-    if (arg->major > 7) {
-        /* Wait for a second INIT request with a 7.X version */
-        send_reply_ok(req, &outarg, sizeof(outarg));
-        return;
-    }
-
-    if (arg->max_readahead < se->conn.max_readahead) {
-        se->conn.max_readahead = arg->max_readahead;
-    }
-    if (flags & FUSE_ASYNC_READ) {
-        se->conn.capable |= FUSE_CAP_ASYNC_READ;
-    }
-    if (flags & FUSE_POSIX_LOCKS) {
-        se->conn.capable |= FUSE_CAP_POSIX_LOCKS;
-    }
-    if (flags & FUSE_ATOMIC_O_TRUNC) {
-        se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
-    }
-    if (flags & FUSE_EXPORT_SUPPORT) {
-        se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
-    }
-    if (flags & FUSE_DONT_MASK) {
-        se->conn.capable |= FUSE_CAP_DONT_MASK;
-    }
-    if (flags & FUSE_FLOCK_LOCKS) {
-        se->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
-    }
-    if (flags & FUSE_AUTO_INVAL_DATA) {
-        se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA;
-    }
-    if (flags & FUSE_DO_READDIRPLUS) {
-        se->conn.capable |= FUSE_CAP_READDIRPLUS;
-    }
-    if (flags & FUSE_READDIRPLUS_AUTO) {
-        se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO;
-    }
-    if (flags & FUSE_ASYNC_DIO) {
-        se->conn.capable |= FUSE_CAP_ASYNC_DIO;
-    }
-    if (flags & FUSE_WRITEBACK_CACHE) {
-        se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
-    }
-    if (flags & FUSE_NO_OPEN_SUPPORT) {
-        se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT;
-    }
-    if (flags & FUSE_PARALLEL_DIROPS) {
-        se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
-    }
-    if (flags & FUSE_POSIX_ACL) {
-        se->conn.capable |= FUSE_CAP_POSIX_ACL;
-    }
-    if (flags & FUSE_HANDLE_KILLPRIV) {
-        se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
-    }
-    if (flags & FUSE_NO_OPENDIR_SUPPORT) {
-        se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
-    }
-    if (!(flags & FUSE_MAX_PAGES)) {
-        size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +
-                             FUSE_BUFFER_HEADER_SIZE;
-        if (bufsize > max_bufsize) {
-            bufsize = max_bufsize;
-        }
-    }
-    if (flags & FUSE_SUBMOUNTS) {
-        se->conn.capable |= FUSE_CAP_SUBMOUNTS;
-    }
-    if (flags & FUSE_HANDLE_KILLPRIV_V2) {
-        se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
-    }
-    if (flags & FUSE_SETXATTR_EXT) {
-        se->conn.capable |= FUSE_CAP_SETXATTR_EXT;
-    }
-    if (flags & FUSE_SECURITY_CTX) {
-        se->conn.capable |= FUSE_CAP_SECURITY_CTX;
-    }
-#ifdef HAVE_SPLICE
-#ifdef HAVE_VMSPLICE
-    se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
-#endif
-    se->conn.capable |= FUSE_CAP_SPLICE_READ;
-#endif
-    se->conn.capable |= FUSE_CAP_IOCTL_DIR;
-
-    /*
-     * Default settings for modern filesystems.
-     *
-     * Most of these capabilities were disabled by default in
-     * libfuse2 for backwards compatibility reasons. In libfuse3,
-     * we can finally enable them by default (as long as they're
-     * supported by the kernel).
-     */
-#define LL_SET_DEFAULT(cond, cap)             \
-    if ((cond) && (se->conn.capable & (cap))) \
-        se->conn.want |= (cap)
-    LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ);
-    LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS);
-    LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA);
-    LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV);
-    LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO);
-    LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR);
-    LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC);
-    LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ);
-    LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS);
-    LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS);
-    LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS);
-    LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir,
-                   FUSE_CAP_READDIRPLUS_AUTO);
-    se->conn.time_gran = 1;
-
-    if (bufsize < FUSE_MIN_READ_BUFFER) {
-        fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n",
-                 bufsize);
-        bufsize = FUSE_MIN_READ_BUFFER;
-    }
-    se->bufsize = bufsize;
-
-    if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) {
-        se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE;
-    }
-
-    se->got_init = 1;
-    se->got_destroy = 0;
-    if (se->op.init) {
-        se->op.init(se->userdata, &se->conn);
-    }
-
-    if (se->conn.want & (~se->conn.capable)) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: error: filesystem requested capabilities "
-                 "0x%" PRIx64 " that are not supported by kernel, aborting.\n",
-                 se->conn.want & (~se->conn.capable));
-        fuse_reply_err(req, EPROTO);
-        se->error = -EPROTO;
-        fuse_session_exit(se);
-        return;
-    }
-
-    if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) {
-        se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
-    }
-    if (flags & FUSE_MAX_PAGES) {
-        outarg.flags |= FUSE_MAX_PAGES;
-        outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1;
-    }
-
-    /*
-     * Always enable big writes, this is superseded
-     * by the max_write option
-     */
-    outarg.flags |= FUSE_BIG_WRITES;
-
-    if (se->conn.want & FUSE_CAP_ASYNC_READ) {
-        outarg.flags |= FUSE_ASYNC_READ;
-    }
-    if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) {
-        outarg.flags |= FUSE_PARALLEL_DIROPS;
-    }
-    if (se->conn.want & FUSE_CAP_POSIX_LOCKS) {
-        outarg.flags |= FUSE_POSIX_LOCKS;
-    }
-    if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) {
-        outarg.flags |= FUSE_ATOMIC_O_TRUNC;
-    }
-    if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) {
-        outarg.flags |= FUSE_EXPORT_SUPPORT;
-    }
-    if (se->conn.want & FUSE_CAP_DONT_MASK) {
-        outarg.flags |= FUSE_DONT_MASK;
-    }
-    if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) {
-        outarg.flags |= FUSE_FLOCK_LOCKS;
-    }
-    if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) {
-        outarg.flags |= FUSE_AUTO_INVAL_DATA;
-    }
-    if (se->conn.want & FUSE_CAP_READDIRPLUS) {
-        outarg.flags |= FUSE_DO_READDIRPLUS;
-    }
-    if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) {
-        outarg.flags |= FUSE_READDIRPLUS_AUTO;
-    }
-    if (se->conn.want & FUSE_CAP_ASYNC_DIO) {
-        outarg.flags |= FUSE_ASYNC_DIO;
-    }
-    if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) {
-        outarg.flags |= FUSE_WRITEBACK_CACHE;
-    }
-    if (se->conn.want & FUSE_CAP_POSIX_ACL) {
-        outarg.flags |= FUSE_POSIX_ACL;
-    }
-    outarg.max_readahead = se->conn.max_readahead;
-    outarg.max_write = se->conn.max_write;
-    if (se->conn.max_background >= (1 << 16)) {
-        se->conn.max_background = (1 << 16) - 1;
-    }
-    if (se->conn.congestion_threshold > se->conn.max_background) {
-        se->conn.congestion_threshold = se->conn.max_background;
-    }
-    if (!se->conn.congestion_threshold) {
-        se->conn.congestion_threshold = se->conn.max_background * 3 / 4;
-    }
-
-    outarg.max_background = se->conn.max_background;
-    outarg.congestion_threshold = se->conn.congestion_threshold;
-    outarg.time_gran = se->conn.time_gran;
-
-    if (se->conn.want & FUSE_CAP_HANDLE_KILLPRIV_V2) {
-        outarg.flags |= FUSE_HANDLE_KILLPRIV_V2;
-    }
-
-    if (se->conn.want & FUSE_CAP_SETXATTR_EXT) {
-        outarg.flags |= FUSE_SETXATTR_EXT;
-    }
-
-    if (se->conn.want & FUSE_CAP_SECURITY_CTX) {
-        /* bits 32..63 get shifted down 32 bits into the flags2 field */
-        outarg.flags2 |= FUSE_SECURITY_CTX >> 32;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "   INIT: %u.%u\n", outarg.major, outarg.minor);
-    fuse_log(FUSE_LOG_DEBUG, "   flags2=0x%08x flags=0x%08x\n", outarg.flags2,
-             outarg.flags);
-    fuse_log(FUSE_LOG_DEBUG, "   max_readahead=0x%08x\n", outarg.max_readahead);
-    fuse_log(FUSE_LOG_DEBUG, "   max_write=0x%08x\n", outarg.max_write);
-    fuse_log(FUSE_LOG_DEBUG, "   max_background=%i\n", outarg.max_background);
-    fuse_log(FUSE_LOG_DEBUG, "   congestion_threshold=%i\n",
-             outarg.congestion_threshold);
-    fuse_log(FUSE_LOG_DEBUG, "   time_gran=%u\n", outarg.time_gran);
-
-    send_reply_ok(req, &outarg, outargsize);
-}
-
-static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
-                       struct fuse_mbuf_iter *iter)
-{
-    struct fuse_session *se = req->se;
-
-    (void)nodeid;
-    (void)iter;
-
-    se->got_destroy = 1;
-    se->got_init = 0;
-    if (se->op.destroy) {
-        se->op.destroy(se->userdata);
-    }
-
-    send_reply_ok(req, NULL, 0);
-}
-
-int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
-                               off_t offset, struct fuse_bufvec *bufv)
-{
-    struct fuse_out_header out = {
-        .error = FUSE_NOTIFY_STORE,
-    };
-    struct fuse_notify_store_out outarg = {
-        .nodeid = ino,
-        .offset = offset,
-        .size = fuse_buf_size(bufv),
-    };
-    struct iovec iov[3];
-    int res;
-
-    if (!se) {
-        return -EINVAL;
-    }
-
-    iov[0].iov_base = &out;
-    iov[0].iov_len = sizeof(out);
-    iov[1].iov_base = &outarg;
-    iov[1].iov_len = sizeof(outarg);
-
-    res = fuse_send_data_iov(se, NULL, iov, 2, bufv);
-    if (res > 0) {
-        res = -res;
-    }
-
-    return res;
-}
-
-void *fuse_req_userdata(fuse_req_t req)
-{
-    return req->se->userdata;
-}
-
-const struct fuse_ctx *fuse_req_ctx(fuse_req_t req)
-{
-    return &req->ctx;
-}
-
-void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
-                             void *data)
-{
-    pthread_mutex_lock(&req->lock);
-    pthread_mutex_lock(&req->se->lock);
-    req->u.ni.func = func;
-    req->u.ni.data = data;
-    pthread_mutex_unlock(&req->se->lock);
-    if (req->interrupted && func) {
-        func(req, data);
-    }
-    pthread_mutex_unlock(&req->lock);
-}
-
-int fuse_req_interrupted(fuse_req_t req)
-{
-    int interrupted;
-
-    pthread_mutex_lock(&req->se->lock);
-    interrupted = req->interrupted;
-    pthread_mutex_unlock(&req->se->lock);
-
-    return interrupted;
-}
-
-static struct {
-    void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *);
-    const char *name;
-} fuse_ll_ops[] = {
-    [FUSE_LOOKUP] = { do_lookup, "LOOKUP" },
-    [FUSE_FORGET] = { do_forget, "FORGET" },
-    [FUSE_GETATTR] = { do_getattr, "GETATTR" },
-    [FUSE_SETATTR] = { do_setattr, "SETATTR" },
-    [FUSE_READLINK] = { do_readlink, "READLINK" },
-    [FUSE_SYMLINK] = { do_symlink, "SYMLINK" },
-    [FUSE_MKNOD] = { do_mknod, "MKNOD" },
-    [FUSE_MKDIR] = { do_mkdir, "MKDIR" },
-    [FUSE_UNLINK] = { do_unlink, "UNLINK" },
-    [FUSE_RMDIR] = { do_rmdir, "RMDIR" },
-    [FUSE_RENAME] = { do_rename, "RENAME" },
-    [FUSE_LINK] = { do_link, "LINK" },
-    [FUSE_OPEN] = { do_open, "OPEN" },
-    [FUSE_READ] = { do_read, "READ" },
-    [FUSE_WRITE] = { do_write, "WRITE" },
-    [FUSE_STATFS] = { do_statfs, "STATFS" },
-    [FUSE_RELEASE] = { do_release, "RELEASE" },
-    [FUSE_FSYNC] = { do_fsync, "FSYNC" },
-    [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" },
-    [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" },
-    [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" },
-    [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" },
-    [FUSE_FLUSH] = { do_flush, "FLUSH" },
-    [FUSE_INIT] = { do_init, "INIT" },
-    [FUSE_OPENDIR] = { do_opendir, "OPENDIR" },
-    [FUSE_READDIR] = { do_readdir, "READDIR" },
-    [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" },
-    [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" },
-    [FUSE_GETLK] = { do_getlk, "GETLK" },
-    [FUSE_SETLK] = { do_setlk, "SETLK" },
-    [FUSE_SETLKW] = { do_setlkw, "SETLKW" },
-    [FUSE_ACCESS] = { do_access, "ACCESS" },
-    [FUSE_CREATE] = { do_create, "CREATE" },
-    [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" },
-    [FUSE_BMAP] = { do_bmap, "BMAP" },
-    [FUSE_IOCTL] = { do_ioctl, "IOCTL" },
-    [FUSE_POLL] = { do_poll, "POLL" },
-    [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" },
-    [FUSE_DESTROY] = { do_destroy, "DESTROY" },
-    [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" },
-    [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" },
-    [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" },
-    [FUSE_RENAME2] = { do_rename2, "RENAME2" },
-    [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
-    [FUSE_LSEEK] = { do_lseek, "LSEEK" },
-    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
-};
-
-#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
-
-static const char *opname(enum fuse_opcode opcode)
-{
-    if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) {
-        return "???";
-    } else {
-        return fuse_ll_ops[opcode].name;
-    }
-}
-
-void fuse_session_process_buf(struct fuse_session *se,
-                              const struct fuse_buf *buf)
-{
-    struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 };
-    fuse_session_process_buf_int(se, &bufv, NULL);
-}
-
-/*
- * Restriction:
- *   bufv is normally a single entry buffer, except for a write
- *   where (if it's in memory) then the bufv may be multiple entries,
- *   where the first entry contains all headers and subsequent entries
- *   contain data
- *   bufv shall not use any offsets etc to make the data anything
- *   other than contiguous starting from 0.
- */
-void fuse_session_process_buf_int(struct fuse_session *se,
-                                  struct fuse_bufvec *bufv,
-                                  struct fuse_chan *ch)
-{
-    const struct fuse_buf *buf = bufv->buf;
-    struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf);
-    struct fuse_in_header *in;
-    struct fuse_req *req;
-    int err;
-
-    /* The first buffer must be a memory buffer */
-    assert(!(buf->flags & FUSE_BUF_IS_FD));
-
-    in = fuse_mbuf_iter_advance(&iter, sizeof(*in));
-    assert(in); /* caller guarantees the input buffer is large enough */
-
-    fuse_log(
-        FUSE_LOG_DEBUG,
-        "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n",
-        (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode),
-        in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid);
-
-    req = fuse_ll_alloc_req(se);
-    if (req == NULL) {
-        struct fuse_out_header out = {
-            .unique = in->unique,
-            .error = -ENOMEM,
-        };
-        struct iovec iov = {
-            .iov_base = &out,
-            .iov_len = sizeof(struct fuse_out_header),
-        };
-
-        fuse_send_msg(se, ch, &iov, 1);
-        return;
-    }
-
-    req->unique = in->unique;
-    req->ctx.uid = in->uid;
-    req->ctx.gid = in->gid;
-    req->ctx.pid = in->pid;
-    req->ch = ch;
-
-    /*
-     * INIT and DESTROY requests are serialized, all other request types
-     * run in parallel.  This prevents races between FUSE_INIT and ordinary
-     * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and
-     * FUSE_DESTROY and FUSE_DESTROY.
-     */
-    if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT ||
-        in->opcode == FUSE_DESTROY) {
-        pthread_rwlock_wrlock(&se->init_rwlock);
-    } else {
-        pthread_rwlock_rdlock(&se->init_rwlock);
-    }
-
-    err = EIO;
-    if (!se->got_init) {
-        enum fuse_opcode expected;
-
-        expected = se->cuse_data ? CUSE_INIT : FUSE_INIT;
-        if (in->opcode != expected) {
-            goto reply_err;
-        }
-    } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) {
-        if (fuse_lowlevel_is_virtio(se)) {
-            /*
-             * TODO: This is after a hard reboot typically, we need to do
-             * a destroy, but we can't reply to this request yet so
-             * we can't use do_destroy
-             */
-            fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__);
-            se->got_destroy = 1;
-            se->got_init = 0;
-            if (se->op.destroy) {
-                se->op.destroy(se->userdata);
-            }
-        } else {
-            goto reply_err;
-        }
-    }
-
-    err = EACCES;
-    /* Implement -o allow_root */
-    if (se->deny_others && in->uid != se->owner && in->uid != 0 &&
-        in->opcode != FUSE_INIT && in->opcode != FUSE_READ &&
-        in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC &&
-        in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR &&
-        in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR &&
-        in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) {
-        goto reply_err;
-    }
-
-    err = ENOSYS;
-    if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) {
-        goto reply_err;
-    }
-    if (in->opcode != FUSE_INTERRUPT) {
-        struct fuse_req *intr;
-        pthread_mutex_lock(&se->lock);
-        intr = check_interrupt(se, req);
-        list_add_req(req, &se->list);
-        pthread_mutex_unlock(&se->lock);
-        if (intr) {
-            fuse_reply_err(intr, EAGAIN);
-        }
-    }
-
-    if (in->opcode == FUSE_WRITE && se->op.write_buf) {
-        do_write_buf(req, in->nodeid, &iter, bufv);
-    } else {
-        fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter);
-    }
-
-    pthread_rwlock_unlock(&se->init_rwlock);
-    return;
-
-reply_err:
-    fuse_reply_err(req, err);
-    pthread_rwlock_unlock(&se->init_rwlock);
-}
-
-#define LL_OPTION(n, o, v)                     \
-    {                                          \
-        n, offsetof(struct fuse_session, o), v \
-    }
-
-static const struct fuse_opt fuse_ll_opts[] = {
-    LL_OPTION("debug", debug, 1),
-    LL_OPTION("-d", debug, 1),
-    LL_OPTION("--debug", debug, 1),
-    LL_OPTION("allow_root", deny_others, 1),
-    LL_OPTION("--socket-path=%s", vu_socket_path, 0),
-    LL_OPTION("--socket-group=%s", vu_socket_group, 0),
-    LL_OPTION("--fd=%d", vu_listen_fd, 0),
-    LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0),
-    FUSE_OPT_END
-};
-
-void fuse_lowlevel_version(void)
-{
-    printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION,
-           FUSE_KERNEL_MINOR_VERSION);
-}
-
-void fuse_lowlevel_help(void)
-{
-    /*
-     * These are not all options, but the ones that are
-     * potentially of interest to an end-user
-     */
-    printf(
-        "    -o allow_root              allow access by root\n"
-        "    --socket-path=PATH         path for the vhost-user socket\n"
-        "    --socket-group=GRNAME      name of group for the vhost-user socket\n"
-        "    --fd=FDNUM                 fd number of vhost-user socket\n"
-        "    --thread-pool-size=NUM     thread pool size limit (default %d)\n",
-        THREAD_POOL_SIZE);
-}
-
-void fuse_session_destroy(struct fuse_session *se)
-{
-    if (se->got_init && !se->got_destroy) {
-        if (se->op.destroy) {
-            se->op.destroy(se->userdata);
-        }
-    }
-    pthread_rwlock_destroy(&se->init_rwlock);
-    pthread_mutex_destroy(&se->lock);
-    free(se->cuse_data);
-    if (se->fd != -1) {
-        close(se->fd);
-    }
-
-    if (fuse_lowlevel_is_virtio(se)) {
-        virtio_session_close(se);
-    }
-
-    free(se->vu_socket_path);
-    se->vu_socket_path = NULL;
-
-    g_free(se);
-}
-
-
-struct fuse_session *fuse_session_new(struct fuse_args *args,
-                                      const struct fuse_lowlevel_ops *op,
-                                      size_t op_size, void *userdata)
-{
-    struct fuse_session *se;
-
-    if (sizeof(struct fuse_lowlevel_ops) < op_size) {
-        fuse_log(
-            FUSE_LOG_ERR,
-            "fuse: warning: library too old, some operations may not work\n");
-        op_size = sizeof(struct fuse_lowlevel_ops);
-    }
-
-    if (args->argc == 0) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: empty argv passed to fuse_session_new().\n");
-        return NULL;
-    }
-
-    se = g_try_new0(struct fuse_session, 1);
-    if (se == NULL) {
-        fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n");
-        goto out1;
-    }
-    se->fd = -1;
-    se->vu_listen_fd = -1;
-    se->thread_pool_size = THREAD_POOL_SIZE;
-    se->conn.max_write = UINT_MAX;
-    se->conn.max_readahead = UINT_MAX;
-
-    /* Parse options */
-    if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) {
-        goto out2;
-    }
-    if (args->argc == 1 && args->argv[0][0] == '-') {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: warning: argv[0] looks like an option, but "
-                 "will be ignored\n");
-    } else if (args->argc != 1) {
-        int i;
-        fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `");
-        for (i = 1; i < args->argc - 1; i++) {
-            fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]);
-        }
-        fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]);
-        goto out4;
-    }
-
-    if (!se->vu_socket_path && se->vu_listen_fd < 0) {
-        fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n");
-        goto out4;
-    }
-    if (se->vu_socket_path && se->vu_listen_fd >= 0) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: --socket-path and --fd cannot be given together\n");
-        goto out4;
-    }
-    if (se->vu_socket_group && !se->vu_socket_path) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: --socket-group can only be used with --socket-path\n");
-        goto out4;
-    }
-
-    se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE;
-
-    list_init_req(&se->list);
-    list_init_req(&se->interrupts);
-    fuse_mutex_init(&se->lock);
-    pthread_rwlock_init(&se->init_rwlock, NULL);
-
-    memcpy(&se->op, op, op_size);
-    se->owner = getuid();
-    se->userdata = userdata;
-
-    return se;
-
-out4:
-    fuse_opt_free_args(args);
-out2:
-    g_free(se);
-out1:
-    return NULL;
-}
-
-int fuse_session_mount(struct fuse_session *se)
-{
-    return virtio_session_mount(se);
-}
-
-int fuse_session_fd(struct fuse_session *se)
-{
-    return se->fd;
-}
-
-void fuse_session_unmount(struct fuse_session *se)
-{
-}
-
-int fuse_lowlevel_is_virtio(struct fuse_session *se)
-{
-    return !!se->virtio_dev;
-}
-
-void fuse_session_exit(struct fuse_session *se)
-{
-    se->exited = 1;
-}
-
-void fuse_session_reset(struct fuse_session *se)
-{
-    se->exited = 0;
-    se->error = 0;
-}
-
-int fuse_session_exited(struct fuse_session *se)
-{
-    return se->exited;
-}

diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
deleted file mode 100644
index b889dae..0000000
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ /dev/null

@@ -1,1988 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_LOWLEVEL_H_
-#define FUSE_LOWLEVEL_H_
-
-/**
- * @file
- *
- * Low level API
- *
- * IMPORTANT: you should define FUSE_USE_VERSION before including this
- * header.  To use the newest API define it to 31 (recommended for any
- * new application).
- */
-
-#ifndef FUSE_USE_VERSION
-#error FUSE_USE_VERSION not defined
-#endif
-
-#include "fuse_common.h"
-
-#include <sys/statvfs.h>
-#include <sys/uio.h>
-#include <utime.h>
-
-/*
- * Miscellaneous definitions
- */
-
-/** The node ID of the root inode */
-#define FUSE_ROOT_ID 1
-
-/** Inode number type */
-typedef uint64_t fuse_ino_t;
-
-/** Request pointer type */
-typedef struct fuse_req *fuse_req_t;
-
-/**
- * Session
- *
- * This provides hooks for processing requests, and exiting
- */
-struct fuse_session;
-
-/** Directory entry parameters supplied to fuse_reply_entry() */
-struct fuse_entry_param {
-    /**
-     * Unique inode number
-     *
-     * In lookup, zero means negative entry (from version 2.5)
-     * Returning ENOENT also means negative entry, but by setting zero
-     * ino the kernel may cache negative entries for entry_timeout
-     * seconds.
-     */
-    fuse_ino_t ino;
-
-    /**
-     * Generation number for this entry.
-     *
-     * If the file system will be exported over NFS, the
-     * ino/generation pairs need to be unique over the file
-     * system's lifetime (rather than just the mount time). So if
-     * the file system reuses an inode after it has been deleted,
-     * it must assign a new, previously unused generation number
-     * to the inode at the same time.
-     *
-     */
-    uint64_t generation;
-
-    /**
-     * Inode attributes.
-     *
-     * Even if attr_timeout == 0, attr must be correct. For example,
-     * for open(), FUSE uses attr.st_size from lookup() to determine
-     * how many bytes to request. If this value is not correct,
-     * incorrect data will be returned.
-     */
-    struct stat attr;
-
-    /**
-     * Validity timeout (in seconds) for inode attributes. If
-     *  attributes only change as a result of requests that come
-     *  through the kernel, this should be set to a very large
-     *  value.
-     */
-    double attr_timeout;
-
-    /**
-     * Validity timeout (in seconds) for the name. If directory
-     *  entries are changed/deleted only as a result of requests
-     *  that come through the kernel, this should be set to a very
-     *  large value.
-     */
-    double entry_timeout;
-
-    /**
-     * Flags for fuse_attr.flags that do not fit into attr.
-     */
-    uint32_t attr_flags;
-};
-
-/**
- * Additional context associated with requests.
- *
- * Note that the reported client uid, gid and pid may be zero in some
- * situations. For example, if the FUSE file system is running in a
- * PID or user namespace but then accessed from outside the namespace,
- * there is no valid uid/pid/gid that could be reported.
- */
-struct fuse_ctx {
-    /** User ID of the calling process */
-    uid_t uid;
-
-    /** Group ID of the calling process */
-    gid_t gid;
-
-    /** Thread ID of the calling process */
-    pid_t pid;
-
-    /** Umask of the calling process */
-    mode_t umask;
-};
-
-struct fuse_forget_data {
-    fuse_ino_t ino;
-    uint64_t nlookup;
-};
-
-/* 'to_set' flags in setattr */
-#define FUSE_SET_ATTR_MODE (1 << 0)
-#define FUSE_SET_ATTR_UID (1 << 1)
-#define FUSE_SET_ATTR_GID (1 << 2)
-#define FUSE_SET_ATTR_SIZE (1 << 3)
-#define FUSE_SET_ATTR_ATIME (1 << 4)
-#define FUSE_SET_ATTR_MTIME (1 << 5)
-#define FUSE_SET_ATTR_ATIME_NOW (1 << 7)
-#define FUSE_SET_ATTR_MTIME_NOW (1 << 8)
-#define FUSE_SET_ATTR_CTIME (1 << 10)
-#define FUSE_SET_ATTR_KILL_SUIDGID (1 << 11)
-
-/*
- * Request methods and replies
- */
-
-/**
- * Low level filesystem operations
- *
- * Most of the methods (with the exception of init and destroy)
- * receive a request handle (fuse_req_t) as their first argument.
- * This handle must be passed to one of the specified reply functions.
- *
- * This may be done inside the method invocation, or after the call
- * has returned.  The request handle is valid until one of the reply
- * functions is called.
- *
- * Other pointer arguments (name, fuse_file_info, etc) are not valid
- * after the call has returned, so if they are needed later, their
- * contents have to be copied.
- *
- * In general, all methods are expected to perform any necessary
- * permission checking. However, a filesystem may delegate this task
- * to the kernel by passing the `default_permissions` mount option to
- * `fuse_session_new()`. In this case, methods will only be called if
- * the kernel's permission check has succeeded.
- *
- * The filesystem sometimes needs to handle a return value of -ENOENT
- * from the reply function, which means, that the request was
- * interrupted, and the reply discarded.  For example if
- * fuse_reply_open() return -ENOENT means, that the release method for
- * this file will not be called.
- */
-struct fuse_lowlevel_ops {
-    /**
-     * Initialize filesystem
-     *
-     * This function is called when libfuse establishes
-     * communication with the FUSE kernel module. The file system
-     * should use this module to inspect and/or modify the
-     * connection parameters provided in the `conn` structure.
-     *
-     * Note that some parameters may be overwritten by options
-     * passed to fuse_session_new() which take precedence over the
-     * values set in this handler.
-     *
-     * There's no reply to this function
-     *
-     * @param userdata the user data passed to fuse_session_new()
-     */
-    void (*init)(void *userdata, struct fuse_conn_info *conn);
-
-    /**
-     * Clean up filesystem.
-     *
-     * Called on filesystem exit. When this method is called, the
-     * connection to the kernel may be gone already, so that eg. calls
-     * to fuse_lowlevel_notify_* will fail.
-     *
-     * There's no reply to this function
-     *
-     * @param userdata the user data passed to fuse_session_new()
-     */
-    void (*destroy)(void *userdata);
-
-    /**
-     * Look up a directory entry by name and get its attributes.
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name the name to look up
-     */
-    void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name);
-
-    /**
-     * Forget about an inode
-     *
-     * This function is called when the kernel removes an inode
-     * from its internal caches.
-     *
-     * The inode's lookup count increases by one for every call to
-     * fuse_reply_entry and fuse_reply_create. The nlookup parameter
-     * indicates by how much the lookup count should be decreased.
-     *
-     * Inodes with a non-zero lookup count may receive request from
-     * the kernel even after calls to unlink, rmdir or (when
-     * overwriting an existing file) rename. Filesystems must handle
-     * such requests properly and it is recommended to defer removal
-     * of the inode until the lookup count reaches zero. Calls to
-     * unlink, rmdir or rename will be followed closely by forget
-     * unless the file or directory is open, in which case the
-     * kernel issues forget only after the release or releasedir
-     * calls.
-     *
-     * Note that if a file system will be exported over NFS the
-     * inodes lifetime must extend even beyond forget. See the
-     * generation field in struct fuse_entry_param above.
-     *
-     * On unmount the lookup count for all inodes implicitly drops
-     * to zero. It is not guaranteed that the file system will
-     * receive corresponding forget messages for the affected
-     * inodes.
-     *
-     * Valid replies:
-     *   fuse_reply_none
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param nlookup the number of lookups to forget
-     */
-    void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup);
-
-    /**
-     * Get file attributes.
-     *
-     * If writeback caching is enabled, the kernel may have a
-     * better idea of a file's length than the FUSE file system
-     * (eg if there has been a write that extended the file size,
-     * but that has not yet been passed to the filesystem.n
-     *
-     * In this case, the st_size value provided by the file system
-     * will be ignored.
-     *
-     * Valid replies:
-     *   fuse_reply_attr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi for future use, currently always NULL
-     */
-    void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Set file attributes
-     *
-     * In the 'attr' argument only members indicated by the 'to_set'
-     * bitmask contain valid values.  Other members contain undefined
-     * values.
-     *
-     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
-     * expected to reset the setuid and setgid bits if the file
-     * size or owner is being changed.
-     *
-     * If the setattr was invoked from the ftruncate() system call
-     * under Linux kernel versions 2.6.15 or later, the fi->fh will
-     * contain the value set by the open method or will be undefined
-     * if the open method didn't set any value.  Otherwise (not
-     * ftruncate call, or kernel version earlier than 2.6.15) the fi
-     * parameter will be NULL.
-     *
-     * Valid replies:
-     *   fuse_reply_attr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param attr the attributes
-     * @param to_set bit mask of attributes which should be set
-     * @param fi file information, or NULL
-     */
-    void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
-                    int to_set, struct fuse_file_info *fi);
-
-    /**
-     * Read symbolic link
-     *
-     * Valid replies:
-     *   fuse_reply_readlink
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     */
-    void (*readlink)(fuse_req_t req, fuse_ino_t ino);
-
-    /**
-     * Create file node
-     *
-     * Create a regular file, character device, block device, fifo or
-     * socket node.
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to create
-     * @param mode file type and mode with which to create the new file
-     * @param rdev the device number (only valid if created file is a device)
-     */
-    void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                  mode_t mode, dev_t rdev);
-
-    /**
-     * Create a directory
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to create
-     * @param mode with which to create the new file
-     */
-    void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                  mode_t mode);
-
-    /**
-     * Remove a file
-     *
-     * If the file's inode's lookup count is non-zero, the file
-     * system is expected to postpone any removal of the inode
-     * until the lookup count reaches zero (see description of the
-     * forget function).
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to remove
-     */
-    void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name);
-
-    /**
-     * Remove a directory
-     *
-     * If the directory's inode's lookup count is non-zero, the
-     * file system is expected to postpone any removal of the
-     * inode until the lookup count reaches zero (see description
-     * of the forget function).
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to remove
-     */
-    void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name);
-
-    /**
-     * Create a symbolic link
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param link the contents of the symbolic link
-     * @param parent inode number of the parent directory
-     * @param name to create
-     */
-    void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent,
-                    const char *name);
-
-    /**
-     * Rename a file
-     *
-     * If the target exists it should be atomically replaced. If
-     * the target's inode's lookup count is non-zero, the file
-     * system is expected to postpone any removal of the inode
-     * until the lookup count reaches zero (see description of the
-     * forget function).
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EINVAL, i.e. all
-     * future bmap requests will fail with EINVAL without being
-     * send to the filesystem process.
-     *
-     * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If
-     * RENAME_NOREPLACE is specified, the filesystem must not
-     * overwrite *newname* if it exists and return an error
-     * instead. If `RENAME_EXCHANGE` is specified, the filesystem
-     * must atomically exchange the two files, i.e. both must
-     * exist and neither may be deleted.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the old parent directory
-     * @param name old name
-     * @param newparent inode number of the new parent directory
-     * @param newname new name
-     */
-    void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                   fuse_ino_t newparent, const char *newname,
-                   unsigned int flags);
-
-    /**
-     * Create a hard link
-     *
-     * Valid replies:
-     *   fuse_reply_entry
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the old inode number
-     * @param newparent inode number of the new parent directory
-     * @param newname new name to create
-     */
-    void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent,
-                 const char *newname);
-
-    /**
-     * Open a file
-     *
-     * Open flags are available in fi->flags. The following rules
-     * apply.
-     *
-     *  - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be
-     *    filtered out / handled by the kernel.
-     *
-     *  - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used
-     *    by the filesystem to check if the operation is
-     *    permitted.  If the ``-o default_permissions`` mount
-     *    option is given, this check is already done by the
-     *    kernel before calling open() and may thus be omitted by
-     *    the filesystem.
-     *
-     *  - When writeback caching is enabled, the kernel may send
-     *    read requests even for files opened with O_WRONLY. The
-     *    filesystem should be prepared to handle this.
-     *
-     *  - When writeback caching is disabled, the filesystem is
-     *    expected to properly handle the O_APPEND flag and ensure
-     *    that each write is appending to the end of the file.
-     *
-     *  - When writeback caching is enabled, the kernel will
-     *    handle O_APPEND. However, unless all changes to the file
-     *    come through the kernel this will not work reliably. The
-     *    filesystem should thus either ignore the O_APPEND flag
-     *    (and let the kernel handle it), or return an error
-     *    (indicating that reliably O_APPEND is not available).
-     *
-     * Filesystem may store an arbitrary file handle (pointer,
-     * index, etc) in fi->fh, and use this in other all other file
-     * operations (read, write, flush, release, fsync).
-     *
-     * Filesystem may also implement stateless file I/O and not store
-     * anything in fi->fh.
-     *
-     * There are also some flags (direct_io, keep_cache) which the
-     * filesystem may set in fi, to change the way the file is opened.
-     * See fuse_file_info structure in <fuse_common.h> for more details.
-     *
-     * If this request is answered with an error code of ENOSYS
-     * and FUSE_CAP_NO_OPEN_SUPPORT is set in
-     * `fuse_conn_info.capable`, this is treated as success and
-     * future calls to open and release will also succeed without being
-     * sent to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_open
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Read data
-     *
-     * Read should send exactly the number of bytes requested except
-     * on EOF or error, otherwise the rest of the data will be
-     * substituted with zeroes.  An exception to this is when the file
-     * has been opened in 'direct_io' mode, in which case the return
-     * value of the read system call will reflect the return value of
-     * this operation.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_iov
-     *   fuse_reply_data
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size number of bytes to read
-     * @param off offset to read from
-     * @param fi file information
-     */
-    void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
-                 struct fuse_file_info *fi);
-
-    /**
-     * Write data
-     *
-     * Write should return exactly the number of bytes requested
-     * except on error.  An exception to this is when the file has
-     * been opened in 'direct_io' mode, in which case the return value
-     * of the write system call will reflect the return value of this
-     * operation.
-     *
-     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
-     * expected to reset the setuid and setgid bits.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     *
-     * Valid replies:
-     *   fuse_reply_write
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param buf data to write
-     * @param size number of bytes to write
-     * @param off offset to write to
-     * @param fi file information
-     */
-    void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size,
-                  off_t off, struct fuse_file_info *fi);
-
-    /**
-     * Flush method
-     *
-     * This is called on each close() of the opened file.
-     *
-     * Since file descriptors can be duplicated (dup, dup2, fork), for
-     * one open call there may be many flush calls.
-     *
-     * Filesystems shouldn't assume that flush will always be called
-     * after some writes, or that if will be called at all.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     *
-     * NOTE: the name of the method is misleading, since (unlike
-     * fsync) the filesystem is not forced to flush pending writes.
-     * One reason to flush data is if the filesystem wants to return
-     * write errors during close.  However, such use is non-portable
-     * because POSIX does not require [close] to wait for delayed I/O to
-     * complete.
-     *
-     * If the filesystem supports file locking operations (setlk,
-     * getlk) it should remove all locks belonging to 'fi->owner'.
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to flush() will
-     * succeed automatically without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     *
-     * [close]:
-     * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
-     */
-    void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Release an open file
-     *
-     * Release is called when there are no more references to an open
-     * file: all file descriptors are closed and all memory mappings
-     * are unmapped.
-     *
-     * For every open call there will be exactly one release call (unless
-     * the filesystem is force-unmounted).
-     *
-     * The filesystem may reply with an error, but error values are
-     * not returned to close() or munmap() which triggered the
-     * release.
-     *
-     * fi->fh will contain the value set by the open method, or will
-     * be undefined if the open method didn't set any value.
-     * fi->flags will contain the same flags as for open.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Synchronize file contents
-     *
-     * If the datasync parameter is non-zero, then only the user data
-     * should be flushed, not the meta data.
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to fsync() will
-     * succeed automatically without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param datasync flag indicating if only data should be flushed
-     * @param fi file information
-     */
-    void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync,
-                  struct fuse_file_info *fi);
-
-    /**
-     * Open a directory
-     *
-     * Filesystem may store an arbitrary file handle (pointer, index,
-     * etc) in fi->fh, and use this in other all other directory
-     * stream operations (readdir, releasedir, fsyncdir).
-     *
-     * If this request is answered with an error code of ENOSYS and
-     * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`,
-     * this is treated as success and future calls to opendir and
-     * releasedir will also succeed without being sent to the filesystem
-     * process. In addition, the kernel will cache readdir results
-     * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR.
-     *
-     * Valid replies:
-     *   fuse_reply_open
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
-
-    /**
-     * Read directory
-     *
-     * Send a buffer filled using fuse_add_direntry(), with size not
-     * exceeding the requested size.  Send an empty buffer on end of
-     * stream.
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * Returning a directory entry from readdir() does not affect
-     * its lookup count.
-     *
-     * If off_t is non-zero, then it will correspond to one of the off_t
-     * values that was previously returned by readdir() for the same
-     * directory handle. In this case, readdir() should skip over entries
-     * coming before the position defined by the off_t value. If entries
-     * are added or removed while the directory handle is open, they filesystem
-     * may still include the entries that have been removed, and may not
-     * report the entries that have been created. However, addition or
-     * removal of entries must never cause readdir() to skip over unrelated
-     * entries or to report them more than once. This means
-     * that off_t can not be a simple index that enumerates the entries
-     * that have been returned but must contain sufficient information to
-     * uniquely determine the next directory entry to return even when the
-     * set of entries is changing.
-     *
-     * The function does not have to report the '.' and '..'
-     * entries, but is allowed to do so. Note that, if readdir does
-     * not return '.' or '..', they will not be implicitly returned,
-     * and this behavior is observable by the caller.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size maximum number of bytes to send
-     * @param off offset to continue reading the directory stream
-     * @param fi file information
-     */
-    void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
-                    struct fuse_file_info *fi);
-
-    /**
-     * Release an open directory
-     *
-     * For every opendir call there will be exactly one releasedir
-     * call (unless the filesystem is force-unmounted).
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     */
-    void (*releasedir)(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi);
-
-    /**
-     * Synchronize directory contents
-     *
-     * If the datasync parameter is non-zero, then only the directory
-     * contents should be flushed, not the meta data.
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to fsyncdir() will
-     * succeed automatically without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param datasync flag indicating if only data should be flushed
-     * @param fi file information
-     */
-    void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync,
-                     struct fuse_file_info *fi);
-
-    /**
-     * Get file system statistics
-     *
-     * Valid replies:
-     *   fuse_reply_statfs
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number, zero means "undefined"
-     */
-    void (*statfs)(fuse_req_t req, fuse_ino_t ino);
-
-    /**
-     * Set an extended attribute
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future setxattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     */
-    void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
-                     const char *value, size_t size, int flags,
-                     uint32_t setxattr_flags);
-
-    /**
-     * Get an extended attribute
-     *
-     * If size is zero, the size of the value should be sent with
-     * fuse_reply_xattr.
-     *
-     * If the size is non-zero, and the value fits in the buffer, the
-     * value should be sent with fuse_reply_buf.
-     *
-     * If the size is too small for the value, the ERANGE error should
-     * be sent.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future getxattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_xattr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param name of the extended attribute
-     * @param size maximum size of the value to send
-     */
-    void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
-                     size_t size);
-
-    /**
-     * List extended attribute names
-     *
-     * If size is zero, the total size of the attribute list should be
-     * sent with fuse_reply_xattr.
-     *
-     * If the size is non-zero, and the null character separated
-     * attribute list fits in the buffer, the list should be sent with
-     * fuse_reply_buf.
-     *
-     * If the size is too small for the list, the ERANGE error should
-     * be sent.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future listxattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_xattr
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size maximum size of the list to send
-     */
-    void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size);
-
-    /**
-     * Remove an extended attribute
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future removexattr() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param name of the extended attribute
-     */
-    void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name);
-
-    /**
-     * Check file access permissions
-     *
-     * This will be called for the access() and chdir() system
-     * calls.  If the 'default_permissions' mount option is given,
-     * this method is not called.
-     *
-     * This method is not called under Linux kernel versions 2.4.x
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent success, i.e. this and all future access()
-     * requests will succeed without being send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param mask requested access mode
-     */
-    void (*access)(fuse_req_t req, fuse_ino_t ino, int mask);
-
-    /**
-     * Create and open a file
-     *
-     * If the file does not exist, first create it with the specified
-     * mode, and then open it.
-     *
-     * See the description of the open handler for more
-     * information.
-     *
-     * If this method is not implemented or under Linux kernel
-     * versions earlier than 2.6.15, the mknod() and open() methods
-     * will be called instead.
-     *
-     * If this request is answered with an error code of ENOSYS, the handler
-     * is treated as not implemented (i.e., for this and future requests the
-     * mknod() and open() handlers will be called instead).
-     *
-     * Valid replies:
-     *   fuse_reply_create
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param parent inode number of the parent directory
-     * @param name to create
-     * @param mode file type and mode with which to create the new file
-     * @param fi file information
-     */
-    void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name,
-                   mode_t mode, struct fuse_file_info *fi);
-
-    /**
-     * Test for a POSIX file lock
-     *
-     * Valid replies:
-     *   fuse_reply_lock
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param lock the region/type to test
-     */
-    void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                  struct flock *lock);
-
-    /**
-     * Acquire, modify or release a POSIX file lock
-     *
-     * For POSIX threads (NPTL) there's a 1-1 relation between pid and
-     * owner, but otherwise this is not always the case.  For checking
-     * lock ownership, 'fi->owner' must be used.  The l_pid field in
-     * 'struct flock' should only be used to fill in this field in
-     * getlk().
-     *
-     * Note: if the locking methods are not implemented, the kernel
-     * will still allow file locking to work locally.  Hence these are
-     * only interesting for network filesystems and similar.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param lock the region/type to set
-     * @param sleep locking operation may sleep
-     */
-    void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                  struct flock *lock, int sleep);
-
-    /**
-     * Map block index within file to block index within device
-     *
-     * Note: This makes sense only for block device backed filesystems
-     * mounted with the 'blkdev' option
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure, i.e. all future bmap() requests will
-     * fail with the same error code without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_bmap
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param blocksize unit of block index
-     * @param idx block index within file
-     */
-    void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize,
-                 uint64_t idx);
-
-    /**
-     * Ioctl
-     *
-     * Note: For unrestricted ioctls (not allowed for FUSE
-     * servers), data in and out areas can be discovered by giving
-     * iovs and setting FUSE_IOCTL_RETRY in *flags*.  For
-     * restricted ioctls, kernel prepares in/out data area
-     * according to the information encoded in cmd.
-     *
-     * Valid replies:
-     *   fuse_reply_ioctl_retry
-     *   fuse_reply_ioctl
-     *   fuse_reply_ioctl_iov
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param cmd ioctl command
-     * @param arg ioctl argument
-     * @param fi file information
-     * @param flags for FUSE_IOCTL_* flags
-     * @param in_buf data fetched from the caller
-     * @param in_bufsz number of fetched bytes
-     * @param out_bufsz maximum size of output data
-     *
-     * Note : the unsigned long request submitted by the application
-     * is truncated to 32 bits.
-     */
-    void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
-                  struct fuse_file_info *fi, unsigned flags, const void *in_buf,
-                  size_t in_bufsz, size_t out_bufsz);
-
-    /**
-     * Poll for IO readiness
-     *
-     * Note: If ph is non-NULL, the client should notify
-     * when IO readiness events occur by calling
-     * fuse_lowlevel_notify_poll() with the specified ph.
-     *
-     * Regardless of the number of times poll with a non-NULL ph
-     * is received, single notification is enough to clear all.
-     * Notifying more times incurs overhead but doesn't harm
-     * correctness.
-     *
-     * The callee is responsible for destroying ph with
-     * fuse_pollhandle_destroy() when no longer in use.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as success (with a kernel-defined default poll-mask) and
-     * future calls to pull() will succeed the same way without being send
-     * to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_poll
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param ph poll handle to be used for notification
-     */
-    void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                 struct fuse_pollhandle *ph);
-
-    /**
-     * Write data made available in a buffer
-     *
-     * This is a more generic version of the ->write() method.  If
-     * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the
-     * kernel supports splicing from the fuse device, then the
-     * data will be made available in pipe for supporting zero
-     * copy data transfer.
-     *
-     * buf->count is guaranteed to be one (and thus buf->idx is
-     * always zero). The write_buf handler must ensure that
-     * bufv->off is correctly updated (reflecting the number of
-     * bytes read from bufv->buf[0]).
-     *
-     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
-     * expected to reset the setuid and setgid bits.
-     *
-     * Valid replies:
-     *   fuse_reply_write
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param bufv buffer containing the data
-     * @param off offset to write to
-     * @param fi file information
-     */
-    void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv,
-                      off_t off, struct fuse_file_info *fi);
-
-    /**
-     * Forget about multiple inodes
-     *
-     * See description of the forget function for more
-     * information.
-     *
-     * Valid replies:
-     *   fuse_reply_none
-     *
-     * @param req request handle
-     */
-    void (*forget_multi)(fuse_req_t req, size_t count,
-                         struct fuse_forget_data *forgets);
-
-    /**
-     * Acquire, modify or release a BSD file lock
-     *
-     * Note: if the locking methods are not implemented, the kernel
-     * will still allow file locking to work locally.  Hence these are
-     * only interesting for network filesystems and similar.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param fi file information
-     * @param op the locking operation, see flock(2)
-     */
-    void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                  int op);
-
-    /**
-     * Allocate requested space. If this function returns success then
-     * subsequent writes to the specified range shall not fail due to the lack
-     * of free space on the file system storage media.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future fallocate() requests will fail with EOPNOTSUPP without being
-     * send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param offset starting point for allocated region
-     * @param length size of allocated region
-     * @param mode determines the operation to be performed on the given range,
-     *             see fallocate(2)
-     */
-    void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
-                      off_t length, struct fuse_file_info *fi);
-
-    /**
-     * Read directory with attributes
-     *
-     * Send a buffer filled using fuse_add_direntry_plus(), with size not
-     * exceeding the requested size.  Send an empty buffer on end of
-     * stream.
-     *
-     * fi->fh will contain the value set by the opendir method, or
-     * will be undefined if the opendir method didn't set any value.
-     *
-     * In contrast to readdir() (which does not affect the lookup counts),
-     * the lookup count of every entry returned by readdirplus(), except "."
-     * and "..", is incremented by one.
-     *
-     * Valid replies:
-     *   fuse_reply_buf
-     *   fuse_reply_data
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param size maximum number of bytes to send
-     * @param off offset to continue reading the directory stream
-     * @param fi file information
-     */
-    void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
-                        struct fuse_file_info *fi);
-
-    /**
-     * Copy a range of data from one file to another
-     *
-     * Performs an optimized copy between two file descriptors without the
-     * additional cost of transferring data through the FUSE kernel module
-     * to user space (glibc) and then back into the FUSE filesystem again.
-     *
-     * In case this method is not implemented, glibc falls back to reading
-     * data from the source and writing to the destination. Effectively
-     * doing an inefficient copy of the data.
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
-     * future copy_file_range() requests will fail with EOPNOTSUPP without
-     * being send to the filesystem process.
-     *
-     * Valid replies:
-     *   fuse_reply_write
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino_in the inode number or the source file
-     * @param off_in starting point from were the data should be read
-     * @param fi_in file information of the source file
-     * @param ino_out the inode number or the destination file
-     * @param off_out starting point where the data should be written
-     * @param fi_out file information of the destination file
-     * @param len maximum size of the data to copy
-     * @param flags passed along with the copy_file_range() syscall
-     */
-    void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
-                            struct fuse_file_info *fi_in, fuse_ino_t ino_out,
-                            off_t off_out, struct fuse_file_info *fi_out,
-                            size_t len, int flags);
-
-    /**
-     * Find next data or hole after the specified offset
-     *
-     * If this request is answered with an error code of ENOSYS, this is
-     * treated as a permanent failure, i.e. all future lseek() requests will
-     * fail with the same error code without being send to the filesystem
-     * process.
-     *
-     * Valid replies:
-     *   fuse_reply_lseek
-     *   fuse_reply_err
-     *
-     * @param req request handle
-     * @param ino the inode number
-     * @param off offset to start search from
-     * @param whence either SEEK_DATA or SEEK_HOLE
-     * @param fi file information
-     */
-    void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
-                  struct fuse_file_info *fi);
-
-    /**
-     * Synchronize file system content
-     *
-     * If this request is answered with an error code of ENOSYS,
-     * this is treated as success and future calls to syncfs() will
-     * succeed automatically without being sent to the filesystem
-     * process.
-     *
-     * @param req request handle
-     * @param ino the inode number
-     */
-    void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
-};
-
-/**
- * Reply with an error code or success.
- *
- * Possible requests:
- *   all except forget
- *
- * Whereever possible, error codes should be chosen from the list of
- * documented error conditions in the corresponding system calls
- * manpage.
- *
- * An error code of ENOSYS is sometimes treated specially. This is
- * indicated in the documentation of the affected handler functions.
- *
- * The following requests may be answered with a zero error code:
- * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr,
- * removexattr, setlk.
- *
- * @param req request handle
- * @param err the positive error value, or zero for success
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_err(fuse_req_t req, int err);
-
-/**
- * Don't send reply
- *
- * Possible requests:
- *   forget
- *   forget_multi
- *   retrieve_reply
- *
- * @param req request handle
- */
-void fuse_reply_none(fuse_req_t req);
-
-/**
- * Reply with a directory entry
- *
- * Possible requests:
- *   lookup, mknod, mkdir, symlink, link
- *
- * Side effects:
- *   increments the lookup count on success
- *
- * @param req request handle
- * @param e the entry parameters
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e);
-
-/**
- * Reply with a directory entry and open parameters
- *
- * currently the following members of 'fi' are used:
- *   fh, direct_io, keep_cache
- *
- * Possible requests:
- *   create
- *
- * Side effects:
- *   increments the lookup count on success
- *
- * @param req request handle
- * @param e the entry parameters
- * @param fi file information
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
-                      const struct fuse_file_info *fi);
-
-/**
- * Reply with attributes
- *
- * Possible requests:
- *   getattr, setattr
- *
- * @param req request handle
- * @param attr the attributes
- * @param attr_timeout validity timeout (in seconds) for the attributes
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
-                    double attr_timeout);
-
-/**
- * Reply with the contents of a symbolic link
- *
- * Possible requests:
- *   readlink
- *
- * @param req request handle
- * @param link symbolic link contents
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_readlink(fuse_req_t req, const char *link);
-
-/**
- * Reply with open parameters
- *
- * currently the following members of 'fi' are used:
- *   fh, direct_io, keep_cache
- *
- * Possible requests:
- *   open, opendir
- *
- * @param req request handle
- * @param fi file information
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi);
-
-/**
- * Reply with number of bytes written
- *
- * Possible requests:
- *   write
- *
- * @param req request handle
- * @param count the number of bytes written
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_write(fuse_req_t req, size_t count);
-
-/**
- * Reply with data
- *
- * Possible requests:
- *   read, readdir, getxattr, listxattr
- *
- * @param req request handle
- * @param buf buffer containing data
- * @param size the size of data in bytes
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size);
-
-/**
- * Reply with data copied/moved from buffer(s)
- *
- * Possible requests:
- *   read, readdir, getxattr, listxattr
- *
- * Side effects:
- *   when used to return data from a readdirplus() (but not readdir())
- *   call, increments the lookup count of each returned entry by one
- *   on success.
- *
- * @param req request handle
- * @param bufv buffer vector
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv);
-
-/**
- * Reply with data vector
- *
- * Possible requests:
- *   read, readdir, getxattr, listxattr
- *
- * @param req request handle
- * @param iov the vector containing the data
- * @param count the size of vector
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count);
-
-/**
- * Reply with filesystem statistics
- *
- * Possible requests:
- *   statfs
- *
- * @param req request handle
- * @param stbuf filesystem statistics
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf);
-
-/**
- * Reply with needed buffer size
- *
- * Possible requests:
- *   getxattr, listxattr
- *
- * @param req request handle
- * @param count the buffer size needed in bytes
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_xattr(fuse_req_t req, size_t count);
-
-/**
- * Reply with file lock information
- *
- * Possible requests:
- *   getlk
- *
- * @param req request handle
- * @param lock the lock information
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_lock(fuse_req_t req, const struct flock *lock);
-
-/**
- * Reply with block index
- *
- * Possible requests:
- *   bmap
- *
- * @param req request handle
- * @param idx block index within device
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_bmap(fuse_req_t req, uint64_t idx);
-
-/*
- * Filling a buffer in readdir
- */
-
-/**
- * Add a directory entry to the buffer
- *
- * Buffer needs to be large enough to hold the entry.  If it's not,
- * then the entry is not filled in but the size of the entry is still
- * returned.  The caller can check this by comparing the bufsize
- * parameter with the returned entry size.  If the entry size is
- * larger than the buffer size, the operation failed.
- *
- * From the 'stbuf' argument the st_ino field and bits 12-15 of the
- * st_mode field are used.  The other fields are ignored.
- *
- * *off* should be any non-zero value that the filesystem can use to
- * identify the current point in the directory stream. It does not
- * need to be the actual physical position. A value of zero is
- * reserved to mean "from the beginning", and should therefore never
- * be used (the first call to fuse_add_direntry should be passed the
- * offset of the second directory entry).
- *
- * @param req request handle
- * @param buf the point where the new entry will be added to the buffer
- * @param bufsize remaining size of the buffer
- * @param name the name of the entry
- * @param stbuf the file attributes
- * @param off the offset of the next entry
- * @return the space needed for the entry
- */
-size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
-                         const char *name, const struct stat *stbuf, off_t off);
-
-/**
- * Add a directory entry to the buffer with the attributes
- *
- * See documentation of `fuse_add_direntry()` for more details.
- *
- * @param req request handle
- * @param buf the point where the new entry will be added to the buffer
- * @param bufsize remaining size of the buffer
- * @param name the name of the entry
- * @param e the directory entry
- * @param off the offset of the next entry
- * @return the space needed for the entry
- */
-size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
-                              const char *name,
-                              const struct fuse_entry_param *e, off_t off);
-
-/**
- * Reply to ask for data fetch and output buffer preparation.  ioctl
- * will be retried with the specified input data fetched and output
- * buffer prepared.
- *
- * Possible requests:
- *   ioctl
- *
- * @param req request handle
- * @param in_iov iovec specifying data to fetch from the caller
- * @param in_count number of entries in in_iov
- * @param out_iov iovec specifying addresses to write output to
- * @param out_count number of entries in out_iov
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
-                           size_t in_count, const struct iovec *out_iov,
-                           size_t out_count);
-
-/**
- * Reply to finish ioctl
- *
- * Possible requests:
- *   ioctl
- *
- * @param req request handle
- * @param result result to be passed to the caller
- * @param buf buffer containing output data
- * @param size length of output data
- */
-int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size);
-
-/**
- * Reply to finish ioctl with iov buffer
- *
- * Possible requests:
- *   ioctl
- *
- * @param req request handle
- * @param result result to be passed to the caller
- * @param iov the vector containing the data
- * @param count the size of vector
- */
-int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
-                         int count);
-
-/**
- * Reply with poll result event mask
- *
- * @param req request handle
- * @param revents poll result event mask
- */
-int fuse_reply_poll(fuse_req_t req, unsigned revents);
-
-/**
- * Reply with offset
- *
- * Possible requests:
- *   lseek
- *
- * @param req request handle
- * @param off offset of next data or hole
- * @return zero for success, -errno for failure to send reply
- */
-int fuse_reply_lseek(fuse_req_t req, off_t off);
-
-/*
- * Notification
- */
-
-/**
- * Notify IO readiness event
- *
- * For more information, please read comment for poll operation.
- *
- * @param ph poll handle to notify IO readiness event for
- */
-int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph);
-
-/**
- * Notify to invalidate cache for an inode.
- *
- * Added in FUSE protocol version 7.12. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * If the filesystem has writeback caching enabled, invalidating an
- * inode will first trigger a writeback of all dirty pages. The call
- * will block until all writeback requests have completed and the
- * inode has been invalidated. It will, however, not wait for
- * completion of pending writeback requests that have been issued
- * before.
- *
- * If there are no dirty pages, this function will never block.
- *
- * @param se the session object
- * @param ino the inode number
- * @param off the offset in the inode where to start invalidating
- *            or negative to invalidate attributes only
- * @param len the amount of cache to invalidate or 0 for all
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
-                                     off_t off, off_t len);
-
-/**
- * Notify to invalidate parent attributes and the dentry matching
- * parent/name
- *
- * To avoid a deadlock this function must not be called in the
- * execution path of a related filesystem operation or within any code
- * that could hold a lock that could be needed to execute such an
- * operation. As of kernel 4.18, a "related operation" is a lookup(),
- * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create()
- * request for the parent, and a setattr(), unlink(), rmdir(),
- * rename(), setxattr(), removexattr(), readdir() or readdirplus()
- * request for the inode itself.
- *
- * When called correctly, this function will never block.
- *
- * Added in FUSE protocol version 7.12. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * @param se the session object
- * @param parent inode number
- * @param name file name
- * @param namelen strlen() of file name
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
-                                     const char *name, size_t namelen);
-
-/**
- * This function behaves like fuse_lowlevel_notify_inval_entry() with
- * the following additional effect (at least as of Linux kernel 4.8):
- *
- * If the provided *child* inode matches the inode that is currently
- * associated with the cached dentry, and if there are any inotify
- * watches registered for the dentry, then the watchers are informed
- * that the dentry has been deleted.
- *
- * To avoid a deadlock this function must not be called while
- * executing a related filesystem operation or while holding a lock
- * that could be needed to execute such an operation (see the
- * description of fuse_lowlevel_notify_inval_entry() for more
- * details).
- *
- * When called correctly, this function will never block.
- *
- * Added in FUSE protocol version 7.18. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * @param se the session object
- * @param parent inode number
- * @param child inode number
- * @param name file name
- * @param namelen strlen() of file name
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
-                                fuse_ino_t child, const char *name,
-                                size_t namelen);
-
-/**
- * Store data to the kernel buffers
- *
- * Synchronously store data in the kernel buffers belonging to the
- * given inode.  The stored data is marked up-to-date (no read will be
- * performed against it, unless it's invalidated or evicted from the
- * cache).
- *
- * If the stored data overflows the current file size, then the size
- * is extended, similarly to a write(2) on the filesystem.
- *
- * If this function returns an error, then the store wasn't fully
- * completed, but it may have been partially completed.
- *
- * Added in FUSE protocol version 7.15. If the kernel does not support
- * this (or a newer) version, the function will return -ENOSYS and do
- * nothing.
- *
- * @param se the session object
- * @param ino the inode number
- * @param offset the starting offset into the file to store to
- * @param bufv buffer vector
- * @return zero for success, -errno for failure
- */
-int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
-                               off_t offset, struct fuse_bufvec *bufv);
-
-/*
- * Utility functions
- */
-
-/**
- * Get the userdata from the request
- *
- * @param req request handle
- * @return the user data passed to fuse_session_new()
- */
-void *fuse_req_userdata(fuse_req_t req);
-
-/**
- * Get the context from the request
- *
- * The pointer returned by this function will only be valid for the
- * request's lifetime
- *
- * @param req request handle
- * @return the context structure
- */
-const struct fuse_ctx *fuse_req_ctx(fuse_req_t req);
-
-/**
- * Callback function for an interrupt
- *
- * @param req interrupted request
- * @param data user data
- */
-typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data);
-
-/**
- * Register/unregister callback for an interrupt
- *
- * If an interrupt has already happened, then the callback function is
- * called from within this function, hence it's not possible for
- * interrupts to be lost.
- *
- * @param req request handle
- * @param func the callback function or NULL for unregister
- * @param data user data passed to the callback function
- */
-void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
-                             void *data);
-
-/**
- * Check if a request has already been interrupted
- *
- * @param req request handle
- * @return 1 if the request has been interrupted, 0 otherwise
- */
-int fuse_req_interrupted(fuse_req_t req);
-
-/**
- * Check if the session is connected via virtio
- *
- * @param se session object
- * @return 1 if the session is a virtio session
- */
-int fuse_lowlevel_is_virtio(struct fuse_session *se);
-
-/*
- * Inquiry functions
- */
-
-/**
- * Print low-level version information to stdout.
- */
-void fuse_lowlevel_version(void);
-
-/**
- * Print available low-level options to stdout. This is not an
- * exhaustive list, but includes only those options that may be of
- * interest to an end-user of a file system.
- */
-void fuse_lowlevel_help(void);
-
-/**
- * Print available options for `fuse_parse_cmdline()`.
- */
-void fuse_cmdline_help(void);
-
-/*
- * Filesystem setup & teardown
- */
-
-struct fuse_cmdline_opts {
-    int foreground;
-    int debug;
-    int nodefault_subtype;
-    int show_version;
-    int show_help;
-    int print_capabilities;
-    int syslog;
-    int log_level;
-    unsigned int max_idle_threads;
-    unsigned long rlimit_nofile;
-};
-
-/**
- * Utility function to parse common options for simple file systems
- * using the low-level API. A help text that describes the available
- * options can be printed with `fuse_cmdline_help`. A single
- * non-option argument is treated as the mountpoint. Multiple
- * non-option arguments will result in an error.
- *
- * If neither -o subtype= or -o fsname= options are given, a new
- * subtype option will be added and set to the basename of the program
- * (the fsname will remain unset, and then defaults to "fuse").
- *
- * Known options will be removed from *args*, unknown options will
- * remain.
- *
- * @param args argument vector (input+output)
- * @param opts output argument for parsed options
- * @return 0 on success, -1 on failure
- */
-int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts);
-
-/**
- * Create a low level session.
- *
- * Returns a session structure suitable for passing to
- * fuse_session_mount() and fuse_session_loop().
- *
- * This function accepts most file-system independent mount options
- * (like context, nodev, ro - see mount(8)), as well as the general
- * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and
- * -o default_permissions, but not ``-o use_ino``).  Instead of `-o
- * debug`, debugging may also enabled with `-d` or `--debug`.
- *
- * If not all options are known, an error message is written to stderr
- * and the function returns NULL.
- *
- * Option parsing skips argv[0], which is assumed to contain the
- * program name. To prevent accidentally passing an option in
- * argv[0], this element must always be present (even if no options
- * are specified). It may be set to the empty string ('\0') if no
- * reasonable value can be provided.
- *
- * @param args argument vector
- * @param op the (low-level) filesystem operations
- * @param op_size sizeof(struct fuse_lowlevel_ops)
- * @param userdata user data
- *
- * @return the fuse session on success, NULL on failure
- **/
-struct fuse_session *fuse_session_new(struct fuse_args *args,
-                                      const struct fuse_lowlevel_ops *op,
-                                      size_t op_size, void *userdata);
-
-/**
- * Mount a FUSE file system.
- *
- * @param se session object
- *
- * @return 0 on success, -1 on failure.
- **/
-int fuse_session_mount(struct fuse_session *se);
-
-/**
- * Enter a single threaded, blocking event loop.
- *
- * When the event loop terminates because the connection to the FUSE
- * kernel module has been closed, this function returns zero. This
- * happens when the filesystem is unmounted regularly (by the
- * filesystem owner or root running the umount(8) or fusermount(1)
- * command), or if connection is explicitly severed by writing ``1``
- * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only
- * way to distinguish between these two conditions is to check if the
- * filesystem is still mounted after the session loop returns.
- *
- * When some error occurs during request processing, the function
- * returns a negated errno(3) value.
- *
- * If the loop has been terminated because of a signal handler
- * installed by fuse_set_signal_handlers(), this function returns the
- * (positive) signal value that triggered the exit.
- *
- * @param se the session
- * @return 0, -errno, or a signal value
- */
-int fuse_session_loop(struct fuse_session *se);
-
-/**
- * Flag a session as terminated.
- *
- * This function is invoked by the POSIX signal handlers, when
- * registered using fuse_set_signal_handlers(). It will cause any
- * running event loops to terminate on the next opportunity.
- *
- * @param se the session
- */
-void fuse_session_exit(struct fuse_session *se);
-
-/**
- * Reset the terminated flag of a session
- *
- * @param se the session
- */
-void fuse_session_reset(struct fuse_session *se);
-
-/**
- * Query the terminated flag of a session
- *
- * @param se the session
- * @return 1 if exited, 0 if not exited
- */
-int fuse_session_exited(struct fuse_session *se);
-
-/**
- * Ensure that file system is unmounted.
- *
- * In regular operation, the file system is typically unmounted by the
- * user calling umount(8) or fusermount(1), which then terminates the
- * FUSE session loop. However, the session loop may also terminate as
- * a result of an explicit call to fuse_session_exit() (e.g. by a
- * signal handler installed by fuse_set_signal_handler()). In this
- * case the filesystem remains mounted, but any attempt to access it
- * will block (while the filesystem process is still running) or give
- * an ESHUTDOWN error (after the filesystem process has terminated).
- *
- * If the communication channel with the FUSE kernel module is still
- * open (i.e., if the session loop was terminated by an explicit call
- * to fuse_session_exit()), this function will close it and unmount
- * the filesystem. If the communication channel has been closed by the
- * kernel, this method will do (almost) nothing.
- *
- * NOTE: The above semantics mean that if the connection to the kernel
- * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file,
- * this method will *not* unmount the filesystem.
- *
- * @param se the session
- */
-void fuse_session_unmount(struct fuse_session *se);
-
-/**
- * Destroy a session
- *
- * @param se the session
- */
-void fuse_session_destroy(struct fuse_session *se);
-
-/*
- * Custom event loop support
- */
-
-/**
- * Return file descriptor for communication with kernel.
- *
- * The file selector can be used to integrate FUSE with a custom event
- * loop. Whenever data is available for reading on the provided fd,
- * the event loop should call `fuse_session_receive_buf` followed by
- * `fuse_session_process_buf` to process the request.
- *
- * The returned file descriptor is valid until `fuse_session_unmount`
- * is called.
- *
- * @param se the session
- * @return a file descriptor
- */
-int fuse_session_fd(struct fuse_session *se);
-
-/**
- * Process a raw request supplied in a generic buffer
- *
- * The fuse_buf may contain a memory buffer or a pipe file descriptor.
- *
- * @param se the session
- * @param buf the fuse_buf containing the request
- */
-void fuse_session_process_buf(struct fuse_session *se,
-                              const struct fuse_buf *buf);
-
-/**
- * Read a raw request from the kernel into the supplied buffer.
- *
- * Depending on file system options, system capabilities, and request
- * size the request is either read into a memory buffer or spliced
- * into a temporary pipe.
- *
- * @param se the session
- * @param buf the fuse_buf to store the request in
- * @return the actual size of the raw request, or -errno on error
- */
-int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf);
-
-#endif /* FUSE_LOWLEVEL_H_ */

diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h
deleted file mode 100644
index f252baa..0000000
--- a/tools/virtiofsd/fuse_misc.h
+++ /dev/null

@@ -1,59 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include <pthread.h>
-
-/*
- * Versioned symbols cannot be used in some cases because it
- *   - confuse the dynamic linker in uClibc
- *   - not supported on MacOSX (in MachO binary format)
- */
-#if (!defined(__UCLIBC__) && !defined(__APPLE__))
-#define FUSE_SYMVER(x) __asm__(x)
-#else
-#define FUSE_SYMVER(x)
-#endif
-
-#ifndef USE_UCLIBC
-#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL)
-#else
-/* Is this hack still needed? */
-static inline void fuse_mutex_init(pthread_mutex_t *mut)
-{
-    pthread_mutexattr_t attr;
-    pthread_mutexattr_init(&attr);
-    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
-    pthread_mutex_init(mut, &attr);
-    pthread_mutexattr_destroy(&attr);
-}
-#endif
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM
-/* Linux */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val)
-#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val)
-#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val)
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC)
-/* FreeBSD */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val)
-#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val)
-#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val)
-#else
-#define ST_ATIM_NSEC(stbuf) 0
-#define ST_CTIM_NSEC(stbuf) 0
-#define ST_MTIM_NSEC(stbuf) 0
-#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0)
-#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0)
-#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0)
-#endif

diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c
deleted file mode 100644
index 9d37144..0000000
--- a/tools/virtiofsd/fuse_opt.c
+++ /dev/null

@@ -1,446 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Implementation of option parsing routines (dealing with `struct
- * fuse_args`).
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_opt.h"
-#include "fuse_i.h"
-#include "fuse_misc.h"
-
-
-struct fuse_opt_context {
-    void *data;
-    const struct fuse_opt *opt;
-    fuse_opt_proc_t proc;
-    int argctr;
-    int argc;
-    char **argv;
-    struct fuse_args outargs;
-    char *opts;
-    int nonopt;
-};
-
-void fuse_opt_free_args(struct fuse_args *args)
-{
-    if (args) {
-        if (args->argv && args->allocated) {
-            int i;
-            for (i = 0; i < args->argc; i++) {
-                free(args->argv[i]);
-            }
-            free(args->argv);
-        }
-        args->argc = 0;
-        args->argv = NULL;
-        args->allocated = 0;
-    }
-}
-
-static int alloc_failed(void)
-{
-    fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
-    return -1;
-}
-
-int fuse_opt_add_arg(struct fuse_args *args, const char *arg)
-{
-    char **newargv;
-    char *newarg;
-
-    assert(!args->argv || args->allocated);
-
-    newarg = strdup(arg);
-    if (!newarg) {
-        return alloc_failed();
-    }
-
-    newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *));
-    if (!newargv) {
-        free(newarg);
-        return alloc_failed();
-    }
-
-    args->argv = newargv;
-    args->allocated = 1;
-    args->argv[args->argc++] = newarg;
-    args->argv[args->argc] = NULL;
-    return 0;
-}
-
-static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos,
-                                      const char *arg)
-{
-    assert(pos <= args->argc);
-    if (fuse_opt_add_arg(args, arg) == -1) {
-        return -1;
-    }
-
-    if (pos != args->argc - 1) {
-        char *newarg = args->argv[args->argc - 1];
-        memmove(&args->argv[pos + 1], &args->argv[pos],
-                sizeof(char *) * (args->argc - pos - 1));
-        args->argv[pos] = newarg;
-    }
-    return 0;
-}
-
-int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg)
-{
-    return fuse_opt_insert_arg_common(args, pos, arg);
-}
-
-static int next_arg(struct fuse_opt_context *ctx, const char *opt)
-{
-    if (ctx->argctr + 1 >= ctx->argc) {
-        fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt);
-        return -1;
-    }
-    ctx->argctr++;
-    return 0;
-}
-
-static int add_arg(struct fuse_opt_context *ctx, const char *arg)
-{
-    return fuse_opt_add_arg(&ctx->outargs, arg);
-}
-
-static int add_opt_common(char **opts, const char *opt, int esc)
-{
-    unsigned oldlen = *opts ? strlen(*opts) : 0;
-    char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1);
-
-    if (!d) {
-        return alloc_failed();
-    }
-
-    *opts = d;
-    if (oldlen) {
-        d += oldlen;
-        *d++ = ',';
-    }
-
-    for (; *opt; opt++) {
-        if (esc && (*opt == ',' || *opt == '\\')) {
-            *d++ = '\\';
-        }
-        *d++ = *opt;
-    }
-    *d = '\0';
-
-    return 0;
-}
-
-int fuse_opt_add_opt(char **opts, const char *opt)
-{
-    return add_opt_common(opts, opt, 0);
-}
-
-int fuse_opt_add_opt_escaped(char **opts, const char *opt)
-{
-    return add_opt_common(opts, opt, 1);
-}
-
-static int add_opt(struct fuse_opt_context *ctx, const char *opt)
-{
-    return add_opt_common(&ctx->opts, opt, 1);
-}
-
-static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key,
-                     int iso)
-{
-    if (key == FUSE_OPT_KEY_DISCARD) {
-        return 0;
-    }
-
-    if (key != FUSE_OPT_KEY_KEEP && ctx->proc) {
-        int res = ctx->proc(ctx->data, arg, key, &ctx->outargs);
-        if (res == -1 || !res) {
-            return res;
-        }
-    }
-    if (iso) {
-        return add_opt(ctx, arg);
-    } else {
-        return add_arg(ctx, arg);
-    }
-}
-
-static int match_template(const char *t, const char *arg, unsigned *sepp)
-{
-    int arglen = strlen(arg);
-    const char *sep = strchr(t, '=');
-    sep = sep ? sep : strchr(t, ' ');
-    if (sep && (!sep[1] || sep[1] == '%')) {
-        int tlen = sep - t;
-        if (sep[0] == '=') {
-            tlen++;
-        }
-        if (arglen >= tlen && strncmp(arg, t, tlen) == 0) {
-            *sepp = sep - t;
-            return 1;
-        }
-    }
-    if (strcmp(t, arg) == 0) {
-        *sepp = 0;
-        return 1;
-    }
-    return 0;
-}
-
-static const struct fuse_opt *find_opt(const struct fuse_opt *opt,
-                                       const char *arg, unsigned *sepp)
-{
-    for (; opt && opt->templ; opt++) {
-        if (match_template(opt->templ, arg, sepp)) {
-            return opt;
-        }
-    }
-    return NULL;
-}
-
-int fuse_opt_match(const struct fuse_opt *opts, const char *opt)
-{
-    unsigned dummy;
-    return find_opt(opts, opt, &dummy) ? 1 : 0;
-}
-
-static int process_opt_param(void *var, const char *format, const char *param,
-                             const char *arg)
-{
-    assert(format[0] == '%');
-    if (format[1] == 's') {
-        char **s = var;
-        char *copy = strdup(param);
-        if (!copy) {
-            return alloc_failed();
-        }
-
-        free(*s);
-        *s = copy;
-    } else {
-        if (sscanf(param, format, var) != 1) {
-            fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n",
-                     arg);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt,
-                       unsigned sep, const char *arg, int iso)
-{
-    if (opt->offset == -1U) {
-        if (call_proc(ctx, arg, opt->value, iso) == -1) {
-            return -1;
-        }
-    } else {
-        void *var = (char *)ctx->data + opt->offset;
-        if (sep && opt->templ[sep + 1]) {
-            const char *param = arg + sep;
-            if (opt->templ[sep] == '=') {
-                param++;
-            }
-            if (process_opt_param(var, opt->templ + sep + 1, param, arg) ==
-                -1) {
-                return -1;
-            }
-        } else {
-            *(int *)var = opt->value;
-        }
-    }
-    return 0;
-}
-
-static int process_opt_sep_arg(struct fuse_opt_context *ctx,
-                               const struct fuse_opt *opt, unsigned sep,
-                               const char *arg, int iso)
-{
-    int res;
-    char *newarg;
-    char *param;
-
-    if (next_arg(ctx, arg) == -1) {
-        return -1;
-    }
-
-    param = ctx->argv[ctx->argctr];
-    newarg = g_try_malloc(sep + strlen(param) + 1);
-    if (!newarg) {
-        return alloc_failed();
-    }
-
-    memcpy(newarg, arg, sep);
-    strcpy(newarg + sep, param);
-    res = process_opt(ctx, opt, sep, newarg, iso);
-    g_free(newarg);
-
-    return res;
-}
-
-static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso)
-{
-    unsigned sep;
-    const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep);
-    if (opt) {
-        for (; opt; opt = find_opt(opt + 1, arg, &sep)) {
-            int res;
-            if (sep && opt->templ[sep] == ' ' && !arg[sep]) {
-                res = process_opt_sep_arg(ctx, opt, sep, arg, iso);
-            } else {
-                res = process_opt(ctx, opt, sep, arg, iso);
-            }
-            if (res == -1) {
-                return -1;
-            }
-        }
-        return 0;
-    } else {
-        return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso);
-    }
-}
-
-static int process_real_option_group(struct fuse_opt_context *ctx, char *opts)
-{
-    char *s = opts;
-    char *d = s;
-    int end = 0;
-
-    while (!end) {
-        if (*s == '\0') {
-            end = 1;
-        }
-        if (*s == ',' || end) {
-            int res;
-
-            *d = '\0';
-            res = process_gopt(ctx, opts, 1);
-            if (res == -1) {
-                return -1;
-            }
-            d = opts;
-        } else {
-            if (s[0] == '\\' && s[1] != '\0') {
-                s++;
-                if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' &&
-                    s[2] >= '0' && s[2] <= '7') {
-                    *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 +
-                           (s[2] - '0');
-                    s += 2;
-                } else {
-                    *d++ = *s;
-                }
-            } else {
-                *d++ = *s;
-            }
-        }
-        s++;
-    }
-
-    return 0;
-}
-
-static int process_option_group(struct fuse_opt_context *ctx, const char *opts)
-{
-    int res;
-    char *copy = strdup(opts);
-
-    if (!copy) {
-        fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
-        return -1;
-    }
-    res = process_real_option_group(ctx, copy);
-    free(copy);
-    return res;
-}
-
-static int process_one(struct fuse_opt_context *ctx, const char *arg)
-{
-    if (ctx->nonopt || arg[0] != '-') {
-        return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0);
-    } else if (arg[1] == 'o') {
-        if (arg[2]) {
-            return process_option_group(ctx, arg + 2);
-        } else {
-            if (next_arg(ctx, arg) == -1) {
-                return -1;
-            }
-
-            return process_option_group(ctx, ctx->argv[ctx->argctr]);
-        }
-    } else if (arg[1] == '-' && !arg[2]) {
-        if (add_arg(ctx, arg) == -1) {
-            return -1;
-        }
-        ctx->nonopt = ctx->outargs.argc;
-        return 0;
-    } else {
-        return process_gopt(ctx, arg, 0);
-    }
-}
-
-static int opt_parse(struct fuse_opt_context *ctx)
-{
-    if (ctx->argc) {
-        if (add_arg(ctx, ctx->argv[0]) == -1) {
-            return -1;
-        }
-    }
-
-    for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) {
-        if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) {
-            return -1;
-        }
-    }
-
-    if (ctx->opts) {
-        if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 ||
-            fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) {
-            return -1;
-        }
-    }
-
-    /* If option separator ("--") is the last argument, remove it */
-    if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc &&
-        strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) {
-        free(ctx->outargs.argv[ctx->outargs.argc - 1]);
-        ctx->outargs.argv[--ctx->outargs.argc] = NULL;
-    }
-
-    return 0;
-}
-
-int fuse_opt_parse(struct fuse_args *args, void *data,
-                   const struct fuse_opt opts[], fuse_opt_proc_t proc)
-{
-    int res;
-    struct fuse_opt_context ctx = {
-        .data = data,
-        .opt = opts,
-        .proc = proc,
-    };
-
-    if (!args || !args->argv || !args->argc) {
-        return 0;
-    }
-
-    ctx.argc = args->argc;
-    ctx.argv = args->argv;
-
-    res = opt_parse(&ctx);
-    if (res != -1) {
-        struct fuse_args tmp = *args;
-        *args = ctx.outargs;
-        ctx.outargs = tmp;
-    }
-    free(ctx.opts);
-    fuse_opt_free_args(&ctx.outargs);
-    return res;
-}

diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h
deleted file mode 100644
index 8f59b4d..0000000
--- a/tools/virtiofsd/fuse_opt.h
+++ /dev/null

@@ -1,272 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#ifndef FUSE_OPT_H_
-#define FUSE_OPT_H_
-
-/** @file
- *
- * This file defines the option parsing interface of FUSE
- */
-
-/**
- * Option description
- *
- * This structure describes a single option, and action associated
- * with it, in case it matches.
- *
- * More than one such match may occur, in which case the action for
- * each match is executed.
- *
- * There are three possible actions in case of a match:
- *
- * i) An integer (int or unsigned) variable determined by 'offset' is
- *    set to 'value'
- *
- * ii) The processing function is called, with 'value' as the key
- *
- * iii) An integer (any) or string (char *) variable determined by
- *    'offset' is set to the value of an option parameter
- *
- * 'offset' should normally be either set to
- *
- *  - 'offsetof(struct foo, member)'  actions i) and iii)
- *
- *  - -1                              action ii)
- *
- * The 'offsetof()' macro is defined in the <stddef.h> header.
- *
- * The template determines which options match, and also have an
- * effect on the action.  Normally the action is either i) or ii), but
- * if a format is present in the template, then action iii) is
- * performed.
- *
- * The types of templates are:
- *
- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only
- *   themselves.  Invalid values are "--" and anything beginning
- *   with "-o"
- *
- * 2) "foo", "foo-bar", etc.  These match "-ofoo", "-ofoo-bar" or
- *    the relevant option in a comma separated option list
- *
- * 3) "bar=", "--foo=", etc.  These are variations of 1) and 2)
- *    which have a parameter
- *
- * 4) "bar=%s", "--foo=%lu", etc.  Same matching as above but perform
- *    action iii).
- *
- * 5) "-x ", etc.  Matches either "-xparam" or "-x param" as
- *    two separate arguments
- *
- * 6) "-x %s", etc.  Combination of 4) and 5)
- *
- * If the format is "%s", memory is allocated for the string unlike with
- * scanf().  The previous value (if non-NULL) stored at the this location is
- * freed.
- */
-struct fuse_opt {
-    /** Matching template and optional parameter formatting */
-    const char *templ;
-
-    /**
-     * Offset of variable within 'data' parameter of fuse_opt_parse()
-     * or -1
-     */
-    unsigned long offset;
-
-    /**
-     * Value to set the variable to, or to be passed as 'key' to the
-     * processing function. Ignored if template has a format
-     */
-    int value;
-};
-
-/**
- * Key option. In case of a match, the processing function will be
- * called with the specified key.
- */
-#define FUSE_OPT_KEY(templ, key) \
-    {                            \
-        templ, -1U, key          \
-    }
-
-/**
- * Last option. An array of 'struct fuse_opt' must end with a NULL
- * template value
- */
-#define FUSE_OPT_END \
-    {                \
-        NULL, 0, 0   \
-    }
-
-/**
- * Argument list
- */
-struct fuse_args {
-    /** Argument count */
-    int argc;
-
-    /** Argument vector.  NULL terminated */
-    char **argv;
-
-    /** Is 'argv' allocated? */
-    int allocated;
-};
-
-/**
- * Initializer for 'struct fuse_args'
- */
-#define FUSE_ARGS_INIT(argc, argv) \
-    {                              \
-        argc, argv, 0              \
-    }
-
-/**
- * Key value passed to the processing function if an option did not
- * match any template
- */
-#define FUSE_OPT_KEY_OPT -1
-
-/**
- * Key value passed to the processing function for all non-options
- *
- * Non-options are the arguments beginning with a character other than
- * '-' or all arguments after the special '--' option
- */
-#define FUSE_OPT_KEY_NONOPT -2
-
-/**
- * Special key value for options to keep
- *
- * Argument is not passed to processing function, but behave as if the
- * processing function returned 1
- */
-#define FUSE_OPT_KEY_KEEP -3
-
-/**
- * Special key value for options to discard
- *
- * Argument is not passed to processing function, but behave as if the
- * processing function returned zero
- */
-#define FUSE_OPT_KEY_DISCARD -4
-
-/**
- * Processing function
- *
- * This function is called if
- *    - option did not match any 'struct fuse_opt'
- *    - argument is a non-option
- *    - option did match and offset was set to -1
- *
- * The 'arg' parameter will always contain the whole argument or
- * option including the parameter if exists.  A two-argument option
- * ("-x foo") is always converted to single argument option of the
- * form "-xfoo" before this function is called.
- *
- * Options of the form '-ofoo' are passed to this function without the
- * '-o' prefix.
- *
- * The return value of this function determines whether this argument
- * is to be inserted into the output argument vector, or discarded.
- *
- * @param data is the user data passed to the fuse_opt_parse() function
- * @param arg is the whole argument or option
- * @param key determines why the processing function was called
- * @param outargs the current output argument list
- * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept
- */
-typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key,
-                               struct fuse_args *outargs);
-
-/**
- * Option parsing function
- *
- * If 'args' was returned from a previous call to fuse_opt_parse() or
- * it was constructed from
- *
- * A NULL 'args' is equivalent to an empty argument vector
- *
- * A NULL 'opts' is equivalent to an 'opts' array containing a single
- * end marker
- *
- * A NULL 'proc' is equivalent to a processing function always
- * returning '1'
- *
- * @param args is the input and output argument list
- * @param data is the user data
- * @param opts is the option description array
- * @param proc is the processing function
- * @return -1 on error, 0 on success
- */
-int fuse_opt_parse(struct fuse_args *args, void *data,
-                   const struct fuse_opt opts[], fuse_opt_proc_t proc);
-
-/**
- * Add an option to a comma separated option list
- *
- * @param opts is a pointer to an option list, may point to a NULL value
- * @param opt is the option to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_add_opt(char **opts, const char *opt);
-
-/**
- * Add an option, escaping commas, to a comma separated option list
- *
- * @param opts is a pointer to an option list, may point to a NULL value
- * @param opt is the option to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_add_opt_escaped(char **opts, const char *opt);
-
-/**
- * Add an argument to a NULL terminated argument vector
- *
- * @param args is the structure containing the current argument list
- * @param arg is the new argument to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_add_arg(struct fuse_args *args, const char *arg);
-
-/**
- * Add an argument at the specified position in a NULL terminated
- * argument vector
- *
- * Adds the argument to the N-th position.  This is useful for adding
- * options at the beginning of the array which must not come after the
- * special '--' option.
- *
- * @param args is the structure containing the current argument list
- * @param pos is the position at which to add the argument
- * @param arg is the new argument to add
- * @return -1 on allocation error, 0 on success
- */
-int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg);
-
-/**
- * Free the contents of argument list
- *
- * The structure itself is not freed
- *
- * @param args is the structure containing the argument list
- */
-void fuse_opt_free_args(struct fuse_args *args);
-
-
-/**
- * Check if an option matches
- *
- * @param opts is the option description array
- * @param opt is the option to match
- * @return 1 if a match is found, 0 if not
- */
-int fuse_opt_match(const struct fuse_opt opts[], const char *opt);
-
-#endif /* FUSE_OPT_H_ */

diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c
deleted file mode 100644
index 1de46de..0000000
--- a/tools/virtiofsd/fuse_signals.c
+++ /dev/null

@@ -1,93 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Utility functions for setting signal handlers.
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "fuse_lowlevel.h"
-
-
-static struct fuse_session *fuse_instance;
-
-static void exit_handler(int sig)
-{
-    if (fuse_instance) {
-        fuse_session_exit(fuse_instance);
-        if (sig <= 0) {
-            fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n");
-            abort();
-        }
-        fuse_instance->error = sig;
-    }
-}
-
-static void do_nothing(int sig)
-{
-    (void)sig;
-}
-
-static int set_one_signal_handler(int sig, void (*handler)(int), int remove)
-{
-    struct sigaction sa;
-    struct sigaction old_sa;
-
-    memset(&sa, 0, sizeof(struct sigaction));
-    sa.sa_handler = remove ? SIG_DFL : handler;
-    sigemptyset(&(sa.sa_mask));
-    sa.sa_flags = 0;
-
-    if (sigaction(sig, NULL, &old_sa) == -1) {
-        fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n",
-                 strerror(errno));
-        return -1;
-    }
-
-    if (old_sa.sa_handler == (remove ? handler : SIG_DFL) &&
-        sigaction(sig, &sa, NULL) == -1) {
-        fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n",
-                 strerror(errno));
-        return -1;
-    }
-    return 0;
-}
-
-int fuse_set_signal_handlers(struct fuse_session *se)
-{
-    /*
-     * If we used SIG_IGN instead of the do_nothing function,
-     * then we would be unable to tell if we set SIG_IGN (and
-     * thus should reset to SIG_DFL in fuse_remove_signal_handlers)
-     * or if it was already set to SIG_IGN (and should be left
-     * untouched.
-     */
-    if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 ||
-        set_one_signal_handler(SIGINT, exit_handler, 0) == -1 ||
-        set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 ||
-        set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) {
-        return -1;
-    }
-
-    fuse_instance = se;
-    return 0;
-}
-
-void fuse_remove_signal_handlers(struct fuse_session *se)
-{
-    if (fuse_instance != se) {
-        fuse_log(FUSE_LOG_ERR,
-                 "fuse: fuse_remove_signal_handlers: unknown session\n");
-    } else {
-        fuse_instance = NULL;
-    }
-
-    set_one_signal_handler(SIGHUP, exit_handler, 1);
-    set_one_signal_handler(SIGINT, exit_handler, 1);
-    set_one_signal_handler(SIGTERM, exit_handler, 1);
-    set_one_signal_handler(SIGPIPE, do_nothing, 1);
-}

diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
deleted file mode 100644
index 9368e29..0000000
--- a/tools/virtiofsd/fuse_virtio.c
+++ /dev/null

@@ -1,1081 +0,0 @@
-/*
- * virtio-fs glue for FUSE
- * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Dave Gilbert  <dgilbert@redhat.com>
- *
- * Implements the glue between libfuse and libvhost-user
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB
- */
-
-#include "qemu/osdep.h"
-#include "qemu/iov.h"
-#include "qapi/error.h"
-#include "fuse_i.h"
-#include "standard-headers/linux/fuse.h"
-#include "fuse_misc.h"
-#include "fuse_opt.h"
-#include "fuse_virtio.h"
-
-#include <sys/eventfd.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <grp.h>
-
-#include "libvhost-user.h"
-
-struct fv_VuDev;
-struct fv_QueueInfo {
-    pthread_t thread;
-    /*
-     * This lock protects the VuVirtq preventing races between
-     * fv_queue_thread() and fv_queue_worker().
-     */
-    pthread_mutex_t vq_lock;
-
-    struct fv_VuDev *virtio_dev;
-
-    /* Our queue index, corresponds to array position */
-    int qidx;
-    int kick_fd;
-    int kill_fd; /* For killing the thread */
-};
-
-/* A FUSE request */
-typedef struct {
-    VuVirtqElement elem;
-    struct fuse_chan ch;
-
-    /* Used to complete requests that involve no reply */
-    bool reply_sent;
-} FVRequest;
-
-/*
- * We pass the dev element into libvhost-user
- * and then use it to get back to the outer
- * container for other data.
- */
-struct fv_VuDev {
-    VuDev dev;
-    struct fuse_session *se;
-
-    /*
-     * Either handle virtqueues or vhost-user protocol messages.  Don't do
-     * both at the same time since that could lead to race conditions if
-     * virtqueues or memory tables change while another thread is accessing
-     * them.
-     *
-     * The assumptions are:
-     * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev.
-     * 2. virtio_loop() reads/writes virtqueues and VuDev.
-     */
-    pthread_rwlock_t vu_dispatch_rwlock;
-
-    /*
-     * The following pair of fields are only accessed in the main
-     * virtio_loop
-     */
-    size_t nqueues;
-    struct fv_QueueInfo **qi;
-};
-
-/* Callback from libvhost-user */
-static uint64_t fv_get_features(VuDev *dev)
-{
-    return 1ULL << VIRTIO_F_VERSION_1;
-}
-
-/* Callback from libvhost-user */
-static void fv_set_features(VuDev *dev, uint64_t features)
-{
-}
-
-/*
- * Callback from libvhost-user if there's a new fd we're supposed to listen
- * to, typically a queue kick?
- */
-static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
-                         void *data)
-{
-    fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
-}
-
-/*
- * Callback from libvhost-user if we're no longer supposed to listen on an fd
- */
-static void fv_remove_watch(VuDev *dev, int fd)
-{
-    fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
-}
-
-/* Callback from libvhost-user to panic */
-static void fv_panic(VuDev *dev, const char *err)
-{
-    fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
-    /* TODO: Allow reconnects?? */
-    exit(EXIT_FAILURE);
-}
-
-/*
- * Copy from an iovec into a fuse_buf (memory only)
- * Caller must ensure there is space
- */
-static size_t copy_from_iov(struct fuse_buf *buf, size_t out_num,
-                            const struct iovec *out_sg,
-                            size_t max)
-{
-    void *dest = buf->mem;
-    size_t copied = 0;
-
-    while (out_num && max) {
-        size_t onelen = out_sg->iov_len;
-        onelen = MIN(onelen, max);
-        memcpy(dest, out_sg->iov_base, onelen);
-        dest += onelen;
-        copied += onelen;
-        out_sg++;
-        out_num--;
-        max -= onelen;
-    }
-
-    return copied;
-}
-
-/*
- * Skip 'skip' bytes in the iov; 'sg_1stindex' is set as
- * the index for the 1st iovec to read data from, and
- * 'sg_1stskip' is the number of bytes to skip in that entry.
- *
- * Returns True if there are at least 'skip' bytes in the iovec
- *
- */
-static bool skip_iov(const struct iovec *sg, size_t sg_size,
-                     size_t skip,
-                     size_t *sg_1stindex, size_t *sg_1stskip)
-{
-    size_t vec;
-
-    for (vec = 0; vec < sg_size; vec++) {
-        if (sg[vec].iov_len > skip) {
-            *sg_1stskip = skip;
-            *sg_1stindex = vec;
-
-            return true;
-        }
-
-        skip -= sg[vec].iov_len;
-    }
-
-    *sg_1stindex = vec;
-    *sg_1stskip = 0;
-    return skip == 0;
-}
-
-/*
- * Copy from one iov to another, the given number of bytes
- * The caller must have checked sizes.
- */
-static void copy_iov(struct iovec *src_iov, int src_count,
-                     struct iovec *dst_iov, int dst_count, size_t to_copy)
-{
-    size_t dst_offset = 0;
-    /* Outer loop copies 'src' elements */
-    while (to_copy) {
-        assert(src_count);
-        size_t src_len = src_iov[0].iov_len;
-        size_t src_offset = 0;
-
-        if (src_len > to_copy) {
-            src_len = to_copy;
-        }
-        /* Inner loop copies contents of one 'src' to maybe multiple dst. */
-        while (src_len) {
-            assert(dst_count);
-            size_t dst_len = dst_iov[0].iov_len - dst_offset;
-            if (dst_len > src_len) {
-                dst_len = src_len;
-            }
-
-            memcpy(dst_iov[0].iov_base + dst_offset,
-                   src_iov[0].iov_base + src_offset, dst_len);
-            src_len -= dst_len;
-            to_copy -= dst_len;
-            src_offset += dst_len;
-            dst_offset += dst_len;
-
-            assert(dst_offset <= dst_iov[0].iov_len);
-            if (dst_offset == dst_iov[0].iov_len) {
-                dst_offset = 0;
-                dst_iov++;
-                dst_count--;
-            }
-        }
-        src_iov++;
-        src_count--;
-    }
-}
-
-/*
- * pthread_rwlock_rdlock() and pthread_rwlock_wrlock can fail if
- * a deadlock condition is detected or the current thread already
- * owns the lock. They can also fail, like pthread_rwlock_unlock(),
- * if the mutex wasn't properly initialized. None of these are ever
- * expected to happen.
- */
-static void vu_dispatch_rdlock(struct fv_VuDev *vud)
-{
-    int ret = pthread_rwlock_rdlock(&vud->vu_dispatch_rwlock);
-    assert(ret == 0);
-}
-
-static void vu_dispatch_wrlock(struct fv_VuDev *vud)
-{
-    int ret = pthread_rwlock_wrlock(&vud->vu_dispatch_rwlock);
-    assert(ret == 0);
-}
-
-static void vu_dispatch_unlock(struct fv_VuDev *vud)
-{
-    int ret = pthread_rwlock_unlock(&vud->vu_dispatch_rwlock);
-    assert(ret == 0);
-}
-
-static void vq_send_element(struct fv_QueueInfo *qi, VuVirtqElement *elem,
-                            ssize_t len)
-{
-    struct fuse_session *se = qi->virtio_dev->se;
-    VuDev *dev = &se->virtio_dev->dev;
-    VuVirtq *q = vu_get_queue(dev, qi->qidx);
-
-    vu_dispatch_rdlock(qi->virtio_dev);
-    pthread_mutex_lock(&qi->vq_lock);
-    vu_queue_push(dev, q, elem, len);
-    vu_queue_notify(dev, q);
-    pthread_mutex_unlock(&qi->vq_lock);
-    vu_dispatch_unlock(qi->virtio_dev);
-}
-
-/*
- * Called back by ll whenever it wants to send a reply/message back
- * The 1st element of the iov starts with the fuse_out_header
- * 'unique'==0 means it's a notify message.
- */
-int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
-                    struct iovec *iov, int count)
-{
-    FVRequest *req = container_of(ch, FVRequest, ch);
-    struct fv_QueueInfo *qi = ch->qi;
-    VuVirtqElement *elem = &req->elem;
-    int ret = 0;
-
-    assert(count >= 1);
-    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
-
-    struct fuse_out_header *out = iov[0].iov_base;
-    /* TODO: Endianness! */
-
-    size_t tosend_len = iov_size(iov, count);
-
-    /* unique == 0 is notification, which we don't support */
-    assert(out->unique);
-    assert(!req->reply_sent);
-
-    /* The 'in' part of the elem is to qemu */
-    unsigned int in_num = elem->in_num;
-    struct iovec *in_sg = elem->in_sg;
-    size_t in_len = iov_size(in_sg, in_num);
-    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
-             __func__, elem->index, in_num, in_len);
-
-    /*
-     * The elem should have room for a 'fuse_out_header' (out from fuse)
-     * plus the data based on the len in the header.
-     */
-    if (in_len < sizeof(struct fuse_out_header)) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
-                 __func__, elem->index);
-        ret = -E2BIG;
-        goto err;
-    }
-    if (in_len < tosend_len) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
-                 __func__, elem->index, tosend_len);
-        ret = -E2BIG;
-        goto err;
-    }
-
-    copy_iov(iov, count, in_sg, in_num, tosend_len);
-
-    vq_send_element(qi, elem, tosend_len);
-    req->reply_sent = true;
-
-err:
-    return ret;
-}
-
-/*
- * Callback from fuse_send_data_iov_* when it's virtio and the buffer
- * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
- * We need send the iov and then the buffer.
- * Return 0 on success
- */
-int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
-                         struct iovec *iov, int count, struct fuse_bufvec *buf,
-                         size_t len)
-{
-    FVRequest *req = container_of(ch, FVRequest, ch);
-    struct fv_QueueInfo *qi = ch->qi;
-    VuVirtqElement *elem = &req->elem;
-    int ret = 0;
-    g_autofree struct iovec *in_sg_cpy = NULL;
-
-    assert(count >= 1);
-    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
-
-    struct fuse_out_header *out = iov[0].iov_base;
-    /* TODO: Endianness! */
-
-    size_t iov_len = iov_size(iov, count);
-    size_t tosend_len = iov_len + len;
-
-    out->len = tosend_len;
-
-    fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
-             count, len, iov_len);
-
-    /* unique == 0 is notification which we don't support */
-    assert(out->unique);
-
-    assert(!req->reply_sent);
-
-    /* The 'in' part of the elem is to qemu */
-    unsigned int in_num = elem->in_num;
-    struct iovec *in_sg = elem->in_sg;
-    size_t in_len = iov_size(in_sg, in_num);
-    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
-             __func__, elem->index, in_num, in_len);
-
-    /*
-     * The elem should have room for a 'fuse_out_header' (out from fuse)
-     * plus the data based on the len in the header.
-     */
-    if (in_len < sizeof(struct fuse_out_header)) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
-                 __func__, elem->index);
-        return E2BIG;
-    }
-    if (in_len < tosend_len) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
-                 __func__, elem->index, tosend_len);
-        return E2BIG;
-    }
-
-    /* TODO: Limit to 'len' */
-
-    /* First copy the header data from iov->in_sg */
-    copy_iov(iov, count, in_sg, in_num, iov_len);
-
-    /*
-     * Build a copy of the in_sg iov so we can skip bits in it,
-     * including changing the offsets
-     */
-    in_sg_cpy = g_new(struct iovec, in_num);
-    memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
-    /* These get updated as we skip */
-    struct iovec *in_sg_ptr = in_sg_cpy;
-    unsigned int in_sg_cpy_count = in_num;
-
-    /* skip over parts of in_sg that contained the header iov */
-    iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, iov_len);
-
-    do {
-        fuse_log(FUSE_LOG_DEBUG, "%s: in_sg_cpy_count=%d len remaining=%zd\n",
-                 __func__, in_sg_cpy_count, len);
-
-        ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
-                     buf->buf[0].pos);
-
-        if (ret == -1) {
-            ret = errno;
-            if (ret == EINTR) {
-                continue;
-            }
-            fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
-                     __func__, len);
-            return ret;
-        }
-
-        if (!ret) {
-            /* EOF case? */
-            fuse_log(FUSE_LOG_DEBUG, "%s: !ret len remaining=%zd\n", __func__,
-                     len);
-            break;
-        }
-        fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
-                 ret, len);
-
-        len -= ret;
-        /* Short read. Retry reading remaining bytes */
-        if (len) {
-            fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
-            /* Skip over this much next time around */
-            iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, ret);
-            buf->buf[0].pos += ret;
-        }
-    } while (len);
-
-    /* Need to fix out->len on EOF */
-    if (len) {
-        struct fuse_out_header *out_sg = in_sg[0].iov_base;
-
-        tosend_len -= len;
-        out_sg->len = tosend_len;
-    }
-
-    vq_send_element(qi, elem, tosend_len);
-    req->reply_sent = true;
-    return 0;
-}
-
-static __thread bool clone_fs_called;
-
-/* Process one FVRequest in a thread pool */
-static void fv_queue_worker(gpointer data, gpointer user_data)
-{
-    struct fv_QueueInfo *qi = user_data;
-    struct fuse_session *se = qi->virtio_dev->se;
-    FVRequest *req = data;
-    VuVirtqElement *elem = &req->elem;
-    struct fuse_buf fbuf = {};
-    bool allocated_bufv = false;
-    struct fuse_bufvec bufv;
-    struct fuse_bufvec *pbufv;
-    struct fuse_in_header inh;
-
-    assert(se->bufsize > sizeof(struct fuse_in_header));
-
-    if (!clone_fs_called) {
-        int ret;
-
-        /* unshare FS for xattr operation */
-        ret = unshare(CLONE_FS);
-        /* should not fail */
-        assert(ret == 0);
-
-        clone_fs_called = true;
-    }
-
-    /*
-     * An element contains one request and the space to send our response
-     * They're spread over multiple descriptors in a scatter/gather set
-     * and we can't trust the guest to keep them still; so copy in/out.
-     */
-    fbuf.mem = g_malloc(se->bufsize);
-
-    fuse_mutex_init(&req->ch.lock);
-    req->ch.fd = -1;
-    req->ch.qi = qi;
-
-    /* The 'out' part of the elem is from qemu */
-    unsigned int out_num = elem->out_num;
-    struct iovec *out_sg = elem->out_sg;
-    size_t out_len = iov_size(out_sg, out_num);
-    fuse_log(FUSE_LOG_DEBUG,
-             "%s: elem %d: with %d out desc of length %zd\n",
-             __func__, elem->index, out_num, out_len);
-
-    /*
-     * The elem should contain a 'fuse_in_header' (in to fuse)
-     * plus the data based on the len in the header.
-     */
-    if (out_len < sizeof(struct fuse_in_header)) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
-                 __func__, elem->index);
-        assert(0); /* TODO */
-    }
-    if (out_len > se->bufsize) {
-        fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__,
-                 elem->index);
-        assert(0); /* TODO */
-    }
-    /* Copy just the fuse_in_header and look at it */
-    copy_from_iov(&fbuf, out_num, out_sg,
-                  sizeof(struct fuse_in_header));
-    memcpy(&inh, fbuf.mem, sizeof(struct fuse_in_header));
-
-    pbufv = NULL; /* Compiler thinks an unitialised path */
-    if (inh.opcode == FUSE_WRITE &&
-        out_len >= (sizeof(struct fuse_in_header) +
-                    sizeof(struct fuse_write_in))) {
-        /*
-         * For a write we don't actually need to copy the
-         * data, we can just do it straight out of guest memory
-         * but we must still copy the headers in case the guest
-         * was nasty and changed them while we were using them.
-         */
-        fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
-
-        fbuf.size = copy_from_iov(&fbuf, out_num, out_sg,
-                                  sizeof(struct fuse_in_header) +
-                                  sizeof(struct fuse_write_in));
-        /* That copy reread the in_header, make sure we use the original */
-        memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header));
-
-        /* Allocate the bufv, with space for the rest of the iov */
-        pbufv = g_try_malloc(sizeof(struct fuse_bufvec) +
-                             sizeof(struct fuse_buf) * out_num);
-        if (!pbufv) {
-            fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
-                    __func__);
-            goto out;
-        }
-
-        allocated_bufv = true;
-        pbufv->count = 1;
-        pbufv->buf[0] = fbuf;
-
-        size_t iovindex, pbufvindex, iov_bytes_skip;
-        pbufvindex = 1; /* 2 headers, 1 fusebuf */
-
-        if (!skip_iov(out_sg, out_num,
-                      sizeof(struct fuse_in_header) +
-                      sizeof(struct fuse_write_in),
-                      &iovindex, &iov_bytes_skip)) {
-            fuse_log(FUSE_LOG_ERR, "%s: skip failed\n",
-                    __func__);
-            goto out;
-        }
-
-        for (; iovindex < out_num; iovindex++, pbufvindex++) {
-            pbufv->count++;
-            pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
-            pbufv->buf[pbufvindex].flags = 0;
-            pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
-            pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
-
-            if (iov_bytes_skip) {
-                pbufv->buf[pbufvindex].mem += iov_bytes_skip;
-                pbufv->buf[pbufvindex].size -= iov_bytes_skip;
-                iov_bytes_skip = 0;
-            }
-        }
-    } else {
-        /* Normal (non fast write) path */
-
-        copy_from_iov(&fbuf, out_num, out_sg, se->bufsize);
-        /* That copy reread the in_header, make sure we use the original */
-        memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header));
-        fbuf.size = out_len;
-
-        /* TODO! Endianness of header */
-
-        /* TODO: Add checks for fuse_session_exited */
-        bufv.buf[0] = fbuf;
-        bufv.count = 1;
-        pbufv = &bufv;
-    }
-    pbufv->idx = 0;
-    pbufv->off = 0;
-    fuse_session_process_buf_int(se, pbufv, &req->ch);
-
-out:
-    if (allocated_bufv) {
-        g_free(pbufv);
-    }
-
-    /* If the request has no reply, still recycle the virtqueue element */
-    if (!req->reply_sent) {
-        fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__,
-                 elem->index);
-        vq_send_element(qi, elem, 0);
-    }
-
-    pthread_mutex_destroy(&req->ch.lock);
-    g_free(fbuf.mem);
-    free(req);
-}
-
-/* Thread function for individual queues, created when a queue is 'started' */
-static void *fv_queue_thread(void *opaque)
-{
-    struct fv_QueueInfo *qi = opaque;
-    struct VuDev *dev = &qi->virtio_dev->dev;
-    struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
-    struct fuse_session *se = qi->virtio_dev->se;
-    GThreadPool *pool = NULL;
-    GList *req_list = NULL;
-
-    if (se->thread_pool_size) {
-        fuse_log(FUSE_LOG_DEBUG, "%s: Creating thread pool for Queue %d\n",
-                 __func__, qi->qidx);
-        pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size,
-                                 FALSE, NULL);
-        if (!pool) {
-            fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
-            return NULL;
-        }
-    }
-
-    fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
-             qi->qidx, qi->kick_fd);
-    while (1) {
-        struct pollfd pf[2];
-
-        pf[0].fd = qi->kick_fd;
-        pf[0].events = POLLIN;
-        pf[0].revents = 0;
-        pf[1].fd = qi->kill_fd;
-        pf[1].events = POLLIN;
-        pf[1].revents = 0;
-
-        fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
-                 qi->qidx);
-        int poll_res = ppoll(pf, 2, NULL, NULL);
-
-        if (poll_res == -1) {
-            if (errno == EINTR) {
-                fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
-                         __func__);
-                continue;
-            }
-            fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
-            break;
-        }
-        assert(poll_res >= 1);
-        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-            fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
-                     __func__, pf[0].revents, qi->qidx);
-            break;
-        }
-        if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Unexpected poll revents %x Queue %d killfd\n",
-                     __func__, pf[1].revents, qi->qidx);
-            break;
-        }
-        if (pf[1].revents) {
-            fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n",
-                     __func__, qi->qidx);
-            break;
-        }
-        assert(pf[0].revents & POLLIN);
-        fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
-                 qi->qidx);
-
-        eventfd_t evalue;
-        if (eventfd_read(qi->kick_fd, &evalue)) {
-            fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
-            break;
-        }
-        /* Mutual exclusion with virtio_loop() */
-        vu_dispatch_rdlock(qi->virtio_dev);
-        pthread_mutex_lock(&qi->vq_lock);
-        /* out is from guest, in is too guest */
-        unsigned int in_bytes, out_bytes;
-        vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
-
-        fuse_log(FUSE_LOG_DEBUG,
-                 "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
-                 __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
-
-        while (1) {
-            FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest));
-            if (!req) {
-                break;
-            }
-
-            req->reply_sent = false;
-
-            if (!se->thread_pool_size) {
-                req_list = g_list_prepend(req_list, req);
-            } else {
-                g_thread_pool_push(pool, req, NULL);
-            }
-        }
-
-        pthread_mutex_unlock(&qi->vq_lock);
-        vu_dispatch_unlock(qi->virtio_dev);
-
-        /* Process all the requests. */
-        if (!se->thread_pool_size && req_list != NULL) {
-            req_list = g_list_reverse(req_list);
-            g_list_foreach(req_list, fv_queue_worker, qi);
-            g_list_free(req_list);
-            req_list = NULL;
-        }
-    }
-
-    if (pool) {
-        g_thread_pool_free(pool, FALSE, TRUE);
-    }
-
-    return NULL;
-}
-
-static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
-{
-    int ret;
-    struct fv_QueueInfo *ourqi;
-
-    assert(qidx < vud->nqueues);
-    ourqi = vud->qi[qidx];
-
-    /* Kill the thread */
-    if (eventfd_write(ourqi->kill_fd, 1)) {
-        fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n",
-                 qidx, strerror(errno));
-    }
-    ret = pthread_join(ourqi->thread, NULL);
-    if (ret) {
-        fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n",
-                 __func__, qidx, ret);
-    }
-    pthread_mutex_destroy(&ourqi->vq_lock);
-    close(ourqi->kill_fd);
-    ourqi->kick_fd = -1;
-    g_free(vud->qi[qidx]);
-    vud->qi[qidx] = NULL;
-}
-
-static void stop_all_queues(struct fv_VuDev *vud)
-{
-    for (int i = 0; i < vud->nqueues; i++) {
-        if (!vud->qi[i]) {
-            continue;
-        }
-
-        fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
-        fv_queue_cleanup_thread(vud, i);
-    }
-}
-
-/* Callback from libvhost-user on start or stop of a queue */
-static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
-{
-    struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
-    struct fv_QueueInfo *ourqi;
-
-    fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
-             started);
-    assert(qidx >= 0);
-
-    /*
-     * Ignore additional request queues for now.  passthrough_ll.c must be
-     * audited for thread-safety issues first.  It was written with a
-     * well-behaved client in mind and may not protect against all types of
-     * races yet.
-     */
-    if (qidx > 1) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: multiple request queues not yet implemented, please only "
-                 "configure 1 request queue\n",
-                 __func__);
-        exit(EXIT_FAILURE);
-    }
-
-    if (started) {
-        /* Fire up a thread to watch this queue */
-        if (qidx >= vud->nqueues) {
-            vud->qi = g_realloc_n(vud->qi, qidx + 1, sizeof(vud->qi[0]));
-            memset(vud->qi + vud->nqueues, 0,
-                   sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
-            vud->nqueues = qidx + 1;
-        }
-        if (!vud->qi[qidx]) {
-            vud->qi[qidx] = g_new0(struct fv_QueueInfo, 1);
-            vud->qi[qidx]->virtio_dev = vud;
-            vud->qi[qidx]->qidx = qidx;
-        } else {
-            /* Shouldn't have been started */
-            assert(vud->qi[qidx]->kick_fd == -1);
-        }
-        ourqi = vud->qi[qidx];
-        ourqi->kick_fd = dev->vq[qidx].kick_fd;
-
-        ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
-        assert(ourqi->kill_fd != -1);
-        pthread_mutex_init(&ourqi->vq_lock, NULL);
-
-        if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
-            fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
-                     __func__, qidx);
-            assert(0);
-        }
-    } else {
-        /*
-         * Temporarily drop write-lock taken in virtio_loop() so that
-         * the queue thread doesn't block in virtio_send_msg().
-         */
-        vu_dispatch_unlock(vud);
-        fv_queue_cleanup_thread(vud, qidx);
-        vu_dispatch_wrlock(vud);
-    }
-}
-
-static bool fv_queue_order(VuDev *dev, int qidx)
-{
-    return false;
-}
-
-static const VuDevIface fv_iface = {
-    .get_features = fv_get_features,
-    .set_features = fv_set_features,
-
-    /* Don't need process message, we've not got any at vhost-user level */
-    .queue_set_started = fv_queue_set_started,
-
-    .queue_is_processed_in_order = fv_queue_order,
-};
-
-/*
- * Main loop; this mostly deals with events on the vhost-user
- * socket itself, and not actual fuse data.
- */
-int virtio_loop(struct fuse_session *se)
-{
-    fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
-
-    while (!fuse_session_exited(se)) {
-        struct pollfd pf[1];
-        bool ok;
-        pf[0].fd = se->vu_socketfd;
-        pf[0].events = POLLIN;
-        pf[0].revents = 0;
-
-        fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
-        int poll_res = ppoll(pf, 1, NULL, NULL);
-
-        if (poll_res == -1) {
-            if (errno == EINTR) {
-                fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
-                         __func__);
-                continue;
-            }
-            fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
-            break;
-        }
-        assert(poll_res == 1);
-        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-            fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
-                     pf[0].revents);
-            break;
-        }
-        assert(pf[0].revents & POLLIN);
-        fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
-        /* Mutual exclusion with fv_queue_thread() */
-        vu_dispatch_wrlock(se->virtio_dev);
-
-        ok = vu_dispatch(&se->virtio_dev->dev);
-
-        vu_dispatch_unlock(se->virtio_dev);
-
-        if (!ok) {
-            fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
-            break;
-        }
-    }
-
-    /*
-     * Make sure all fv_queue_thread()s quit on exit, as we're about to
-     * free virtio dev and fuse session, no one should access them anymore.
-     */
-    stop_all_queues(se->virtio_dev);
-    fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
-
-    return 0;
-}
-
-static void strreplace(char *s, char old, char new)
-{
-    for (; *s; ++s) {
-        if (*s == old) {
-            *s = new;
-        }
-    }
-}
-
-static bool fv_socket_lock(struct fuse_session *se)
-{
-    g_autofree gchar *sk_name = NULL;
-    g_autofree gchar *pidfile = NULL;
-    g_autofree gchar *state = NULL;
-    g_autofree gchar *dir = NULL;
-    Error *local_err = NULL;
-
-    state = qemu_get_local_state_dir();
-    dir = g_build_filename(state, "run", "virtiofsd", NULL);
-
-    if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
-        fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s\n",
-                 __func__, dir, strerror(errno));
-        return false;
-    }
-
-    sk_name = g_strdup(se->vu_socket_path);
-    strreplace(sk_name, '/', '.');
-    pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name);
-
-    if (!qemu_write_pidfile(pidfile, &local_err)) {
-        error_report_err(local_err);
-        return false;
-    }
-
-    return true;
-}
-
-static int fv_create_listen_socket(struct fuse_session *se)
-{
-    struct sockaddr_un un;
-    mode_t old_umask;
-
-    /* Nothing to do if fd is already initialized */
-    if (se->vu_listen_fd >= 0) {
-        return 0;
-    }
-
-    if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
-        fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
-        return -1;
-    }
-
-    if (!strlen(se->vu_socket_path)) {
-        fuse_log(FUSE_LOG_ERR, "Socket path is empty\n");
-        return -1;
-    }
-
-    /* Check the vu_socket_path is already used */
-    if (!fv_socket_lock(se)) {
-        return -1;
-    }
-
-    /*
-     * Create the Unix socket to communicate with qemu
-     * based on QEMU's vhost-user-bridge
-     */
-    unlink(se->vu_socket_path);
-    strcpy(un.sun_path, se->vu_socket_path);
-    size_t addr_len = sizeof(un);
-
-    int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
-    if (listen_sock == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
-        return -1;
-    }
-    un.sun_family = AF_UNIX;
-
-    /*
-     * Unfortunately bind doesn't let you set the mask on the socket,
-     * so set umask appropriately and restore it later.
-     */
-    if (se->vu_socket_group) {
-        old_umask = umask(S_IROTH | S_IWOTH | S_IXOTH);
-    } else {
-        old_umask = umask(S_IRGRP | S_IWGRP | S_IXGRP |
-                          S_IROTH | S_IWOTH | S_IXOTH);
-    }
-    if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
-        close(listen_sock);
-        umask(old_umask);
-        return -1;
-    }
-    if (se->vu_socket_group) {
-        struct group *g = getgrnam(se->vu_socket_group);
-        if (g) {
-            if (chown(se->vu_socket_path, -1, g->gr_gid) == -1) {
-                fuse_log(FUSE_LOG_WARNING,
-                         "vhost socket failed to set group to %s (%d): %m\n",
-                         se->vu_socket_group, g->gr_gid);
-            }
-        } else {
-            fuse_log(FUSE_LOG_ERR,
-                     "vhost socket: unable to find group '%s'\n",
-                     se->vu_socket_group);
-            close(listen_sock);
-            umask(old_umask);
-            return -1;
-        }
-    }
-    umask(old_umask);
-
-    if (listen(listen_sock, 1) == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
-        close(listen_sock);
-        return -1;
-    }
-
-    se->vu_listen_fd = listen_sock;
-    return 0;
-}
-
-int virtio_session_mount(struct fuse_session *se)
-{
-    int ret;
-
-    /*
-     * Test that unshare(CLONE_FS) works. fv_queue_worker() will need it. It's
-     * an unprivileged system call but some Docker/Moby versions are known to
-     * reject it via seccomp when CAP_SYS_ADMIN is not given.
-     *
-     * Note that the program is single-threaded here so this syscall has no
-     * visible effect and is safe to make.
-     */
-    ret = unshare(CLONE_FS);
-    if (ret == -1 && errno == EPERM) {
-        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_FS) failed with EPERM. If "
-                "running in a container please check that the container "
-                "runtime seccomp policy allows unshare.\n");
-        return -1;
-    }
-
-    ret = fv_create_listen_socket(se);
-    if (ret < 0) {
-        return ret;
-    }
-
-    se->fd = -1;
-
-    fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
-             __func__);
-    int data_sock = accept(se->vu_listen_fd, NULL, NULL);
-    if (data_sock == -1) {
-        fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
-        close(se->vu_listen_fd);
-        return -1;
-    }
-    close(se->vu_listen_fd);
-    se->vu_listen_fd = -1;
-    fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
-             __func__);
-
-    /* TODO: Some cleanup/deallocation! */
-    se->virtio_dev = g_new0(struct fv_VuDev, 1);
-
-    se->vu_socketfd = data_sock;
-    se->virtio_dev->se = se;
-    pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
-    if (!vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL,
-                 fv_set_watch, fv_remove_watch, &fv_iface)) {
-        fuse_log(FUSE_LOG_ERR, "%s: vu_init failed\n", __func__);
-        return -1;
-    }
-
-    return 0;
-}
-
-void virtio_session_close(struct fuse_session *se)
-{
-    close(se->vu_socketfd);
-
-    if (!se->virtio_dev) {
-        return;
-    }
-
-    g_free(se->virtio_dev->qi);
-    pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
-    g_free(se->virtio_dev);
-    se->virtio_dev = NULL;
-}

diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
deleted file mode 100644
index 1116840..0000000
--- a/tools/virtiofsd/fuse_virtio.h
+++ /dev/null

@@ -1,33 +0,0 @@
-/*
- * virtio-fs glue for FUSE
- * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Dave Gilbert  <dgilbert@redhat.com>
- *
- * Implements the glue between libfuse and libvhost-user
- *
- * This program can be distributed under the terms of the GNU LGPLv2.
- *  See the file COPYING.LIB
- */
-
-#ifndef FUSE_VIRTIO_H
-#define FUSE_VIRTIO_H
-
-#include "fuse_i.h"
-
-struct fuse_session;
-
-int virtio_session_mount(struct fuse_session *se);
-void virtio_session_close(struct fuse_session *se);
-int virtio_loop(struct fuse_session *se);
-
-
-int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
-                    struct iovec *iov, int count);
-
-int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
-                         struct iovec *iov, int count,
-                         struct fuse_bufvec *buf, size_t len);
-
-#endif

diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
deleted file mode 100644
index f5f66f2..0000000
--- a/tools/virtiofsd/helper.c
+++ /dev/null

@@ -1,409 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * Helper functions to create (simple) standalone programs. With the
- * aid of these functions it should be possible to create full FUSE
- * file system by implementing nothing but the request handlers.
-
- * This program can be distributed under the terms of the GNU LGPLv2.
- * See the file COPYING.LIB.
- */
-
-#include "qemu/osdep.h"
-#include "fuse_i.h"
-#include "fuse_lowlevel.h"
-#include "fuse_misc.h"
-#include "fuse_opt.h"
-
-#include <sys/param.h>
-#include <sys/resource.h>
-
-#define FUSE_HELPER_OPT(t, p)                       \
-    {                                               \
-        t, offsetof(struct fuse_cmdline_opts, p), 1 \
-    }
-#define FUSE_HELPER_OPT_VALUE(t, p, v)              \
-    {                                               \
-        t, offsetof(struct fuse_cmdline_opts, p), v \
-    }
-
-static const struct fuse_opt fuse_helper_opts[] = {
-    FUSE_HELPER_OPT("-h", show_help),
-    FUSE_HELPER_OPT("--help", show_help),
-    FUSE_HELPER_OPT("-V", show_version),
-    FUSE_HELPER_OPT("--version", show_version),
-    FUSE_HELPER_OPT("--print-capabilities", print_capabilities),
-    FUSE_HELPER_OPT("-d", debug),
-    FUSE_HELPER_OPT("debug", debug),
-    FUSE_HELPER_OPT("-d", foreground),
-    FUSE_HELPER_OPT("debug", foreground),
-    FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP),
-    FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP),
-    FUSE_HELPER_OPT("-f", foreground),
-    FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0),
-    FUSE_HELPER_OPT("fsname=", nodefault_subtype),
-    FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP),
-    FUSE_HELPER_OPT("subtype=", nodefault_subtype),
-    FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP),
-    FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads),
-    FUSE_HELPER_OPT("--rlimit-nofile=%lu", rlimit_nofile),
-    FUSE_HELPER_OPT("--syslog", syslog),
-    FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG),
-    FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO),
-    FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING),
-    FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR),
-    FUSE_OPT_END
-};
-
-struct fuse_conn_info_opts {
-    int atomic_o_trunc;
-    int no_remote_posix_lock;
-    int no_remote_flock;
-    int splice_write;
-    int splice_move;
-    int splice_read;
-    int no_splice_write;
-    int no_splice_move;
-    int no_splice_read;
-    int auto_inval_data;
-    int no_auto_inval_data;
-    int no_readdirplus;
-    int no_readdirplus_auto;
-    int async_dio;
-    int no_async_dio;
-    int writeback_cache;
-    int no_writeback_cache;
-    int async_read;
-    int sync_read;
-    unsigned max_write;
-    unsigned max_readahead;
-    unsigned max_background;
-    unsigned congestion_threshold;
-    unsigned time_gran;
-    int set_max_write;
-    int set_max_readahead;
-    int set_max_background;
-    int set_congestion_threshold;
-    int set_time_gran;
-};
-
-#define CONN_OPTION(t, p, v)                          \
-    {                                                 \
-        t, offsetof(struct fuse_conn_info_opts, p), v \
-    }
-static const struct fuse_opt conn_info_opt_spec[] = {
-    CONN_OPTION("max_write=%u", max_write, 0),
-    CONN_OPTION("max_write=", set_max_write, 1),
-    CONN_OPTION("max_readahead=%u", max_readahead, 0),
-    CONN_OPTION("max_readahead=", set_max_readahead, 1),
-    CONN_OPTION("max_background=%u", max_background, 0),
-    CONN_OPTION("max_background=", set_max_background, 1),
-    CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0),
-    CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1),
-    CONN_OPTION("sync_read", sync_read, 1),
-    CONN_OPTION("async_read", async_read, 1),
-    CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1),
-    CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1),
-    CONN_OPTION("no_remote_lock", no_remote_flock, 1),
-    CONN_OPTION("no_remote_flock", no_remote_flock, 1),
-    CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1),
-    CONN_OPTION("splice_write", splice_write, 1),
-    CONN_OPTION("no_splice_write", no_splice_write, 1),
-    CONN_OPTION("splice_move", splice_move, 1),
-    CONN_OPTION("no_splice_move", no_splice_move, 1),
-    CONN_OPTION("splice_read", splice_read, 1),
-    CONN_OPTION("no_splice_read", no_splice_read, 1),
-    CONN_OPTION("auto_inval_data", auto_inval_data, 1),
-    CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1),
-    CONN_OPTION("readdirplus=no", no_readdirplus, 1),
-    CONN_OPTION("readdirplus=yes", no_readdirplus, 0),
-    CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1),
-    CONN_OPTION("readdirplus=auto", no_readdirplus, 0),
-    CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0),
-    CONN_OPTION("async_dio", async_dio, 1),
-    CONN_OPTION("no_async_dio", no_async_dio, 1),
-    CONN_OPTION("writeback_cache", writeback_cache, 1),
-    CONN_OPTION("no_writeback_cache", no_writeback_cache, 1),
-    CONN_OPTION("time_gran=%u", time_gran, 0),
-    CONN_OPTION("time_gran=", set_time_gran, 1),
-    FUSE_OPT_END
-};
-
-
-void fuse_cmdline_help(void)
-{
-    printf("    -h   --help                print help\n"
-           "    -V   --version             print version\n"
-           "    --print-capabilities       print vhost-user.json\n"
-           "    -d   -o debug              enable debug output (implies -f)\n"
-           "    --syslog                   log to syslog (default stderr)\n"
-           "    -f                         foreground operation\n"
-           "    --daemonize                run in background\n"
-           "    -o cache=<mode>            cache mode. could be one of \"auto, "
-           "always, none\"\n"
-           "                               default: auto\n"
-           "    -o flock|no_flock          enable/disable flock\n"
-           "                               default: no_flock\n"
-           "    -o log_level=<level>       log level, default to \"info\"\n"
-           "                               level could be one of \"debug, "
-           "info, warn, err\"\n"
-           "    -o max_idle_threads        the maximum number of idle worker "
-           "threads\n"
-           "                               allowed (default: 10)\n"
-           "    -o posix_lock|no_posix_lock\n"
-           "                               enable/disable remote posix lock\n"
-           "                               default: no_posix_lock\n"
-           "    -o readdirplus|no_readdirplus\n"
-           "                               enable/disable readirplus\n"
-           "                               default: readdirplus except with "
-           "cache=none\n"
-           "    -o sandbox=namespace|chroot\n"
-           "                               sandboxing mode:\n"
-           "                               - namespace: mount, pid, and net\n"
-           "                                 namespaces with pivot_root(2)\n"
-           "                                 into shared directory\n"
-           "                               - chroot: chroot(2) into shared\n"
-           "                                 directory (use in containers)\n"
-           "                               default: namespace\n"
-           "    -o timeout=<number>        I/O timeout (seconds)\n"
-           "                               default: depends on cache= option.\n"
-           "    -o writeback|no_writeback  enable/disable writeback cache\n"
-           "                               default: no_writeback\n"
-           "    -o xattr|no_xattr          enable/disable xattr\n"
-           "                               default: no_xattr\n"
-           "    -o xattrmap=<mapping>      Enable xattr mapping (enables xattr)\n"
-           "                               <mapping> is a string consists of a series of rules\n"
-           "                               e.g. -o xattrmap=:map::user.virtiofs.:\n"
-           "    -o modcaps=CAPLIST         Modify the list of capabilities\n"
-           "                               e.g. -o modcaps=+sys_admin:-chown\n"
-           "    --rlimit-nofile=<num>      set maximum number of file descriptors\n"
-           "                               (0 leaves rlimit unchanged)\n"
-           "                               default: min(1000000, fs.file-max - 16384)\n"
-           "                                        if the current rlimit is lower\n"
-           "    -o allow_direct_io|no_allow_direct_io\n"
-           "                               retain/discard O_DIRECT flags passed down\n"
-           "                               to virtiofsd from guest applications.\n"
-           "                               default: no_allow_direct_io\n"
-           "    -o announce_submounts      Announce sub-mount points to the guest\n"
-           "    -o posix_acl/no_posix_acl  Enable/Disable posix_acl. (default: disabled)\n"
-           "    -o security_label/no_security_label  Enable/Disable security label. (default: disabled)\n"
-           "    -o killpriv_v2/no_killpriv_v2\n"
-           "                               Enable/Disable FUSE_HANDLE_KILLPRIV_V2.\n"
-           "                               (default: enabled as long as client supports it)\n"
-           );
-}
-
-static int fuse_helper_opt_proc(void *data, const char *arg, int key,
-                                struct fuse_args *outargs)
-{
-    (void)data;
-    (void)outargs;
-
-    switch (key) {
-    case FUSE_OPT_KEY_NONOPT:
-        fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg);
-        return -1;
-
-    default:
-        /* Pass through unknown options */
-        return 1;
-    }
-}
-
-static unsigned long get_default_rlimit_nofile(void)
-{
-    g_autofree gchar *file_max_str = NULL;
-    const rlim_t reserved_fds = 16384; /* leave at least this many fds free */
-    rlim_t max_fds = 1000000; /* our default RLIMIT_NOFILE target */
-    rlim_t file_max;
-    struct rlimit rlim;
-
-    /*
-     * Reduce max_fds below the system-wide maximum, if necessary.  This
-     * ensures there are fds available for other processes so we don't
-     * cause resource exhaustion.
-     */
-    if (!g_file_get_contents("/proc/sys/fs/file-max", &file_max_str,
-                             NULL, NULL)) {
-        fuse_log(FUSE_LOG_ERR, "can't read /proc/sys/fs/file-max\n");
-        exit(1);
-    }
-    file_max = g_ascii_strtoull(file_max_str, NULL, 10);
-    if (file_max < 2 * reserved_fds) {
-        fuse_log(FUSE_LOG_ERR,
-                 "The fs.file-max sysctl is too low (%lu) to allow a "
-                 "reasonable number of open files.\n",
-                 (unsigned long)file_max);
-        exit(1);
-    }
-    max_fds = MIN(file_max - reserved_fds, max_fds);
-
-    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
-        fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n");
-        exit(1);
-    }
-
-    if (rlim.rlim_cur >= max_fds) {
-        return 0; /* we have more fds available than required! */
-    }
-    return max_fds;
-}
-
-int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts)
-{
-    memset(opts, 0, sizeof(struct fuse_cmdline_opts));
-
-    opts->max_idle_threads = 10;
-    opts->rlimit_nofile = get_default_rlimit_nofile();
-    opts->foreground = 1;
-
-    if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) ==
-        -1) {
-        return -1;
-    }
-
-    return 0;
-}
-
-
-int fuse_daemonize(int foreground)
-{
-    int ret = 0, rett;
-    if (!foreground) {
-        int nullfd;
-        int waiter[2];
-        char completed;
-
-        if (!g_unix_open_pipe(waiter, FD_CLOEXEC, NULL)) {
-            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n",
-                     strerror(errno));
-            return -1;
-        }
-
-        /*
-         * demonize current process by forking it and killing the
-         * parent.  This makes current process as a child of 'init'.
-         */
-        switch (fork()) {
-        case -1:
-            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n",
-                     strerror(errno));
-            return -1;
-        case 0:
-            break;
-        default:
-            _exit(read(waiter[0], &completed,
-                       sizeof(completed) != sizeof(completed)));
-        }
-
-        if (setsid() == -1) {
-            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n",
-                     strerror(errno));
-            return -1;
-        }
-
-        ret = chdir("/");
-
-        nullfd = open("/dev/null", O_RDWR, 0);
-        if (nullfd != -1) {
-            rett = dup2(nullfd, 0);
-            if (!ret) {
-                ret = rett;
-            }
-            rett = dup2(nullfd, 1);
-            if (!ret) {
-                ret = rett;
-            }
-            rett = dup2(nullfd, 2);
-            if (!ret) {
-                ret = rett;
-            }
-            if (nullfd > 2) {
-                close(nullfd);
-            }
-        }
-
-        /* Propagate completion of daemon initialization */
-        completed = 1;
-        rett = write(waiter[1], &completed, sizeof(completed));
-        if (!ret) {
-            ret = rett;
-        }
-        close(waiter[0]);
-        close(waiter[1]);
-    } else {
-        ret = chdir("/");
-    }
-    return ret;
-}
-
-void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
-                               struct fuse_conn_info *conn)
-{
-    if (opts->set_max_write) {
-        conn->max_write = opts->max_write;
-    }
-    if (opts->set_max_background) {
-        conn->max_background = opts->max_background;
-    }
-    if (opts->set_congestion_threshold) {
-        conn->congestion_threshold = opts->congestion_threshold;
-    }
-    if (opts->set_time_gran) {
-        conn->time_gran = opts->time_gran;
-    }
-    if (opts->set_max_readahead) {
-        conn->max_readahead = opts->max_readahead;
-    }
-
-#define LL_ENABLE(cond, cap) \
-    if (cond)                \
-        conn->want |= (cap)
-#define LL_DISABLE(cond, cap) \
-    if (cond)                 \
-        conn->want &= ~(cap)
-
-    LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ);
-    LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ);
-
-    LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE);
-    LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE);
-
-    LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE);
-    LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE);
-
-    LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
-    LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
-
-    LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS);
-    LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO);
-
-    LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO);
-    LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO);
-
-    LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
-    LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
-
-    LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ);
-    LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ);
-
-    LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS);
-    LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS);
-}
-
-struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args)
-{
-    struct fuse_conn_info_opts *opts;
-
-    opts = calloc(1, sizeof(struct fuse_conn_info_opts));
-    if (opts == NULL) {
-        fuse_log(FUSE_LOG_ERR, "calloc failed\n");
-        return NULL;
-    }
-    if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) {
-        free(opts);
-        return NULL;
-    }
-    return opts;
-}

diff --git a/tools/virtiofsd/meson.build b/tools/virtiofsd/meson.build
deleted file mode 100644
index c134ba6..0000000
--- a/tools/virtiofsd/meson.build
+++ /dev/null

@@ -1,18 +0,0 @@
-executable('virtiofsd', files(
-  'buffer.c',
-  'fuse_opt.c',
-  'fuse_log.c',
-  'fuse_lowlevel.c',
-  'fuse_signals.c',
-  'fuse_virtio.c',
-  'helper.c',
-  'passthrough_ll.c',
-  'passthrough_seccomp.c'),
-  dependencies: [seccomp, qemuutil, libcap_ng, vhost_user],
-  install: true,
-  install_dir: get_option('libexecdir'))
-
-configure_file(input: '50-qemu-virtiofsd.json.in',
-               output: '50-qemu-virtiofsd.json',
-               configuration: { 'libexecdir' : get_option('prefix') / get_option('libexecdir') },
-               install_dir: qemu_datadir / 'vhost-user')

diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h
deleted file mode 100644
index 0b98275..0000000
--- a/tools/virtiofsd/passthrough_helpers.h
+++ /dev/null

@@ -1,51 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE
- */
-
-/*
- * Creates files on the underlying file system in response to a FUSE_MKNOD
- * operation
- */
-static int mknod_wrapper(int dirfd, const char *path, const char *link,
-                         int mode, dev_t rdev)
-{
-    int res;
-
-    if (S_ISREG(mode)) {
-        res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode);
-        if (res >= 0) {
-            res = close(res);
-        }
-    } else if (S_ISDIR(mode)) {
-        res = mkdirat(dirfd, path, mode);
-    } else if (S_ISLNK(mode) && link != NULL) {
-        res = symlinkat(link, dirfd, path);
-    } else if (S_ISFIFO(mode)) {
-        res = mkfifoat(dirfd, path, mode);
-    } else {
-        res = mknodat(dirfd, path, mode, rdev);
-    }
-
-    return res;
-}

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
deleted file mode 100644
index 40ea2ed..0000000
--- a/tools/virtiofsd/passthrough_ll.c
+++ /dev/null

@@ -1,4521 +0,0 @@
-/*
- * FUSE: Filesystem in Userspace
- * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
- *
- * This program can be distributed under the terms of the GNU GPLv2.
- * See the file COPYING.
- */
-
-/*
- *
- * This file system mirrors the existing file system hierarchy of the
- * system, starting at the root file system. This is implemented by
- * just "passing through" all requests to the corresponding user-space
- * libc functions. In contrast to passthrough.c and passthrough_fh.c,
- * this implementation uses the low-level API. Its performance should
- * be the least bad among the three, but many operations are not
- * implemented. In particular, it is not possible to remove files (or
- * directories) because the code necessary to defer actual removal
- * until the file is not opened anymore would make the example much
- * more complicated.
- *
- * When writeback caching is enabled (-o writeback mount option), it
- * is only possible to write to files for which the mounting user has
- * read permissions. This is because the writeback cache requires the
- * kernel to be able to issue read requests for all files (which the
- * passthrough filesystem cannot satisfy if it can't read the file in
- * the underlying filesystem).
- *
- * Compile with:
- *
- *     gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
- * passthrough_ll
- *
- * ## Source code ##
- * \include passthrough_ll.c
- */
-
-#include "qemu/osdep.h"
-#include "qemu/timer.h"
-#include "qemu-version.h"
-#include "qemu/help-texts.h"
-#include "fuse_virtio.h"
-#include "fuse_log.h"
-#include "fuse_lowlevel.h"
-#include "standard-headers/linux/fuse.h"
-#include <cap-ng.h>
-#include <dirent.h>
-#include <pthread.h>
-#include <sys/file.h>
-#include <sys/mount.h>
-#include <sys/prctl.h>
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <sys/wait.h>
-#include <sys/xattr.h>
-#include <syslog.h>
-#include <grp.h>
-
-#include "qemu/cutils.h"
-#include "passthrough_helpers.h"
-#include "passthrough_seccomp.h"
-
-/* Keep track of inode posix locks for each owner. */
-struct lo_inode_plock {
-    uint64_t lock_owner;
-    int fd; /* fd for OFD locks */
-};
-
-struct lo_map_elem {
-    union {
-        struct lo_inode *inode;
-        struct lo_dirp *dirp;
-        int fd;
-        ssize_t freelist;
-    };
-    bool in_use;
-};
-
-/* Maps FUSE fh or ino values to internal objects */
-struct lo_map {
-    struct lo_map_elem *elems;
-    size_t nelems;
-    ssize_t freelist;
-};
-
-struct lo_key {
-    ino_t ino;
-    dev_t dev;
-    uint64_t mnt_id;
-};
-
-struct lo_inode {
-    int fd;
-
-    /*
-     * Atomic reference count for this object.  The nlookup field holds a
-     * reference and release it when nlookup reaches 0.
-     */
-    gint refcount;
-
-    struct lo_key key;
-
-    /*
-     * This counter keeps the inode alive during the FUSE session.
-     * Incremented when the FUSE inode number is sent in a reply
-     * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc).  Decremented when an inode is
-     * released by a FUSE_FORGET request.
-     *
-     * Note that this value is untrusted because the client can manipulate
-     * it arbitrarily using FUSE_FORGET requests.
-     *
-     * Protected by lo->mutex.
-     */
-    uint64_t nlookup;
-
-    fuse_ino_t fuse_ino;
-    pthread_mutex_t plock_mutex;
-    GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
-
-    mode_t filetype;
-};
-
-struct lo_cred {
-    uid_t euid;
-    gid_t egid;
-    mode_t umask;
-};
-
-enum {
-    CACHE_NONE,
-    CACHE_AUTO,
-    CACHE_ALWAYS,
-};
-
-enum {
-    SANDBOX_NAMESPACE,
-    SANDBOX_CHROOT,
-};
-
-typedef struct xattr_map_entry {
-    char *key;
-    char *prepend;
-    unsigned int flags;
-} XattrMapEntry;
-
-struct lo_data {
-    pthread_mutex_t mutex;
-    int sandbox;
-    int debug;
-    int writeback;
-    int flock;
-    int posix_lock;
-    int xattr;
-    char *xattrmap;
-    char *xattr_security_capability;
-    char *source;
-    char *modcaps;
-    double timeout;
-    int cache;
-    int timeout_set;
-    int readdirplus_set;
-    int readdirplus_clear;
-    int allow_direct_io;
-    int announce_submounts;
-    bool use_statx;
-    struct lo_inode root;
-    GHashTable *inodes; /* protected by lo->mutex */
-    struct lo_map ino_map; /* protected by lo->mutex */
-    struct lo_map dirp_map; /* protected by lo->mutex */
-    struct lo_map fd_map; /* protected by lo->mutex */
-    XattrMapEntry *xattr_map_list;
-    size_t xattr_map_nentries;
-
-    /* An O_PATH file descriptor to /proc/self/fd/ */
-    int proc_self_fd;
-    /* An O_PATH file descriptor to /proc/self/task/ */
-    int proc_self_task;
-    int user_killpriv_v2, killpriv_v2;
-    /* If set, virtiofsd is responsible for setting umask during creation */
-    bool change_umask;
-    int user_posix_acl, posix_acl;
-    /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */
-    bool use_fscreate;
-    int user_security_label;
-};
-
-static const struct fuse_opt lo_opts[] = {
-    { "sandbox=namespace",
-      offsetof(struct lo_data, sandbox),
-      SANDBOX_NAMESPACE },
-    { "sandbox=chroot",
-      offsetof(struct lo_data, sandbox),
-      SANDBOX_CHROOT },
-    { "writeback", offsetof(struct lo_data, writeback), 1 },
-    { "no_writeback", offsetof(struct lo_data, writeback), 0 },
-    { "source=%s", offsetof(struct lo_data, source), 0 },
-    { "flock", offsetof(struct lo_data, flock), 1 },
-    { "no_flock", offsetof(struct lo_data, flock), 0 },
-    { "posix_lock", offsetof(struct lo_data, posix_lock), 1 },
-    { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 },
-    { "xattr", offsetof(struct lo_data, xattr), 1 },
-    { "no_xattr", offsetof(struct lo_data, xattr), 0 },
-    { "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 },
-    { "modcaps=%s", offsetof(struct lo_data, modcaps), 0 },
-    { "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
-    { "timeout=", offsetof(struct lo_data, timeout_set), 1 },
-    { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE },
-    { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO },
-    { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
-    { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
-    { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
-    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
-    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 },
-    { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
-    { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
-    { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
-    { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 },
-    { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 },
-    { "security_label", offsetof(struct lo_data, user_security_label), 1 },
-    { "no_security_label", offsetof(struct lo_data, user_security_label), 0 },
-    FUSE_OPT_END
-};
-static bool use_syslog = false;
-static int current_log_level;
-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
-                                 uint64_t n);
-
-static struct {
-    pthread_mutex_t mutex;
-    void *saved;
-} cap;
-/* That we loaded cap-ng in the current thread from the saved */
-static __thread bool cap_loaded = 0;
-
-static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
-                                uint64_t mnt_id);
-static int xattr_map_client(const struct lo_data *lo, const char *client_name,
-                            char **out_name);
-
-#define FCHDIR_NOFAIL(fd) do {                         \
-        int fchdir_res = fchdir(fd);                   \
-        assert(fchdir_res == 0);                       \
-    } while (0)
-
-static bool is_dot_or_dotdot(const char *name)
-{
-    return name[0] == '.' &&
-           (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
-}
-
-/* Is `path` a single path component that is not "." or ".."? */
-static bool is_safe_path_component(const char *path)
-{
-    if (strchr(path, '/')) {
-        return false;
-    }
-
-    return !is_dot_or_dotdot(path);
-}
-
-static bool is_empty(const char *name)
-{
-    return name[0] == '\0';
-}
-
-static struct lo_data *lo_data(fuse_req_t req)
-{
-    return (struct lo_data *)fuse_req_userdata(req);
-}
-
-/*
- * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With
- * selinux=0, read from fscreate returns -EINVAL.
- *
- * TODO: Link with libselinux and use is_selinux_enabled() instead down
- * the line. It probably will be more reliable indicator.
- */
-static bool is_fscreate_usable(struct lo_data *lo)
-{
-    char procname[64];
-    int fscreate_fd;
-    size_t bytes_read;
-
-    sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid));
-    fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR);
-    if (fscreate_fd == -1) {
-        return false;
-    }
-
-    bytes_read = read(fscreate_fd, procname, 64);
-    close(fscreate_fd);
-    if (bytes_read == -1) {
-        return false;
-    }
-    return true;
-}
-
-/* Helpers to set/reset fscreate */
-static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx,
-                                  size_t ctxlen, int *fd)
-{
-    char procname[64];
-    int fscreate_fd, err = 0;
-    size_t written;
-
-    sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid));
-    fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY);
-    err = fscreate_fd == -1 ? errno : 0;
-    if (err) {
-        return err;
-    }
-
-    written = write(fscreate_fd, ctx, ctxlen);
-    err = written == -1 ? errno : 0;
-    if (err) {
-        goto out;
-    }
-
-    *fd = fscreate_fd;
-    return 0;
-out:
-    close(fscreate_fd);
-    return err;
-}
-
-static void close_reset_proc_fscreate(int fd)
-{
-    if ((write(fd, NULL, 0)) == -1) {
-        fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno);
-    }
-    close(fd);
-    return;
-}
-
-/*
- * Load capng's state from our saved state if the current thread
- * hadn't previously been loaded.
- * returns 0 on success
- */
-static int load_capng(void)
-{
-    if (!cap_loaded) {
-        pthread_mutex_lock(&cap.mutex);
-        capng_restore_state(&cap.saved);
-        /*
-         * restore_state free's the saved copy
-         * so make another.
-         */
-        cap.saved = capng_save_state();
-        if (!cap.saved) {
-            pthread_mutex_unlock(&cap.mutex);
-            fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n");
-            return -EINVAL;
-        }
-        pthread_mutex_unlock(&cap.mutex);
-
-        /*
-         * We want to use the loaded state for our pid,
-         * not the original
-         */
-        capng_setpid(syscall(SYS_gettid));
-        cap_loaded = true;
-    }
-    return 0;
-}
-
-/*
- * Helpers for dropping and regaining effective capabilities. Returns 0
- * on success, error otherwise
- */
-static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
-{
-    int cap, ret;
-
-    cap = capng_name_to_capability(cap_name);
-    if (cap < 0) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
-                 cap_name, strerror(errno));
-        goto out;
-    }
-
-    if (load_capng()) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
-        goto out;
-    }
-
-    /* We dont have this capability in effective set already. */
-    if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) {
-        ret = 0;
-        goto out;
-    }
-
-    if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n");
-        goto out;
-    }
-
-    if (capng_apply(CAPNG_SELECT_CAPS)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n");
-        goto out;
-    }
-
-    ret = 0;
-    if (cap_dropped) {
-        *cap_dropped = true;
-    }
-
-out:
-    return ret;
-}
-
-static int gain_effective_cap(const char *cap_name)
-{
-    int cap;
-    int ret = 0;
-
-    cap = capng_name_to_capability(cap_name);
-    if (cap < 0) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
-                 cap_name, strerror(errno));
-        goto out;
-    }
-
-    if (load_capng()) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
-        goto out;
-    }
-
-    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n");
-        goto out;
-    }
-
-    if (capng_apply(CAPNG_SELECT_CAPS)) {
-        ret = errno;
-        fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n");
-        goto out;
-    }
-    ret = 0;
-
-out:
-    return ret;
-}
-
-/*
- * The host kernel normally drops security.capability xattr's on
- * any write, however if we're remapping xattr names we need to drop
- * whatever the clients security.capability is actually stored as.
- */
-static int drop_security_capability(const struct lo_data *lo, int fd)
-{
-    if (!lo->xattr_security_capability) {
-        /* We didn't remap the name, let the host kernel do it */
-        return 0;
-    }
-    if (!fremovexattr(fd, lo->xattr_security_capability)) {
-        /* All good */
-        return 0;
-    }
-
-    switch (errno) {
-    case ENODATA:
-        /* Attribute didn't exist, that's fine */
-        return 0;
-
-    case ENOTSUP:
-        /* FS didn't support attribute anyway, also fine */
-        return 0;
-
-    default:
-        /* Hmm other error */
-        return errno;
-    }
-}
-
-static void lo_map_init(struct lo_map *map)
-{
-    map->elems = NULL;
-    map->nelems = 0;
-    map->freelist = -1;
-}
-
-static void lo_map_destroy(struct lo_map *map)
-{
-    g_free(map->elems);
-}
-
-static int lo_map_grow(struct lo_map *map, size_t new_nelems)
-{
-    struct lo_map_elem *new_elems;
-    size_t i;
-
-    if (new_nelems <= map->nelems) {
-        return 1;
-    }
-
-    new_elems = g_try_realloc_n(map->elems, new_nelems, sizeof(map->elems[0]));
-    if (!new_elems) {
-        return 0;
-    }
-
-    for (i = map->nelems; i < new_nelems; i++) {
-        new_elems[i].freelist = i + 1;
-        new_elems[i].in_use = false;
-    }
-    new_elems[new_nelems - 1].freelist = -1;
-
-    map->elems = new_elems;
-    map->freelist = map->nelems;
-    map->nelems = new_nelems;
-    return 1;
-}
-
-static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map)
-{
-    struct lo_map_elem *elem;
-
-    if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) {
-        return NULL;
-    }
-
-    elem = &map->elems[map->freelist];
-    map->freelist = elem->freelist;
-
-    elem->in_use = true;
-
-    return elem;
-}
-
-static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key)
-{
-    ssize_t *prev;
-
-    if (!lo_map_grow(map, key + 1)) {
-        return NULL;
-    }
-
-    for (prev = &map->freelist; *prev != -1;
-         prev = &map->elems[*prev].freelist) {
-        if (*prev == key) {
-            struct lo_map_elem *elem = &map->elems[key];
-
-            *prev = elem->freelist;
-            elem->in_use = true;
-            return elem;
-        }
-    }
-    return NULL;
-}
-
-static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key)
-{
-    if (key >= map->nelems) {
-        return NULL;
-    }
-    if (!map->elems[key].in_use) {
-        return NULL;
-    }
-    return &map->elems[key];
-}
-
-static void lo_map_remove(struct lo_map *map, size_t key)
-{
-    struct lo_map_elem *elem;
-
-    if (key >= map->nelems) {
-        return;
-    }
-
-    elem = &map->elems[key];
-    if (!elem->in_use) {
-        return;
-    }
-
-    elem->in_use = false;
-
-    elem->freelist = map->freelist;
-    map->freelist = key;
-}
-
-/* Assumes lo->mutex is held */
-static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd)
-{
-    struct lo_map_elem *elem;
-
-    elem = lo_map_alloc_elem(&lo->fd_map);
-    if (!elem) {
-        return -1;
-    }
-
-    elem->fd = fd;
-    return elem - lo->fd_map.elems;
-}
-
-/* Assumes lo->mutex is held */
-static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
-{
-    struct lo_map_elem *elem;
-
-    elem = lo_map_alloc_elem(&lo_data(req)->dirp_map);
-    if (!elem) {
-        return -1;
-    }
-
-    elem->dirp = dirp;
-    return elem - lo_data(req)->dirp_map.elems;
-}
-
-/* Assumes lo->mutex is held */
-static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
-{
-    struct lo_map_elem *elem;
-
-    elem = lo_map_alloc_elem(&lo_data(req)->ino_map);
-    if (!elem) {
-        return -1;
-    }
-
-    elem->inode = inode;
-    return elem - lo_data(req)->ino_map.elems;
-}
-
-static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
-{
-    struct lo_inode *inode = *inodep;
-
-    if (!inode) {
-        return;
-    }
-
-    *inodep = NULL;
-
-    if (g_atomic_int_dec_and_test(&inode->refcount)) {
-        close(inode->fd);
-        free(inode);
-    }
-}
-
-/* Caller must release refcount using lo_inode_put() */
-static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->ino_map, ino);
-    if (elem) {
-        g_atomic_int_inc(&elem->inode->refcount);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-
-    if (!elem) {
-        return NULL;
-    }
-
-    return elem->inode;
-}
-
-/*
- * TODO Remove this helper and force callers to hold an inode refcount until
- * they are done with the fd.  This will be done in a later patch to make
- * review easier.
- */
-static int lo_fd(fuse_req_t req, fuse_ino_t ino)
-{
-    struct lo_inode *inode = lo_inode(req, ino);
-    int fd;
-
-    if (!inode) {
-        return -1;
-    }
-
-    fd = inode->fd;
-    lo_inode_put(lo_data(req), &inode);
-    return fd;
-}
-
-/*
- * Open a file descriptor for an inode. Returns -EBADF if the inode is not a
- * regular file or a directory.
- *
- * Use this helper function instead of raw openat(2) to prevent security issues
- * when a malicious client opens special files such as block device nodes.
- * Symlink inodes are also rejected since symlinks must already have been
- * traversed on the client side.
- */
-static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode,
-                         int open_flags)
-{
-    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
-    int fd;
-
-    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
-        return -EBADF;
-    }
-
-    /*
-     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
-     * that the inode is not a special file but if an external process races
-     * with us then symlinks are traversed here. It is not possible to escape
-     * the shared directory since it is mounted as "/" though.
-     */
-    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-    if (fd < 0) {
-        return -errno;
-    }
-    return fd;
-}
-
-static void lo_init(void *userdata, struct fuse_conn_info *conn)
-{
-    struct lo_data *lo = (struct lo_data *)userdata;
-
-    if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) {
-        conn->want |= FUSE_CAP_EXPORT_SUPPORT;
-    }
-
-    if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
-        conn->want |= FUSE_CAP_WRITEBACK_CACHE;
-    }
-    if (conn->capable & FUSE_CAP_FLOCK_LOCKS) {
-        if (lo->flock) {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
-            conn->want |= FUSE_CAP_FLOCK_LOCKS;
-        } else {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n");
-            conn->want &= ~FUSE_CAP_FLOCK_LOCKS;
-        }
-    }
-
-    if (conn->capable & FUSE_CAP_POSIX_LOCKS) {
-        if (lo->posix_lock) {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n");
-            conn->want |= FUSE_CAP_POSIX_LOCKS;
-        } else {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n");
-            conn->want &= ~FUSE_CAP_POSIX_LOCKS;
-        }
-    }
-
-    if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
-        lo->readdirplus_clear) {
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
-        conn->want &= ~FUSE_CAP_READDIRPLUS;
-    }
-
-    if (!(conn->capable & FUSE_CAP_SUBMOUNTS) && lo->announce_submounts) {
-        fuse_log(FUSE_LOG_WARNING, "lo_init: Cannot announce submounts, client "
-                 "does not support it\n");
-        lo->announce_submounts = false;
-    }
-
-    if (lo->user_killpriv_v2 == 1) {
-        /*
-         * User explicitly asked for this option. Enable it unconditionally.
-         * If connection does not have this capability, it should fail
-         * in fuse_lowlevel.c
-         */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
-        conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
-        lo->killpriv_v2 = 1;
-    } else {
-        /*
-         * Either user specified to disable killpriv_v2, or did not
-         * specify anything. Disable killpriv_v2 in both the cases.
-         */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n");
-        conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
-        lo->killpriv_v2 = 0;
-    }
-
-    if (lo->user_posix_acl == 1) {
-        /*
-         * User explicitly asked for this option. Enable it unconditionally.
-         * If connection does not have this capability, print error message
-         * now. It will fail later in fuse_lowlevel.c
-         */
-        if (!(conn->capable & FUSE_CAP_POSIX_ACL) ||
-            !(conn->capable & FUSE_CAP_DONT_MASK) ||
-            !(conn->capable & FUSE_CAP_SETXATTR_EXT)) {
-            fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable posix acl."
-                     " kernel does not support FUSE_POSIX_ACL, FUSE_DONT_MASK"
-                     " or FUSE_SETXATTR_EXT capability.\n");
-        } else {
-            fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling posix acl\n");
-        }
-
-        conn->want |= FUSE_CAP_POSIX_ACL | FUSE_CAP_DONT_MASK |
-                      FUSE_CAP_SETXATTR_EXT;
-        lo->change_umask = true;
-        lo->posix_acl = true;
-    } else {
-        /* User either did not specify anything or wants it disabled */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n");
-        conn->want &= ~FUSE_CAP_POSIX_ACL;
-    }
-
-    if (lo->user_security_label == 1) {
-        if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) {
-            fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label."
-                     " kernel does not support FUSE_SECURITY_CTX capability.\n");
-        }
-        conn->want |= FUSE_CAP_SECURITY_CTX;
-    } else {
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n");
-        conn->want &= ~FUSE_CAP_SECURITY_CTX;
-    }
-}
-
-static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi)
-{
-    int res;
-    struct stat buf;
-    struct lo_data *lo = lo_data(req);
-
-    (void)fi;
-
-    res =
-        fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
-    if (res == -1) {
-        return (void)fuse_reply_err(req, errno);
-    }
-
-    fuse_reply_attr(req, &buf, lo->timeout);
-}
-
-static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->fd_map, fi->fh);
-    pthread_mutex_unlock(&lo->mutex);
-
-    if (!elem) {
-        return -1;
-    }
-
-    return elem->fd;
-}
-
-static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
-                       int valid, struct fuse_file_info *fi)
-{
-    int saverr;
-    char procname[64];
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    int ifd;
-    int res;
-    int fd = -1;
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    ifd = inode->fd;
-
-    /* If fi->fh is invalid we'll report EBADF later */
-    if (fi) {
-        fd = lo_fi_fd(req, fi);
-    }
-
-    if (valid & FUSE_SET_ATTR_MODE) {
-        if (fi) {
-            res = fchmod(fd, attr->st_mode);
-        } else {
-            sprintf(procname, "%i", ifd);
-            res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
-        }
-        if (res == -1) {
-            saverr = errno;
-            goto out_err;
-        }
-    }
-    if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
-        uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1;
-        gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1;
-
-        saverr = drop_security_capability(lo, ifd);
-        if (saverr) {
-            goto out_err;
-        }
-
-        res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
-        if (res == -1) {
-            saverr = errno;
-            goto out_err;
-        }
-    }
-    if (valid & FUSE_SET_ATTR_SIZE) {
-        int truncfd;
-        bool kill_suidgid;
-        bool cap_fsetid_dropped = false;
-
-        kill_suidgid = lo->killpriv_v2 && (valid & FUSE_SET_ATTR_KILL_SUIDGID);
-        if (fi) {
-            truncfd = fd;
-        } else {
-            truncfd = lo_inode_open(lo, inode, O_RDWR);
-            if (truncfd < 0) {
-                saverr = -truncfd;
-                goto out_err;
-            }
-        }
-
-        saverr = drop_security_capability(lo, truncfd);
-        if (saverr) {
-            if (!fi) {
-                close(truncfd);
-            }
-            goto out_err;
-        }
-
-        if (kill_suidgid) {
-            res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
-            if (res != 0) {
-                saverr = res;
-                if (!fi) {
-                    close(truncfd);
-                }
-                goto out_err;
-            }
-        }
-
-        res = ftruncate(truncfd, attr->st_size);
-        saverr = res == -1 ? errno : 0;
-
-        if (cap_fsetid_dropped) {
-            if (gain_effective_cap("FSETID")) {
-                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
-            }
-        }
-        if (!fi) {
-            close(truncfd);
-        }
-        if (res == -1) {
-            goto out_err;
-        }
-    }
-    if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
-        struct timespec tv[2];
-
-        tv[0].tv_sec = 0;
-        tv[1].tv_sec = 0;
-        tv[0].tv_nsec = UTIME_OMIT;
-        tv[1].tv_nsec = UTIME_OMIT;
-
-        if (valid & FUSE_SET_ATTR_ATIME_NOW) {
-            tv[0].tv_nsec = UTIME_NOW;
-        } else if (valid & FUSE_SET_ATTR_ATIME) {
-            tv[0] = attr->st_atim;
-        }
-
-        if (valid & FUSE_SET_ATTR_MTIME_NOW) {
-            tv[1].tv_nsec = UTIME_NOW;
-        } else if (valid & FUSE_SET_ATTR_MTIME) {
-            tv[1] = attr->st_mtim;
-        }
-
-        if (fi) {
-            res = futimens(fd, tv);
-        } else {
-            sprintf(procname, "%i", inode->fd);
-            res = utimensat(lo->proc_self_fd, procname, tv, 0);
-        }
-        if (res == -1) {
-            saverr = errno;
-            goto out_err;
-        }
-    }
-    lo_inode_put(lo, &inode);
-
-    return lo_getattr(req, ino, fi);
-
-out_err:
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, saverr);
-}
-
-static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
-                                uint64_t mnt_id)
-{
-    struct lo_inode *p;
-    struct lo_key key = {
-        .ino = st->st_ino,
-        .dev = st->st_dev,
-        .mnt_id = mnt_id,
-    };
-
-    pthread_mutex_lock(&lo->mutex);
-    p = g_hash_table_lookup(lo->inodes, &key);
-    if (p) {
-        assert(p->nlookup > 0);
-        p->nlookup++;
-        g_atomic_int_inc(&p->refcount);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-
-    return p;
-}
-
-/* value_destroy_func for posix_locks GHashTable */
-static void posix_locks_value_destroy(gpointer data)
-{
-    struct lo_inode_plock *plock = data;
-
-    /*
-     * We had used open() for locks and had only one fd. So
-     * closing this fd should release all OFD locks.
-     */
-    close(plock->fd);
-    free(plock);
-}
-
-static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
-                    struct stat *statbuf, int flags, uint64_t *mnt_id)
-{
-    int res;
-
-#if defined(CONFIG_STATX) && defined(CONFIG_STATX_MNT_ID)
-    if (lo->use_statx) {
-        struct statx statxbuf;
-
-        res = statx(dirfd, pathname, flags, STATX_BASIC_STATS | STATX_MNT_ID,
-                    &statxbuf);
-        if (!res) {
-            memset(statbuf, 0, sizeof(*statbuf));
-            statbuf->st_dev = makedev(statxbuf.stx_dev_major,
-                                      statxbuf.stx_dev_minor);
-            statbuf->st_ino = statxbuf.stx_ino;
-            statbuf->st_mode = statxbuf.stx_mode;
-            statbuf->st_nlink = statxbuf.stx_nlink;
-            statbuf->st_uid = statxbuf.stx_uid;
-            statbuf->st_gid = statxbuf.stx_gid;
-            statbuf->st_rdev = makedev(statxbuf.stx_rdev_major,
-                                       statxbuf.stx_rdev_minor);
-            statbuf->st_size = statxbuf.stx_size;
-            statbuf->st_blksize = statxbuf.stx_blksize;
-            statbuf->st_blocks = statxbuf.stx_blocks;
-            statbuf->st_atim.tv_sec = statxbuf.stx_atime.tv_sec;
-            statbuf->st_atim.tv_nsec = statxbuf.stx_atime.tv_nsec;
-            statbuf->st_mtim.tv_sec = statxbuf.stx_mtime.tv_sec;
-            statbuf->st_mtim.tv_nsec = statxbuf.stx_mtime.tv_nsec;
-            statbuf->st_ctim.tv_sec = statxbuf.stx_ctime.tv_sec;
-            statbuf->st_ctim.tv_nsec = statxbuf.stx_ctime.tv_nsec;
-
-            if (statxbuf.stx_mask & STATX_MNT_ID) {
-                *mnt_id = statxbuf.stx_mnt_id;
-            } else {
-                *mnt_id = 0;
-            }
-            return 0;
-        } else if (errno != ENOSYS) {
-            return -1;
-        }
-        lo->use_statx = false;
-        /* fallback */
-    }
-#endif
-    res = fstatat(dirfd, pathname, statbuf, flags);
-    if (res == -1) {
-        return -1;
-    }
-    *mnt_id = 0;
-
-    return 0;
-}
-
-/*
- * Increments nlookup on the inode on success. unref_inode_lolocked() must be
- * called eventually to decrement nlookup again. If inodep is non-NULL, the
- * inode pointer is stored and the caller must call lo_inode_put().
- */
-static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
-                        struct fuse_entry_param *e,
-                        struct lo_inode **inodep)
-{
-    int newfd;
-    int res;
-    int saverr;
-    uint64_t mnt_id;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode = NULL;
-    struct lo_inode *dir = lo_inode(req, parent);
-
-    if (inodep) {
-        *inodep = NULL; /* in case there is an error */
-    }
-
-    /*
-     * name_to_handle_at() and open_by_handle_at() can reach here with fuse
-     * mount point in guest, but we don't have its inode info in the
-     * ino_map.
-     */
-    if (!dir) {
-        return ENOENT;
-    }
-
-    memset(e, 0, sizeof(*e));
-    e->attr_timeout = lo->timeout;
-    e->entry_timeout = lo->timeout;
-
-    /* Do not allow escaping root directory */
-    if (dir == &lo->root && strcmp(name, "..") == 0) {
-        name = ".";
-    }
-
-    newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
-    if (newfd == -1) {
-        goto out_err;
-    }
-
-    res = do_statx(lo, newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
-                   &mnt_id);
-    if (res == -1) {
-        goto out_err;
-    }
-
-    if (S_ISDIR(e->attr.st_mode) && lo->announce_submounts &&
-        (e->attr.st_dev != dir->key.dev || mnt_id != dir->key.mnt_id)) {
-        e->attr_flags |= FUSE_ATTR_SUBMOUNT;
-    }
-
-    inode = lo_find(lo, &e->attr, mnt_id);
-    if (inode) {
-        close(newfd);
-    } else {
-        inode = calloc(1, sizeof(struct lo_inode));
-        if (!inode) {
-            goto out_err;
-        }
-
-        /* cache only filetype */
-        inode->filetype = (e->attr.st_mode & S_IFMT);
-
-        /*
-         * One for the caller and one for nlookup (released in
-         * unref_inode_lolocked())
-         */
-        g_atomic_int_set(&inode->refcount, 2);
-
-        inode->nlookup = 1;
-        inode->fd = newfd;
-        inode->key.ino = e->attr.st_ino;
-        inode->key.dev = e->attr.st_dev;
-        inode->key.mnt_id = mnt_id;
-        if (lo->posix_lock) {
-            pthread_mutex_init(&inode->plock_mutex, NULL);
-            inode->posix_locks = g_hash_table_new_full(
-                g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
-        }
-        pthread_mutex_lock(&lo->mutex);
-        inode->fuse_ino = lo_add_inode_mapping(req, inode);
-        g_hash_table_insert(lo->inodes, &inode->key, inode);
-        pthread_mutex_unlock(&lo->mutex);
-    }
-    e->ino = inode->fuse_ino;
-
-    /* Transfer ownership of inode pointer to caller or drop it */
-    if (inodep) {
-        *inodep = inode;
-    } else {
-        lo_inode_put(lo, &inode);
-    }
-
-    lo_inode_put(lo, &dir);
-
-    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
-             name, (unsigned long long)e->ino);
-
-    return 0;
-
-out_err:
-    saverr = errno;
-    if (newfd != -1) {
-        close(newfd);
-    }
-    lo_inode_put(lo, &inode);
-    lo_inode_put(lo, &dir);
-    return saverr;
-}
-
-static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
-{
-    struct fuse_entry_param e;
-    int err;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent,
-             name);
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    /*
-     * Don't use is_safe_path_component(), allow "." and ".." for NFS export
-     * support.
-     */
-    if (strchr(name, '/')) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    err = lo_do_lookup(req, parent, name, &e, NULL);
-    if (err) {
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_entry(req, &e);
-    }
-}
-
-/*
- * On some archs, setres*id is limited to 2^16 but they
- * provide setres*id32 variants that allow 2^32.
- * Others just let setres*id do 2^32 anyway.
- */
-#ifdef SYS_setresgid32
-#define OURSYS_setresgid SYS_setresgid32
-#else
-#define OURSYS_setresgid SYS_setresgid
-#endif
-
-#ifdef SYS_setresuid32
-#define OURSYS_setresuid SYS_setresuid32
-#else
-#define OURSYS_setresuid SYS_setresuid
-#endif
-
-static void drop_supplementary_groups(void)
-{
-    int ret;
-
-    ret = getgroups(0, NULL);
-    if (ret == -1) {
-        fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n",
-                 errno, strerror(errno));
-        exit(1);
-    }
-
-    if (!ret) {
-        return;
-    }
-
-    /* Drop all supplementary groups. We should not need it */
-    ret = setgroups(0, NULL);
-    if (ret == -1) {
-        fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n",
-                 errno, strerror(errno));
-        exit(1);
-    }
-}
-
-/*
- * Change to uid/gid of caller so that file is created with
- * ownership of caller.
- * TODO: What about selinux context?
- */
-static int lo_change_cred(fuse_req_t req, struct lo_cred *old,
-                          bool change_umask)
-{
-    int res;
-
-    old->euid = geteuid();
-    old->egid = getegid();
-
-    res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1);
-    if (res == -1) {
-        return errno;
-    }
-
-    res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1);
-    if (res == -1) {
-        int errno_save = errno;
-
-        syscall(OURSYS_setresgid, -1, old->egid, -1);
-        return errno_save;
-    }
-
-    if (change_umask) {
-        old->umask = umask(req->ctx.umask);
-    }
-    return 0;
-}
-
-/* Regain Privileges */
-static void lo_restore_cred(struct lo_cred *old, bool restore_umask)
-{
-    int res;
-
-    res = syscall(OURSYS_setresuid, -1, old->euid, -1);
-    if (res == -1) {
-        fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid);
-        exit(1);
-    }
-
-    res = syscall(OURSYS_setresgid, -1, old->egid, -1);
-    if (res == -1) {
-        fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
-        exit(1);
-    }
-
-    if (restore_umask)
-        umask(old->umask);
-}
-
-/*
- * A helper to change cred and drop capability. Returns 0 on success and
- * errno on error
- */
-static int lo_drop_cap_change_cred(fuse_req_t req, struct lo_cred *old,
-                                   bool change_umask, const char *cap_name,
-                                   bool *cap_dropped)
-{
-    int ret;
-    bool __cap_dropped;
-
-    assert(cap_name);
-
-    ret = drop_effective_cap(cap_name, &__cap_dropped);
-    if (ret) {
-        return ret;
-    }
-
-    ret = lo_change_cred(req, old, change_umask);
-    if (ret) {
-        if (__cap_dropped) {
-            if (gain_effective_cap(cap_name)) {
-                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name);
-            }
-        }
-    }
-
-    if (cap_dropped) {
-        *cap_dropped = __cap_dropped;
-    }
-    return ret;
-}
-
-static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask,
-                                     const char *cap_name)
-{
-    assert(cap_name);
-
-    lo_restore_cred(old, restore_umask);
-
-    if (gain_effective_cap(cap_name)) {
-        fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name);
-    }
-}
-
-static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir,
-                                   const char *name, const char *secctx_name)
-{
-    int path_fd, err;
-    char procname[64];
-    struct lo_data *lo = lo_data(req);
-
-    if (!req->secctx.ctxlen) {
-        return 0;
-    }
-
-    /* Open newly created element with O_PATH */
-    path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
-    err = path_fd == -1 ? errno : 0;
-    if (err) {
-        return err;
-    }
-    sprintf(procname, "%i", path_fd);
-    FCHDIR_NOFAIL(lo->proc_self_fd);
-    /* Set security context. This is not atomic w.r.t file creation */
-    err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen,
-                   0);
-    if (err) {
-        err = errno;
-    }
-    FCHDIR_NOFAIL(lo->root.fd);
-    close(path_fd);
-    return err;
-}
-
-static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir,
-                            const char *name, mode_t mode, dev_t rdev,
-                            const char *link)
-{
-    int err, fscreate_fd = -1;
-    const char *secctx_name = req->secctx.name;
-    struct lo_cred old = {};
-    struct lo_data *lo = lo_data(req);
-    char *mapped_name = NULL;
-    bool secctx_enabled = req->secctx.ctxlen;
-    bool do_fscreate = false;
-
-    if (secctx_enabled && lo->xattrmap) {
-        err = xattr_map_client(lo, req->secctx.name, &mapped_name);
-        if (err < 0) {
-            return -err;
-        }
-        secctx_name = mapped_name;
-    }
-
-    /*
-     * If security xattr has not been remapped and selinux is enabled on
-     * host, set fscreate and no need to do a setxattr() after file creation
-     */
-    if (secctx_enabled && !mapped_name && lo->use_fscreate) {
-        do_fscreate = true;
-        err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen,
-                                     &fscreate_fd);
-        if (err) {
-            goto out;
-        }
-    }
-
-    err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode));
-    if (err) {
-        goto out;
-    }
-
-    err = mknod_wrapper(dir->fd, name, link, mode, rdev);
-    err = err == -1 ? errno : 0;
-    lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode));
-    if (err) {
-        goto out;
-    }
-
-    if (!do_fscreate) {
-        err = do_mknod_symlink_secctx(req, dir, name, secctx_name);
-        if (err) {
-            unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0);
-        }
-    }
-out:
-    if (fscreate_fd != -1) {
-        close_reset_proc_fscreate(fscreate_fd);
-    }
-    g_free(mapped_name);
-    return err;
-}
-
-static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
-                             const char *name, mode_t mode, dev_t rdev,
-                             const char *link)
-{
-    int saverr;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *dir;
-    struct fuse_entry_param e;
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    dir = lo_inode(req, parent);
-    if (!dir) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    saverr = do_mknod_symlink(req, dir, name, mode, rdev, link);
-    if (saverr) {
-        goto out;
-    }
-
-    saverr = lo_do_lookup(req, parent, name, &e, NULL);
-    if (saverr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
-             name, (unsigned long long)e.ino);
-
-    fuse_reply_entry(req, &e);
-    lo_inode_put(lo, &dir);
-    return;
-
-out:
-    lo_inode_put(lo, &dir);
-    fuse_reply_err(req, saverr);
-}
-
-static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
-                     mode_t mode, dev_t rdev)
-{
-    lo_mknod_symlink(req, parent, name, mode, rdev, NULL);
-}
-
-static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
-                     mode_t mode)
-{
-    lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL);
-}
-
-static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
-                       const char *name)
-{
-    lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
-}
-
-static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
-                    const char *name)
-{
-    int res;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *parent_inode;
-    struct lo_inode *inode;
-    struct fuse_entry_param e;
-    char procname[64];
-    int saverr;
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    parent_inode = lo_inode(req, parent);
-    inode = lo_inode(req, ino);
-    if (!parent_inode || !inode) {
-        errno = EBADF;
-        goto out_err;
-    }
-
-    memset(&e, 0, sizeof(struct fuse_entry_param));
-    e.attr_timeout = lo->timeout;
-    e.entry_timeout = lo->timeout;
-
-    sprintf(procname, "%i", inode->fd);
-    res = linkat(lo->proc_self_fd, procname, parent_inode->fd, name,
-                 AT_SYMLINK_FOLLOW);
-    if (res == -1) {
-        goto out_err;
-    }
-
-    res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
-    if (res == -1) {
-        goto out_err;
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    inode->nlookup++;
-    pthread_mutex_unlock(&lo->mutex);
-    e.ino = inode->fuse_ino;
-
-    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
-             name, (unsigned long long)e.ino);
-
-    fuse_reply_entry(req, &e);
-    lo_inode_put(lo, &parent_inode);
-    lo_inode_put(lo, &inode);
-    return;
-
-out_err:
-    saverr = errno;
-    lo_inode_put(lo, &parent_inode);
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, saverr);
-}
-
-/* Increments nlookup and caller must release refcount using lo_inode_put() */
-static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
-                                    const char *name)
-{
-    int res;
-    uint64_t mnt_id;
-    struct stat attr;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *dir = lo_inode(req, parent);
-
-    if (!dir) {
-        return NULL;
-    }
-
-    res = do_statx(lo, dir->fd, name, &attr, AT_SYMLINK_NOFOLLOW, &mnt_id);
-    lo_inode_put(lo, &dir);
-    if (res == -1) {
-        return NULL;
-    }
-
-    return lo_find(lo, &attr, mnt_id);
-}
-
-static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
-{
-    int res;
-    struct lo_inode *inode;
-    struct lo_data *lo = lo_data(req);
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    inode = lookup_name(req, parent, name);
-    if (!inode) {
-        fuse_reply_err(req, EIO);
-        return;
-    }
-
-    res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-    unref_inode_lolocked(lo, inode, 1);
-    lo_inode_put(lo, &inode);
-}
-
-static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
-                      fuse_ino_t newparent, const char *newname,
-                      unsigned int flags)
-{
-    int res;
-    struct lo_inode *parent_inode;
-    struct lo_inode *newparent_inode;
-    struct lo_inode *oldinode = NULL;
-    struct lo_inode *newinode = NULL;
-    struct lo_data *lo = lo_data(req);
-
-    if (is_empty(name) || is_empty(newname)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    parent_inode = lo_inode(req, parent);
-    newparent_inode = lo_inode(req, newparent);
-    if (!parent_inode || !newparent_inode) {
-        fuse_reply_err(req, EBADF);
-        goto out;
-    }
-
-    oldinode = lookup_name(req, parent, name);
-    newinode = lookup_name(req, newparent, newname);
-
-    if (!oldinode) {
-        fuse_reply_err(req, EIO);
-        goto out;
-    }
-
-    if (flags) {
-#ifndef SYS_renameat2
-        fuse_reply_err(req, EINVAL);
-#else
-        res = syscall(SYS_renameat2, parent_inode->fd, name,
-                        newparent_inode->fd, newname, flags);
-        if (res == -1 && errno == ENOSYS) {
-            fuse_reply_err(req, EINVAL);
-        } else {
-            fuse_reply_err(req, res == -1 ? errno : 0);
-        }
-#endif
-        goto out;
-    }
-
-    res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-out:
-    unref_inode_lolocked(lo, oldinode, 1);
-    unref_inode_lolocked(lo, newinode, 1);
-    lo_inode_put(lo, &oldinode);
-    lo_inode_put(lo, &newinode);
-    lo_inode_put(lo, &parent_inode);
-    lo_inode_put(lo, &newparent_inode);
-}
-
-static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
-{
-    int res;
-    struct lo_inode *inode;
-    struct lo_data *lo = lo_data(req);
-
-    if (is_empty(name)) {
-        fuse_reply_err(req, ENOENT);
-        return;
-    }
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    inode = lookup_name(req, parent, name);
-    if (!inode) {
-        fuse_reply_err(req, EIO);
-        return;
-    }
-
-    res = unlinkat(lo_fd(req, parent), name, 0);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-    unref_inode_lolocked(lo, inode, 1);
-    lo_inode_put(lo, &inode);
-}
-
-/* To be called with lo->mutex held */
-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
-{
-    if (!inode) {
-        return;
-    }
-
-    assert(inode->nlookup >= n);
-    inode->nlookup -= n;
-    if (!inode->nlookup) {
-        lo_map_remove(&lo->ino_map, inode->fuse_ino);
-        g_hash_table_remove(lo->inodes, &inode->key);
-        if (lo->posix_lock) {
-            if (g_hash_table_size(inode->posix_locks)) {
-                fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
-            }
-            g_hash_table_destroy(inode->posix_locks);
-            pthread_mutex_destroy(&inode->plock_mutex);
-        }
-        /* Drop our refcount from lo_do_lookup() */
-        lo_inode_put(lo, &inode);
-    }
-}
-
-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
-                                 uint64_t n)
-{
-    if (!inode) {
-        return;
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    unref_inode(lo, inode, n);
-    pthread_mutex_unlock(&lo->mutex);
-}
-
-static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        return;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "  forget %lli %lli -%lli\n",
-             (unsigned long long)ino, (unsigned long long)inode->nlookup,
-             (unsigned long long)nlookup);
-
-    unref_inode_lolocked(lo, inode, nlookup);
-    lo_inode_put(lo, &inode);
-}
-
-static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
-{
-    lo_forget_one(req, ino, nlookup);
-    fuse_reply_none(req);
-}
-
-static void lo_forget_multi(fuse_req_t req, size_t count,
-                            struct fuse_forget_data *forgets)
-{
-    int i;
-
-    for (i = 0; i < count; i++) {
-        lo_forget_one(req, forgets[i].ino, forgets[i].nlookup);
-    }
-    fuse_reply_none(req);
-}
-
-static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
-{
-    char buf[PATH_MAX + 1];
-    int res;
-
-    res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
-    if (res == -1) {
-        return (void)fuse_reply_err(req, errno);
-    }
-
-    if (res == sizeof(buf)) {
-        return (void)fuse_reply_err(req, ENAMETOOLONG);
-    }
-
-    buf[res] = '\0';
-
-    fuse_reply_readlink(req, buf);
-}
-
-struct lo_dirp {
-    gint refcount;
-    DIR *dp;
-    struct dirent *entry;
-    off_t offset;
-};
-
-static void lo_dirp_put(struct lo_dirp **dp)
-{
-    struct lo_dirp *d = *dp;
-
-    if (!d) {
-        return;
-    }
-    *dp = NULL;
-
-    if (g_atomic_int_dec_and_test(&d->refcount)) {
-        closedir(d->dp);
-        free(d);
-    }
-}
-
-/* Call lo_dirp_put() on the return value when no longer needed */
-static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->dirp_map, fi->fh);
-    if (elem) {
-        g_atomic_int_inc(&elem->dirp->refcount);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-    if (!elem) {
-        return NULL;
-    }
-
-    return elem->dirp;
-}
-
-static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi)
-{
-    int error = ENOMEM;
-    struct lo_data *lo = lo_data(req);
-    struct lo_dirp *d;
-    int fd;
-    ssize_t fh;
-
-    d = calloc(1, sizeof(struct lo_dirp));
-    if (d == NULL) {
-        goto out_err;
-    }
-
-    fd = openat(lo_fd(req, ino), ".", O_RDONLY);
-    if (fd == -1) {
-        goto out_errno;
-    }
-
-    d->dp = fdopendir(fd);
-    if (d->dp == NULL) {
-        goto out_errno;
-    }
-
-    d->offset = 0;
-    d->entry = NULL;
-
-    g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */
-    pthread_mutex_lock(&lo->mutex);
-    fh = lo_add_dirp_mapping(req, d);
-    pthread_mutex_unlock(&lo->mutex);
-    if (fh == -1) {
-        goto out_err;
-    }
-
-    fi->fh = fh;
-    if (lo->cache == CACHE_ALWAYS) {
-        fi->cache_readdir = 1;
-    }
-    fuse_reply_open(req, fi);
-    return;
-
-out_errno:
-    error = errno;
-out_err:
-    if (d) {
-        if (d->dp) {
-            closedir(d->dp);
-        } else if (fd != -1) {
-            close(fd);
-        }
-        free(d);
-    }
-    fuse_reply_err(req, error);
-}
-
-static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
-                          off_t offset, struct fuse_file_info *fi, int plus)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_dirp *d = NULL;
-    struct lo_inode *dinode;
-    g_autofree char *buf = NULL;
-    char *p;
-    size_t rem = size;
-    int err = EBADF;
-
-    dinode = lo_inode(req, ino);
-    if (!dinode) {
-        goto error;
-    }
-
-    d = lo_dirp(req, fi);
-    if (!d) {
-        goto error;
-    }
-
-    err = ENOMEM;
-    buf = g_try_malloc0(size);
-    if (!buf) {
-        goto error;
-    }
-    p = buf;
-
-    if (offset != d->offset) {
-        seekdir(d->dp, offset);
-        d->entry = NULL;
-        d->offset = offset;
-    }
-    while (1) {
-        size_t entsize;
-        off_t nextoff;
-        const char *name;
-
-        if (!d->entry) {
-            errno = 0;
-            d->entry = readdir(d->dp);
-            if (!d->entry) {
-                if (errno) { /* Error */
-                    err = errno;
-                    goto error;
-                } else { /* End of stream */
-                    break;
-                }
-            }
-        }
-        nextoff = d->entry->d_off;
-        name = d->entry->d_name;
-
-        fuse_ino_t entry_ino = 0;
-        struct fuse_entry_param e = (struct fuse_entry_param){
-            .attr.st_ino = d->entry->d_ino,
-            .attr.st_mode = d->entry->d_type << 12,
-        };
-
-        /* Hide root's parent directory */
-        if (dinode == &lo->root && strcmp(name, "..") == 0) {
-            e.attr.st_ino = lo->root.key.ino;
-            e.attr.st_mode = DT_DIR << 12;
-        }
-
-        if (plus) {
-            if (!is_dot_or_dotdot(name)) {
-                err = lo_do_lookup(req, ino, name, &e, NULL);
-                if (err) {
-                    goto error;
-                }
-                entry_ino = e.ino;
-            }
-
-            entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff);
-        } else {
-            entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff);
-        }
-        if (entsize > rem) {
-            if (entry_ino != 0) {
-                lo_forget_one(req, entry_ino, 1);
-            }
-            break;
-        }
-
-        p += entsize;
-        rem -= entsize;
-
-        d->entry = NULL;
-        d->offset = nextoff;
-    }
-
-    err = 0;
-error:
-    lo_dirp_put(&d);
-    lo_inode_put(lo, &dinode);
-
-    /*
-     * If there's an error, we can only signal it if we haven't stored
-     * any entries yet - otherwise we'd end up with wrong lookup
-     * counts for the entries that are already in the buffer. So we
-     * return what we've collected until that point.
-     */
-    if (err && rem == size) {
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_buf(req, buf, size - rem);
-    }
-}
-
-static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
-                       off_t offset, struct fuse_file_info *fi)
-{
-    lo_do_readdir(req, ino, size, offset, fi, 0);
-}
-
-static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
-                           off_t offset, struct fuse_file_info *fi)
-{
-    lo_do_readdir(req, ino, size, offset, fi, 1);
-}
-
-static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
-                          struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-    struct lo_dirp *d;
-
-    (void)ino;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->dirp_map, fi->fh);
-    if (!elem) {
-        pthread_mutex_unlock(&lo->mutex);
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    d = elem->dirp;
-    lo_map_remove(&lo->dirp_map, fi->fh);
-    pthread_mutex_unlock(&lo->mutex);
-
-    lo_dirp_put(&d); /* paired with lo_opendir() */
-
-    fuse_reply_err(req, 0);
-}
-
-static void update_open_flags(int writeback, int allow_direct_io,
-                              struct fuse_file_info *fi)
-{
-    /*
-     * With writeback cache, kernel may send read requests even
-     * when userspace opened write-only
-     */
-    if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
-        fi->flags &= ~O_ACCMODE;
-        fi->flags |= O_RDWR;
-    }
-
-    /*
-     * With writeback cache, O_APPEND is handled by the kernel.
-     * This breaks atomicity (since the file may change in the
-     * underlying filesystem, so that the kernel's idea of the
-     * end of the file isn't accurate anymore). In this example,
-     * we just accept that. A more rigorous filesystem may want
-     * to return an error here
-     */
-    if (writeback && (fi->flags & O_APPEND)) {
-        fi->flags &= ~O_APPEND;
-    }
-
-    /*
-     * O_DIRECT in guest should not necessarily mean bypassing page
-     * cache on host as well. Therefore, we discard it by default
-     * ('-o no_allow_direct_io'). If somebody needs that behavior,
-     * the '-o allow_direct_io' option should be set.
-     */
-    if (!allow_direct_io) {
-        fi->flags &= ~O_DIRECT;
-    }
-}
-
-/*
- * Open a regular file, set up an fd mapping, and fill out the struct
- * fuse_file_info for it. If existing_fd is not negative, use that fd instead
- * opening a new one. Takes ownership of existing_fd.
- *
- * Returns 0 on success or a positive errno.
- */
-static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
-                      int existing_fd, struct fuse_file_info *fi)
-{
-    ssize_t fh;
-    int fd = existing_fd;
-    int err;
-    bool cap_fsetid_dropped = false;
-    bool kill_suidgid = lo->killpriv_v2 && fi->kill_priv;
-
-    update_open_flags(lo->writeback, lo->allow_direct_io, fi);
-
-    if (fd < 0) {
-        if (kill_suidgid) {
-            err = drop_effective_cap("FSETID", &cap_fsetid_dropped);
-            if (err) {
-                return err;
-            }
-        }
-
-        fd = lo_inode_open(lo, inode, fi->flags);
-
-        if (cap_fsetid_dropped) {
-            if (gain_effective_cap("FSETID")) {
-                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
-            }
-        }
-        if (fd < 0) {
-            return -fd;
-        }
-        if (fi->flags & (O_TRUNC)) {
-            int err = drop_security_capability(lo, fd);
-            if (err) {
-                close(fd);
-                return err;
-            }
-        }
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    fh = lo_add_fd_mapping(lo, fd);
-    pthread_mutex_unlock(&lo->mutex);
-    if (fh == -1) {
-        close(fd);
-        return ENOMEM;
-    }
-
-    fi->fh = fh;
-    if (lo->cache == CACHE_NONE) {
-        fi->direct_io = 1;
-    } else if (lo->cache == CACHE_ALWAYS) {
-        fi->keep_cache = 1;
-    }
-    return 0;
-}
-
-static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode,
-                               const char *name, mode_t mode,
-                               struct fuse_file_info *fi, int *open_fd,
-                              bool tmpfile)
-{
-    int err, fd;
-    struct lo_cred old = {};
-    struct lo_data *lo = lo_data(req);
-    int flags;
-
-    if (tmpfile) {
-        flags = fi->flags | O_TMPFILE;
-        /*
-         * Don't use O_EXCL as we want to link file later. Also reset O_CREAT
-         * otherwise openat() returns -EINVAL.
-         */
-        flags &= ~(O_CREAT | O_EXCL);
-
-        /* O_TMPFILE needs either O_RDWR or O_WRONLY */
-        if ((flags & O_ACCMODE) == O_RDONLY) {
-            flags |= O_RDWR;
-        }
-    } else {
-        flags = fi->flags | O_CREAT | O_EXCL;
-    }
-
-    err = lo_change_cred(req, &old, lo->change_umask);
-    if (err) {
-        return err;
-    }
-
-    /* Try to create a new file but don't open existing files */
-    fd = openat(parent_inode->fd, name, flags, mode);
-    err = fd == -1 ? errno : 0;
-    lo_restore_cred(&old, lo->change_umask);
-    if (!err) {
-        *open_fd = fd;
-    }
-    return err;
-}
-
-static int do_create_secctx_fscreate(fuse_req_t req,
-                                     struct lo_inode *parent_inode,
-                                     const char *name, mode_t mode,
-                                     struct fuse_file_info *fi, int *open_fd)
-{
-    int err = 0, fd = -1, fscreate_fd = -1;
-    struct lo_data *lo = lo_data(req);
-
-    err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen,
-                                 &fscreate_fd);
-    if (err) {
-        return err;
-    }
-
-    err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false);
-
-    close_reset_proc_fscreate(fscreate_fd);
-    if (!err) {
-        *open_fd = fd;
-    }
-    return err;
-}
-
-static int do_create_secctx_tmpfile(fuse_req_t req,
-                                    struct lo_inode *parent_inode,
-                                    const char *name, mode_t mode,
-                                    struct fuse_file_info *fi,
-                                    const char *secctx_name, int *open_fd)
-{
-    int err, fd = -1;
-    struct lo_data *lo = lo_data(req);
-    char procname[64];
-
-    err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true);
-    if (err) {
-        return err;
-    }
-
-    err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0);
-    if (err) {
-        err = errno;
-        goto out;
-    }
-
-    /* Security context set on file. Link it in place */
-    sprintf(procname, "%d", fd);
-    FCHDIR_NOFAIL(lo->proc_self_fd);
-    err = linkat(AT_FDCWD, procname, parent_inode->fd, name,
-                 AT_SYMLINK_FOLLOW);
-    err = err == -1 ? errno : 0;
-    FCHDIR_NOFAIL(lo->root.fd);
-
-out:
-    if (!err) {
-        *open_fd = fd;
-    } else if (fd != -1) {
-        close(fd);
-    }
-    return err;
-}
-
-static int do_create_secctx_noatomic(fuse_req_t req,
-                                     struct lo_inode *parent_inode,
-                                     const char *name, mode_t mode,
-                                     struct fuse_file_info *fi,
-                                     const char *secctx_name, int *open_fd)
-{
-    int err = 0, fd = -1;
-
-    err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false);
-    if (err) {
-        goto out;
-    }
-
-    /* Set security context. This is not atomic w.r.t file creation */
-    err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0);
-    err = err == -1 ? errno : 0;
-out:
-    if (!err) {
-        *open_fd = fd;
-    } else {
-        if (fd != -1) {
-            close(fd);
-            unlinkat(parent_inode->fd, name, 0);
-        }
-    }
-    return err;
-}
-
-static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode,
-                        const char *name, mode_t mode,
-                        struct fuse_file_info *fi, int *open_fd)
-{
-    struct lo_data *lo = lo_data(req);
-    char *mapped_name = NULL;
-    int err;
-    const char *ctxname = req->secctx.name;
-    bool secctx_enabled = req->secctx.ctxlen;
-
-    if (secctx_enabled && lo->xattrmap) {
-        err = xattr_map_client(lo, req->secctx.name, &mapped_name);
-        if (err < 0) {
-            return -err;
-        }
-
-        ctxname = mapped_name;
-    }
-
-    if (secctx_enabled) {
-        /*
-         * If security.selinux has not been remapped and selinux is enabled,
-         * use fscreate to set context before file creation. If not, use
-         * tmpfile method for regular files. Otherwise fallback to
-         * non-atomic method of file creation and xattr setting.
-         */
-        if (!mapped_name && lo->use_fscreate) {
-            err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi,
-                                            open_fd);
-            goto out;
-        } else if (S_ISREG(mode)) {
-            err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi,
-                                           ctxname, open_fd);
-            /*
-             * If filesystem does not support O_TMPFILE, fallback to non-atomic
-             * method.
-             */
-            if (!err || err != EOPNOTSUPP) {
-                goto out;
-            }
-        }
-
-        err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi,
-                                        ctxname, open_fd);
-    } else {
-        err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd,
-                                 false);
-    }
-
-out:
-    g_free(mapped_name);
-    return err;
-}
-
-static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
-                      mode_t mode, struct fuse_file_info *fi)
-{
-    int fd = -1;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *parent_inode;
-    struct lo_inode *inode = NULL;
-    struct fuse_entry_param e;
-    int err;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)"
-             " kill_priv=%d\n", parent, name, fi->kill_priv);
-
-    if (!is_safe_path_component(name)) {
-        fuse_reply_err(req, EINVAL);
-        return;
-    }
-
-    parent_inode = lo_inode(req, parent);
-    if (!parent_inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    update_open_flags(lo->writeback, lo->allow_direct_io, fi);
-
-    err = do_lo_create(req, parent_inode, name, mode, fi, &fd);
-
-    /* Ignore the error if file exists and O_EXCL was not given */
-    if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
-        goto out;
-    }
-
-    err = lo_do_lookup(req, parent, name, &e, &inode);
-    if (err) {
-        goto out;
-    }
-
-    err = lo_do_open(lo, inode, fd, fi);
-    fd = -1; /* lo_do_open() takes ownership of fd */
-    if (err) {
-        /* Undo lo_do_lookup() nlookup ref */
-        unref_inode_lolocked(lo, inode, 1);
-    }
-
-out:
-    lo_inode_put(lo, &inode);
-    lo_inode_put(lo, &parent_inode);
-
-    if (err) {
-        if (fd >= 0) {
-            close(fd);
-        }
-
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_create(req, &e, fi);
-    }
-}
-
-/* Should be called with inode->plock_mutex held */
-static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
-                                                      struct lo_inode *inode,
-                                                      uint64_t lock_owner,
-                                                      pid_t pid, int *err)
-{
-    struct lo_inode_plock *plock;
-    int fd;
-
-    plock =
-        g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner));
-
-    if (plock) {
-        return plock;
-    }
-
-    plock = malloc(sizeof(struct lo_inode_plock));
-    if (!plock) {
-        *err = ENOMEM;
-        return NULL;
-    }
-
-    /* Open another instance of file which can be used for ofd locks. */
-    /* TODO: What if file is not writable? */
-    fd = lo_inode_open(lo, inode, O_RDWR);
-    if (fd < 0) {
-        *err = -fd;
-        free(plock);
-        return NULL;
-    }
-
-    plock->lock_owner = lock_owner;
-    plock->fd = fd;
-    g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner),
-                        plock);
-    return plock;
-}
-
-static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                     struct flock *lock)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    struct lo_inode_plock *plock;
-    int ret, saverr = 0;
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_getlk(ino=%" PRIu64 ", flags=%d)"
-             " owner=0x%" PRIx64 ", l_type=%d l_start=0x%" PRIx64
-             " l_len=0x%" PRIx64 "\n",
-             ino, fi->flags, fi->lock_owner, lock->l_type,
-             (uint64_t)lock->l_start, (uint64_t)lock->l_len);
-
-    if (!lo->posix_lock) {
-        fuse_reply_err(req, ENOSYS);
-        return;
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    pthread_mutex_lock(&inode->plock_mutex);
-    plock =
-        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
-    if (!plock) {
-        saverr = ret;
-        goto out;
-    }
-
-    ret = fcntl(plock->fd, F_OFD_GETLK, lock);
-    if (ret == -1) {
-        saverr = errno;
-    }
-
-out:
-    pthread_mutex_unlock(&inode->plock_mutex);
-    lo_inode_put(lo, &inode);
-
-    if (saverr) {
-        fuse_reply_err(req, saverr);
-    } else {
-        fuse_reply_lock(req, lock);
-    }
-}
-
-static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                     struct flock *lock, int sleep)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    struct lo_inode_plock *plock;
-    int ret, saverr = 0;
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_setlk(ino=%" PRIu64 ", flags=%d)"
-             " cmd=%d pid=%d owner=0x%" PRIx64 " sleep=%d l_whence=%d"
-             " l_start=0x%" PRIx64 " l_len=0x%" PRIx64 "\n",
-             ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
-             lock->l_whence, (uint64_t)lock->l_start, (uint64_t)lock->l_len);
-
-    if (!lo->posix_lock) {
-        fuse_reply_err(req, ENOSYS);
-        return;
-    }
-
-    if (sleep) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    pthread_mutex_lock(&inode->plock_mutex);
-    plock =
-        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
-
-    if (!plock) {
-        saverr = ret;
-        goto out;
-    }
-
-    /* TODO: Is it alright to modify flock? */
-    lock->l_pid = 0;
-    ret = fcntl(plock->fd, F_OFD_SETLK, lock);
-    if (ret == -1) {
-        saverr = errno;
-    }
-
-out:
-    pthread_mutex_unlock(&inode->plock_mutex);
-    lo_inode_put(lo, &inode);
-
-    fuse_reply_err(req, saverr);
-}
-
-static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
-                        struct fuse_file_info *fi)
-{
-    int res;
-    struct lo_dirp *d;
-    int fd;
-
-    (void)ino;
-
-    d = lo_dirp(req, fi);
-    if (!d) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    fd = dirfd(d->dp);
-    if (datasync) {
-        res = fdatasync(fd);
-    } else {
-        res = fsync(fd);
-    }
-
-    lo_dirp_put(&d);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-}
-
-static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode = lo_inode(req, ino);
-    int err;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d, kill_priv=%d)"
-             "\n", ino, fi->flags, fi->kill_priv);
-
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    err = lo_do_open(lo, inode, -1, fi);
-    lo_inode_put(lo, &inode);
-    if (err) {
-        fuse_reply_err(req, err);
-    } else {
-        fuse_reply_open(req, fi);
-    }
-}
-
-static void lo_release(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_map_elem *elem;
-    int fd = -1;
-
-    (void)ino;
-
-    pthread_mutex_lock(&lo->mutex);
-    elem = lo_map_get(&lo->fd_map, fi->fh);
-    if (elem) {
-        fd = elem->fd;
-        elem = NULL;
-        lo_map_remove(&lo->fd_map, fi->fh);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-
-    close(fd);
-    fuse_reply_err(req, 0);
-}
-
-static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
-{
-    int res;
-    (void)ino;
-    struct lo_inode *inode;
-    struct lo_data *lo = lo_data(req);
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    if (!S_ISREG(inode->filetype)) {
-        lo_inode_put(lo, &inode);
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    /* An fd is going away. Cleanup associated posix locks */
-    if (lo->posix_lock) {
-        pthread_mutex_lock(&inode->plock_mutex);
-        g_hash_table_remove(inode->posix_locks,
-            GUINT_TO_POINTER(fi->lock_owner));
-        pthread_mutex_unlock(&inode->plock_mutex);
-    }
-    res = close(dup(lo_fi_fd(req, fi)));
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, res == -1 ? errno : 0);
-}
-
-static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
-                     struct fuse_file_info *fi)
-{
-    struct lo_inode *inode = lo_inode(req, ino);
-    struct lo_data *lo = lo_data(req);
-    int res;
-    int fd;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
-             (void *)fi);
-
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    if (!fi) {
-        fd = lo_inode_open(lo, inode, O_RDWR);
-        if (fd < 0) {
-            res = -fd;
-            goto out;
-        }
-    } else {
-        fd = lo_fi_fd(req, fi);
-    }
-
-    if (datasync) {
-        res = fdatasync(fd) == -1 ? errno : 0;
-    } else {
-        res = fsync(fd) == -1 ? errno : 0;
-    }
-    if (!fi) {
-        close(fd);
-    }
-out:
-    lo_inode_put(lo, &inode);
-    fuse_reply_err(req, res);
-}
-
-static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
-                    struct fuse_file_info *fi)
-{
-    struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_read(ino=%" PRIu64 ", size=%zd, "
-             "off=%lu)\n",
-             ino, size, (unsigned long)offset);
-
-    buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
-    buf.buf[0].fd = lo_fi_fd(req, fi);
-    buf.buf[0].pos = offset;
-
-    fuse_reply_data(req, &buf);
-}
-
-static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
-                         struct fuse_bufvec *in_buf, off_t off,
-                         struct fuse_file_info *fi)
-{
-    (void)ino;
-    ssize_t res;
-    struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
-    bool cap_fsetid_dropped = false;
-
-    out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
-    out_buf.buf[0].fd = lo_fi_fd(req, fi);
-    out_buf.buf[0].pos = off;
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu kill_priv=%d)\n",
-             ino, out_buf.buf[0].size, (unsigned long)off, fi->kill_priv);
-
-    res = drop_security_capability(lo_data(req), out_buf.buf[0].fd);
-    if (res) {
-        fuse_reply_err(req, res);
-        return;
-    }
-
-    /*
-     * If kill_priv is set, drop CAP_FSETID which should lead to kernel
-     * clearing setuid/setgid on file. Note, for WRITE, we need to do
-     * this even if killpriv_v2 is not enabled. fuse direct write path
-     * relies on this.
-     */
-    if (fi->kill_priv) {
-        res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
-        if (res != 0) {
-            fuse_reply_err(req, res);
-            return;
-        }
-    }
-
-    res = fuse_buf_copy(&out_buf, in_buf);
-    if (res < 0) {
-        fuse_reply_err(req, -res);
-    } else {
-        fuse_reply_write(req, (size_t)res);
-    }
-
-    if (cap_fsetid_dropped) {
-        res = gain_effective_cap("FSETID");
-        if (res) {
-            fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
-        }
-    }
-}
-
-static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
-{
-    int res;
-    struct statvfs stbuf;
-
-    res = fstatvfs(lo_fd(req, ino), &stbuf);
-    if (res == -1) {
-        fuse_reply_err(req, errno);
-    } else {
-        fuse_reply_statfs(req, &stbuf);
-    }
-}
-
-static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
-                         off_t length, struct fuse_file_info *fi)
-{
-    int err = EOPNOTSUPP;
-    (void)ino;
-
-#ifdef CONFIG_FALLOCATE
-    err = fallocate(lo_fi_fd(req, fi), mode, offset, length);
-    if (err < 0) {
-        err = errno;
-    }
-
-#elif defined(CONFIG_POSIX_FALLOCATE)
-    if (mode) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
-#endif
-
-    fuse_reply_err(req, err);
-}
-
-static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
-                     int op)
-{
-    int res;
-    (void)ino;
-
-    if (!(op & LOCK_NB)) {
-        /*
-         * Blocking flock can deadlock as there is only one thread
-         * serving the queue.
-         */
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    res = flock(lo_fi_fd(req, fi), op);
-
-    fuse_reply_err(req, res == -1 ? errno : 0);
-}
-
-/* types */
-/*
- * Exit; process attribute unmodified if matched.
- * An empty key applies to all.
- */
-#define XATTR_MAP_FLAG_OK      (1 <<  0)
-/*
- * The attribute is unwanted;
- * EPERM on write, hidden on read.
- */
-#define XATTR_MAP_FLAG_BAD     (1 <<  1)
-/*
- * For attr that start with 'key' prepend 'prepend'
- * 'key' may be empty to prepend for all attrs
- * key is defined from set/remove point of view.
- * Automatically reversed on read
- */
-#define XATTR_MAP_FLAG_PREFIX  (1 <<  2)
-/*
- * The attribute is unsupported;
- * ENOTSUP on write, hidden on read.
- */
-#define XATTR_MAP_FLAG_UNSUPPORTED     (1 <<  3)
-
-/* scopes */
-/* Apply rule to get/set/remove */
-#define XATTR_MAP_FLAG_CLIENT  (1 << 16)
-/* Apply rule to list */
-#define XATTR_MAP_FLAG_SERVER  (1 << 17)
-/* Apply rule to all */
-#define XATTR_MAP_FLAG_ALL   (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT)
-
-static void add_xattrmap_entry(struct lo_data *lo,
-                               const XattrMapEntry *new_entry)
-{
-    XattrMapEntry *res = g_realloc_n(lo->xattr_map_list,
-                                     lo->xattr_map_nentries + 1,
-                                     sizeof(XattrMapEntry));
-    res[lo->xattr_map_nentries++] = *new_entry;
-
-    lo->xattr_map_list = res;
-}
-
-static void free_xattrmap(struct lo_data *lo)
-{
-    XattrMapEntry *map = lo->xattr_map_list;
-    size_t i;
-
-    if (!map) {
-        return;
-    }
-
-    for (i = 0; i < lo->xattr_map_nentries; i++) {
-        g_free(map[i].key);
-        g_free(map[i].prepend);
-    };
-
-    g_free(map);
-    lo->xattr_map_list = NULL;
-    lo->xattr_map_nentries = -1;
-}
-
-/*
- * Handle the 'map' type, which is sugar for a set of commands
- * for the common case of prefixing a subset or everything,
- * and allowing anything not prefixed through.
- * It must be the last entry in the stream, although there
- * can be other entries before it.
- * The form is:
- *    :map:key:prefix:
- *
- * key maybe empty in which case all entries are prefixed.
- */
-static void parse_xattrmap_map(struct lo_data *lo,
-                               const char *rule, char sep)
-{
-    const char *tmp;
-    char *key;
-    char *prefix;
-    XattrMapEntry tmp_entry;
-
-    if (*rule != sep) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Expecting '%c' after 'map' keyword, found '%c'\n",
-                 __func__, sep, *rule);
-        exit(1);
-    }
-
-    rule++;
-
-    /* At start of 'key' field */
-    tmp = strchr(rule, sep);
-    if (!tmp) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Missing '%c' at end of key field in map rule\n",
-                 __func__, sep);
-        exit(1);
-    }
-
-    key = g_strndup(rule, tmp - rule);
-    rule = tmp + 1;
-
-    /* At start of prefix field */
-    tmp = strchr(rule, sep);
-    if (!tmp) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Missing '%c' at end of prefix field in map rule\n",
-                 __func__, sep);
-        exit(1);
-    }
-
-    prefix = g_strndup(rule, tmp - rule);
-    rule = tmp + 1;
-
-    /*
-     * This should be the end of the string, we don't allow
-     * any more commands after 'map'.
-     */
-    if (*rule) {
-        fuse_log(FUSE_LOG_ERR,
-                 "%s: Expecting end of command after map, found '%c'\n",
-                 __func__, *rule);
-        exit(1);
-    }
-
-    /* 1st: Prefix matches/everything */
-    tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL;
-    tmp_entry.key = g_strdup(key);
-    tmp_entry.prepend = g_strdup(prefix);
-    add_xattrmap_entry(lo, &tmp_entry);
-
-    if (!*key) {
-        /* Prefix all case */
-
-        /* 2nd: Hide any non-prefixed entries on the host */
-        tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL;
-        tmp_entry.key = g_strdup("");
-        tmp_entry.prepend = g_strdup("");
-        add_xattrmap_entry(lo, &tmp_entry);
-    } else {
-        /* Prefix matching case */
-
-        /* 2nd: Hide non-prefixed but matching entries on the host */
-        tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER;
-        tmp_entry.key = g_strdup(""); /* Not used */
-        tmp_entry.prepend = g_strdup(key);
-        add_xattrmap_entry(lo, &tmp_entry);
-
-        /* 3rd: Stop the client accessing prefixed attributes directly */
-        tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT;
-        tmp_entry.key = g_strdup(prefix);
-        tmp_entry.prepend = g_strdup(""); /* Not used */
-        add_xattrmap_entry(lo, &tmp_entry);
-
-        /* 4th: Everything else is OK */
-        tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL;
-        tmp_entry.key = g_strdup("");
-        tmp_entry.prepend = g_strdup("");
-        add_xattrmap_entry(lo, &tmp_entry);
-    }
-
-    g_free(key);
-    g_free(prefix);
-}
-
-static void parse_xattrmap(struct lo_data *lo)
-{
-    const char *map = lo->xattrmap;
-    const char *tmp;
-    int ret;
-
-    lo->xattr_map_nentries = 0;
-    while (*map) {
-        XattrMapEntry tmp_entry;
-        char sep;
-
-        if (isspace(*map)) {
-            map++;
-            continue;
-        }
-        /* The separator is the first non-space of the rule */
-        sep = *map++;
-        if (!sep) {
-            break;
-        }
-
-        tmp_entry.flags = 0;
-        /* Start of 'type' */
-        if (strstart(map, "prefix", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX;
-        } else if (strstart(map, "ok", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_OK;
-        } else if (strstart(map, "bad", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_BAD;
-        } else if (strstart(map, "unsupported", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_UNSUPPORTED;
-        } else if (strstart(map, "map", &map)) {
-            /*
-             * map is sugar that adds a number of rules, and must be
-             * the last entry.
-             */
-            parse_xattrmap_map(lo, map, sep);
-            break;
-        } else {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Unexpected type;"
-                     "Expecting 'prefix', 'ok', 'bad', 'unsupported' or 'map'"
-                     " in rule %zu\n", __func__, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        if (*map++ != sep) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Missing '%c' at end of type field of rule %zu\n",
-                     __func__, sep, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        /* Start of 'scope' */
-        if (strstart(map, "client", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT;
-        } else if (strstart(map, "server", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_SERVER;
-        } else if (strstart(map, "all", &map)) {
-            tmp_entry.flags |= XATTR_MAP_FLAG_ALL;
-        } else {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Unexpected scope;"
-                     " Expecting 'client', 'server', or 'all', in rule %zu\n",
-                     __func__, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        if (*map++ != sep) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Expecting '%c' found '%c'"
-                     " after scope in rule %zu\n",
-                     __func__, sep, *map, lo->xattr_map_nentries);
-            exit(1);
-        }
-
-        /* At start of 'key' field */
-        tmp = strchr(map, sep);
-        if (!tmp) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Missing '%c' at end of key field of rule %zu",
-                     __func__, sep, lo->xattr_map_nentries);
-            exit(1);
-        }
-        tmp_entry.key = g_strndup(map, tmp - map);
-        map = tmp + 1;
-
-        /* At start of 'prepend' field */
-        tmp = strchr(map, sep);
-        if (!tmp) {
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Missing '%c' at end of prepend field of rule %zu",
-                     __func__, sep, lo->xattr_map_nentries);
-            exit(1);
-        }
-        tmp_entry.prepend = g_strndup(map, tmp - map);
-        map = tmp + 1;
-
-        add_xattrmap_entry(lo, &tmp_entry);
-        /* End of rule - go around again for another rule */
-    }
-
-    if (!lo->xattr_map_nentries) {
-        fuse_log(FUSE_LOG_ERR, "Empty xattr map\n");
-        exit(1);
-    }
-
-    ret = xattr_map_client(lo, "security.capability",
-                           &lo->xattr_security_capability);
-    if (ret) {
-        fuse_log(FUSE_LOG_ERR, "Failed to map security.capability: %s\n",
-                strerror(ret));
-        exit(1);
-    }
-    if (!lo->xattr_security_capability ||
-        !strcmp(lo->xattr_security_capability, "security.capability")) {
-        /* 1-1 mapping, don't need to do anything */
-        free(lo->xattr_security_capability);
-        lo->xattr_security_capability = NULL;
-    }
-}
-
-/*
- * For use with getxattr/setxattr/removexattr, where the client
- * gives us a name and we may need to choose a different one.
- * Allocates a buffer for the result placing it in *out_name.
- *   If there's no change then *out_name is not set.
- * Returns 0 on success
- * Can return -EPERM to indicate we block a given attribute
- *   (in which case out_name is not allocated)
- * Can return -ENOMEM to indicate out_name couldn't be allocated.
- */
-static int xattr_map_client(const struct lo_data *lo, const char *client_name,
-                            char **out_name)
-{
-    size_t i;
-    for (i = 0; i < lo->xattr_map_nentries; i++) {
-        const XattrMapEntry *cur_entry = lo->xattr_map_list + i;
-
-        if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) &&
-            (strstart(client_name, cur_entry->key, NULL))) {
-            if (cur_entry->flags & XATTR_MAP_FLAG_BAD) {
-                return -EPERM;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) {
-                return -ENOTSUP;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
-                /* Unmodified name */
-                return 0;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) {
-                *out_name = g_try_malloc(strlen(client_name) +
-                                         strlen(cur_entry->prepend) + 1);
-                if (!*out_name) {
-                    return -ENOMEM;
-                }
-                sprintf(*out_name, "%s%s", cur_entry->prepend, client_name);
-                return 0;
-            }
-        }
-    }
-
-    return -EPERM;
-}
-
-/*
- * For use with listxattr where the server fs gives us a name and we may need
- * to sanitize this for the client.
- * Returns a pointer to the result in *out_name
- *   This is always the original string or the current string with some prefix
- *   removed; no reallocation is done.
- * Returns 0 on success
- * Can return -ENODATA to indicate the name should be dropped from the list.
- */
-static int xattr_map_server(const struct lo_data *lo, const char *server_name,
-                            const char **out_name)
-{
-    size_t i;
-    const char *end;
-
-    for (i = 0; i < lo->xattr_map_nentries; i++) {
-        const XattrMapEntry *cur_entry = lo->xattr_map_list + i;
-
-        if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) &&
-            (strstart(server_name, cur_entry->prepend, &end))) {
-            if (cur_entry->flags & XATTR_MAP_FLAG_BAD ||
-                cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) {
-                return -ENODATA;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
-                *out_name = server_name;
-                return 0;
-            }
-            if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) {
-                /* Remove prefix */
-                *out_name = end;
-                return 0;
-            }
-        }
-    }
-
-    return -ENODATA;
-}
-
-static bool block_xattr(struct lo_data *lo, const char *name)
-{
-    /*
-     * If user explicitly enabled posix_acl or did not provide any option,
-     * do not block acl. Otherwise block system.posix_acl_access and
-     * system.posix_acl_default xattrs.
-     */
-    if (lo->user_posix_acl) {
-        return false;
-    }
-    if (!strcmp(name, "system.posix_acl_access") ||
-        !strcmp(name, "system.posix_acl_default"))
-            return true;
-
-    return false;
-}
-
-/*
- * Returns number of bytes in xattr_list after filtering on success. This
- * could be zero as well if nothing is left after filtering.
- *
- * Returns negative error code on failure.
- * xattr_list is modified in place.
- */
-static int remove_blocked_xattrs(struct lo_data *lo, char *xattr_list,
-                                 unsigned in_size)
-{
-    size_t out_index, in_index;
-
-    /*
-     * As of now we only filter out acl xattrs. If acls are enabled or
-     * they have not been explicitly disabled, there is nothing to
-     * filter.
-     */
-    if (lo->user_posix_acl) {
-        return in_size;
-    }
-
-    out_index = 0;
-    in_index = 0;
-    while (in_index < in_size) {
-        char *in_ptr = xattr_list + in_index;
-
-        /* Length of current attribute name */
-        size_t in_len = strlen(xattr_list + in_index) + 1;
-
-        if (!block_xattr(lo, in_ptr)) {
-            if (in_index != out_index) {
-                memmove(xattr_list + out_index, xattr_list + in_index, in_len);
-            }
-            out_index += in_len;
-        }
-        in_index += in_len;
-     }
-    return out_index;
-}
-
-static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
-                        size_t size)
-{
-    struct lo_data *lo = lo_data(req);
-    g_autofree char *value = NULL;
-    char procname[64];
-    const char *name;
-    char *mapped_name;
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-
-    if (block_xattr(lo, in_name)) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    mapped_name = NULL;
-    name = in_name;
-    if (lo->xattrmap) {
-        ret = xattr_map_client(lo, in_name, &mapped_name);
-        if (ret < 0) {
-            if (ret == -EPERM) {
-                ret = -ENODATA;
-            }
-            fuse_reply_err(req, -ret);
-            return;
-        }
-        if (mapped_name) {
-            name = mapped_name;
-        }
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        g_free(mapped_name);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
-             ino, name, size);
-
-    if (size) {
-        value = g_try_malloc(size);
-        if (!value) {
-            goto out_err;
-        }
-    }
-
-    sprintf(procname, "%i", inode->fd);
-    /*
-     * It is not safe to open() non-regular/non-dir files in file server
-     * unless O_PATH is used, so use that method for regular files/dir
-     * only (as it seems giving less performance overhead).
-     * Otherwise, call fchdir() to avoid open().
-     */
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            goto out_err;
-        }
-        ret = fgetxattr(fd, name, value, size);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = getxattr(procname, name, value, size);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-
-    if (ret == -1) {
-        goto out;
-    }
-    if (size) {
-        saverr = 0;
-        if (ret == 0) {
-            goto out;
-        }
-        fuse_reply_buf(req, value, ret);
-    } else {
-        fuse_reply_xattr(req, ret);
-    }
-out_free:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    return;
-
-out_err:
-    saverr = errno;
-out:
-    fuse_reply_err(req, saverr);
-    g_free(mapped_name);
-    goto out_free;
-}
-
-static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
-{
-    struct lo_data *lo = lo_data(req);
-    g_autofree char *value = NULL;
-    char procname[64];
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
-             size);
-
-    if (size) {
-        value = g_try_malloc(size);
-        if (!value) {
-            goto out_err;
-        }
-    }
-
-    sprintf(procname, "%i", inode->fd);
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            goto out_err;
-        }
-        ret = flistxattr(fd, value, size);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = listxattr(procname, value, size);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-
-    if (ret == -1) {
-        goto out;
-    }
-    if (size) {
-        saverr = 0;
-        if (ret == 0) {
-            goto out;
-        }
-
-        if (lo->xattr_map_list) {
-            /*
-             * Map the names back, some attributes might be dropped,
-             * some shortened, but not increased, so we shouldn't
-             * run out of room.
-             */
-            size_t out_index, in_index;
-            out_index = 0;
-            in_index = 0;
-            while (in_index < ret) {
-                const char *map_out;
-                char *in_ptr = value + in_index;
-                /* Length of current attribute name */
-                size_t in_len = strlen(value + in_index) + 1;
-
-                int mapret = xattr_map_server(lo, in_ptr, &map_out);
-                if (mapret != -ENODATA && mapret != 0) {
-                    /* Shouldn't happen */
-                    saverr = -mapret;
-                    goto out;
-                }
-                if (mapret == 0) {
-                    /* Either unchanged, or truncated */
-                    size_t out_len;
-                    if (map_out != in_ptr) {
-                        /* +1 copies the NIL */
-                        out_len = strlen(map_out) + 1;
-                    } else {
-                        /* No change */
-                        out_len = in_len;
-                    }
-                    /*
-                     * Move result along, may still be needed for an unchanged
-                     * entry if a previous entry was changed.
-                     */
-                    memmove(value + out_index, map_out, out_len);
-
-                    out_index += out_len;
-                }
-                in_index += in_len;
-            }
-            ret = out_index;
-            if (ret == 0) {
-                goto out;
-            }
-        }
-
-        ret = remove_blocked_xattrs(lo, value, ret);
-        if (ret <= 0) {
-            saverr = -ret;
-            goto out;
-        }
-        fuse_reply_buf(req, value, ret);
-    } else {
-        /*
-         * xattrmap only ever shortens the result,
-         * so we don't need to do anything clever with the
-         * allocation length here.
-         */
-        fuse_reply_xattr(req, ret);
-    }
-out_free:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    return;
-
-out_err:
-    saverr = errno;
-out:
-    fuse_reply_err(req, saverr);
-    goto out_free;
-}
-
-static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
-                        const char *value, size_t size, int flags,
-                        uint32_t extra_flags)
-{
-    char procname[64];
-    const char *name;
-    char *mapped_name;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-    bool switched_creds = false;
-    bool cap_fsetid_dropped = false;
-    struct lo_cred old = {};
-
-    if (block_xattr(lo, in_name)) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    mapped_name = NULL;
-    name = in_name;
-    if (lo->xattrmap) {
-        ret = xattr_map_client(lo, in_name, &mapped_name);
-        if (ret < 0) {
-            fuse_reply_err(req, -ret);
-            return;
-        }
-        if (mapped_name) {
-            name = mapped_name;
-        }
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        g_free(mapped_name);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
-             ", name=%s value=%s size=%zd)\n", ino, name, value, size);
-
-    sprintf(procname, "%i", inode->fd);
-    /*
-     * If we are setting posix access acl and if SGID needs to be
-     * cleared, then switch to caller's gid and drop CAP_FSETID
-     * and that should make sure host kernel clears SGID.
-     *
-     * This probably will not work when we support idmapped mounts.
-     * In that case we will need to find a non-root gid and switch
-     * to it. (Instead of gid in request). Fix it when we support
-     * idmapped mounts.
-     */
-    if (lo->posix_acl && !strcmp(name, "system.posix_acl_access")
-        && (extra_flags & FUSE_SETXATTR_ACL_KILL_SGID)) {
-        ret = lo_drop_cap_change_cred(req, &old, false, "FSETID",
-                                      &cap_fsetid_dropped);
-        if (ret) {
-            saverr = ret;
-            goto out;
-        }
-        switched_creds = true;
-    }
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            saverr = errno;
-            goto out;
-        }
-        ret = fsetxattr(fd, name, value, size, flags);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = setxattr(procname, name, value, size, flags);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-    if (switched_creds) {
-        if (cap_fsetid_dropped)
-            lo_restore_cred_gain_cap(&old, false, "FSETID");
-        else
-            lo_restore_cred(&old, false);
-    }
-
-out:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    g_free(mapped_name);
-    fuse_reply_err(req, saverr);
-}
-
-static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name)
-{
-    char procname[64];
-    const char *name;
-    char *mapped_name;
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode;
-    ssize_t ret;
-    int saverr;
-    int fd = -1;
-
-    if (block_xattr(lo, in_name)) {
-        fuse_reply_err(req, EOPNOTSUPP);
-        return;
-    }
-
-    mapped_name = NULL;
-    name = in_name;
-    if (lo->xattrmap) {
-        ret = xattr_map_client(lo, in_name, &mapped_name);
-        if (ret < 0) {
-            fuse_reply_err(req, -ret);
-            return;
-        }
-        if (mapped_name) {
-            name = mapped_name;
-        }
-    }
-
-    inode = lo_inode(req, ino);
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        g_free(mapped_name);
-        return;
-    }
-
-    saverr = ENOSYS;
-    if (!lo_data(req)->xattr) {
-        goto out;
-    }
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
-             name);
-
-    sprintf(procname, "%i", inode->fd);
-    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
-        if (fd < 0) {
-            saverr = errno;
-            goto out;
-        }
-        ret = fremovexattr(fd, name);
-        saverr = ret == -1 ? errno : 0;
-    } else {
-        /* fchdir should not fail here */
-        FCHDIR_NOFAIL(lo->proc_self_fd);
-        ret = removexattr(procname, name);
-        saverr = ret == -1 ? errno : 0;
-        FCHDIR_NOFAIL(lo->root.fd);
-    }
-
-out:
-    if (fd >= 0) {
-        close(fd);
-    }
-
-    lo_inode_put(lo, &inode);
-    g_free(mapped_name);
-    fuse_reply_err(req, saverr);
-}
-
-#ifdef HAVE_COPY_FILE_RANGE
-static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
-                               struct fuse_file_info *fi_in, fuse_ino_t ino_out,
-                               off_t off_out, struct fuse_file_info *fi_out,
-                               size_t len, int flags)
-{
-    int in_fd, out_fd;
-    ssize_t res;
-
-    in_fd = lo_fi_fd(req, fi_in);
-    out_fd = lo_fi_fd(req, fi_out);
-
-    fuse_log(FUSE_LOG_DEBUG,
-             "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
-             "off=%ju, ino=%" PRIu64 "/fd=%d, "
-             "off=%ju, size=%zd, flags=0x%x)\n",
-             ino_in, in_fd, (intmax_t)off_in,
-             ino_out, out_fd, (intmax_t)off_out, len, flags);
-
-    res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
-    if (res < 0) {
-        fuse_reply_err(req, errno);
-    } else {
-        fuse_reply_write(req, res);
-    }
-}
-#endif
-
-static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
-                     struct fuse_file_info *fi)
-{
-    off_t res;
-
-    (void)ino;
-    res = lseek(lo_fi_fd(req, fi), off, whence);
-    if (res != -1) {
-        fuse_reply_lseek(req, res);
-    } else {
-        fuse_reply_err(req, errno);
-    }
-}
-
-static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
-{
-    int fd, ret = 0;
-
-    fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
-             inode->fuse_ino);
-
-    fd = lo_inode_open(lo, inode, O_RDONLY);
-    if (fd < 0) {
-        return -fd;
-    }
-
-    if (syncfs(fd) < 0) {
-        ret = errno;
-    }
-
-    close(fd);
-    return ret;
-}
-
-static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
-{
-    struct lo_data *lo = lo_data(req);
-    struct lo_inode *inode = lo_inode(req, ino);
-    int err;
-
-    if (!inode) {
-        fuse_reply_err(req, EBADF);
-        return;
-    }
-
-    err = lo_do_syncfs(lo, inode);
-    lo_inode_put(lo, &inode);
-
-    /*
-     * If submounts aren't announced, the client only sends a request to
-     * sync the root inode. TODO: Track submounts internally and iterate
-     * over them as well.
-     */
-
-    fuse_reply_err(req, err);
-}
-
-static void lo_destroy(void *userdata)
-{
-    struct lo_data *lo = (struct lo_data *)userdata;
-
-    pthread_mutex_lock(&lo->mutex);
-    while (true) {
-        GHashTableIter iter;
-        gpointer key, value;
-
-        g_hash_table_iter_init(&iter, lo->inodes);
-        if (!g_hash_table_iter_next(&iter, &key, &value)) {
-            break;
-        }
-
-        struct lo_inode *inode = value;
-        unref_inode(lo, inode, inode->nlookup);
-    }
-    pthread_mutex_unlock(&lo->mutex);
-}
-
-static struct fuse_lowlevel_ops lo_oper = {
-    .init = lo_init,
-    .lookup = lo_lookup,
-    .mkdir = lo_mkdir,
-    .mknod = lo_mknod,
-    .symlink = lo_symlink,
-    .link = lo_link,
-    .unlink = lo_unlink,
-    .rmdir = lo_rmdir,
-    .rename = lo_rename,
-    .forget = lo_forget,
-    .forget_multi = lo_forget_multi,
-    .getattr = lo_getattr,
-    .setattr = lo_setattr,
-    .readlink = lo_readlink,
-    .opendir = lo_opendir,
-    .readdir = lo_readdir,
-    .readdirplus = lo_readdirplus,
-    .releasedir = lo_releasedir,
-    .fsyncdir = lo_fsyncdir,
-    .create = lo_create,
-    .getlk = lo_getlk,
-    .setlk = lo_setlk,
-    .open = lo_open,
-    .release = lo_release,
-    .flush = lo_flush,
-    .fsync = lo_fsync,
-    .read = lo_read,
-    .write_buf = lo_write_buf,
-    .statfs = lo_statfs,
-    .fallocate = lo_fallocate,
-    .flock = lo_flock,
-    .getxattr = lo_getxattr,
-    .listxattr = lo_listxattr,
-    .setxattr = lo_setxattr,
-    .removexattr = lo_removexattr,
-#ifdef HAVE_COPY_FILE_RANGE
-    .copy_file_range = lo_copy_file_range,
-#endif
-    .lseek = lo_lseek,
-    .syncfs = lo_syncfs,
-    .destroy = lo_destroy,
-};
-
-/* Print vhost-user.json backend program capabilities */
-static void print_capabilities(void)
-{
-    printf("{\n");
-    printf("  \"type\": \"fs\"\n");
-    printf("}\n");
-}
-
-/*
- * Drop all Linux capabilities because the wait parent process only needs to
- * sit in waitpid(2) and terminate.
- */
-static void setup_wait_parent_capabilities(void)
-{
-    capng_setpid(syscall(SYS_gettid));
-    capng_clear(CAPNG_SELECT_BOTH);
-    capng_apply(CAPNG_SELECT_BOTH);
-}
-
-/*
- * Move to a new mount, net, and pid namespaces to isolate this process.
- */
-static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
-{
-    pid_t child;
-
-    /*
-     * Create a new pid namespace for *child* processes.  We'll have to
-     * fork in order to enter the new pid namespace.  A new mount namespace
-     * is also needed so that we can remount /proc for the new pid
-     * namespace.
-     *
-     * Our UNIX domain sockets have been created.  Now we can move to
-     * an empty network namespace to prevent TCP/IP and other network
-     * activity in case this process is compromised.
-     */
-    if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
-        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
-        exit(1);
-    }
-
-    child = fork();
-    if (child < 0) {
-        fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
-        exit(1);
-    }
-    if (child > 0) {
-        pid_t waited;
-        int wstatus;
-
-        setup_wait_parent_capabilities();
-
-        /* The parent waits for the child */
-        do {
-            waited = waitpid(child, &wstatus, 0);
-        } while (waited < 0 && errno == EINTR && !se->exited);
-
-        /* We were terminated by a signal, see fuse_signals.c */
-        if (se->exited) {
-            exit(0);
-        }
-
-        if (WIFEXITED(wstatus)) {
-            exit(WEXITSTATUS(wstatus));
-        }
-
-        exit(1);
-    }
-
-    /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
-    prctl(PR_SET_PDEATHSIG, SIGTERM);
-
-    /*
-     * If the mounts have shared propagation then we want to opt out so our
-     * mount changes don't affect the parent mount namespace.
-     */
-    if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
-        exit(1);
-    }
-
-    /* The child must remount /proc to use the new pid namespace */
-    if (mount("proc", "/proc", "proc",
-              MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
-        exit(1);
-    }
-
-    /* Get the /proc/self/task descriptor */
-    lo->proc_self_task = open("/proc/self/task/", O_PATH);
-    if (lo->proc_self_task == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n");
-        exit(1);
-    }
-
-    lo->use_fscreate = is_fscreate_usable(lo);
-
-    /*
-     * We only need /proc/self/fd. Prevent ".." from accessing parent
-     * directories of /proc/self/fd by bind-mounting it over /proc. Since / was
-     * previously remounted with MS_REC | MS_SLAVE this mount change only
-     * affects our process.
-     */
-    if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n");
-        exit(1);
-    }
-
-    /* Get the /proc (actually /proc/self/fd, see above) file descriptor */
-    lo->proc_self_fd = open("/proc", O_PATH);
-    if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n");
-        exit(1);
-    }
-}
-
-/*
- * Capture the capability state, we'll need to restore this for individual
- * threads later; see load_capng.
- */
-static void setup_capng(void)
-{
-    /* Note this accesses /proc so has to happen before the sandbox */
-    if (capng_get_caps_process()) {
-        fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n");
-        exit(1);
-    }
-    pthread_mutex_init(&cap.mutex, NULL);
-    pthread_mutex_lock(&cap.mutex);
-    cap.saved = capng_save_state();
-    if (!cap.saved) {
-        fuse_log(FUSE_LOG_ERR, "capng_save_state\n");
-        exit(1);
-    }
-    pthread_mutex_unlock(&cap.mutex);
-}
-
-static void cleanup_capng(void)
-{
-    free(cap.saved);
-    cap.saved = NULL;
-    pthread_mutex_destroy(&cap.mutex);
-}
-
-
-/*
- * Make the source directory our root so symlinks cannot escape and no other
- * files are accessible.  Assumes unshare(CLONE_NEWNS) was already called.
- */
-static void setup_mounts(const char *source)
-{
-    int oldroot;
-    int newroot;
-
-    if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
-        exit(1);
-    }
-
-    /* This magic is based on lxc's lxc_pivot_root() */
-    oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-    if (oldroot < 0) {
-        fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
-        exit(1);
-    }
-
-    newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-    if (newroot < 0) {
-        fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
-        exit(1);
-    }
-
-    if (fchdir(newroot) < 0) {
-        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
-        exit(1);
-    }
-
-    if (syscall(__NR_pivot_root, ".", ".") < 0) {
-        fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
-        exit(1);
-    }
-
-    if (fchdir(oldroot) < 0) {
-        fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
-        exit(1);
-    }
-
-    if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
-        fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
-        exit(1);
-    }
-
-    if (umount2(".", MNT_DETACH) < 0) {
-        fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
-        exit(1);
-    }
-
-    if (fchdir(newroot) < 0) {
-        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
-        exit(1);
-    }
-
-    close(newroot);
-    close(oldroot);
-}
-
-/*
- * Only keep capabilities in allowlist that are needed for file system operation
- * The (possibly NULL) modcaps_in string passed in is free'd before exit.
- */
-static void setup_capabilities(char *modcaps_in)
-{
-    char *modcaps = modcaps_in;
-    pthread_mutex_lock(&cap.mutex);
-    capng_restore_state(&cap.saved);
-
-    /*
-     * Add to allowlist file system-related capabilities that are needed for a
-     * file server to act like root.  Drop everything else like networking and
-     * sysadmin capabilities.
-     *
-     * Exclusions:
-     * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl
-     *    and we don't support that.
-     * 2. CAP_MAC_OVERRIDE is not included because it only seems to be
-     *    used by the Smack LSM.  Omit it until there is demand for it.
-     */
-    capng_setpid(syscall(SYS_gettid));
-    capng_clear(CAPNG_SELECT_BOTH);
-    if (capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE,
-            CAP_CHOWN,
-            CAP_DAC_OVERRIDE,
-            CAP_FOWNER,
-            CAP_FSETID,
-            CAP_SETGID,
-            CAP_SETUID,
-            CAP_MKNOD,
-            CAP_SETFCAP,
-            -1)) {
-        fuse_log(FUSE_LOG_ERR, "%s: capng_updatev failed\n", __func__);
-        exit(1);
-    }
-
-    /*
-     * The modcaps option is a colon separated list of caps,
-     * each preceded by either + or -.
-     */
-    while (modcaps) {
-        capng_act_t action;
-        int cap;
-
-        char *next = strchr(modcaps, ':');
-        if (next) {
-            *next = '\0';
-            next++;
-        }
-
-        switch (modcaps[0]) {
-        case '+':
-            action = CAPNG_ADD;
-            break;
-
-        case '-':
-            action = CAPNG_DROP;
-            break;
-
-        default:
-            fuse_log(FUSE_LOG_ERR,
-                     "%s: Expecting '+'/'-' in modcaps but found '%c'\n",
-                     __func__, modcaps[0]);
-            exit(1);
-        }
-        cap = capng_name_to_capability(modcaps + 1);
-        if (cap < 0) {
-            fuse_log(FUSE_LOG_ERR, "%s: Unknown capability '%s'\n", __func__,
-                     modcaps);
-            exit(1);
-        }
-        if (capng_update(action, CAPNG_PERMITTED | CAPNG_EFFECTIVE, cap)) {
-            fuse_log(FUSE_LOG_ERR, "%s: capng_update failed for '%s'\n",
-                     __func__, modcaps);
-            exit(1);
-        }
-
-        modcaps = next;
-    }
-    g_free(modcaps_in);
-
-    if (capng_apply(CAPNG_SELECT_BOTH)) {
-        fuse_log(FUSE_LOG_ERR, "%s: capng_apply failed\n", __func__);
-        exit(1);
-    }
-
-    cap.saved = capng_save_state();
-    if (!cap.saved) {
-        fuse_log(FUSE_LOG_ERR, "%s: capng_save_state failed\n", __func__);
-        exit(1);
-    }
-    pthread_mutex_unlock(&cap.mutex);
-}
-
-/*
- * Use chroot as a weaker sandbox for environments where the process is
- * launched without CAP_SYS_ADMIN.
- */
-static void setup_chroot(struct lo_data *lo)
-{
-    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
-    if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n");
-        exit(1);
-    }
-
-    lo->proc_self_task = open("/proc/self/task", O_PATH);
-    if (lo->proc_self_fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n");
-        exit(1);
-    }
-
-    lo->use_fscreate = is_fscreate_usable(lo);
-
-    /*
-     * Make the shared directory the file system root so that FUSE_OPEN
-     * (lo_open()) cannot escape the shared directory by opening a symlink.
-     *
-     * The chroot(2) syscall is later disabled by seccomp and the
-     * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot
-     * is not possible.
-     *
-     * However, it's still possible to escape the chroot via lo->proc_self_fd
-     * but that requires first gaining control of the process.
-     */
-    if (chroot(lo->source) != 0) {
-        fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source);
-        exit(1);
-    }
-
-    /* Move into the chroot */
-    if (chdir("/") != 0) {
-        fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n");
-        exit(1);
-    }
-}
-
-/*
- * Lock down this process to prevent access to other processes or files outside
- * source directory.  This reduces the impact of arbitrary code execution bugs.
- */
-static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
-                          bool enable_syslog)
-{
-    if (lo->sandbox == SANDBOX_NAMESPACE) {
-        setup_namespaces(lo, se);
-        setup_mounts(lo->source);
-    } else {
-        setup_chroot(lo);
-    }
-
-    setup_seccomp(enable_syslog);
-    setup_capabilities(g_strdup(lo->modcaps));
-}
-
-/* Set the maximum number of open file descriptors */
-static void setup_nofile_rlimit(unsigned long rlimit_nofile)
-{
-    struct rlimit rlim = {
-        .rlim_cur = rlimit_nofile,
-        .rlim_max = rlimit_nofile,
-    };
-
-    if (rlimit_nofile == 0) {
-        return; /* nothing to do */
-    }
-
-    if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
-        /* Ignore SELinux denials */
-        if (errno == EPERM) {
-            return;
-        }
-
-        fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n");
-        exit(1);
-    }
-}
-
-G_GNUC_PRINTF(2, 0)
-static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
-{
-    g_autofree char *localfmt = NULL;
-    char buf[64];
-
-    if (current_log_level < level) {
-        return;
-    }
-
-    if (current_log_level == FUSE_LOG_DEBUG) {
-        if (use_syslog) {
-            /* no timestamp needed */
-            localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
-                                       fmt);
-        } else {
-            g_autoptr(GDateTime) now = g_date_time_new_now_utc();
-            g_autofree char *nowstr = g_date_time_format(now,
-                                       "%Y-%m-%d %H:%M:%S.%%06d%z");
-            snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now));
-            localfmt = g_strdup_printf("[%s] [ID: %08ld] %s",
-                                       buf, syscall(__NR_gettid), fmt);
-        }
-        fmt = localfmt;
-    }
-
-    if (use_syslog) {
-        int priority = LOG_ERR;
-        switch (level) {
-        case FUSE_LOG_EMERG:
-            priority = LOG_EMERG;
-            break;
-        case FUSE_LOG_ALERT:
-            priority = LOG_ALERT;
-            break;
-        case FUSE_LOG_CRIT:
-            priority = LOG_CRIT;
-            break;
-        case FUSE_LOG_ERR:
-            priority = LOG_ERR;
-            break;
-        case FUSE_LOG_WARNING:
-            priority = LOG_WARNING;
-            break;
-        case FUSE_LOG_NOTICE:
-            priority = LOG_NOTICE;
-            break;
-        case FUSE_LOG_INFO:
-            priority = LOG_INFO;
-            break;
-        case FUSE_LOG_DEBUG:
-            priority = LOG_DEBUG;
-            break;
-        }
-        vsyslog(priority, fmt, ap);
-    } else {
-        vfprintf(stderr, fmt, ap);
-    }
-}
-
-static void setup_root(struct lo_data *lo, struct lo_inode *root)
-{
-    int fd, res;
-    struct stat stat;
-    uint64_t mnt_id;
-
-    fd = open("/", O_PATH);
-    if (fd == -1) {
-        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source);
-        exit(1);
-    }
-
-    res = do_statx(lo, fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
-                   &mnt_id);
-    if (res == -1) {
-        fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source);
-        exit(1);
-    }
-
-    root->filetype = S_IFDIR;
-    root->fd = fd;
-    root->key.ino = stat.st_ino;
-    root->key.dev = stat.st_dev;
-    root->key.mnt_id = mnt_id;
-    root->nlookup = 2;
-    g_atomic_int_set(&root->refcount, 2);
-    if (lo->posix_lock) {
-        pthread_mutex_init(&root->plock_mutex, NULL);
-        root->posix_locks = g_hash_table_new_full(
-            g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
-    }
-}
-
-static guint lo_key_hash(gconstpointer key)
-{
-    const struct lo_key *lkey = key;
-
-    return (guint)lkey->ino + (guint)lkey->dev + (guint)lkey->mnt_id;
-}
-
-static gboolean lo_key_equal(gconstpointer a, gconstpointer b)
-{
-    const struct lo_key *la = a;
-    const struct lo_key *lb = b;
-
-    return la->ino == lb->ino && la->dev == lb->dev && la->mnt_id == lb->mnt_id;
-}
-
-static void fuse_lo_data_cleanup(struct lo_data *lo)
-{
-    if (lo->inodes) {
-        g_hash_table_destroy(lo->inodes);
-    }
-
-    if (lo->root.posix_locks) {
-        g_hash_table_destroy(lo->root.posix_locks);
-    }
-    lo_map_destroy(&lo->fd_map);
-    lo_map_destroy(&lo->dirp_map);
-    lo_map_destroy(&lo->ino_map);
-
-    if (lo->proc_self_fd >= 0) {
-        close(lo->proc_self_fd);
-    }
-
-    if (lo->proc_self_task >= 0) {
-        close(lo->proc_self_task);
-    }
-
-    if (lo->root.fd >= 0) {
-        close(lo->root.fd);
-    }
-
-    free(lo->xattrmap);
-    free_xattrmap(lo);
-    free(lo->xattr_security_capability);
-    free(lo->source);
-}
-
-static void qemu_version(void)
-{
-    printf("virtiofsd version " QEMU_FULL_VERSION "\n" QEMU_COPYRIGHT "\n");
-}
-
-int main(int argc, char *argv[])
-{
-    struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
-    struct fuse_session *se;
-    struct fuse_cmdline_opts opts;
-    struct lo_data lo = {
-        .sandbox = SANDBOX_NAMESPACE,
-        .debug = 0,
-        .writeback = 0,
-        .posix_lock = 0,
-        .allow_direct_io = 0,
-        .proc_self_fd = -1,
-        .proc_self_task = -1,
-        .user_killpriv_v2 = -1,
-        .user_posix_acl = -1,
-        .user_security_label = -1,
-    };
-    struct lo_map_elem *root_elem;
-    struct lo_map_elem *reserve_elem;
-    int ret = -1;
-
-    /* Initialize time conversion information for localtime_r(). */
-    tzset();
-
-    /* Don't mask creation mode, kernel already did that */
-    umask(0);
-
-    qemu_init_exec_dir(argv[0]);
-
-    drop_supplementary_groups();
-
-    pthread_mutex_init(&lo.mutex, NULL);
-    lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal);
-    lo.root.fd = -1;
-    lo.root.fuse_ino = FUSE_ROOT_ID;
-    lo.cache = CACHE_AUTO;
-
-    /*
-     * Set up the ino map like this:
-     * [0] Reserved (will not be used)
-     * [1] Root inode
-     */
-    lo_map_init(&lo.ino_map);
-    reserve_elem = lo_map_reserve(&lo.ino_map, 0);
-    if (!reserve_elem) {
-        fuse_log(FUSE_LOG_ERR, "failed to alloc reserve_elem.\n");
-        goto err_out1;
-    }
-    reserve_elem->in_use = false;
-    root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
-    if (!root_elem) {
-        fuse_log(FUSE_LOG_ERR, "failed to alloc root_elem.\n");
-        goto err_out1;
-    }
-    root_elem->inode = &lo.root;
-
-    lo_map_init(&lo.dirp_map);
-    lo_map_init(&lo.fd_map);
-
-    if (fuse_parse_cmdline(&args, &opts) != 0) {
-        goto err_out1;
-    }
-    fuse_set_log_func(log_func);
-    use_syslog = opts.syslog;
-    if (use_syslog) {
-        openlog("virtiofsd", LOG_PID, LOG_DAEMON);
-    }
-
-    if (opts.show_help) {
-        printf("usage: %s [options]\n\n", argv[0]);
-        fuse_cmdline_help();
-        printf("    -o source=PATH             shared directory tree\n");
-        fuse_lowlevel_help();
-        ret = 0;
-        goto err_out1;
-    } else if (opts.show_version) {
-        qemu_version();
-        fuse_lowlevel_version();
-        ret = 0;
-        goto err_out1;
-    } else if (opts.print_capabilities) {
-        print_capabilities();
-        ret = 0;
-        goto err_out1;
-    }
-
-    if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
-        goto err_out1;
-    }
-
-    if (opts.log_level != 0) {
-        current_log_level = opts.log_level;
-    } else {
-        /* default log level is INFO */
-        current_log_level = FUSE_LOG_INFO;
-    }
-    lo.debug = opts.debug;
-    if (lo.debug) {
-        current_log_level = FUSE_LOG_DEBUG;
-    }
-    if (lo.source) {
-        struct stat stat;
-        int res;
-
-        res = lstat(lo.source, &stat);
-        if (res == -1) {
-            fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n",
-                     lo.source);
-            exit(1);
-        }
-        if (!S_ISDIR(stat.st_mode)) {
-            fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
-            exit(1);
-        }
-    } else {
-        lo.source = strdup("/");
-        if (!lo.source) {
-            fuse_log(FUSE_LOG_ERR, "failed to strdup source\n");
-            goto err_out1;
-        }
-    }
-
-    if (lo.xattrmap) {
-        lo.xattr = 1;
-        parse_xattrmap(&lo);
-    }
-
-    if (!lo.timeout_set) {
-        switch (lo.cache) {
-        case CACHE_NONE:
-            lo.timeout = 0.0;
-            break;
-
-        case CACHE_AUTO:
-            lo.timeout = 1.0;
-            break;
-
-        case CACHE_ALWAYS:
-            lo.timeout = 86400.0;
-            break;
-        }
-    } else if (lo.timeout < 0) {
-        fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout);
-        exit(1);
-    }
-
-    if (lo.user_posix_acl == 1 && !lo.xattr) {
-        fuse_log(FUSE_LOG_ERR, "Can't enable posix ACLs. xattrs are disabled."
-                 "\n");
-        exit(1);
-    }
-
-    lo.use_statx = true;
-
-    se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
-    if (se == NULL) {
-        goto err_out1;
-    }
-
-    if (fuse_set_signal_handlers(se) != 0) {
-        goto err_out2;
-    }
-
-    if (fuse_session_mount(se) != 0) {
-        goto err_out3;
-    }
-
-    fuse_daemonize(opts.foreground);
-
-    setup_nofile_rlimit(opts.rlimit_nofile);
-
-    /* Must be before sandbox since it wants /proc */
-    setup_capng();
-
-    setup_sandbox(&lo, se, opts.syslog);
-
-    setup_root(&lo, &lo.root);
-    /* Block until ctrl+c or fusermount -u */
-    ret = virtio_loop(se);
-
-    fuse_session_unmount(se);
-    cleanup_capng();
-err_out3:
-    fuse_remove_signal_handlers(se);
-err_out2:
-    fuse_session_destroy(se);
-err_out1:
-    fuse_opt_free_args(&args);
-
-    fuse_lo_data_cleanup(&lo);
-
-    return ret ? 1 : 0;
-}

diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
deleted file mode 100644
index 0033dab..0000000
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ /dev/null

@@ -1,182 +0,0 @@
-/*
- * Seccomp sandboxing for virtiofsd
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#include "qemu/osdep.h"
-#include "passthrough_seccomp.h"
-#include "fuse_i.h"
-#include "fuse_log.h"
-#include <seccomp.h>
-
-/* Bodge for libseccomp 2.4.2 which broke ppoll */
-#if !defined(__SNR_ppoll) && defined(__SNR_brk)
-#ifdef __NR_ppoll
-#define __SNR_ppoll __NR_ppoll
-#else
-#define __SNR_ppoll __PNR_ppoll
-#endif
-#endif
-
-static const int syscall_allowlist[] = {
-    /* TODO ireg sem*() syscalls */
-    SCMP_SYS(brk),
-    SCMP_SYS(capget), /* For CAP_FSETID */
-    SCMP_SYS(capset),
-    SCMP_SYS(clock_gettime),
-    SCMP_SYS(clone),
-#ifdef __NR_clone3
-    SCMP_SYS(clone3),
-#endif
-    SCMP_SYS(close),
-    SCMP_SYS(copy_file_range),
-    SCMP_SYS(dup),
-    SCMP_SYS(eventfd2),
-    SCMP_SYS(exit),
-    SCMP_SYS(exit_group),
-    SCMP_SYS(fallocate),
-    SCMP_SYS(fchdir),
-    SCMP_SYS(fchmod),
-    SCMP_SYS(fchmodat),
-    SCMP_SYS(fchownat),
-    SCMP_SYS(fcntl),
-    SCMP_SYS(fdatasync),
-    SCMP_SYS(fgetxattr),
-    SCMP_SYS(flistxattr),
-    SCMP_SYS(flock),
-    SCMP_SYS(fremovexattr),
-    SCMP_SYS(fsetxattr),
-    SCMP_SYS(fstat),
-    SCMP_SYS(fstatfs),
-    SCMP_SYS(fstatfs64),
-    SCMP_SYS(fsync),
-    SCMP_SYS(ftruncate),
-    SCMP_SYS(futex),
-    SCMP_SYS(getdents),
-    SCMP_SYS(getdents64),
-    SCMP_SYS(getegid),
-    SCMP_SYS(geteuid),
-    SCMP_SYS(getpid),
-    SCMP_SYS(gettid),
-    SCMP_SYS(gettimeofday),
-    SCMP_SYS(getxattr),
-    SCMP_SYS(linkat),
-    SCMP_SYS(listxattr),
-    SCMP_SYS(lseek),
-    SCMP_SYS(_llseek), /* For POWER */
-    SCMP_SYS(madvise),
-    SCMP_SYS(mkdirat),
-    SCMP_SYS(mknodat),
-    SCMP_SYS(mmap),
-    SCMP_SYS(mprotect),
-    SCMP_SYS(mremap),
-    SCMP_SYS(munmap),
-    SCMP_SYS(newfstatat),
-    SCMP_SYS(statx),
-    SCMP_SYS(open),
-    SCMP_SYS(openat),
-    SCMP_SYS(ppoll),
-    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
-    SCMP_SYS(preadv),
-    SCMP_SYS(pread64),
-    SCMP_SYS(pwritev),
-    SCMP_SYS(pwrite64),
-    SCMP_SYS(read),
-    SCMP_SYS(readlinkat),
-    SCMP_SYS(recvmsg),
-    SCMP_SYS(renameat),
-    SCMP_SYS(renameat2),
-    SCMP_SYS(removexattr),
-    SCMP_SYS(restart_syscall),
-#ifdef __NR_rseq
-    SCMP_SYS(rseq), /* required since glibc 2.35 */
-#endif
-    SCMP_SYS(rt_sigaction),
-    SCMP_SYS(rt_sigprocmask),
-    SCMP_SYS(rt_sigreturn),
-    SCMP_SYS(sched_getattr),
-    SCMP_SYS(sched_setattr),
-    SCMP_SYS(sendmsg),
-    SCMP_SYS(setresgid),
-    SCMP_SYS(setresuid),
-#ifdef __NR_setresgid32
-    SCMP_SYS(setresgid32),
-#endif
-#ifdef __NR_setresuid32
-    SCMP_SYS(setresuid32),
-#endif
-    SCMP_SYS(set_robust_list),
-    SCMP_SYS(setxattr),
-    SCMP_SYS(sigreturn),
-    SCMP_SYS(symlinkat),
-    SCMP_SYS(syncfs),
-    SCMP_SYS(time), /* Rarely needed, except on static builds */
-    SCMP_SYS(tgkill),
-    SCMP_SYS(unlinkat),
-    SCMP_SYS(unshare),
-    SCMP_SYS(utimensat),
-    SCMP_SYS(write),
-    SCMP_SYS(writev),
-    SCMP_SYS(umask),
-};
-
-/* Syscalls used when --syslog is enabled */
-static const int syscall_allowlist_syslog[] = {
-    SCMP_SYS(send),
-    SCMP_SYS(sendto),
-};
-
-static void add_allowlist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
-{
-    size_t i;
-
-    for (i = 0; i < len; i++) {
-        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
-            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
-                     syscalls[i]);
-            exit(1);
-        }
-    }
-}
-
-void setup_seccomp(bool enable_syslog)
-{
-    scmp_filter_ctx ctx;
-
-#ifdef SCMP_ACT_KILL_PROCESS
-    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
-    /* Handle a newer libseccomp but an older kernel */
-    if (!ctx && errno == EOPNOTSUPP) {
-        ctx = seccomp_init(SCMP_ACT_TRAP);
-    }
-#else
-    ctx = seccomp_init(SCMP_ACT_TRAP);
-#endif
-    if (!ctx) {
-        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
-        exit(1);
-    }
-
-    add_allowlist(ctx, syscall_allowlist, G_N_ELEMENTS(syscall_allowlist));
-    if (enable_syslog) {
-        add_allowlist(ctx, syscall_allowlist_syslog,
-                      G_N_ELEMENTS(syscall_allowlist_syslog));
-    }
-
-    /* libvhost-user calls this for post-copy migration, we don't need it */
-    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
-                         SCMP_SYS(userfaultfd), 0) != 0) {
-        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
-        exit(1);
-    }
-
-    if (seccomp_load(ctx) < 0) {
-        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
-        exit(1);
-    }
-
-    seccomp_release(ctx);
-}

diff --git a/tools/virtiofsd/passthrough_seccomp.h b/tools/virtiofsd/passthrough_seccomp.h
deleted file mode 100644
index 12674fc..0000000
--- a/tools/virtiofsd/passthrough_seccomp.h
+++ /dev/null

@@ -1,14 +0,0 @@
-/*
- * Seccomp sandboxing for virtiofsd
- *
- * Copyright (C) 2019 Red Hat, Inc.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef VIRTIOFSD_PASSTHROUGH_SECCOMP_H
-#define VIRTIOFSD_PASSTHROUGH_SECCOMP_H
-
-void setup_seccomp(bool enable_syslog);
-
-#endif /* VIRTIOFSD_PASSTHROUGH_SECCOMP_H */

diff --git a/ui/console.c b/ui/console.c
index ab43561..98b701f 100644
--- a/ui/console.c
+++ b/ui/console.c

@@ -28,6 +28,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-commands-ui.h"
 #include "qemu/coroutine.h"
+#include "qemu/error-report.h"
 #include "qemu/fifo8.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"

diff --git a/ui/dbus-clipboard.c b/ui/dbus-clipboard.c
index 5843d26..df9a754 100644
--- a/ui/dbus-clipboard.c
+++ b/ui/dbus-clipboard.c

@@ -23,6 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/dbus.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/sysemu.h"

diff --git a/ui/dbus-console.c b/ui/dbus-console.c
index 898a4ac..0bfaa22 100644
--- a/ui/dbus-console.c
+++ b/ui/dbus-console.c

@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "ui/input.h"
 #include "ui/kbd-state.h"

diff --git a/ui/dbus-listener.c b/ui/dbus-listener.c
index f9fc8ed..57d4e40 100644
--- a/ui/dbus-listener.c
+++ b/ui/dbus-listener.c

@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 #include "dbus.h"
 #include <gio/gunixfdlist.h>

diff --git a/ui/dbus.c b/ui/dbus.c
index 32d88dc..f2dcba0 100644
--- a/ui/dbus.c
+++ b/ui/dbus.c

@@ -23,6 +23,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "qemu/dbus.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"

diff --git a/ui/egl-headless.c b/ui/egl-headless.c
index 7a30fd97..ae07e91 100644
--- a/ui/egl-headless.c
+++ b/ui/egl-headless.c

@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "sysemu/sysemu.h"
 #include "ui/console.h"

diff --git a/ui/gtk.c b/ui/gtk.c
index 7f752d8..fd82e9b 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c

@@ -36,6 +36,7 @@
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 
 #include "ui/console.h"

diff --git a/ui/spice-app.c b/ui/spice-app.c
index 7e71e18..ad7f055 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c

@@ -29,6 +29,7 @@
 #include "ui/console.h"
 #include "ui/spice-display.h"
 #include "qemu/config-file.h"
+#include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/cutils.h"
 #include "qemu/module.h"

diff --git a/ui/spice-core.c b/ui/spice-core.c
index 72f8f16..76f7c2b 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c

@@ -413,9 +413,6 @@
             .type = QEMU_OPT_BOOL,
 #endif
         },{
-            .name = "password",
-            .type = QEMU_OPT_STRING,
-        },{
             .name = "password-secret",
             .type = QEMU_OPT_STRING,
         },{
@@ -666,20 +663,8 @@
     }
     passwordSecret = qemu_opt_get(opts, "password-secret");
     if (passwordSecret) {
-        if (qemu_opt_get(opts, "password")) {
-            error_report("'password' option is mutually exclusive with "
-                         "'password-secret'");
-            exit(1);
-        }
         password = qcrypto_secret_lookup_as_utf8(passwordSecret,
                                                  &error_fatal);
-    } else {
-        str = qemu_opt_get(opts, "password");
-        if (str) {
-            warn_report("'password' option is deprecated and insecure, "
-                        "use 'password-secret' instead");
-            password = g_strdup(str);
-        }
     }
 
     if (tls_port) {

diff --git a/ui/spice-display.c b/ui/spice-display.c
index 0616a69..16802f9 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c

@@ -17,6 +17,7 @@
 
 #include "qemu/osdep.h"
 #include "ui/qemu-spice.h"
+#include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "qemu/lockable.h"
 #include "qemu/main-loop.h"

diff --git a/ui/udmabuf.c b/ui/udmabuf.c
index cbf4357..6a0a11a 100644
--- a/ui/udmabuf.c
+++ b/ui/udmabuf.c

@@ -7,6 +7,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "ui/console.h"
+#include "qemu/error-report.h"
 
 #include <sys/ioctl.h>
 

diff --git a/ui/vdagent.c b/ui/vdagent.c
index 1f51a78..8a65149 100644
--- a/ui/vdagent.c
+++ b/ui/vdagent.c

@@ -2,6 +2,7 @@
 #include "qapi/error.h"
 #include "chardev/char.h"
 #include "qemu/buffer.h"
+#include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/units.h"
 #include "hw/qdev-core.h"

diff --git a/util/cacheflush.c b/util/cacheflush.c
index 2c2c73e..06c2333 100644
--- a/util/cacheflush.c
+++ b/util/cacheflush.c

@@ -121,8 +121,12 @@
 static bool have_coherent_icache;
 #endif
 
-#if defined(__aarch64__) && !defined(CONFIG_DARWIN)
-/* Apple does not expose CTR_EL0, so we must use system interfaces. */
+#if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32)
+/*
+ * Apple does not expose CTR_EL0, so we must use system interfaces.
+ * Windows neither, but we use a generic implementation of flush_idcache_range
+ * in this case.
+ */
 static uint64_t save_ctr_el0;
 static void arch_cache_info(int *isize, int *dsize)
 {
@@ -225,7 +229,11 @@
 
 /* Caches are coherent and do not require flushing; symbol inline. */
 
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) && !defined(CONFIG_WIN32)
+/*
+ * For Windows, we use generic implementation of flush_idcache_range, that
+ * performs a call to FlushInstructionCache, through __builtin___clear_cache.
+ */
 
 #ifdef CONFIG_DARWIN
 /* Apple does not expose CTR_EL0, so we must use system interfaces. */

diff --git a/util/hbitmap.c b/util/hbitmap.c
index 297db35..6d6e1b5 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c

@@ -331,7 +331,7 @@
 
     assert(next_zero > start);
     *pnum = next_zero - start;
-    return false;
+    return true;
 }
 
 bool hbitmap_empty(const HBitmap *hb)

diff --git a/util/qemu-config.c b/util/qemu-config.c
index d63f274..42076ef 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c

@@ -2,7 +2,6 @@
 #include "block/qdict.h" /* for qdict_extract_subqdict() */
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/error-report.h"

diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index bae938c..93d2505 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c

@@ -18,6 +18,10 @@
 #include "qemu/tsan.h"
 #include "qemu/bitmap.h"
 
+#ifdef CONFIG_PTHREAD_SET_NAME_NP
+#include <pthread_np.h>
+#endif
+
 static bool name_threads;
 
 void qemu_thread_naming(bool enable)
@@ -25,7 +29,8 @@
     name_threads = enable;
 
 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
-    !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID
+    !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \
+    !defined CONFIG_PTHREAD_SET_NAME_NP
     /* This is a debugging option, not fatal */
     if (enable) {
         fprintf(stderr, "qemu: thread naming not supported on this host\n");
@@ -223,7 +228,7 @@
         error_exit(err, __func__);
 }
 
-static bool
+static bool TSA_NO_TSA
 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
                        const char *file, const int line)
 {
@@ -480,6 +485,8 @@
         pthread_setname_np(pthread_self(), qemu_thread_args->name);
 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
         pthread_setname_np(qemu_thread_args->name);
+# elif defined(CONFIG_PTHREAD_SET_NAME_NP)
+        pthread_set_name_np(pthread_self(), qemu_thread_args->name);
 # endif
     }
     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);

diff --git a/util/trace-events b/util/trace-events
index c8f53d7..16f78d8 100644
--- a/util/trace-events
+++ b/util/trace-events

@@ -93,6 +93,7 @@
 qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
 
 #userfaultfd.c
+uffd_detect_open_mode(int mode) "%d"
 uffd_query_features_nosys(int err) "errno: %i"
 uffd_query_features_api_failed(int err) "errno: %i"
 uffd_create_fd_nosys(int err) "errno: %i"

diff --git a/util/userfaultfd.c b/util/userfaultfd.c
index 4953b31..fdff486 100644
--- a/util/userfaultfd.c
+++ b/util/userfaultfd.c

@@ -18,10 +18,42 @@
 #include <poll.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
+#include <fcntl.h>
+
+typedef enum {
+    UFFD_UNINITIALIZED = 0,
+    UFFD_USE_DEV_PATH,
+    UFFD_USE_SYSCALL,
+} uffd_open_mode;
 
 int uffd_open(int flags)
 {
 #if defined(__NR_userfaultfd)
+    static uffd_open_mode open_mode;
+    static int uffd_dev;
+
+    /* Detect how to generate uffd desc when run the 1st time */
+    if (open_mode == UFFD_UNINITIALIZED) {
+        /*
+         * Make /dev/userfaultfd the default approach because it has better
+         * permission controls, meanwhile allows kernel faults without any
+         * privilege requirement (e.g. SYS_CAP_PTRACE).
+         */
+        uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+        if (uffd_dev >= 0) {
+            open_mode = UFFD_USE_DEV_PATH;
+        } else {
+            /* Fallback to the system call */
+            open_mode = UFFD_USE_SYSCALL;
+        }
+        trace_uffd_detect_open_mode(open_mode);
+    }
+
+    if (open_mode == UFFD_USE_DEV_PATH) {
+        assert(uffd_dev >= 0);
+        return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
+    }
+
     return syscall(__NR_userfaultfd, flags);
 #else
     return -EINVAL;

diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 145eb17..40f36ea 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c

@@ -8,6 +8,7 @@
  * later.  See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/vhost-user-server.h"
 #include "block/aio-wait.h"
commit	b11728dc3ae67ddedf34b7a4f318170e7092803c	[log] [tgz]
author	Peter Maydell <peter.maydell@linaro.org>	Sun Feb 26 20:14:46 2023 +0000
committer	Peter Maydell <peter.maydell@linaro.org>	Sun Feb 26 20:14:46 2023 +0000
tree	1ec73af509292d112a3bf543d1b827643e288e03
parent	1270a3f57c9221080f3205a15964814ff8359ca9 [diff]
parent	8c89d50c10afdd98da82642ca5e9d7af4f1c18bd [diff]