Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-03-09' into staging

* Add some missing gitlab-CI job dependencies
* Re-enable "make check SPEED=slow"
* Improve the gitlab-pipeline-status script
* Clean up inclusing of qtest.h headers
* Improve libqos/qgraph documentation
* Fix downloading problem in the acceptance tests
* Remove deprecated target tilegx
* Add new bsd-user maintainers

# gpg: Signature made Tue 09 Mar 2021 10:27:29 GMT
# gpg:                using RSA key 27B88847EEE0250118F3EAB92ED9D774FE702DB5
# gpg:                issuer "thuth@redhat.com"
# gpg: Good signature from "Thomas Huth <th.huth@gmx.de>" [full]
# gpg:                 aka "Thomas Huth <thuth@redhat.com>" [full]
# gpg:                 aka "Thomas Huth <huth@tuxfamily.org>" [full]
# gpg:                 aka "Thomas Huth <th.huth@posteo.de>" [unknown]
# Primary key fingerprint: 27B8 8847 EEE0 2501 18F3  EAB9 2ED9 D774 FE70 2DB5

* remotes/thuth-gitlab/tags/pull-request-2021-03-09:
  bsd-user: Add new maintainers
  Remove deprecated target tilegx
  Acceptance Tests: restore filtering of tests by target arch
  Acceptance Tests: restore downloading of VM images
  docs/devel/qgraph: improve qgraph documentation
  libqos/qgraph: format qgraph comments for sphinx documentation
  scripts/ci/gitlab-pipeline-status: give more info when pipeline not found
  scripts/ci/gitlab-pipeline-status: give more information on failures
  scripts/ci/gitlab-pipeline-status: split utlity function for HTTP GET
  meson: Re-enable the possibility to run "make check SPEED=slow"
  docker: OpenSBI build job depends on OpenSBI container
  docker: EDK2 build job depends on EDK2 container
  docker: Alpine build job depends on Alpine container
  qtest: delete superfluous inclusions of qtest.h

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 06ea6e0..e04ae21 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -747,10 +747,17 @@
 F: include/hw/misc/iotkit-sysctl.h
 F: hw/misc/iotkit-sysinfo.c
 F: include/hw/misc/iotkit-sysinfo.h
+F: hw/misc/armsse-cpu-pwrctrl.c
+F: include/hw/misc/armsse-cpu-pwrctrl.h
 F: hw/misc/armsse-cpuid.c
 F: include/hw/misc/armsse-cpuid.h
 F: hw/misc/armsse-mhu.c
 F: include/hw/misc/armsse-mhu.h
+F: hw/timer/sse-counter.c
+F: include/hw/timer/sse-counter.h
+F: hw/timer/sse-timer.c
+F: include/hw/timer/sse-timer.h
+F: tests/qtest/sse-timer-test.c
 F: docs/system/arm/mps2.rst
 
 Musca
@@ -3136,10 +3143,13 @@
 parallels
 M: Stefan Hajnoczi <stefanha@redhat.com>
 M: Denis V. Lunev <den@openvz.org>
+M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/parallels.c
+F: block/parallels-ext.c
 F: docs/interop/parallels.txt
+T: git https://src.openvz.org/scm/~vsementsov/qemu.git parallels
 
 qed
 M: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 4db74fc..f62f12e 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -244,11 +244,11 @@
 
 void cpu_exec_step_atomic(CPUState *cpu)
 {
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
-    uint32_t cflags = 1;
-    uint32_t cf_mask = cflags & CF_HASH_MASK;
+    uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1;
     int tb_exit;
 
     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@@ -257,15 +257,15 @@
         g_assert(!cpu->running);
         cpu->running = true;
 
-        tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
+        cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+        tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
+
         if (tb == NULL) {
             mmap_lock();
             tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
             mmap_unlock();
         }
 
-        /* Since we got here, we know that parallel_cpus must be true.  */
-        parallel_cpus = false;
         cpu_exec_enter(cpu);
         /* execute the generated code */
         trace_exec_tb(tb, pc);
@@ -293,7 +293,6 @@
      * the execution.
      */
     g_assert(cpu_in_exclusive_context(cpu));
-    parallel_cpus = true;
     cpu->running = false;
     end_exclusive();
 }
@@ -304,7 +303,7 @@
     CPUArchState *env;
     tb_page_addr_t phys_page1;
     uint32_t flags;
-    uint32_t cf_mask;
+    uint32_t cflags;
     uint32_t trace_vcpu_dstate;
 };
 
@@ -318,7 +317,7 @@
         tb->cs_base == desc->cs_base &&
         tb->flags == desc->flags &&
         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
-        (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
+        tb_cflags(tb) == desc->cflags) {
         /* check next page if needed */
         if (tb->page_addr[1] == -1) {
             return true;
@@ -338,7 +337,7 @@
 
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                    target_ulong cs_base, uint32_t flags,
-                                   uint32_t cf_mask)
+                                   uint32_t cflags)
 {
     tb_page_addr_t phys_pc;
     struct tb_desc desc;
@@ -347,7 +346,7 @@
     desc.env = (CPUArchState *)cpu->env_ptr;
     desc.cs_base = cs_base;
     desc.flags = flags;
-    desc.cf_mask = cf_mask;
+    desc.cflags = cflags;
     desc.trace_vcpu_dstate = *cpu->trace_dstate;
     desc.pc = pc;
     phys_pc = get_page_addr_code(desc.env, pc);
@@ -355,7 +354,7 @@
         return NULL;
     }
     desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
+    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
 
@@ -415,16 +414,19 @@
 
 static inline TranslationBlock *tb_find(CPUState *cpu,
                                         TranslationBlock *last_tb,
-                                        int tb_exit, uint32_t cf_mask)
+                                        int tb_exit, uint32_t cflags)
 {
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
 
-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+    tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
     if (tb == NULL) {
         mmap_lock();
-        tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
+        tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
         mmap_unlock();
         /* We add the TB in the virtual pc hash table for the fast lookup */
         qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
@@ -490,7 +492,7 @@
         if (replay_has_exception()
             && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
             /* Execute just one insn to trigger exception pending in the log */
-            cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1;
+            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
         }
 #endif
         return false;
@@ -787,7 +789,7 @@
                have CF_INVALID set, -1 is a convenient invalid value that
                does not require tcg headers for cpu_common_reset.  */
             if (cflags == -1) {
-                cflags = curr_cflags();
+                cflags = curr_cflags(cpu);
             } else {
                 cpu->cflags_next_tb = -1;
             }
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 42973fb..847d207 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -114,8 +114,7 @@
     char thread_name[VCPU_THREAD_NAME_SIZE];
 
     g_assert(tcg_enabled());
-
-    parallel_cpus = (current_machine->smp.max_cpus > 1);
+    tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);
 
     cpu->thread = g_malloc0(sizeof(QemuThread));
     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 4a66055..018b54c 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -269,7 +269,7 @@
     static QemuThread *single_tcg_cpu_thread;
 
     g_assert(tcg_enabled());
-    parallel_cpus = false;
+    tcg_cpu_init_cflags(cpu, false);
 
     if (!single_tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 6144d9d..6cdcaa2 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -41,6 +41,14 @@
 
 /* common functionality among all TCG variants */
 
+void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
+{
+    uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT;
+    cflags |= parallel ? CF_PARALLEL : 0;
+    cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
+    cpu->tcg_cflags = cflags;
+}
+
 void tcg_cpus_destroy(CPUState *cpu)
 {
     cpu_thread_signal_destroyed(cpu);
diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
index 4813000..6a5fcef 100644
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -17,5 +17,6 @@
 void tcg_cpus_destroy(CPUState *cpu);
 int tcg_cpus_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
+void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
 
 #endif /* TCG_CPUS_H */
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index d736f4f..49f5de3 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,10 +27,10 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
-#include "exec/tb-lookup.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
+#include "exec/tb-lookup.h"
 
 /* 32-bit helpers */
 
@@ -152,7 +152,9 @@
     target_ulong cs_base, pc;
     uint32_t flags;
 
-    tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+
+    tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
     if (tb == NULL) {
         return tcg_code_gen_epilogue;
     }
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bbd919a..f32df8b 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -224,7 +224,6 @@
 TCGContext tcg_init_ctx;
 __thread TCGContext *tcg_ctx;
 TBContext tb_ctx;
-bool parallel_cpus;
 
 static void page_table_config_init(void)
 {
@@ -1311,7 +1310,7 @@
     return a->pc == b->pc &&
         a->cs_base == b->cs_base &&
         a->flags == b->flags &&
-        (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
+        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
         a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
         a->page_addr[0] == b->page_addr[0] &&
         a->page_addr[1] == b->page_addr[1];
@@ -1616,6 +1615,7 @@
     PageDesc *p;
     uint32_t h;
     tb_page_addr_t phys_pc;
+    uint32_t orig_cflags = tb_cflags(tb);
 
     assert_memory_lock();
 
@@ -1626,7 +1626,7 @@
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
                      tb->trace_vcpu_dstate);
     if (!qht_remove(&tb_ctx.htable, tb, h)) {
         return;
@@ -1793,6 +1793,7 @@
     uint32_t h;
 
     assert_memory_lock();
+    tcg_debug_assert(!(tb->cflags & CF_INVALID));
 
     /*
      * Add the TB to the page list, acquiring first the pages's locks.
@@ -1811,7 +1812,7 @@
     }
 
     /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
                      tb->trace_vcpu_dstate);
     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
 
@@ -1865,9 +1866,6 @@
         cflags = (cflags & ~CF_COUNT_MASK) | 1;
     }
 
-    cflags &= ~CF_CLUSTER_MASK;
-    cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;
-
     max_insns = cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
@@ -2194,7 +2192,7 @@
     if (current_tb_modified) {
         page_collection_unlock(pages);
         /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | curr_cflags();
+        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
         mmap_unlock();
         cpu_loop_exit_noexc(cpu);
     }
@@ -2362,7 +2360,7 @@
 #ifdef TARGET_HAS_PRECISE_SMC
     if (current_tb_modified) {
         /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | curr_cflags();
+        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
         return true;
     }
 #endif
@@ -2438,7 +2436,7 @@
      * operations only (which execute after completion) so we don't
      * double instrument the instruction.
      */
-    cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
+    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
 
     qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
                            "cpu_io_recompile: rewound execution of TB to "
diff --git a/block.c b/block.c
index a1f3cec..2daff6d 100644
--- a/block.c
+++ b/block.c
@@ -1440,7 +1440,7 @@
          * Check for empty string or invalid characters, but not if it is
          * generated (generated names use characters not available to the user)
          */
-        error_setg(errp, "Invalid node name");
+        error_setg(errp, "Invalid node-name: '%s'", node_name);
         return;
     }
 
@@ -1453,7 +1453,7 @@
 
     /* takes care of avoiding duplicates node names */
     if (bdrv_find_node(node_name)) {
-        error_setg(errp, "Duplicate node name");
+        error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
         goto out;
     }
 
@@ -5432,7 +5432,7 @@
         }
     }
 
-    error_setg(errp, "Cannot find device=%s nor node_name=%s",
+    error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
                      device ? device : "",
                      node_name ? node_name : "");
     return NULL;
@@ -6752,7 +6752,7 @@
     AioContext *aio_context;
 
     if (!to_replace_bs) {
-        error_setg(errp, "Node name '%s' not found", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return NULL;
     }
 
diff --git a/block/backup-top.c b/block/backup-top.c
index d1253e1..589e8b6 100644
--- a/block/backup-top.c
+++ b/block/backup-top.c
@@ -45,6 +45,12 @@
         BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         QEMUIOVector *qiov, int flags)
 {
+    BDRVBackupTopState *s = bs->opaque;
+
+    if (!s->active) {
+        return -EIO;
+    }
+
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
 
@@ -54,6 +60,10 @@
     BDRVBackupTopState *s = bs->opaque;
     uint64_t off, end;
 
+    if (!s->active) {
+        return -EIO;
+    }
+
     if (flags & BDRV_REQ_WRITE_UNCHANGED) {
         return 0;
     }
diff --git a/block/backup.c b/block/backup.c
index 94e6dcd..6cf2f97 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -103,6 +103,7 @@
 static void backup_clean(Job *job)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+    block_job_remove_all_bdrv(&s->common);
     bdrv_backup_top_drop(s->backup_top);
 }
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 9b9cd71..a0eaa28 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -726,6 +726,19 @@
     return hbitmap_serialization_align(bitmap->bitmap);
 }
 
+/* Return the disk size covered by a chunk of serialized bitmap data. */
+uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size,
+                                                  const BdrvDirtyBitmap *bitmap)
+{
+    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    uint64_t limit = granularity * (serialized_chunk_size << 3);
+
+    assert(QEMU_IS_ALIGNED(limit,
+                           bdrv_dirty_bitmap_serialization_align(bitmap)));
+    return limit;
+}
+
+
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                       uint8_t *buf, uint64_t offset,
                                       uint64_t bytes)
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index ab2c4d4..cb5d896 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -20,8 +20,17 @@
 #include "sysemu/block-backend.h"
 #include "util/block-helpers.h"
 
+/*
+ * Sector units are 512 bytes regardless of the
+ * virtio_blk_config->blk_size value.
+ */
+#define VIRTIO_BLK_SECTOR_BITS 9
+#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS)
+
 enum {
     VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
+    VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768,
+    VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768,
 };
 struct virtio_blk_inhdr {
     unsigned char status;
@@ -58,30 +67,102 @@
     free(req);
 }
 
+static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
+                                 size_t size)
+{
+    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
+    uint64_t total_sectors;
+
+    if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return false;
+    }
+    if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) {
+        return false;
+    }
+    blk_get_geometry(vexp->export.blk, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
 static int coroutine_fn
-vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov,
+vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
                             uint32_t iovcnt, uint32_t type)
 {
+    BlockBackend *blk = vexp->export.blk;
     struct virtio_blk_discard_write_zeroes desc;
-    ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
+    ssize_t size;
+    uint64_t sector;
+    uint32_t num_sectors;
+    uint32_t max_sectors;
+    uint32_t flags;
+    int bytes;
+
+    /* Only one desc is currently supported */
+    if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
     if (unlikely(size != sizeof(desc))) {
-        error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
-        return -EINVAL;
+        error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
+        return VIRTIO_BLK_S_IOERR;
     }
 
-    uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
-                          le32_to_cpu(desc.num_sectors) << 9 };
-    if (type == VIRTIO_BLK_T_DISCARD) {
-        if (blk_co_pdiscard(blk, range[0], range[1]) == 0) {
-            return 0;
+    sector = le64_to_cpu(desc.sector);
+    num_sectors = le32_to_cpu(desc.num_sectors);
+    flags = le32_to_cpu(desc.flags);
+    max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
+                  VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS :
+                  VHOST_USER_BLK_MAX_DISCARD_SECTORS;
+
+    /* This check ensures that 'bytes' fits in an int */
+    if (unlikely(num_sectors > max_sectors)) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
+
+    if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    /*
+     * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+     * and write zeroes commands if any unknown flag is set.
+     */
+    if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+        int blk_flags = 0;
+
+        if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+            blk_flags |= BDRV_REQ_MAY_UNMAP;
         }
-    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
-        if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) {
-            return 0;
+
+        if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                                 bytes, blk_flags) == 0) {
+            return VIRTIO_BLK_S_OK;
+        }
+    } else if (type == VIRTIO_BLK_T_DISCARD) {
+        /*
+         * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+         * discard commands if the unmap flag is set.
+         */
+        if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+            return VIRTIO_BLK_S_UNSUPP;
+        }
+
+        if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                            bytes) == 0) {
+            return VIRTIO_BLK_S_OK;
         }
     }
 
-    return -EINVAL;
+    return VIRTIO_BLK_S_IOERR;
 }
 
 static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
@@ -128,6 +209,8 @@
     switch (type & ~VIRTIO_BLK_T_BARRIER) {
     case VIRTIO_BLK_T_IN:
     case VIRTIO_BLK_T_OUT: {
+        QEMUIOVector qiov;
+        int64_t offset;
         ssize_t ret = 0;
         bool is_write = type & VIRTIO_BLK_T_OUT;
         req->sector_num = le64_to_cpu(req->out.sector);
@@ -137,13 +220,24 @@
             break;
         }
 
-        int64_t offset = req->sector_num * vexp->blk_size;
-        QEMUIOVector qiov;
         if (is_write) {
             qemu_iovec_init_external(&qiov, out_iov, out_num);
-            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
         } else {
             qemu_iovec_init_external(&qiov, in_iov, in_num);
+        }
+
+        if (unlikely(!vu_blk_sect_range_ok(vexp,
+                                           req->sector_num,
+                                           qiov.size))) {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS;
+
+        if (is_write) {
+            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
+        } else {
             ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
         }
         if (ret >= 0) {
@@ -170,19 +264,13 @@
     }
     case VIRTIO_BLK_T_DISCARD:
     case VIRTIO_BLK_T_WRITE_ZEROES: {
-        int rc;
-
         if (!vexp->writable) {
             req->in->status = VIRTIO_BLK_S_IOERR;
             break;
         }
 
-        rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type);
-        if (rc == 0) {
-            req->in->status = VIRTIO_BLK_S_OK;
-        } else {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-        }
+        req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num,
+                                                      type);
         break;
     }
     default:
@@ -347,17 +435,21 @@
                          uint32_t blk_size,
                          uint16_t num_queues)
 {
-    config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+    config->capacity =
+        cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS);
     config->blk_size = cpu_to_le32(blk_size);
     config->size_max = cpu_to_le32(0);
     config->seg_max = cpu_to_le32(128 - 2);
     config->min_io_size = cpu_to_le16(1);
     config->opt_io_size = cpu_to_le32(1);
     config->num_queues = cpu_to_le16(num_queues);
-    config->max_discard_sectors = cpu_to_le32(32768);
+    config->max_discard_sectors =
+        cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS);
     config->max_discard_seg = cpu_to_le32(1);
-    config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9);
-    config->max_write_zeroes_sectors = cpu_to_le32(32768);
+    config->discard_sector_alignment =
+        cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS);
+    config->max_write_zeroes_sectors
+        = cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS);
     config->max_write_zeroes_seg = cpu_to_le32(1);
 }
 
@@ -383,7 +475,7 @@
     if (vu_opts->has_logical_block_size) {
         logical_block_size = vu_opts->logical_block_size;
     } else {
-        logical_block_size = BDRV_SECTOR_SIZE;
+        logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
     }
     check_block_size(exp->id, "logical-block-size", logical_block_size,
                      &local_err);
diff --git a/block/meson.build b/block/meson.build
index eeaefe5..d21990e 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -57,7 +57,8 @@
   'qed-table.c',
   'qed.c',
 ))
-block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'], if_true: files('parallels.c'))
+block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'],
+             if_true: files('parallels.c', 'parallels-ext.c'))
 block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c'))
 block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
 block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
diff --git a/block/parallels-ext.c b/block/parallels-ext.c
new file mode 100644
index 0000000..e0dd097
--- /dev/null
+++ b/block/parallels-ext.c
@@ -0,0 +1,300 @@
+/*
+ * Support of Parallels Format Extension. It's a part of Parallels format
+ * driver.
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "parallels.h"
+#include "crypto/hash.h"
+#include "qemu/uuid.h"
+
+#define PARALLELS_FORMAT_EXTENSION_MAGIC 0xAB234CEF23DCEA87ULL
+
+#define PARALLELS_END_OF_FEATURES_MAGIC 0x0ULL
+#define PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC 0x20385FAE252CB34AULL
+
+typedef struct ParallelsFormatExtensionHeader {
+    uint64_t magic; /* PARALLELS_FORMAT_EXTENSION_MAGIC */
+    uint8_t check_sum[16];
+} QEMU_PACKED ParallelsFormatExtensionHeader;
+
+typedef struct ParallelsFeatureHeader {
+    uint64_t magic;
+    uint64_t flags;
+    uint32_t data_size;
+    uint32_t _unused;
+} QEMU_PACKED ParallelsFeatureHeader;
+
+typedef struct ParallelsDirtyBitmapFeature {
+    uint64_t size;
+    uint8_t id[16];
+    uint32_t granularity;
+    uint32_t l1_size;
+    /* L1 table follows */
+} QEMU_PACKED ParallelsDirtyBitmapFeature;
+
+/* Given L1 table read bitmap data from the image and populate @bitmap */
+static int parallels_load_bitmap_data(BlockDriverState *bs,
+                                      const uint64_t *l1_table,
+                                      uint32_t l1_size,
+                                      BdrvDirtyBitmap *bitmap,
+                                      Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret = 0;
+    uint64_t offset, limit;
+    uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+    uint8_t *buf = NULL;
+    uint64_t i, tab_size =
+        DIV_ROUND_UP(bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size),
+                     s->cluster_size);
+
+    if (tab_size != l1_size) {
+        error_setg(errp, "Bitmap table size %" PRIu32 " does not correspond "
+                   "to bitmap size and cluster size. Expected %" PRIu64,
+                   l1_size, tab_size);
+        return -EINVAL;
+    }
+
+    buf = qemu_blockalign(bs, s->cluster_size);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
+    for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
+        uint64_t count = MIN(bm_size - offset, limit);
+        uint64_t entry = l1_table[i];
+
+        if (entry == 0) {
+            /* No need to deserialize zeros because @bitmap is cleared. */
+            continue;
+        }
+
+        if (entry == 1) {
+            bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false);
+        } else {
+            ret = bdrv_pread(bs->file, entry << BDRV_SECTOR_BITS, buf,
+                             s->cluster_size);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret,
+                                 "Failed to read bitmap data cluster");
+                goto finish;
+            }
+            bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count,
+                                               false);
+        }
+    }
+    ret = 0;
+
+    bdrv_dirty_bitmap_deserialize_finish(bitmap);
+
+finish:
+    qemu_vfree(buf);
+
+    return ret;
+}
+
+/*
+ * @data buffer (of @data_size size) is the Dirty bitmaps feature which
+ * consists of ParallelsDirtyBitmapFeature followed by L1 table.
+ */
+static BdrvDirtyBitmap *parallels_load_bitmap(BlockDriverState *bs,
+                                              uint8_t *data,
+                                              size_t data_size,
+                                              Error **errp)
+{
+    int ret;
+    ParallelsDirtyBitmapFeature bf;
+    g_autofree uint64_t *l1_table = NULL;
+    BdrvDirtyBitmap *bitmap;
+    QemuUUID uuid;
+    char uuidstr[UUID_FMT_LEN + 1];
+    int i;
+
+    if (data_size < sizeof(bf)) {
+        error_setg(errp, "Too small Bitmap Feature area in Parallels Format "
+                   "Extension: %zu bytes, expected at least %zu bytes",
+                   data_size, sizeof(bf));
+        return NULL;
+    }
+    memcpy(&bf, data, sizeof(bf));
+    bf.size = le64_to_cpu(bf.size);
+    bf.granularity = le32_to_cpu(bf.granularity) << BDRV_SECTOR_BITS;
+    bf.l1_size = le32_to_cpu(bf.l1_size);
+    data += sizeof(bf);
+    data_size -= sizeof(bf);
+
+    if (bf.size != bs->total_sectors) {
+        error_setg(errp, "Bitmap size (in sectors) %" PRId64 " differs from "
+                   "disk size in sectors %" PRId64, bf.size, bs->total_sectors);
+        return NULL;
+    }
+
+    if (bf.l1_size * sizeof(uint64_t) > data_size) {
+        error_setg(errp, "Bitmaps feature corrupted: l1 table exceeds "
+                   "extension data_size");
+        return NULL;
+    }
+
+    memcpy(&uuid, bf.id, sizeof(uuid));
+    qemu_uuid_unparse(&uuid, uuidstr);
+    bitmap = bdrv_create_dirty_bitmap(bs, bf.granularity, uuidstr, errp);
+    if (!bitmap) {
+        return NULL;
+    }
+
+    l1_table = g_new(uint64_t, bf.l1_size);
+    for (i = 0; i < bf.l1_size; i++, data += sizeof(uint64_t)) {
+        l1_table[i] = ldq_le_p(data);
+    }
+
+    ret = parallels_load_bitmap_data(bs, l1_table, bf.l1_size, bitmap, errp);
+    if (ret < 0) {
+        bdrv_release_dirty_bitmap(bitmap);
+        return NULL;
+    }
+
+    /* We support format extension only for RO parallels images. */
+    assert(!(bs->open_flags & BDRV_O_RDWR));
+    bdrv_dirty_bitmap_set_readonly(bitmap, true);
+
+    return bitmap;
+}
+
+static int parallels_parse_format_extension(BlockDriverState *bs,
+                                            uint8_t *ext_cluster, Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+    int remaining = s->cluster_size;
+    uint8_t *pos = ext_cluster;
+    ParallelsFormatExtensionHeader eh;
+    g_autofree uint8_t *hash = NULL;
+    size_t hash_len = 0;
+    GSList *bitmaps = NULL, *el;
+
+    memcpy(&eh, pos, sizeof(eh));
+    eh.magic = le64_to_cpu(eh.magic);
+    pos += sizeof(eh);
+    remaining -= sizeof(eh);
+
+    if (eh.magic != PARALLELS_FORMAT_EXTENSION_MAGIC) {
+        error_setg(errp, "Wrong parallels Format Extension magic: 0x%" PRIx64
+                   ", expected: 0x%llx", eh.magic,
+                   PARALLELS_FORMAT_EXTENSION_MAGIC);
+        goto fail;
+    }
+
+    ret = qcrypto_hash_bytes(QCRYPTO_HASH_ALG_MD5, (char *)pos, remaining,
+                             &hash, &hash_len, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    if (hash_len != sizeof(eh.check_sum) ||
+        memcmp(hash, eh.check_sum, sizeof(eh.check_sum)) != 0) {
+        error_setg(errp, "Wrong checksum in Format Extension header. Format "
+                   "extension is corrupted.");
+        goto fail;
+    }
+
+    while (true) {
+        ParallelsFeatureHeader fh;
+        BdrvDirtyBitmap *bitmap;
+
+        if (remaining < sizeof(fh)) {
+            error_setg(errp, "Can not read feature header, as remaining bytes "
+                       "(%d) in Format Extension is less than Feature header "
+                       "size (%zu)", remaining, sizeof(fh));
+            goto fail;
+        }
+
+        memcpy(&fh, pos, sizeof(fh));
+        pos += sizeof(fh);
+        remaining -= sizeof(fh);
+
+        fh.magic = le64_to_cpu(fh.magic);
+        fh.flags = le64_to_cpu(fh.flags);
+        fh.data_size = le32_to_cpu(fh.data_size);
+
+        if (fh.flags) {
+            error_setg(errp, "Flags for extension feature are unsupported");
+            goto fail;
+        }
+
+        if (fh.data_size > remaining) {
+            error_setg(errp, "Feature data_size exceedes Format Extension "
+                       "cluster");
+            goto fail;
+        }
+
+        switch (fh.magic) {
+        case PARALLELS_END_OF_FEATURES_MAGIC:
+            return 0;
+
+        case PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC:
+            bitmap = parallels_load_bitmap(bs, pos, fh.data_size, errp);
+            if (!bitmap) {
+                goto fail;
+            }
+            bitmaps = g_slist_append(bitmaps, bitmap);
+            break;
+
+        default:
+            error_setg(errp, "Unknown feature: 0x%" PRIu64, fh.magic);
+            goto fail;
+        }
+
+        pos = ext_cluster + QEMU_ALIGN_UP(pos + fh.data_size - ext_cluster, 8);
+    }
+
+fail:
+    for (el = bitmaps; el; el = el->next) {
+        bdrv_release_dirty_bitmap(el->data);
+    }
+    g_slist_free(bitmaps);
+
+    return -EINVAL;
+}
+
+int parallels_read_format_extension(BlockDriverState *bs,
+                                    int64_t ext_off, Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+    uint8_t *ext_cluster = qemu_blockalign(bs, s->cluster_size);
+
+    assert(ext_off > 0);
+
+    ret = bdrv_pread(bs->file, ext_off, ext_cluster, s->cluster_size);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to read Format Extension cluster");
+        goto out;
+    }
+
+    ret = parallels_parse_format_extension(bs, ext_cluster, errp);
+
+out:
+    qemu_vfree(ext_cluster);
+
+    return ret;
+}
diff --git a/block/parallels.c b/block/parallels.c
index 3c22dfd..6ebad2a 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -29,6 +29,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
@@ -421,7 +422,6 @@
     int ret;
     uint32_t i;
     bool flush_bat = false;
-    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
     size = bdrv_getlength(bs->file->bs);
     if (size < 0) {
@@ -472,7 +472,7 @@
             high_off = off;
         }
 
-        if (prev_off != 0 && (prev_off + cluster_size) != off) {
+        if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
             res->bfi.fragmented_clusters++;
         }
         prev_off = off;
@@ -487,10 +487,10 @@
         }
     }
 
-    res->image_end_offset = high_off + cluster_size;
+    res->image_end_offset = high_off + s->cluster_size;
     if (size > res->image_end_offset) {
         int64_t count;
-        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
+        count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
         fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
                 fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
                 size - res->image_end_offset);
@@ -771,6 +771,7 @@
         ret = -EFBIG;
         goto fail;
     }
+    s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
     s->bat_size = le32_to_cpu(ph.bat_entries);
     if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
@@ -843,6 +844,23 @@
         goto fail_options;
     }
 
+    if (ph.ext_off) {
+        if (flags & BDRV_O_RDWR) {
+            /*
+             * It's unsafe to open image RW if there is an extension (as we
+             * don't support it). But parallels driver in QEMU historically
+             * ignores the extension, so print warning and don't care.
+             */
+            warn_report("Format Extension ignored in RW mode");
+        } else {
+            ret = parallels_read_format_extension(
+                    bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp);
+            if (ret < 0) {
+                goto fail;
+            }
+        }
+    }
+
     if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
         s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
         ret = parallels_update_header(bs);
diff --git a/block/parallels.h b/block/parallels.h
index 5aa101c..f22f43f 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -48,7 +48,8 @@
     uint64_t nb_sectors;
     uint32_t inuse;
     uint32_t data_off;
-    char padding[12];
+    uint32_t flags;
+    uint64_t ext_off;
 } QEMU_PACKED ParallelsHeader;
 
 typedef enum ParallelsPreallocMode {
@@ -79,9 +80,13 @@
     ParallelsPreallocMode prealloc_mode;
 
     unsigned int tracks;
+    unsigned int cluster_size;
 
     unsigned int off_multiplier;
     Error *migration_blocker;
 } BDRVParallelsState;
 
+int parallels_read_format_extension(BlockDriverState *bs,
+                                    int64_t ext_off, Error **errp);
+
 #endif
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 5eef82f..42d81c4 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -278,18 +278,6 @@
     return 0;
 }
 
-/* Return the disk size covered by a single qcow2 cluster of bitmap data. */
-static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s,
-                                                const BdrvDirtyBitmap *bitmap)
-{
-    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    uint64_t limit = granularity * (s->cluster_size << 3);
-
-    assert(QEMU_IS_ALIGNED(limit,
-                           bdrv_dirty_bitmap_serialization_align(bitmap)));
-    return limit;
-}
-
 /* load_bitmap_data
  * @bitmap_table entries must satisfy specification constraints.
  * @bitmap must be cleared */
@@ -312,7 +300,7 @@
     }
 
     buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
     for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
         uint64_t count = MIN(bm_size - offset, limit);
         uint64_t entry = bitmap_table[i];
@@ -1303,7 +1291,7 @@
     }
 
     buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
     assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
 
     offset = 0;
diff --git a/blockdev.c b/blockdev.c
index 7463dd5..68f0228 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1514,13 +1514,13 @@
             s->has_snapshot_node_name ? s->snapshot_node_name : NULL;
 
         if (node_name && !snapshot_node_name) {
-            error_setg(errp, "New overlay node name missing");
+            error_setg(errp, "New overlay node-name missing");
             goto out;
         }
 
         if (snapshot_node_name &&
             bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
-            error_setg(errp, "New overlay node name already in use");
+            error_setg(errp, "New overlay node-name already in use");
             goto out;
         }
 
@@ -3597,13 +3597,14 @@
 
     /* Check for the selected node name */
     if (!options->has_node_name) {
-        error_setg(errp, "Node name not specified");
+        error_setg(errp, "node-name not specified");
         goto fail;
     }
 
     bs = bdrv_find_node(options->node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node named '%s'", options->node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'",
+                   options->node_name);
         goto fail;
     }
 
@@ -3634,7 +3635,7 @@
 
     bs = bdrv_find_node(node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node %s", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return;
     }
     if (bdrv_has_blk(bs)) {
@@ -3757,7 +3758,7 @@
 
     bs = bdrv_find_node(node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node %s", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return;
     }
 
diff --git a/blockjob.c b/blockjob.c
index f2feff0..ef96801 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -318,8 +318,12 @@
     info->status    = job->job.status;
     info->auto_finalize = job->job.auto_finalize;
     info->auto_dismiss  = job->job.auto_dismiss;
-    info->has_error = job->job.ret != 0;
-    info->error     = job->job.ret ? g_strdup(strerror(-job->job.ret)) : NULL;
+    if (job->job.ret) {
+        info->has_error = true;
+        info->error = job->job.err ?
+                        g_strdup(error_get_pretty(job->job.err)) :
+                        g_strdup(strerror(-job->job.ret));
+    }
     return info;
 }
 
@@ -356,7 +360,7 @@
     }
 
     if (job->job.ret < 0) {
-        msg = strerror(-job->job.ret);
+        msg = error_get_pretty(job->job.err);
     }
 
     qapi_event_send_block_job_completed(job_type(&job->job),
diff --git a/docs/devel/clocks.rst b/docs/devel/clocks.rst
index c54bbb8..956bd14 100644
--- a/docs/devel/clocks.rst
+++ b/docs/devel/clocks.rst
@@ -80,11 +80,12 @@
 instance.
 
 To add an input clock to a device, the function ``qdev_init_clock_in()``
-must be used.  It takes the name, a callback and an opaque parameter
-for the callback (this will be explained in a following section).
+must be used.  It takes the name, a callback, an opaque parameter
+for the callback and a mask of events when the callback should be
+called (this will be explained in a following section).
 Output is simpler; only the name is required. Typically::
 
-    qdev_init_clock_in(DEVICE(dev), "clk_in", clk_in_callback, dev);
+    qdev_init_clock_in(DEVICE(dev), "clk_in", clk_in_callback, dev, ClockUpdate);
     qdev_init_clock_out(DEVICE(dev), "clk_out");
 
 Both functions return the created Clock pointer, which should be saved in the
@@ -113,7 +114,7 @@
      * callback for the input clock (see "Callback on input clock
      * change" section below for more information).
      */
-    static void clk_in_callback(void *opaque);
+    static void clk_in_callback(void *opaque, ClockEvent event);
 
     /*
      * static array describing clocks:
@@ -124,7 +125,7 @@
      *   the clk_out field of a MyDeviceState structure.
      */
     static const ClockPortInitArray mydev_clocks = {
-        QDEV_CLOCK_IN(MyDeviceState, clk_in, clk_in_callback),
+        QDEV_CLOCK_IN(MyDeviceState, clk_in, clk_in_callback, ClockUpdate),
         QDEV_CLOCK_OUT(MyDeviceState, clk_out),
         QDEV_CLOCK_END
     };
@@ -153,6 +154,47 @@
 connecting the clocks together and devices will fetch the right value during
 the first reset.
 
+Clock callbacks
+---------------
+
+You can give a clock a callback function in several ways:
+
+ * by passing it as an argument to ``qdev_init_clock_in()``
+ * as an argument to the ``QDEV_CLOCK_IN()`` macro initializing an
+   array to be passed to ``qdev_init_clocks()``
+ * by directly calling the ``clock_set_callback()`` function
+
+The callback function must be of this type:
+
+.. code-block:: c
+
+   typedef void ClockCallback(void *opaque, ClockEvent event);
+
+The ``opaque`` argument is the pointer passed to ``qdev_init_clock_in()``
+or ``clock_set_callback()``; for ``qdev_init_clocks()`` it is the
+``dev`` device pointer.
+
+The ``event`` argument specifies why the callback has been called.
+When you register the callback you specify a mask of ClockEvent values
+that you are interested in. The callback will only be called for those
+events.
+
+The events currently supported are:
+
+ * ``ClockPreUpdate`` : called when the input clock's period is about to
+   update. This is useful if the device needs to do some action for
+   which it needs to know the old value of the clock period. During
+   this callback, Clock API functions like ``clock_get()`` or
+   ``clock_ticks_to_ns()`` will use the old period.
+ * ``ClockUpdate`` : called after the input clock's period has changed.
+   During this callback, Clock API functions like ``clock_ticks_to_ns()``
+   will use the new period.
+
+Note that a clock only has one callback: it is not possible to register
+different functions for different events. You must register a single
+callback which listens for all of the events you are interested in,
+and use the ``event`` argument to identify which event has happened.
+
 Retrieving clocks from a device
 -------------------------------
 
@@ -231,7 +273,7 @@
 .. code-block:: c
 
     clk = qdev_init_clock_in(DEVICE(dev), "clk-in", clk_in_callback,
-                             dev);
+                             dev, ClockUpdate);
     /* set initial value to 10ns / 100MHz */
     clock_set_ns(clk, 10);
 
@@ -267,11 +309,12 @@
 so be cautious about using it in calculations.
 
 It is also possible to register a callback on clock frequency changes.
-Here is an example:
+Here is an example, which assumes that ``clock_callback`` has been
+specified as the callback for the ``ClockUpdate`` event:
 
 .. code-block:: c
 
-    void clock_callback(void *opaque) {
+    void clock_callback(void *opaque, ClockEvent event) {
         MyDeviceState *s = (MyDeviceState *) opaque;
         /*
          * 'opaque' is the argument passed to qdev_init_clock_in();
@@ -317,6 +360,18 @@
 ``timer_mod_ns()`` then you should be careful to avoid overflow
 in those calculations, of course.)
 
+Obtaining tick counts
+---------------------
+
+For calculations where you need to know the number of ticks in
+a given duration, use ``clock_ns_to_ticks()``. This function handles
+possible non-whole-number-of-nanoseconds periods and avoids
+potential rounding errors. It will return '0' if the clock is stopped
+(i.e. it has period zero). If the inputs imply a tick count that
+overflows a 64-bit value (a very long duration for a clock with a
+very short period) the output value is truncated, so effectively
+the 64-bit output wraps around.
+
 Changing a clock period
 -----------------------
 
diff --git a/docs/interop/parallels.txt b/docs/interop/parallels.txt
index f15bf35..bb3fadf 100644
--- a/docs/interop/parallels.txt
+++ b/docs/interop/parallels.txt
@@ -208,21 +208,25 @@
   28 - 31:    l1_size
               The number of entries in the L1 table of the bitmap.
 
-  variable:   l1_table (8 * l1_size bytes)
-              L1 offset table (in bytes)
+  variable:   L1 offset table (l1_table), size: 8 * l1_size bytes
 
-A dirty bitmap is stored using a one-level structure for the mapping to host
-clusters - an L1 table.
+The dirty bitmap described by this feature extension is stored in a set of
+clusters inside the Parallels image file. The offsets of these clusters are
+saved in the L1 offset table specified by the feature extension. Each L1 table
+entry is a 64 bit integer as described below:
 
-Given an offset in bytes into the bitmap data, the offset in bytes into the
-image file can be obtained as follows:
+Given an offset in bytes into the bitmap data, corresponding L1 entry is
 
-    offset = l1_table[offset / cluster_size] + (offset % cluster_size)
+    l1_table[offset / cluster_size]
 
-If an L1 table entry is 0, the corresponding cluster of the bitmap is assumed
-to be zero.
+If an L1 table entry is 0, all bits in the corresponding cluster of the bitmap
+are assumed to be 0.
 
-If an L1 table entry is 1, the corresponding cluster of the bitmap is assumed
-to have all bits set.
+If an L1 table entry is 1, all bits in the corresponding cluster of the bitmap
+are assumed to be 1.
 
-If an L1 table entry is not 0 or 1, it allocates a cluster from the data area.
+If an L1 table entry is not 0 or 1, it contains the corresponding cluster
+offset (in 512b sectors). Given an offset in bytes into the bitmap data the
+offset in bytes into the image file can be obtained as follows:
+
+    offset = l1_table[offset / cluster_size] * 512 + (offset % cluster_size)
diff --git a/docs/system/arm/mps2.rst b/docs/system/arm/mps2.rst
index 601ccea..f83b151 100644
--- a/docs/system/arm/mps2.rst
+++ b/docs/system/arm/mps2.rst
@@ -1,5 +1,5 @@
-Arm MPS2 and MPS3 boards (``mps2-an385``, ``mps2-an386``, ``mps2-an500``, ``mps2-an505``, ``mps2-an511``, ``mps2-an521``, ``mps3-an524``)
-=========================================================================================================================================
+Arm MPS2 and MPS3 boards (``mps2-an385``, ``mps2-an386``, ``mps2-an500``, ``mps2-an505``, ``mps2-an511``, ``mps2-an521``, ``mps3-an524``, ``mps3-an547``)
+=========================================================================================================================================================
 
 These board models all use Arm M-profile CPUs.
 
@@ -27,6 +27,8 @@
   Dual Cortex-M33 as documented in Arm Application Note AN521
 ``mps3-an524``
   Dual Cortex-M33 on an MPS3, as documented in Arm Application Note AN524
+``mps3-an547``
+  Cortex-M55 on an MPS3, as documented in Arm Application Note AN547
 
 Differences between QEMU and real hardware:
 
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
index c05b3d3..086493e 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -69,7 +69,7 @@
   a description of character device properties. A common character device
   definition configures a UNIX domain socket::
 
-  --chardev socket,id=char1,path=/tmp/qmp.sock,server=on,wait=off
+  --chardev socket,id=char1,path=/var/run/qsd-qmp.sock,server=on,wait=off
 
 .. option:: --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>]
   --export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
@@ -80,8 +80,9 @@
   requests for modifying data (the default is off).
 
   The ``nbd`` export type requires ``--nbd-server`` (see below). ``name`` is
-  the NBD export name. ``bitmap`` is the name of a dirty bitmap reachable from
-  the block node, so the NBD client can use NBD_OPT_SET_META_CONTEXT with the
+  the NBD export name (if not specified, it defaults to the given
+  ``node-name``). ``bitmap`` is the name of a dirty bitmap reachable from the
+  block node, so the NBD client can use NBD_OPT_SET_META_CONTEXT with the
   metadata context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap.
 
   The ``vhost-user-blk`` export type takes a vhost-user socket address on which
@@ -101,14 +102,17 @@
 
 .. option:: --nbd-server addr.type=inet,addr.host=<host>,addr.port=<port>[,tls-creds=<id>][,tls-authz=<id>][,max-connections=<n>]
   --nbd-server addr.type=unix,addr.path=<path>[,tls-creds=<id>][,tls-authz=<id>][,max-connections=<n>]
+  --nbd-server addr.type=fd,addr.str=<fd>[,tls-creds=<id>][,tls-authz=<id>][,max-connections=<n>]
 
   is a server for NBD exports. Both TCP and UNIX domain sockets are supported.
-  TLS encryption can be configured using ``--object`` tls-creds-* and authz-*
-  secrets (see below).
+  A listen socket can be provided via file descriptor passing (see Examples
+  below). TLS encryption can be configured using ``--object`` tls-creds-* and
+  authz-* secrets (see below).
 
-  To configure an NBD server on UNIX domain socket path ``/tmp/nbd.sock``::
+  To configure an NBD server on UNIX domain socket path
+  ``/var/run/qsd-nbd.sock``::
 
-  --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock
+  --nbd-server addr.type=unix,addr.path=/var/run/qsd-nbd.sock
 
 .. option:: --object help
   --object <type>,help
@@ -118,6 +122,20 @@
   List object properties with ``<type>,help``. See the :manpage:`qemu(1)`
   manual page for a description of the object properties.
 
+.. option:: --pidfile PATH
+
+  is the path to a file where the daemon writes its pid. This allows scripts to
+  stop the daemon by sending a signal::
+
+    $ kill -SIGTERM $(<path/to/qsd.pid)
+
+  A file lock is applied to the file so only one instance of the daemon can run
+  with a given pid file path. The daemon unlinks its pid file when terminating.
+
+  The pid file is written after chardevs, exports, and NBD servers have been
+  created but before accepting connections. The daemon has started successfully
+  when the pid file is written and clients may begin connecting.
+
 Examples
 --------
 Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute
@@ -127,6 +145,42 @@
       --chardev socket,path=qmp.sock,server=on,wait=off,id=char1 \
       --monitor chardev=char1
 
+Launch the daemon from Python with a QMP monitor socket using file descriptor
+passing so there is no need to busy wait for the QMP monitor to become
+available::
+
+  #!/usr/bin/env python3
+  import subprocess
+  import socket
+
+  sock_path = '/var/run/qmp.sock'
+
+  with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as listen_sock:
+      listen_sock.bind(sock_path)
+      listen_sock.listen()
+
+      fd = listen_sock.fileno()
+
+      subprocess.Popen(
+          ['qemu-storage-daemon',
+           '--chardev', f'socket,fd={fd},server=on,id=char1',
+           '--monitor', 'chardev=char1'],
+          pass_fds=[fd],
+      )
+
+  # listen_sock was automatically closed when leaving the 'with' statement
+  # body. If the daemon process terminated early then the following connect()
+  # will fail with "Connection refused" because no process has the listen
+  # socket open anymore. Launch errors can be detected this way.
+
+  qmp_sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+  qmp_sock.connect(sock_path)
+  ...QMP interaction...
+
+The same socket spawning approach also works with the ``--nbd-server
+addr.type=fd,addr.str=<fd>`` and ``--export
+type=vhost-user-blk,addr.type=fd,addr.str=<fd>`` options.
+
 Export raw image file ``disk.img`` over NBD UNIX domain socket ``nbd.sock``::
 
   $ qemu-storage-daemon \
diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 870a6d5..0f0a9f6 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -238,7 +238,7 @@
     memory_region_init_io(&s->iomem, obj, &npcm7xx_adc_ops, s,
                           TYPE_NPCM7XX_ADC, 4 * KiB);
     sysbus_init_mmio(sbd, &s->iomem);
-    s->clock = qdev_init_clock_in(DEVICE(s), "clock", NULL, NULL);
+    s->clock = qdev_init_clock_in(DEVICE(s), "clock", NULL, NULL, 0);
 
     for (i = 0; i < NPCM7XX_ADC_NUM_INPUTS; ++i) {
         object_property_add_uint32_ptr(obj, "adci[*]",
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 4e6f4ff..8c37cf0 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -353,6 +353,7 @@
     select SSI_M25P80
     select XILINX_AXI
     select XILINX_SPIPS
+    select XLNX_CSU_DMA
     select XLNX_ZYNQMP
     select XLNX_ZDMA
 
@@ -505,6 +506,7 @@
 config ARMSSE
     bool
     select ARM_V7M
+    select ARMSSE_CPU_PWRCTRL
     select ARMSSE_CPUID
     select ARMSSE_MHU
     select CMSDK_APB_TIMER
@@ -520,9 +522,5 @@
     select TZ_MSC
     select TZ_PPC
     select UNIMP
-
-config ARMSSE_CPUID
-    bool
-
-config ARMSSE_MHU
-    bool
+    select SSE_COUNTER
+    select SSE_TIMER
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
index 26e1a8c..e5aeb9e 100644
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@@ -19,29 +19,58 @@
 #include "migration/vmstate.h"
 #include "hw/registerfields.h"
 #include "hw/arm/armsse.h"
+#include "hw/arm/armsse-version.h"
 #include "hw/arm/boot.h"
 #include "hw/irq.h"
 #include "hw/qdev-clock.h"
 
-/* Format of the System Information block SYS_CONFIG register */
-typedef enum SysConfigFormat {
-    IoTKitFormat,
-    SSE200Format,
-} SysConfigFormat;
+/*
+ * The SSE-300 puts some devices in different places to the
+ * SSE-200 (and original IoTKit). We use an array of these structs
+ * to define how each variant lays out these devices. (Parts of the
+ * SoC that are the same for all variants aren't handled via these
+ * data structures.)
+ */
+
+#define NO_IRQ -1
+#define NO_PPC -1
+/*
+ * Special values for ARMSSEDeviceInfo::irq to indicate that this
+ * device uses one of the inputs to the OR gate that feeds into the
+ * CPU NMI input.
+ */
+#define NMI_0 10000
+#define NMI_1 10001
+
+typedef struct ARMSSEDeviceInfo {
+    const char *name; /* name to use for the QOM object; NULL terminates list */
+    const char *type; /* QOM type name */
+    unsigned int index; /* Which of the N devices of this type is this ? */
+    hwaddr addr;
+    hwaddr size; /* only needed for TYPE_UNIMPLEMENTED_DEVICE */
+    int ppc; /* Index of APB PPC this device is wired up to, or NO_PPC */
+    int ppc_port; /* Port number of this device on the PPC */
+    int irq; /* NO_IRQ, or 0..NUM_SSE_IRQS-1, or NMI_0 or NMI_1 */
+    bool slowclk; /* true if device uses the slow 32KHz clock */
+} ARMSSEDeviceInfo;
 
 struct ARMSSEInfo {
     const char *name;
+    uint32_t sse_version;
     int sram_banks;
     int num_cpus;
     uint32_t sys_version;
+    uint32_t iidr;
     uint32_t cpuwait_rst;
-    SysConfigFormat sys_config_format;
     bool has_mhus;
-    bool has_ppus;
     bool has_cachectrl;
     bool has_cpusecctrl;
     bool has_cpuid;
+    bool has_cpu_pwrctrl;
+    bool has_sse_counter;
     Property *props;
+    const ARMSSEDeviceInfo *devinfo;
+    const bool *irq_is_common;
 };
 
 static Property iotkit_properties[] = {
@@ -68,70 +97,370 @@
     DEFINE_PROP_END_OF_LIST()
 };
 
-static const ARMSSEInfo armsse_variants[] = {
+static const ARMSSEDeviceInfo iotkit_devices[] = {
     {
-        .name = TYPE_IOTKIT,
-        .sram_banks = 1,
-        .num_cpus = 1,
-        .sys_version = 0x41743,
-        .cpuwait_rst = 0,
-        .sys_config_format = IoTKitFormat,
-        .has_mhus = false,
-        .has_ppus = false,
-        .has_cachectrl = false,
-        .has_cpusecctrl = false,
-        .has_cpuid = false,
-        .props = iotkit_properties,
+        .name = "timer0",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 0,
+        .addr = 0x40000000,
+        .ppc = 0,
+        .ppc_port = 0,
+        .irq = 3,
     },
     {
-        .name = TYPE_SSE200,
-        .sram_banks = 4,
-        .num_cpus = 2,
-        .sys_version = 0x22041743,
-        .cpuwait_rst = 2,
-        .sys_config_format = SSE200Format,
-        .has_mhus = true,
-        .has_ppus = true,
-        .has_cachectrl = true,
-        .has_cpusecctrl = true,
-        .has_cpuid = true,
-        .props = armsse_properties,
+        .name = "timer1",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 1,
+        .addr = 0x40001000,
+        .ppc = 0,
+        .ppc_port = 1,
+        .irq = 4,
     },
+    {
+        .name = "s32ktimer",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 2,
+        .addr = 0x4002f000,
+        .ppc = 1,
+        .ppc_port = 0,
+        .irq = 2,
+        .slowclk = true,
+    },
+    {
+        .name = "dualtimer",
+        .type = TYPE_CMSDK_APB_DUALTIMER,
+        .index = 0,
+        .addr = 0x40002000,
+        .ppc = 0,
+        .ppc_port = 2,
+        .irq = 5,
+    },
+    {
+        .name = "s32kwatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 0,
+        .addr = 0x5002e000,
+        .ppc = NO_PPC,
+        .irq = NMI_0,
+        .slowclk = true,
+    },
+    {
+        .name = "nswatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 1,
+        .addr = 0x40081000,
+        .ppc = NO_PPC,
+        .irq = 1,
+    },
+    {
+        .name = "swatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 2,
+        .addr = 0x50081000,
+        .ppc = NO_PPC,
+        .irq = NMI_1,
+    },
+    {
+        .name = "armsse-sysinfo",
+        .type = TYPE_IOTKIT_SYSINFO,
+        .index = 0,
+        .addr = 0x40020000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "armsse-sysctl",
+        .type = TYPE_IOTKIT_SYSCTL,
+        .index = 0,
+        .addr = 0x50021000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = NULL,
+    }
 };
 
-static uint32_t armsse_sys_config_value(ARMSSE *s, const ARMSSEInfo *info)
-{
-    /* Return the SYS_CONFIG value for this SSE */
-    uint32_t sys_config;
-
-    switch (info->sys_config_format) {
-    case IoTKitFormat:
-        sys_config = 0;
-        sys_config = deposit32(sys_config, 0, 4, info->sram_banks);
-        sys_config = deposit32(sys_config, 4, 4, s->sram_addr_width - 12);
-        break;
-    case SSE200Format:
-        sys_config = 0;
-        sys_config = deposit32(sys_config, 0, 4, info->sram_banks);
-        sys_config = deposit32(sys_config, 4, 5, s->sram_addr_width);
-        sys_config = deposit32(sys_config, 24, 4, 2);
-        if (info->num_cpus > 1) {
-            sys_config = deposit32(sys_config, 10, 1, 1);
-            sys_config = deposit32(sys_config, 20, 4, info->sram_banks - 1);
-            sys_config = deposit32(sys_config, 28, 4, 2);
-        }
-        break;
-    default:
-        g_assert_not_reached();
+static const ARMSSEDeviceInfo sse200_devices[] = {
+    {
+        .name = "timer0",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 0,
+        .addr = 0x40000000,
+        .ppc = 0,
+        .ppc_port = 0,
+        .irq = 3,
+    },
+    {
+        .name = "timer1",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 1,
+        .addr = 0x40001000,
+        .ppc = 0,
+        .ppc_port = 1,
+        .irq = 4,
+    },
+    {
+        .name = "s32ktimer",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 2,
+        .addr = 0x4002f000,
+        .ppc = 1,
+        .ppc_port = 0,
+        .irq = 2,
+        .slowclk = true,
+    },
+    {
+        .name = "dualtimer",
+        .type = TYPE_CMSDK_APB_DUALTIMER,
+        .index = 0,
+        .addr = 0x40002000,
+        .ppc = 0,
+        .ppc_port = 2,
+        .irq = 5,
+    },
+    {
+        .name = "s32kwatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 0,
+        .addr = 0x5002e000,
+        .ppc = NO_PPC,
+        .irq = NMI_0,
+        .slowclk = true,
+    },
+    {
+        .name = "nswatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 1,
+        .addr = 0x40081000,
+        .ppc = NO_PPC,
+        .irq = 1,
+    },
+    {
+        .name = "swatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 2,
+        .addr = 0x50081000,
+        .ppc = NO_PPC,
+        .irq = NMI_1,
+    },
+    {
+        .name = "armsse-sysinfo",
+        .type = TYPE_IOTKIT_SYSINFO,
+        .index = 0,
+        .addr = 0x40020000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "armsse-sysctl",
+        .type = TYPE_IOTKIT_SYSCTL,
+        .index = 0,
+        .addr = 0x50021000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "CPU0CORE_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 0,
+        .addr = 0x50023000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "CPU1CORE_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 1,
+        .addr = 0x50025000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "DBG_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 2,
+        .addr = 0x50029000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "RAM0_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 3,
+        .addr = 0x5002a000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "RAM1_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 4,
+        .addr = 0x5002b000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "RAM2_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 5,
+        .addr = 0x5002c000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "RAM3_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 6,
+        .addr = 0x5002d000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "SYS_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 7,
+        .addr = 0x50022000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = NULL,
     }
-    return sys_config;
-}
+};
 
-/* Clock frequency in HZ of the 32KHz "slow clock" */
-#define S32KCLK (32 * 1000)
+static const ARMSSEDeviceInfo sse300_devices[] = {
+    {
+        .name = "timer0",
+        .type = TYPE_SSE_TIMER,
+        .index = 0,
+        .addr = 0x48000000,
+        .ppc = 0,
+        .ppc_port = 0,
+        .irq = 3,
+    },
+    {
+        .name = "timer1",
+        .type = TYPE_SSE_TIMER,
+        .index = 1,
+        .addr = 0x48001000,
+        .ppc = 0,
+        .ppc_port = 1,
+        .irq = 4,
+    },
+    {
+        .name = "timer2",
+        .type = TYPE_SSE_TIMER,
+        .index = 2,
+        .addr = 0x48002000,
+        .ppc = 0,
+        .ppc_port = 2,
+        .irq = 5,
+    },
+    {
+        .name = "timer3",
+        .type = TYPE_SSE_TIMER,
+        .index = 3,
+        .addr = 0x48003000,
+        .ppc = 0,
+        .ppc_port = 5,
+        .irq = 27,
+    },
+    {
+        .name = "s32ktimer",
+        .type = TYPE_CMSDK_APB_TIMER,
+        .index = 0,
+        .addr = 0x4802f000,
+        .ppc = 1,
+        .ppc_port = 0,
+        .irq = 2,
+        .slowclk = true,
+    },
+    {
+        .name = "s32kwatchdog",
+        .type = TYPE_CMSDK_APB_WATCHDOG,
+        .index = 0,
+        .addr = 0x4802e000,
+        .ppc = NO_PPC,
+        .irq = NMI_0,
+        .slowclk = true,
+    },
+    {
+        .name = "watchdog",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 0,
+        .addr = 0x48040000,
+        .size = 0x2000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "armsse-sysinfo",
+        .type = TYPE_IOTKIT_SYSINFO,
+        .index = 0,
+        .addr = 0x48020000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "armsse-sysctl",
+        .type = TYPE_IOTKIT_SYSCTL,
+        .index = 0,
+        .addr = 0x58021000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "SYS_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 1,
+        .addr = 0x58022000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "CPU0CORE_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 2,
+        .addr = 0x50023000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "MGMT_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 3,
+        .addr = 0x50028000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = "DEBUG_PPU",
+        .type = TYPE_UNIMPLEMENTED_DEVICE,
+        .index = 4,
+        .addr = 0x50029000,
+        .size = 0x1000,
+        .ppc = NO_PPC,
+        .irq = NO_IRQ,
+    },
+    {
+        .name = NULL,
+    }
+};
 
 /* Is internal IRQ n shared between CPUs in a multi-core SSE ? */
-static bool irq_is_common[32] = {
+static const bool sse200_irq_is_common[32] = {
     [0 ... 5] = true,
     /* 6, 7: per-CPU MHU interrupts */
     [8 ... 12] = true,
@@ -145,6 +474,112 @@
     /* 30, 31: reserved */
 };
 
+static const bool sse300_irq_is_common[32] = {
+    [0 ... 5] = true,
+    /* 6, 7: per-CPU MHU interrupts */
+    [8 ... 12] = true,
+    /* 13: reserved */
+    [14 ... 16] = true,
+    /* 17-25: reserved */
+    [26 ... 27] = true,
+    /* 28, 29: per-CPU CTI interrupts */
+    /* 30, 31: reserved */
+};
+
+static const ARMSSEInfo armsse_variants[] = {
+    {
+        .name = TYPE_IOTKIT,
+        .sse_version = ARMSSE_IOTKIT,
+        .sram_banks = 1,
+        .num_cpus = 1,
+        .sys_version = 0x41743,
+        .iidr = 0,
+        .cpuwait_rst = 0,
+        .has_mhus = false,
+        .has_cachectrl = false,
+        .has_cpusecctrl = false,
+        .has_cpuid = false,
+        .has_cpu_pwrctrl = false,
+        .has_sse_counter = false,
+        .props = iotkit_properties,
+        .devinfo = iotkit_devices,
+        .irq_is_common = sse200_irq_is_common,
+    },
+    {
+        .name = TYPE_SSE200,
+        .sse_version = ARMSSE_SSE200,
+        .sram_banks = 4,
+        .num_cpus = 2,
+        .sys_version = 0x22041743,
+        .iidr = 0,
+        .cpuwait_rst = 2,
+        .has_mhus = true,
+        .has_cachectrl = true,
+        .has_cpusecctrl = true,
+        .has_cpuid = true,
+        .has_cpu_pwrctrl = false,
+        .has_sse_counter = false,
+        .props = armsse_properties,
+        .devinfo = sse200_devices,
+        .irq_is_common = sse200_irq_is_common,
+    },
+    {
+        .name = TYPE_SSE300,
+        .sse_version = ARMSSE_SSE300,
+        .sram_banks = 2,
+        .num_cpus = 1,
+        .sys_version = 0x7e00043b,
+        .iidr = 0x74a0043b,
+        .cpuwait_rst = 0,
+        .has_mhus = false,
+        .has_cachectrl = false,
+        .has_cpusecctrl = true,
+        .has_cpuid = true,
+        .has_cpu_pwrctrl = true,
+        .has_sse_counter = true,
+        .props = armsse_properties,
+        .devinfo = sse300_devices,
+        .irq_is_common = sse300_irq_is_common,
+    },
+};
+
+static uint32_t armsse_sys_config_value(ARMSSE *s, const ARMSSEInfo *info)
+{
+    /* Return the SYS_CONFIG value for this SSE */
+    uint32_t sys_config;
+
+    switch (info->sse_version) {
+    case ARMSSE_IOTKIT:
+        sys_config = 0;
+        sys_config = deposit32(sys_config, 0, 4, info->sram_banks);
+        sys_config = deposit32(sys_config, 4, 4, s->sram_addr_width - 12);
+        break;
+    case ARMSSE_SSE200:
+        sys_config = 0;
+        sys_config = deposit32(sys_config, 0, 4, info->sram_banks);
+        sys_config = deposit32(sys_config, 4, 5, s->sram_addr_width);
+        sys_config = deposit32(sys_config, 24, 4, 2);
+        if (info->num_cpus > 1) {
+            sys_config = deposit32(sys_config, 10, 1, 1);
+            sys_config = deposit32(sys_config, 20, 4, info->sram_banks - 1);
+            sys_config = deposit32(sys_config, 28, 4, 2);
+        }
+        break;
+    case ARMSSE_SSE300:
+        sys_config = 0;
+        sys_config = deposit32(sys_config, 0, 4, info->sram_banks);
+        sys_config = deposit32(sys_config, 4, 5, s->sram_addr_width);
+        sys_config = deposit32(sys_config, 16, 3, 3); /* CPU0 = Cortex-M55 */
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return sys_config;
+}
+
+/* Clock frequency in HZ of the 32KHz "slow clock" */
+#define S32KCLK (32 * 1000)
+
 /*
  * Create an alias region in @container of @size bytes starting at @base
  * which mirrors the memory starting at @orig.
@@ -230,9 +665,10 @@
     qdev_connect_gpio_out(dev_splitter, 2, s->sec_resp_cfg_in);
 }
 
-static void armsse_mainclk_update(void *opaque)
+static void armsse_mainclk_update(void *opaque, ClockEvent event)
 {
     ARMSSE *s = ARM_SSE(opaque);
+
     /*
      * Set system_clock_scale from our Clock input; this is what
      * controls the tick rate of the CPU SysTick timer.
@@ -245,14 +681,15 @@
     ARMSSE *s = ARM_SSE(obj);
     ARMSSEClass *asc = ARM_SSE_GET_CLASS(obj);
     const ARMSSEInfo *info = asc->info;
+    const ARMSSEDeviceInfo *devinfo;
     int i;
 
     assert(info->sram_banks <= MAX_SRAM_BANKS);
     assert(info->num_cpus <= SSE_MAX_CPUS);
 
     s->mainclk = qdev_init_clock_in(DEVICE(s), "MAINCLK",
-                                    armsse_mainclk_update, s);
-    s->s32kclk = qdev_init_clock_in(DEVICE(s), "S32KCLK", NULL, NULL);
+                                    armsse_mainclk_update, s, ClockUpdate);
+    s->s32kclk = qdev_init_clock_in(DEVICE(s), "S32KCLK", NULL, NULL, 0);
 
     memory_region_init(&s->container, obj, "armsse-container", UINT64_MAX);
 
@@ -285,9 +722,52 @@
         }
     }
 
+    for (devinfo = info->devinfo; devinfo->name; devinfo++) {
+        assert(devinfo->ppc == NO_PPC || devinfo->ppc < ARRAY_SIZE(s->apb_ppc));
+        if (!strcmp(devinfo->type, TYPE_CMSDK_APB_TIMER)) {
+            assert(devinfo->index < ARRAY_SIZE(s->timer));
+            object_initialize_child(obj, devinfo->name,
+                                    &s->timer[devinfo->index],
+                                    TYPE_CMSDK_APB_TIMER);
+        } else if (!strcmp(devinfo->type, TYPE_CMSDK_APB_DUALTIMER)) {
+            assert(devinfo->index == 0);
+            object_initialize_child(obj, devinfo->name, &s->dualtimer,
+                                    TYPE_CMSDK_APB_DUALTIMER);
+        } else if (!strcmp(devinfo->type, TYPE_SSE_TIMER)) {
+            assert(devinfo->index < ARRAY_SIZE(s->sse_timer));
+            object_initialize_child(obj, devinfo->name,
+                                    &s->sse_timer[devinfo->index],
+                                    TYPE_SSE_TIMER);
+        } else if (!strcmp(devinfo->type, TYPE_CMSDK_APB_WATCHDOG)) {
+            assert(devinfo->index < ARRAY_SIZE(s->cmsdk_watchdog));
+            object_initialize_child(obj, devinfo->name,
+                                    &s->cmsdk_watchdog[devinfo->index],
+                                    TYPE_CMSDK_APB_WATCHDOG);
+        } else if (!strcmp(devinfo->type, TYPE_IOTKIT_SYSINFO)) {
+            assert(devinfo->index == 0);
+            object_initialize_child(obj, devinfo->name, &s->sysinfo,
+                                    TYPE_IOTKIT_SYSINFO);
+        } else if (!strcmp(devinfo->type, TYPE_IOTKIT_SYSCTL)) {
+            assert(devinfo->index == 0);
+            object_initialize_child(obj, devinfo->name, &s->sysctl,
+                                    TYPE_IOTKIT_SYSCTL);
+        } else if (!strcmp(devinfo->type, TYPE_UNIMPLEMENTED_DEVICE)) {
+            assert(devinfo->index < ARRAY_SIZE(s->unimp));
+            object_initialize_child(obj, devinfo->name,
+                                    &s->unimp[devinfo->index],
+                                    TYPE_UNIMPLEMENTED_DEVICE);
+        } else {
+            g_assert_not_reached();
+        }
+    }
+
     object_initialize_child(obj, "secctl", &s->secctl, TYPE_IOTKIT_SECCTL);
-    object_initialize_child(obj, "apb-ppc0", &s->apb_ppc0, TYPE_TZ_PPC);
-    object_initialize_child(obj, "apb-ppc1", &s->apb_ppc1, TYPE_TZ_PPC);
+
+    for (i = 0; i < ARRAY_SIZE(s->apb_ppc); i++) {
+        g_autofree char *name = g_strdup_printf("apb-ppc%d", i);
+        object_initialize_child(obj, name, &s->apb_ppc[i], TYPE_TZ_PPC);
+    }
+
     for (i = 0; i < info->sram_banks; i++) {
         char *name = g_strdup_printf("mpc%d", i);
         object_initialize_child(obj, name, &s->mpc[i], TYPE_TZ_MPC);
@@ -303,46 +783,11 @@
         object_initialize_child(obj, name, splitter, TYPE_SPLIT_IRQ);
         g_free(name);
     }
-    object_initialize_child(obj, "timer0", &s->timer0, TYPE_CMSDK_APB_TIMER);
-    object_initialize_child(obj, "timer1", &s->timer1, TYPE_CMSDK_APB_TIMER);
-    object_initialize_child(obj, "s32ktimer", &s->s32ktimer,
-                            TYPE_CMSDK_APB_TIMER);
-    object_initialize_child(obj, "dualtimer", &s->dualtimer,
-                            TYPE_CMSDK_APB_DUALTIMER);
-    object_initialize_child(obj, "s32kwatchdog", &s->s32kwatchdog,
-                            TYPE_CMSDK_APB_WATCHDOG);
-    object_initialize_child(obj, "nswatchdog", &s->nswatchdog,
-                            TYPE_CMSDK_APB_WATCHDOG);
-    object_initialize_child(obj, "swatchdog", &s->swatchdog,
-                            TYPE_CMSDK_APB_WATCHDOG);
-    object_initialize_child(obj, "armsse-sysctl", &s->sysctl,
-                            TYPE_IOTKIT_SYSCTL);
-    object_initialize_child(obj, "armsse-sysinfo", &s->sysinfo,
-                            TYPE_IOTKIT_SYSINFO);
+
     if (info->has_mhus) {
         object_initialize_child(obj, "mhu0", &s->mhu[0], TYPE_ARMSSE_MHU);
         object_initialize_child(obj, "mhu1", &s->mhu[1], TYPE_ARMSSE_MHU);
     }
-    if (info->has_ppus) {
-        for (i = 0; i < info->num_cpus; i++) {
-            char *name = g_strdup_printf("CPU%dCORE_PPU", i);
-            int ppuidx = CPU0CORE_PPU + i;
-
-            object_initialize_child(obj, name, &s->ppu[ppuidx],
-                                    TYPE_UNIMPLEMENTED_DEVICE);
-            g_free(name);
-        }
-        object_initialize_child(obj, "DBG_PPU", &s->ppu[DBG_PPU],
-                                TYPE_UNIMPLEMENTED_DEVICE);
-        for (i = 0; i < info->sram_banks; i++) {
-            char *name = g_strdup_printf("RAM%d_PPU", i);
-            int ppuidx = RAM0_PPU + i;
-
-            object_initialize_child(obj, name, &s->ppu[ppuidx],
-                                    TYPE_UNIMPLEMENTED_DEVICE);
-            g_free(name);
-        }
-    }
     if (info->has_cachectrl) {
         for (i = 0; i < info->num_cpus; i++) {
             char *name = g_strdup_printf("cachectrl%d", i);
@@ -370,6 +815,20 @@
             g_free(name);
         }
     }
+    if (info->has_cpu_pwrctrl) {
+        for (i = 0; i < info->num_cpus; i++) {
+            char *name = g_strdup_printf("cpu_pwrctrl%d", i);
+
+            object_initialize_child(obj, name, &s->cpu_pwrctrl[i],
+                                    TYPE_ARMSSE_CPU_PWRCTRL);
+            g_free(name);
+        }
+    }
+    if (info->has_sse_counter) {
+        object_initialize_child(obj, "sse-counter", &s->sse_counter,
+                                TYPE_SSE_COUNTER);
+    }
+
     object_initialize_child(obj, "nmi-orgate", &s->nmi_orgate, TYPE_OR_IRQ);
     object_initialize_child(obj, "ppc-irq-orgate", &s->ppc_irq_orgate,
                             TYPE_OR_IRQ);
@@ -384,7 +843,7 @@
     }
     if (info->num_cpus > 1) {
         for (i = 0; i < ARRAY_SIZE(s->cpu_irq_splitter); i++) {
-            if (irq_is_common[i]) {
+            if (info->irq_is_common[i]) {
                 char *name = g_strdup_printf("cpu-irq-splitter%d", i);
                 SplitIRQ *splitter = &s->cpu_irq_splitter[i];
 
@@ -417,7 +876,7 @@
     ARMSSEClass *asc = ARM_SSE_GET_CLASS(s);
     const ARMSSEInfo *info = asc->info;
 
-    assert(irq_is_common[irqno]);
+    assert(info->irq_is_common[irqno]);
 
     if (info->num_cpus == 1) {
         /* Only one CPU -- just connect directly to it */
@@ -428,22 +887,12 @@
     }
 }
 
-static void map_ppu(ARMSSE *s, int ppuidx, const char *name, hwaddr addr)
-{
-    /* Map a PPU unimplemented device stub */
-    DeviceState *dev = DEVICE(&s->ppu[ppuidx]);
-
-    qdev_prop_set_string(dev, "name", name);
-    qdev_prop_set_uint64(dev, "size", 0x1000);
-    sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->ppu[ppuidx]), 0, addr);
-}
-
 static void armsse_realize(DeviceState *dev, Error **errp)
 {
     ARMSSE *s = ARM_SSE(dev);
     ARMSSEClass *asc = ARM_SSE_GET_CLASS(dev);
     const ARMSSEInfo *info = asc->info;
+    const ARMSSEDeviceInfo *devinfo;
     int i;
     MemoryRegion *mr;
     Error *err = NULL;
@@ -522,7 +971,7 @@
         int j;
         char *gpioname;
 
-        qdev_prop_set_uint32(cpudev, "num-irq", s->exp_numirq + 32);
+        qdev_prop_set_uint32(cpudev, "num-irq", s->exp_numirq + NUM_SSE_IRQS);
         /*
          * In real hardware the initial Secure VTOR is set from the INITSVTOR*
          * registers in the IoT Kit System Control Register block. In QEMU
@@ -593,7 +1042,7 @@
         /* Connect EXP_IRQ/EXP_CPUn_IRQ GPIOs to the NVIC's lines 32 and up */
         s->exp_irqs[i] = g_new(qemu_irq, s->exp_numirq);
         for (j = 0; j < s->exp_numirq; j++) {
-            s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + 32);
+            s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + NUM_SSE_IRQS);
         }
         if (i == 0) {
             gpioname = g_strdup("EXP_IRQ");
@@ -609,7 +1058,7 @@
     /* Wire up the splitters that connect common IRQs to all CPUs */
     if (info->num_cpus > 1) {
         for (i = 0; i < ARRAY_SIZE(s->cpu_irq_splitter); i++) {
-            if (irq_is_common[i]) {
+            if (info->irq_is_common[i]) {
                 Object *splitter = OBJECT(&s->cpu_irq_splitter[i]);
                 DeviceState *devs = DEVICE(splitter);
                 int cpunum;
@@ -649,6 +1098,8 @@
     }
 
     /* Security controller */
+    object_property_set_int(OBJECT(&s->secctl), "sse-version",
+                            info->sse_version, &error_abort);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->secctl), errp)) {
         return;
     }
@@ -715,6 +1166,36 @@
     qdev_connect_gpio_out(DEVICE(&s->mpc_irq_orgate), 0,
                           armsse_get_common_irq_in(s, 9));
 
+    /* This OR gate wires together outputs from the secure watchdogs to NMI */
+    if (!object_property_set_int(OBJECT(&s->nmi_orgate), "num-lines", 2,
+                                 errp)) {
+        return;
+    }
+    if (!qdev_realize(DEVICE(&s->nmi_orgate), NULL, errp)) {
+        return;
+    }
+    qdev_connect_gpio_out(DEVICE(&s->nmi_orgate), 0,
+                          qdev_get_gpio_in_named(DEVICE(&s->armv7m), "NMI", 0));
+
+    /* The SSE-300 has a System Counter / System Timestamp Generator */
+    if (info->has_sse_counter) {
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->sse_counter);
+
+        qdev_connect_clock_in(DEVICE(sbd), "CLK", s->mainclk);
+        if (!sysbus_realize(sbd, errp)) {
+            return;
+        }
+        /*
+         * The control frame is only in the Secure region;
+         * the status frame is in the NS region (and visible in the
+         * S region via the alias mapping).
+         */
+        memory_region_add_subregion(&s->container, 0x58100000,
+                                    sysbus_mmio_get_region(sbd, 0));
+        memory_region_add_subregion(&s->container, 0x48101000,
+                                    sysbus_mmio_get_region(sbd, 1));
+    }
+
     /* Devices behind APB PPC0:
      *   0x40000000: timer0
      *   0x40001000: timer1
@@ -725,35 +1206,127 @@
      * it to the appropriate PPC port; then we can realize the PPC and
      * map its upstream ends to the right place in the container.
      */
-    qdev_connect_clock_in(DEVICE(&s->timer0), "pclk", s->mainclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->timer0), errp)) {
-        return;
-    }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->timer0), 0,
-                       armsse_get_common_irq_in(s, 3));
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->timer0), 0);
-    object_property_set_link(OBJECT(&s->apb_ppc0), "port[0]", OBJECT(mr),
-                             &error_abort);
+    for (devinfo = info->devinfo; devinfo->name; devinfo++) {
+        SysBusDevice *sbd;
+        qemu_irq irq;
 
-    qdev_connect_clock_in(DEVICE(&s->timer1), "pclk", s->mainclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->timer1), errp)) {
-        return;
-    }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->timer1), 0,
-                       armsse_get_common_irq_in(s, 4));
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->timer1), 0);
-    object_property_set_link(OBJECT(&s->apb_ppc0), "port[1]", OBJECT(mr),
-                             &error_abort);
+        if (!strcmp(devinfo->type, TYPE_CMSDK_APB_TIMER)) {
+            sbd = SYS_BUS_DEVICE(&s->timer[devinfo->index]);
 
-    qdev_connect_clock_in(DEVICE(&s->dualtimer), "TIMCLK", s->mainclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->dualtimer), errp)) {
-        return;
+            qdev_connect_clock_in(DEVICE(sbd), "pclk",
+                                  devinfo->slowclk ? s->s32kclk : s->mainclk);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else if (!strcmp(devinfo->type, TYPE_CMSDK_APB_DUALTIMER)) {
+            sbd = SYS_BUS_DEVICE(&s->dualtimer);
+
+            qdev_connect_clock_in(DEVICE(sbd), "TIMCLK", s->mainclk);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else if (!strcmp(devinfo->type, TYPE_SSE_TIMER)) {
+            sbd = SYS_BUS_DEVICE(&s->sse_timer[devinfo->index]);
+
+            assert(info->has_sse_counter);
+            object_property_set_link(OBJECT(sbd), "counter",
+                                     OBJECT(&s->sse_counter), &error_abort);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else if (!strcmp(devinfo->type, TYPE_CMSDK_APB_WATCHDOG)) {
+            sbd = SYS_BUS_DEVICE(&s->cmsdk_watchdog[devinfo->index]);
+
+            qdev_connect_clock_in(DEVICE(sbd), "WDOGCLK",
+                                  devinfo->slowclk ? s->s32kclk : s->mainclk);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else if (!strcmp(devinfo->type, TYPE_IOTKIT_SYSINFO)) {
+            sbd = SYS_BUS_DEVICE(&s->sysinfo);
+
+            object_property_set_int(OBJECT(&s->sysinfo), "SYS_VERSION",
+                                    info->sys_version, &error_abort);
+            object_property_set_int(OBJECT(&s->sysinfo), "SYS_CONFIG",
+                                    armsse_sys_config_value(s, info),
+                                    &error_abort);
+            object_property_set_int(OBJECT(&s->sysinfo), "sse-version",
+                                    info->sse_version, &error_abort);
+            object_property_set_int(OBJECT(&s->sysinfo), "IIDR",
+                                    info->iidr, &error_abort);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else if (!strcmp(devinfo->type, TYPE_IOTKIT_SYSCTL)) {
+            /* System control registers */
+            sbd = SYS_BUS_DEVICE(&s->sysctl);
+
+            object_property_set_int(OBJECT(&s->sysctl), "sse-version",
+                                    info->sse_version, &error_abort);
+            object_property_set_int(OBJECT(&s->sysctl), "CPUWAIT_RST",
+                                    info->cpuwait_rst, &error_abort);
+            object_property_set_int(OBJECT(&s->sysctl), "INITSVTOR0_RST",
+                                    s->init_svtor, &error_abort);
+            object_property_set_int(OBJECT(&s->sysctl), "INITSVTOR1_RST",
+                                    s->init_svtor, &error_abort);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else if (!strcmp(devinfo->type, TYPE_UNIMPLEMENTED_DEVICE)) {
+            sbd = SYS_BUS_DEVICE(&s->unimp[devinfo->index]);
+
+            qdev_prop_set_string(DEVICE(sbd), "name", devinfo->name);
+            qdev_prop_set_uint64(DEVICE(sbd), "size", devinfo->size);
+            if (!sysbus_realize(sbd, errp)) {
+                return;
+            }
+            mr = sysbus_mmio_get_region(sbd, 0);
+        } else {
+            g_assert_not_reached();
+        }
+
+        switch (devinfo->irq) {
+        case NO_IRQ:
+            irq = NULL;
+            break;
+        case 0 ... NUM_SSE_IRQS - 1:
+            irq = armsse_get_common_irq_in(s, devinfo->irq);
+            break;
+        case NMI_0:
+        case NMI_1:
+            irq = qdev_get_gpio_in(DEVICE(&s->nmi_orgate),
+                                   devinfo->irq - NMI_0);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        if (irq) {
+            sysbus_connect_irq(sbd, 0, irq);
+        }
+
+        /*
+         * Devices connected to a PPC are connected to the port here;
+         * we will map the upstream end of that port to the right address
+         * in the container later after the PPC has been realized.
+         * Devices not connected to a PPC can be mapped immediately.
+         */
+        if (devinfo->ppc != NO_PPC) {
+            TZPPC *ppc = &s->apb_ppc[devinfo->ppc];
+            g_autofree char *portname = g_strdup_printf("port[%d]",
+                                                        devinfo->ppc_port);
+            object_property_set_link(OBJECT(ppc), portname, OBJECT(mr),
+                                     &error_abort);
+        } else {
+            memory_region_add_subregion(&s->container, devinfo->addr, mr);
+        }
     }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->dualtimer), 0,
-                       armsse_get_common_irq_in(s, 5));
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dualtimer), 0);
-    object_property_set_link(OBJECT(&s->apb_ppc0), "port[2]", OBJECT(mr),
-                             &error_abort);
 
     if (info->has_mhus) {
         /*
@@ -775,7 +1348,7 @@
             }
             port = g_strdup_printf("port[%d]", i + 3);
             mr = sysbus_mmio_get_region(mhu_sbd, 0);
-            object_property_set_link(OBJECT(&s->apb_ppc0), port, OBJECT(mr),
+            object_property_set_link(OBJECT(&s->apb_ppc[0]), port, OBJECT(mr),
                                      &error_abort);
             g_free(port);
 
@@ -795,19 +1368,13 @@
         }
     }
 
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->apb_ppc0), errp)) {
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->apb_ppc[0]), errp)) {
         return;
     }
 
-    sbd_apb_ppc0 = SYS_BUS_DEVICE(&s->apb_ppc0);
-    dev_apb_ppc0 = DEVICE(&s->apb_ppc0);
+    sbd_apb_ppc0 = SYS_BUS_DEVICE(&s->apb_ppc[0]);
+    dev_apb_ppc0 = DEVICE(&s->apb_ppc[0]);
 
-    mr = sysbus_mmio_get_region(sbd_apb_ppc0, 0);
-    memory_region_add_subregion(&s->container, 0x40000000, mr);
-    mr = sysbus_mmio_get_region(sbd_apb_ppc0, 1);
-    memory_region_add_subregion(&s->container, 0x40001000, mr);
-    mr = sysbus_mmio_get_region(sbd_apb_ppc0, 2);
-    memory_region_add_subregion(&s->container, 0x40002000, mr);
     if (info->has_mhus) {
         mr = sysbus_mmio_get_region(sbd_apb_ppc0, 3);
         memory_region_add_subregion(&s->container, 0x40003000, mr);
@@ -852,6 +1419,8 @@
      *  0x50010000: L1 icache control registers
      *  0x50011000: CPUSECCTRL (CPU local security control registers)
      *  0x4001f000 and 0x5001f000: CPU_IDENTITY register block
+     * The SSE-300 has an extra:
+     *  0x40012000 and 0x50012000: CPU_PWRCTRL register block
      */
     if (info->has_cachectrl) {
         for (i = 0; i < info->num_cpus; i++) {
@@ -898,28 +1467,24 @@
             memory_region_add_subregion(&s->cpu_container[i], 0x4001F000, mr);
         }
     }
+    if (info->has_cpu_pwrctrl) {
+        for (i = 0; i < info->num_cpus; i++) {
+            MemoryRegion *mr;
 
-    /* 0x40020000 .. 0x4002ffff : ARMSSE system control peripheral region */
-    /* Devices behind APB PPC1:
-     *   0x4002f000: S32K timer
-     */
-    qdev_connect_clock_in(DEVICE(&s->s32ktimer), "pclk", s->s32kclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->s32ktimer), errp)) {
+            if (!sysbus_realize(SYS_BUS_DEVICE(&s->cpu_pwrctrl[i]), errp)) {
+                return;
+            }
+
+            mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->cpu_pwrctrl[i]), 0);
+            memory_region_add_subregion(&s->cpu_container[i], 0x40012000, mr);
+        }
+    }
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->apb_ppc[1]), errp)) {
         return;
     }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->s32ktimer), 0,
-                       armsse_get_common_irq_in(s, 2));
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->s32ktimer), 0);
-    object_property_set_link(OBJECT(&s->apb_ppc1), "port[0]", OBJECT(mr),
-                             &error_abort);
 
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->apb_ppc1), errp)) {
-        return;
-    }
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->apb_ppc1), 0);
-    memory_region_add_subregion(&s->container, 0x4002f000, mr);
-
-    dev_apb_ppc1 = DEVICE(&s->apb_ppc1);
+    dev_apb_ppc1 = DEVICE(&s->apb_ppc[1]);
     qdev_connect_gpio_out_named(dev_secctl, "apb_ppc1_nonsec", 0,
                                 qdev_get_gpio_in_named(dev_apb_ppc1,
                                                        "cfg_nonsec", 0));
@@ -936,92 +1501,23 @@
                           qdev_get_gpio_in_named(dev_apb_ppc1,
                                                  "cfg_sec_resp", 0));
 
-    if (!object_property_set_int(OBJECT(&s->sysinfo), "SYS_VERSION",
-                                 info->sys_version, errp)) {
-        return;
-    }
-    if (!object_property_set_int(OBJECT(&s->sysinfo), "SYS_CONFIG",
-                                 armsse_sys_config_value(s, info), errp)) {
-        return;
-    }
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->sysinfo), errp)) {
-        return;
-    }
-    /* System information registers */
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->sysinfo), 0, 0x40020000);
-    /* System control registers */
-    object_property_set_int(OBJECT(&s->sysctl), "SYS_VERSION",
-                            info->sys_version, &error_abort);
-    object_property_set_int(OBJECT(&s->sysctl), "CPUWAIT_RST",
-                            info->cpuwait_rst, &error_abort);
-    object_property_set_int(OBJECT(&s->sysctl), "INITSVTOR0_RST",
-                            s->init_svtor, &error_abort);
-    object_property_set_int(OBJECT(&s->sysctl), "INITSVTOR1_RST",
-                            s->init_svtor, &error_abort);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->sysctl), errp)) {
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->sysctl), 0, 0x50021000);
+    /*
+     * Now both PPCs are realized we can map the upstream ends of
+     * ports which correspond to entries in the devinfo array.
+     * The ports which are connected to non-devinfo devices have
+     * already been mapped.
+     */
+    for (devinfo = info->devinfo; devinfo->name; devinfo++) {
+        SysBusDevice *ppc_sbd;
 
-    if (info->has_ppus) {
-        /* CPUnCORE_PPU for each CPU */
-        for (i = 0; i < info->num_cpus; i++) {
-            char *name = g_strdup_printf("CPU%dCORE_PPU", i);
-
-            map_ppu(s, CPU0CORE_PPU + i, name, 0x50023000 + i * 0x2000);
-            /*
-             * We don't support CPU debug so don't create the
-             * CPU0DEBUG_PPU at 0x50024000 and 0x50026000.
-             */
-            g_free(name);
+        if (devinfo->ppc == NO_PPC) {
+            continue;
         }
-        map_ppu(s, DBG_PPU, "DBG_PPU", 0x50029000);
-
-        for (i = 0; i < info->sram_banks; i++) {
-            char *name = g_strdup_printf("RAM%d_PPU", i);
-
-            map_ppu(s, RAM0_PPU + i, name, 0x5002a000 + i * 0x1000);
-            g_free(name);
-        }
+        ppc_sbd = SYS_BUS_DEVICE(&s->apb_ppc[devinfo->ppc]);
+        mr = sysbus_mmio_get_region(ppc_sbd, devinfo->ppc_port);
+        memory_region_add_subregion(&s->container, devinfo->addr, mr);
     }
 
-    /* This OR gate wires together outputs from the secure watchdogs to NMI */
-    if (!object_property_set_int(OBJECT(&s->nmi_orgate), "num-lines", 2,
-                                 errp)) {
-        return;
-    }
-    if (!qdev_realize(DEVICE(&s->nmi_orgate), NULL, errp)) {
-        return;
-    }
-    qdev_connect_gpio_out(DEVICE(&s->nmi_orgate), 0,
-                          qdev_get_gpio_in_named(DEVICE(&s->armv7m), "NMI", 0));
-
-    qdev_connect_clock_in(DEVICE(&s->s32kwatchdog), "WDOGCLK", s->s32kclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->s32kwatchdog), errp)) {
-        return;
-    }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->s32kwatchdog), 0,
-                       qdev_get_gpio_in(DEVICE(&s->nmi_orgate), 0));
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->s32kwatchdog), 0, 0x5002e000);
-
-    /* 0x40080000 .. 0x4008ffff : ARMSSE second Base peripheral region */
-
-    qdev_connect_clock_in(DEVICE(&s->nswatchdog), "WDOGCLK", s->mainclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->nswatchdog), errp)) {
-        return;
-    }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->nswatchdog), 0,
-                       armsse_get_common_irq_in(s, 1));
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->nswatchdog), 0, 0x40081000);
-
-    qdev_connect_clock_in(DEVICE(&s->swatchdog), "WDOGCLK", s->mainclk);
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->swatchdog), errp)) {
-        return;
-    }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->swatchdog), 0,
-                       qdev_get_gpio_in(DEVICE(&s->nmi_orgate), 1));
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->swatchdog), 0, 0x50081000);
-
     for (i = 0; i < ARRAY_SIZE(s->ppc_irq_splitter); i++) {
         Object *splitter = OBJECT(&s->ppc_irq_splitter[i]);
 
@@ -1052,7 +1548,7 @@
         DeviceState *devs = DEVICE(&s->ppc_irq_splitter[i]);
         char *gpioname = g_strdup_printf("apb_ppc%d_irq_status",
                                          i - NUM_EXTERNAL_PPCS);
-        TZPPC *ppc = (i == NUM_EXTERNAL_PPCS) ? &s->apb_ppc0 : &s->apb_ppc1;
+        TZPPC *ppc = &s->apb_ppc[i - NUM_EXTERNAL_PPCS];
 
         qdev_connect_gpio_out(devs, 0,
                               qdev_get_gpio_in_named(dev_secctl, gpioname, 0));
@@ -1120,7 +1616,7 @@
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->container);
 
     /* Set initial system_clock_scale from MAINCLK */
-    armsse_mainclk_update(s);
+    armsse_mainclk_update(s, ClockUpdate);
 }
 
 static void armsse_idau_check(IDAUInterface *ii, uint32_t address,
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index 72da8cb..3fbe3d2 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -17,6 +17,7 @@
  *  "mps2-an505" -- Cortex-M33 as documented in ARM Application Note AN505
  *  "mps2-an521" -- Dual Cortex-M33 as documented in Application Note AN521
  *  "mps2-an524" -- Dual Cortex-M33 as documented in Application Note AN524
+ *  "mps2-an547" -- Single Cortex-M55 as documented in Application Note AN547
  *
  * Links to the TRM for the board itself and to the various Application
  * Notes which document the FPGA images can be found here:
@@ -30,6 +31,8 @@
  * https://developer.arm.com/documentation/dai0521/latest/
  * Application Note AN524:
  * https://developer.arm.com/documentation/dai0524/latest/
+ * Application Note AN547:
+ * https://developer.arm.com/-/media/Arm%20Developer%20Community/PDF/DAI0547B_SSE300_PLUS_U55_FPGA_for_mps3.pdf
  *
  * The AN505 defers to the Cortex-M33 processor ARMv8M IoT Kit FVP User Guide
  * (ARM ECM0601256) for the details of some of the device layout:
@@ -37,6 +40,8 @@
  * Similarly, the AN521 and AN524 use the SSE-200, and the SSE-200 TRM defines
  * most of the device layout:
  *  https://developer.arm.com/documentation/101104/latest/
+ * and the AN547 uses the SSE-300, whose layout is in the SSE-300 TRM:
+ *  https://developer.arm.com/documentation/101773/latest/
  */
 
 #include "qemu/osdep.h"
@@ -68,13 +73,14 @@
 #include "hw/qdev-clock.h"
 #include "qom/object.h"
 
-#define MPS2TZ_NUMIRQ_MAX 95
-#define MPS2TZ_RAM_MAX 4
+#define MPS2TZ_NUMIRQ_MAX 96
+#define MPS2TZ_RAM_MAX 5
 
 typedef enum MPS2TZFPGAType {
     FPGA_AN505,
     FPGA_AN521,
     FPGA_AN524,
+    FPGA_AN547,
 } MPS2TZFPGAType;
 
 /*
@@ -106,11 +112,15 @@
     MPS2TZFPGAType fpga_type;
     uint32_t scc_id;
     uint32_t sysclk_frq; /* Main SYSCLK frequency in Hz */
+    uint32_t apb_periph_frq; /* APB peripheral frequency in Hz */
     uint32_t len_oscclk;
     const uint32_t *oscclk;
     uint32_t fpgaio_num_leds; /* Number of LEDs in FPGAIO LED0 register */
     bool fpgaio_has_switches; /* Does FPGAIO have SWITCH register? */
+    bool fpgaio_has_dbgctrl; /* Does FPGAIO have DBGCTRL register? */
     int numirq; /* Number of external interrupts */
+    int uart_overflow_irq; /* number of the combined UART overflow IRQ */
+    uint32_t init_svtor; /* init-svtor setting for SSE */
     const RAMInfo *raminfo;
     const char *armsse_type;
 };
@@ -149,6 +159,7 @@
 #define TYPE_MPS2TZ_AN505_MACHINE MACHINE_TYPE_NAME("mps2-an505")
 #define TYPE_MPS2TZ_AN521_MACHINE MACHINE_TYPE_NAME("mps2-an521")
 #define TYPE_MPS3TZ_AN524_MACHINE MACHINE_TYPE_NAME("mps3-an524")
+#define TYPE_MPS3TZ_AN547_MACHINE MACHINE_TYPE_NAME("mps3-an547")
 
 OBJECT_DECLARE_TYPE(MPS2TZMachineState, MPS2TZMachineClass, MPS2TZ_MACHINE)
 
@@ -248,6 +259,49 @@
     },
 };
 
+static const RAMInfo an547_raminfo[] = { {
+        .name = "itcm",
+        .base = 0x00000000,
+        .size = 512 * KiB,
+        .mpc = -1,
+        .mrindex = 0,
+    }, {
+        .name = "sram",
+        .base = 0x01000000,
+        .size = 2 * MiB,
+        .mpc = 0,
+        .mrindex = 1,
+    }, {
+        .name = "dtcm",
+        .base = 0x20000000,
+        .size = 4 * 128 * KiB,
+        .mpc = -1,
+        .mrindex = 2,
+    }, {
+        .name = "sram 2",
+        .base = 0x21000000,
+        .size = 4 * MiB,
+        .mpc = -1,
+        .mrindex = 3,
+    }, {
+        /* We don't model QSPI flash yet; for now expose it as simple ROM */
+        .name = "QSPI",
+        .base = 0x28000000,
+        .size = 8 * MiB,
+        .mpc = 1,
+        .mrindex = 4,
+        .flags = IS_ROM,
+    }, {
+        .name = "DDR",
+        .base = 0x60000000,
+        .size = MPS3_DDR_SIZE,
+        .mpc = 2,
+        .mrindex = -1,
+    }, {
+        .name = NULL,
+    },
+};
+
 static const RAMInfo *find_raminfo_for_mpc(MPS2TZMachineState *mms, int mpc)
 {
     MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
@@ -377,7 +431,7 @@
 
     object_initialize_child(OBJECT(mms), name, uart, TYPE_CMSDK_APB_UART);
     qdev_prop_set_chr(DEVICE(uart), "chardev", serial_hd(i));
-    qdev_prop_set_uint32(DEVICE(uart), "pclk-frq", mmc->sysclk_frq);
+    qdev_prop_set_uint32(DEVICE(uart), "pclk-frq", mmc->apb_periph_frq);
     sysbus_realize(SYS_BUS_DEVICE(uart), &error_fatal);
     s = SYS_BUS_DEVICE(uart);
     sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqs[0]));
@@ -421,6 +475,7 @@
     object_initialize_child(OBJECT(mms), "fpgaio", fpgaio, TYPE_MPS2_FPGAIO);
     qdev_prop_set_uint32(DEVICE(fpgaio), "num-leds", mmc->fpgaio_num_leds);
     qdev_prop_set_bit(DEVICE(fpgaio), "has-switches", mmc->fpgaio_has_switches);
+    qdev_prop_set_bit(DEVICE(fpgaio), "has-dbgctrl", mmc->fpgaio_has_dbgctrl);
     sysbus_realize(SYS_BUS_DEVICE(fpgaio), &error_fatal);
     return sysbus_mmio_get_region(SYS_BUS_DEVICE(fpgaio), 0);
 }
@@ -696,6 +751,7 @@
     object_property_set_link(OBJECT(&mms->iotkit), "memory",
                              OBJECT(system_memory), &error_abort);
     qdev_prop_set_uint32(iotkitdev, "EXP_NUMIRQ", mmc->numirq);
+    qdev_prop_set_uint32(iotkitdev, "init-svtor", mmc->init_svtor);
     qdev_connect_clock_in(iotkitdev, "MAINCLK", mms->sysclk);
     qdev_connect_clock_in(iotkitdev, "S32KCLK", mms->s32kclk);
     sysbus_realize(SYS_BUS_DEVICE(&mms->iotkit), &error_fatal);
@@ -770,7 +826,7 @@
                             &error_fatal);
     qdev_realize(DEVICE(&mms->uart_irq_orgate), NULL, &error_fatal);
     qdev_connect_gpio_out(DEVICE(&mms->uart_irq_orgate), 0,
-                          get_sse_irq_in(mms, 47));
+                          get_sse_irq_in(mms, mmc->uart_overflow_irq));
 
     /* Most of the devices in the FPGA are behind Peripheral Protection
      * Controllers. The required order for initializing things is:
@@ -887,6 +943,55 @@
         },
     };
 
+    const PPCInfo an547_ppcs[] = { {
+            .name = "apb_ppcexp0",
+            .ports = {
+                { "ssram-mpc", make_mpc, &mms->mpc[0], 0x57000000, 0x1000 },
+                { "qspi-mpc", make_mpc, &mms->mpc[1], 0x57001000, 0x1000 },
+                { "ddr-mpc", make_mpc, &mms->mpc[2], 0x57002000, 0x1000 },
+            },
+        }, {
+            .name = "apb_ppcexp1",
+            .ports = {
+                { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000 },
+                { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000 },
+                { "spi0", make_spi, &mms->spi[0], 0x49202000, 0x1000, { 53 } },
+                { "spi1", make_spi, &mms->spi[1], 0x49203000, 0x1000, { 54 } },
+                { "spi2", make_spi, &mms->spi[2], 0x49204000, 0x1000, { 55 } },
+                { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000 },
+                { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000 },
+                { /* port 7 reserved */ },
+                { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000 },
+            },
+        }, {
+            .name = "apb_ppcexp2",
+            .ports = {
+                { "scc", make_scc, &mms->scc, 0x49300000, 0x1000 },
+                { "i2s-audio", make_unimp_dev, &mms->i2s_audio, 0x49301000, 0x1000 },
+                { "fpgaio", make_fpgaio, &mms->fpgaio, 0x49302000, 0x1000 },
+                { "uart0", make_uart, &mms->uart[0], 0x49303000, 0x1000, { 33, 34, 43 } },
+                { "uart1", make_uart, &mms->uart[1], 0x49304000, 0x1000, { 35, 36, 44 } },
+                { "uart2", make_uart, &mms->uart[2], 0x49305000, 0x1000, { 37, 38, 45 } },
+                { "uart3", make_uart, &mms->uart[3], 0x49306000, 0x1000, { 39, 40, 46 } },
+                { "uart4", make_uart, &mms->uart[4], 0x49307000, 0x1000, { 41, 42, 47 } },
+                { "uart5", make_uart, &mms->uart[5], 0x49308000, 0x1000, { 125, 126, 127 } },
+
+                { /* port 9 reserved */ },
+                { "clcd", make_unimp_dev, &mms->cldc, 0x4930a000, 0x1000 },
+                { "rtc", make_rtc, &mms->rtc, 0x4930b000, 0x1000 },
+            },
+        }, {
+            .name = "ahb_ppcexp0",
+            .ports = {
+                { "gpio0", make_unimp_dev, &mms->gpio[0], 0x41100000, 0x1000 },
+                { "gpio1", make_unimp_dev, &mms->gpio[1], 0x41101000, 0x1000 },
+                { "gpio2", make_unimp_dev, &mms->gpio[2], 0x41102000, 0x1000 },
+                { "gpio3", make_unimp_dev, &mms->gpio[3], 0x41103000, 0x1000 },
+                { "eth-usb", make_eth_usb, NULL, 0x41400000, 0x200000, { 49 } },
+            },
+        },
+    };
+
     switch (mmc->fpga_type) {
     case FPGA_AN505:
     case FPGA_AN521:
@@ -897,6 +1002,10 @@
         ppcs = an524_ppcs;
         num_ppcs = ARRAY_SIZE(an524_ppcs);
         break;
+    case FPGA_AN547:
+        ppcs = an547_ppcs;
+        num_ppcs = ARRAY_SIZE(an547_ppcs);
+        break;
     default:
         g_assert_not_reached();
     }
@@ -975,6 +1084,11 @@
 
     create_unimplemented_device("FPGA NS PC", 0x48007000, 0x1000);
 
+    if (mmc->fpga_type == FPGA_AN547) {
+        create_unimplemented_device("U55 timing adapter 0", 0x48102000, 0x1000);
+        create_unimplemented_device("U55 timing adapter 1", 0x48103000, 0x1000);
+    }
+
     create_non_mpc_ram(mms);
 
     armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
@@ -1041,11 +1155,15 @@
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m33");
     mmc->scc_id = 0x41045050;
     mmc->sysclk_frq = 20 * 1000 * 1000; /* 20MHz */
+    mmc->apb_periph_frq = mmc->sysclk_frq;
     mmc->oscclk = an505_oscclk;
     mmc->len_oscclk = ARRAY_SIZE(an505_oscclk);
     mmc->fpgaio_num_leds = 2;
     mmc->fpgaio_has_switches = false;
+    mmc->fpgaio_has_dbgctrl = false;
     mmc->numirq = 92;
+    mmc->uart_overflow_irq = 47;
+    mmc->init_svtor = 0x10000000;
     mmc->raminfo = an505_raminfo;
     mmc->armsse_type = TYPE_IOTKIT;
     mps2tz_set_default_ram_info(mmc);
@@ -1064,11 +1182,15 @@
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m33");
     mmc->scc_id = 0x41045210;
     mmc->sysclk_frq = 20 * 1000 * 1000; /* 20MHz */
+    mmc->apb_periph_frq = mmc->sysclk_frq;
     mmc->oscclk = an505_oscclk; /* AN521 is the same as AN505 here */
     mmc->len_oscclk = ARRAY_SIZE(an505_oscclk);
     mmc->fpgaio_num_leds = 2;
     mmc->fpgaio_has_switches = false;
+    mmc->fpgaio_has_dbgctrl = false;
     mmc->numirq = 92;
+    mmc->uart_overflow_irq = 47;
+    mmc->init_svtor = 0x10000000;
     mmc->raminfo = an505_raminfo; /* AN521 is the same as AN505 here */
     mmc->armsse_type = TYPE_SSE200;
     mps2tz_set_default_ram_info(mmc);
@@ -1087,16 +1209,47 @@
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m33");
     mmc->scc_id = 0x41045240;
     mmc->sysclk_frq = 32 * 1000 * 1000; /* 32MHz */
+    mmc->apb_periph_frq = mmc->sysclk_frq;
     mmc->oscclk = an524_oscclk;
     mmc->len_oscclk = ARRAY_SIZE(an524_oscclk);
     mmc->fpgaio_num_leds = 10;
     mmc->fpgaio_has_switches = true;
+    mmc->fpgaio_has_dbgctrl = false;
     mmc->numirq = 95;
+    mmc->uart_overflow_irq = 47;
+    mmc->init_svtor = 0x10000000;
     mmc->raminfo = an524_raminfo;
     mmc->armsse_type = TYPE_SSE200;
     mps2tz_set_default_ram_info(mmc);
 }
 
+static void mps3tz_an547_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_CLASS(oc);
+
+    mc->desc = "ARM MPS3 with AN547 FPGA image for Cortex-M55";
+    mc->default_cpus = 1;
+    mc->min_cpus = mc->default_cpus;
+    mc->max_cpus = mc->default_cpus;
+    mmc->fpga_type = FPGA_AN547;
+    mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m55");
+    mmc->scc_id = 0x41055470;
+    mmc->sysclk_frq = 32 * 1000 * 1000; /* 32MHz */
+    mmc->apb_periph_frq = 25 * 1000 * 1000; /* 25MHz */
+    mmc->oscclk = an524_oscclk; /* same as AN524 */
+    mmc->len_oscclk = ARRAY_SIZE(an524_oscclk);
+    mmc->fpgaio_num_leds = 10;
+    mmc->fpgaio_has_switches = true;
+    mmc->fpgaio_has_dbgctrl = true;
+    mmc->numirq = 96;
+    mmc->uart_overflow_irq = 48;
+    mmc->init_svtor = 0x00000000;
+    mmc->raminfo = an547_raminfo;
+    mmc->armsse_type = TYPE_SSE300;
+    mps2tz_set_default_ram_info(mmc);
+}
+
 static const TypeInfo mps2tz_info = {
     .name = TYPE_MPS2TZ_MACHINE,
     .parent = TYPE_MACHINE,
@@ -1128,12 +1281,19 @@
     .class_init = mps3tz_an524_class_init,
 };
 
+static const TypeInfo mps3tz_an547_info = {
+    .name = TYPE_MPS3TZ_AN547_MACHINE,
+    .parent = TYPE_MPS2TZ_MACHINE,
+    .class_init = mps3tz_an547_class_init,
+};
+
 static void mps2tz_machine_init(void)
 {
     type_register_static(&mps2tz_info);
     type_register_static(&mps2tz_an505_info);
     type_register_static(&mps2tz_an521_info);
     type_register_static(&mps3tz_an524_info);
+    type_register_static(&mps3tz_an547_info);
 }
 
 type_init(mps2tz_machine_init);
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 46030c1..7f01284 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -50,6 +50,7 @@
 #define QSPI_ADDR           0xff0f0000
 #define LQSPI_ADDR          0xc0000000
 #define QSPI_IRQ            15
+#define QSPI_DMA_ADDR       0xff0f0800
 
 #define DP_ADDR             0xfd4a0000
 #define DP_IRQ              113
@@ -284,6 +285,8 @@
     for (i = 0; i < XLNX_ZYNQMP_NUM_ADMA_CH; i++) {
         object_initialize_child(obj, "adma[*]", &s->adma[i], TYPE_XLNX_ZDMA);
     }
+
+    object_initialize_child(obj, "qspi-dma", &s->qspi_dma, TYPE_XLNX_CSU_DMA);
 }
 
 static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -301,11 +304,13 @@
 
     ram_size = memory_region_size(s->ddr_ram);
 
-    /* Create the DDR Memory Regions. User friendly checks should happen at
+    /*
+     * Create the DDR Memory Regions. User friendly checks should happen at
      * the board level
      */
     if (ram_size > XLNX_ZYNQMP_MAX_LOW_RAM_SIZE) {
-        /* The RAM size is above the maximum available for the low DDR.
+        /*
+         * The RAM size is above the maximum available for the low DDR.
          * Create the high DDR memory region as well.
          */
         assert(ram_size <= XLNX_ZYNQMP_MAX_RAM_SIZE);
@@ -521,7 +526,8 @@
         SysBusDevice *sbd = SYS_BUS_DEVICE(&s->sdhci[i]);
         Object *sdhci = OBJECT(&s->sdhci[i]);
 
-        /* Compatible with:
+        /*
+         * Compatible with:
          * - SD Host Controller Specification Version 3.00
          * - SDIO Specification Version 3.0
          * - eMMC Specification Version 4.51
@@ -635,6 +641,15 @@
         sysbus_connect_irq(SYS_BUS_DEVICE(&s->adma[i]), 0,
                            gic_spi[adma_ch_intr[i]]);
     }
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi_dma), errp)) {
+        return;
+    }
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi_dma), 0, QSPI_DMA_ADDR);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi_dma), 0, gic_spi[QSPI_IRQ]);
+    object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma",
+                             OBJECT(&s->qspi_dma), errp);
 }
 
 static Property xlnx_zynqmp_props[] = {
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index da4fbf9..b870a50 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -54,6 +54,9 @@
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
+    /* Our num_queues overrides the device backend */
+    virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues);
+
     memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config));
 }
 
@@ -491,10 +494,6 @@
         goto reconnect;
     }
 
-    if (s->blkcfg.num_queues != s->num_queues) {
-        s->blkcfg.num_queues = s->num_queues;
-    }
-
     return;
 
 virtio_err:
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index c603e140..ceb677b 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -519,7 +519,7 @@
                              uart_event, NULL, s, NULL, true);
 }
 
-static void cadence_uart_refclk_update(void *opaque)
+static void cadence_uart_refclk_update(void *opaque, ClockEvent event)
 {
     CadenceUARTState *s = opaque;
 
@@ -537,7 +537,7 @@
     sysbus_init_irq(sbd, &s->irq);
 
     s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk",
-            cadence_uart_refclk_update, s);
+                                   cadence_uart_refclk_update, s, ClockUpdate);
     /* initialize the frequency in case the clock remains unconnected */
     clock_set_hz(s->refclk, UART_DEFAULT_REF_CLK);
 
diff --git a/hw/char/ibex_uart.c b/hw/char/ibex_uart.c
index 89f1182..edcaa30 100644
--- a/hw/char/ibex_uart.c
+++ b/hw/char/ibex_uart.c
@@ -396,7 +396,7 @@
     }
 }
 
-static void ibex_uart_clk_update(void *opaque)
+static void ibex_uart_clk_update(void *opaque, ClockEvent event)
 {
     IbexUartState *s = opaque;
 
@@ -466,7 +466,7 @@
     IbexUartState *s = IBEX_UART(obj);
 
     s->f_clk = qdev_init_clock_in(DEVICE(obj), "f_clock",
-                                  ibex_uart_clk_update, s);
+                                  ibex_uart_clk_update, s, ClockUpdate);
     clock_set_hz(s->f_clk, IBEX_UART_CLOCK);
 
     sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->tx_watermark);
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index ea4a4e5..c5621a1 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -309,7 +309,7 @@
         pl011_put_fifo(opaque, 0x400);
 }
 
-static void pl011_clock_update(void *opaque)
+static void pl011_clock_update(void *opaque, ClockEvent event)
 {
     PL011State *s = PL011(opaque);
 
@@ -378,7 +378,8 @@
         sysbus_init_irq(sbd, &s->irq[i]);
     }
 
-    s->clk = qdev_init_clock_in(DEVICE(obj), "clk", pl011_clock_update, s);
+    s->clk = qdev_init_clock_in(DEVICE(obj), "clk", pl011_clock_update, s,
+                                ClockUpdate);
 
     s->read_trigger = 1;
     s->ifl = 0x12;
diff --git a/hw/core/clock.c b/hw/core/clock.c
index 76b5f46..fc5a996 100644
--- a/hw/core/clock.c
+++ b/hw/core/clock.c
@@ -39,15 +39,17 @@
     return clk;
 }
 
-void clock_set_callback(Clock *clk, ClockCallback *cb, void *opaque)
+void clock_set_callback(Clock *clk, ClockCallback *cb, void *opaque,
+                        unsigned int events)
 {
     clk->callback = cb;
     clk->callback_opaque = opaque;
+    clk->callback_events = events;
 }
 
 void clock_clear_callback(Clock *clk)
 {
-    clock_set_callback(clk, NULL, NULL);
+    clock_set_callback(clk, NULL, NULL, 0);
 }
 
 bool clock_set(Clock *clk, uint64_t period)
@@ -62,18 +64,32 @@
     return true;
 }
 
+static void clock_call_callback(Clock *clk, ClockEvent event)
+{
+    /*
+     * Call the Clock's callback for this event, if it has one and
+     * is interested in this event.
+     */
+    if (clk->callback && (clk->callback_events & event)) {
+        clk->callback(clk->callback_opaque, event);
+    }
+}
+
 static void clock_propagate_period(Clock *clk, bool call_callbacks)
 {
     Clock *child;
 
     QLIST_FOREACH(child, &clk->children, sibling) {
         if (child->period != clk->period) {
+            if (call_callbacks) {
+                clock_call_callback(child, ClockPreUpdate);
+            }
             child->period = clk->period;
             trace_clock_update(CLOCK_PATH(child), CLOCK_PATH(clk),
                                CLOCK_PERIOD_TO_HZ(clk->period),
                                call_callbacks);
-            if (call_callbacks && child->callback) {
-                child->callback(child->callback_opaque);
+            if (call_callbacks) {
+                clock_call_callback(child, ClockUpdate);
             }
             clock_propagate_period(child, call_callbacks);
         }
diff --git a/hw/core/qdev-clock.c b/hw/core/qdev-clock.c
index eb05f2a..117f4c6 100644
--- a/hw/core/qdev-clock.c
+++ b/hw/core/qdev-clock.c
@@ -111,7 +111,8 @@
 }
 
 Clock *qdev_init_clock_in(DeviceState *dev, const char *name,
-                            ClockCallback *callback, void *opaque)
+                          ClockCallback *callback, void *opaque,
+                          unsigned int events)
 {
     NamedClockList *ncl;
 
@@ -120,7 +121,7 @@
     ncl = qdev_init_clocklist(dev, name, false, NULL);
 
     if (callback) {
-        clock_set_callback(ncl->clock, callback, opaque);
+        clock_set_callback(ncl->clock, callback, opaque, events);
     }
     return ncl->clock;
 }
@@ -137,7 +138,8 @@
         if (elem->is_output) {
             *clkp = qdev_init_clock_out(dev, elem->name);
         } else {
-            *clkp = qdev_init_clock_in(dev, elem->name, elem->callback, dev);
+            *clkp = qdev_init_clock_in(dev, elem->name, elem->callback, dev,
+                                       elem->callback_events);
         }
     }
 }
diff --git a/hw/dma/Kconfig b/hw/dma/Kconfig
index 5d6be1a..98fbb1b 100644
--- a/hw/dma/Kconfig
+++ b/hw/dma/Kconfig
@@ -26,3 +26,7 @@
 
 config SIFIVE_PDMA
     bool
+
+config XLNX_CSU_DMA
+    bool
+    select REGISTER
diff --git a/hw/dma/meson.build b/hw/dma/meson.build
index 47b4a7c..5c78a4e 100644
--- a/hw/dma/meson.build
+++ b/hw/dma/meson.build
@@ -14,3 +14,4 @@
 softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_dma.c'))
 softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_dma.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_PDMA', if_true: files('sifive_pdma.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_CSU_DMA', if_true: files('xlnx_csu_dma.c'))
diff --git a/hw/dma/sparc32_dma.c b/hw/dma/sparc32_dma.c
index b643b41..03bc500 100644
--- a/hw/dma/sparc32_dma.c
+++ b/hw/dma/sparc32_dma.c
@@ -295,13 +295,13 @@
     memory_region_init_io(&s->iomem, OBJECT(s), &dma_mem_ops, s,
                           "espdma-mmio", DMA_SIZE);
 
-    object_initialize_child(obj, "esp", &es->esp, TYPE_ESP);
+    object_initialize_child(obj, "esp", &es->esp, TYPE_SYSBUS_ESP);
 }
 
 static void sparc32_espdma_device_realize(DeviceState *dev, Error **errp)
 {
     ESPDMADeviceState *es = SPARC32_ESPDMA_DEVICE(dev);
-    SysBusESPState *sysbus = ESP(&es->esp);
+    SysBusESPState *sysbus = SYSBUS_ESP(&es->esp);
     ESPState *esp = &sysbus->esp;
 
     esp->dma_memory_read = espdma_memory_read;
diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c
new file mode 100644
index 0000000..98324da
--- /dev/null
+++ b/hw/dma/xlnx_csu_dma.c
@@ -0,0 +1,745 @@
+/*
+ * Xilinx Platform CSU Stream DMA emulation
+ *
+ * This implementation is based on
+ * https://github.com/Xilinx/qemu/blob/master/hw/dma/csu_stream_dma.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "migration/vmstate.h"
+#include "sysemu/dma.h"
+#include "hw/ptimer.h"
+#include "hw/stream.h"
+#include "hw/register.h"
+#include "hw/dma/xlnx_csu_dma.h"
+
+/*
+ * Ref: UG1087 (v1.7) February 8, 2019
+ * https://www.xilinx.com/html_docs/registers/ug1087/ug1087-zynq-ultrascale-registers.html
+ * CSUDMA Module section
+ */
+REG32(ADDR, 0x0)
+    FIELD(ADDR, ADDR, 2, 30) /* wo */
+REG32(SIZE, 0x4)
+    FIELD(SIZE, SIZE, 2, 27) /* wo */
+    FIELD(SIZE, LAST_WORD, 0, 1) /* rw, only exists in SRC */
+REG32(STATUS, 0x8)
+    FIELD(STATUS, DONE_CNT, 13, 3) /* wtc */
+    FIELD(STATUS, FIFO_LEVEL, 5, 8) /* ro */
+    FIELD(STATUS, OUTSTANDING, 1, 4) /* ro */
+    FIELD(STATUS, BUSY, 0, 1) /* ro */
+REG32(CTRL, 0xc)
+    FIELD(CTRL, FIFOTHRESH, 25, 7) /* rw, only exists in DST, reset 0x40 */
+    FIELD(CTRL, APB_ERR_RESP, 24, 1) /* rw */
+    FIELD(CTRL, ENDIANNESS, 23, 1) /* rw */
+    FIELD(CTRL, AXI_BRST_TYPE, 22, 1) /* rw */
+    FIELD(CTRL, TIMEOUT_VAL, 10, 12) /* rw, reset: 0xFFE */
+    FIELD(CTRL, FIFO_THRESH, 2, 8) /* rw, reset: 0x80 */
+    FIELD(CTRL, PAUSE_STRM, 1, 1) /* rw */
+    FIELD(CTRL, PAUSE_MEM, 0, 1) /* rw */
+REG32(CRC, 0x10)
+REG32(INT_STATUS, 0x14)
+    FIELD(INT_STATUS, FIFO_OVERFLOW, 7, 1) /* wtc */
+    FIELD(INT_STATUS, INVALID_APB, 6, 1) /* wtc */
+    FIELD(INT_STATUS, THRESH_HIT, 5, 1) /* wtc */
+    FIELD(INT_STATUS, TIMEOUT_MEM, 4, 1) /* wtc */
+    FIELD(INT_STATUS, TIMEOUT_STRM, 3, 1) /* wtc */
+    FIELD(INT_STATUS, AXI_BRESP_ERR, 2, 1) /* wtc, SRC: AXI_RDERR */
+    FIELD(INT_STATUS, DONE, 1, 1) /* wtc */
+    FIELD(INT_STATUS, MEM_DONE, 0, 1) /* wtc */
+REG32(INT_ENABLE, 0x18)
+    FIELD(INT_ENABLE, FIFO_OVERFLOW, 7, 1) /* wtc */
+    FIELD(INT_ENABLE, INVALID_APB, 6, 1) /* wtc */
+    FIELD(INT_ENABLE, THRESH_HIT, 5, 1) /* wtc */
+    FIELD(INT_ENABLE, TIMEOUT_MEM, 4, 1) /* wtc */
+    FIELD(INT_ENABLE, TIMEOUT_STRM, 3, 1) /* wtc */
+    FIELD(INT_ENABLE, AXI_BRESP_ERR, 2, 1) /* wtc, SRC: AXI_RDERR */
+    FIELD(INT_ENABLE, DONE, 1, 1) /* wtc */
+    FIELD(INT_ENABLE, MEM_DONE, 0, 1) /* wtc */
+REG32(INT_DISABLE, 0x1c)
+    FIELD(INT_DISABLE, FIFO_OVERFLOW, 7, 1) /* wtc */
+    FIELD(INT_DISABLE, INVALID_APB, 6, 1) /* wtc */
+    FIELD(INT_DISABLE, THRESH_HIT, 5, 1) /* wtc */
+    FIELD(INT_DISABLE, TIMEOUT_MEM, 4, 1) /* wtc */
+    FIELD(INT_DISABLE, TIMEOUT_STRM, 3, 1) /* wtc */
+    FIELD(INT_DISABLE, AXI_BRESP_ERR, 2, 1) /* wtc, SRC: AXI_RDERR */
+    FIELD(INT_DISABLE, DONE, 1, 1) /* wtc */
+    FIELD(INT_DISABLE, MEM_DONE, 0, 1) /* wtc */
+REG32(INT_MASK, 0x20)
+    FIELD(INT_MASK, FIFO_OVERFLOW, 7, 1) /* ro, reset: 0x1 */
+    FIELD(INT_MASK, INVALID_APB, 6, 1) /* ro, reset: 0x1 */
+    FIELD(INT_MASK, THRESH_HIT, 5, 1) /* ro, reset: 0x1 */
+    FIELD(INT_MASK, TIMEOUT_MEM, 4, 1) /* ro, reset: 0x1 */
+    FIELD(INT_MASK, TIMEOUT_STRM, 3, 1) /* ro, reset: 0x1 */
+    FIELD(INT_MASK, AXI_BRESP_ERR, 2, 1) /* ro, reset: 0x1, SRC: AXI_RDERR */
+    FIELD(INT_MASK, DONE, 1, 1) /* ro, reset: 0x1 */
+    FIELD(INT_MASK, MEM_DONE, 0, 1) /* ro, reset: 0x1 */
+REG32(CTRL2, 0x24)
+    FIELD(CTRL2, ARCACHE, 24, 3) /* rw */
+    FIELD(CTRL2, ROUTE_BIT, 23, 1) /* rw */
+    FIELD(CTRL2, TIMEOUT_EN, 22, 1) /* rw */
+    FIELD(CTRL2, TIMEOUT_PRE, 4, 12) /* rw, reset: 0xFFF */
+    FIELD(CTRL2, MAX_OUTS_CMDS, 0, 4) /* rw, reset: 0x8 */
+REG32(ADDR_MSB, 0x28)
+    FIELD(ADDR_MSB, ADDR_MSB, 0, 17) /* wo */
+
+#define R_CTRL_TIMEOUT_VAL_RESET    (0xFFE)
+#define R_CTRL_FIFO_THRESH_RESET    (0x80)
+#define R_CTRL_FIFOTHRESH_RESET     (0x40)
+
+#define R_CTRL2_TIMEOUT_PRE_RESET   (0xFFF)
+#define R_CTRL2_MAX_OUTS_CMDS_RESET (0x8)
+
+#define XLNX_CSU_DMA_ERR_DEBUG      (0)
+#define XLNX_CSU_DMA_INT_R_MASK     (0xff)
+
+/* UG1807: Set the prescaler value for the timeout in clk (~2.5ns) cycles */
+#define XLNX_CSU_DMA_TIMER_FREQ     (400 * 1000 * 1000)
+
+static bool xlnx_csu_dma_is_paused(XlnxCSUDMA *s)
+{
+    bool paused;
+
+    paused = !!(s->regs[R_CTRL] & R_CTRL_PAUSE_STRM_MASK);
+    paused |= !!(s->regs[R_CTRL] & R_CTRL_PAUSE_MEM_MASK);
+
+    return paused;
+}
+
+static bool xlnx_csu_dma_get_eop(XlnxCSUDMA *s)
+{
+    return s->r_size_last_word;
+}
+
+static bool xlnx_csu_dma_burst_is_fixed(XlnxCSUDMA *s)
+{
+    return !!(s->regs[R_CTRL] & R_CTRL_AXI_BRST_TYPE_MASK);
+}
+
+static bool xlnx_csu_dma_timeout_enabled(XlnxCSUDMA *s)
+{
+    return !!(s->regs[R_CTRL2] & R_CTRL2_TIMEOUT_EN_MASK);
+}
+
+static void xlnx_csu_dma_update_done_cnt(XlnxCSUDMA *s, int a)
+{
+    int cnt;
+
+    /* Increase DONE_CNT */
+    cnt = ARRAY_FIELD_EX32(s->regs, STATUS, DONE_CNT) + a;
+    ARRAY_FIELD_DP32(s->regs, STATUS, DONE_CNT, cnt);
+}
+
+static void xlnx_csu_dma_data_process(XlnxCSUDMA *s, uint8_t *buf, uint32_t len)
+{
+    uint32_t bswap;
+    uint32_t i;
+
+    bswap = s->regs[R_CTRL] & R_CTRL_ENDIANNESS_MASK;
+    if (s->is_dst && !bswap) {
+        /* Fast when ENDIANNESS cleared */
+        return;
+    }
+
+    for (i = 0; i < len; i += 4) {
+        uint8_t *b = &buf[i];
+        union {
+            uint8_t u8[4];
+            uint32_t u32;
+        } v = {
+            .u8 = { b[0], b[1], b[2], b[3] }
+        };
+
+        if (!s->is_dst) {
+            s->regs[R_CRC] += v.u32;
+        }
+        if (bswap) {
+            /*
+             * No point using bswap, we need to writeback
+             * into a potentially unaligned pointer.
+             */
+            b[0] = v.u8[3];
+            b[1] = v.u8[2];
+            b[2] = v.u8[1];
+            b[3] = v.u8[0];
+        }
+    }
+}
+
+static void xlnx_csu_dma_update_irq(XlnxCSUDMA *s)
+{
+    qemu_set_irq(s->irq, !!(s->regs[R_INT_STATUS] & ~s->regs[R_INT_MASK]));
+}
+
+/* len is in bytes */
+static uint32_t xlnx_csu_dma_read(XlnxCSUDMA *s, uint8_t *buf, uint32_t len)
+{
+    hwaddr addr = (hwaddr)s->regs[R_ADDR_MSB] << 32 | s->regs[R_ADDR];
+    MemTxResult result = MEMTX_OK;
+
+    if (xlnx_csu_dma_burst_is_fixed(s)) {
+        uint32_t i;
+
+        for (i = 0; i < len && (result == MEMTX_OK); i += s->width) {
+            uint32_t mlen = MIN(len - i, s->width);
+
+            result = address_space_rw(s->dma_as, addr, s->attr,
+                                      buf + i, mlen, false);
+        }
+    } else {
+        result = address_space_rw(s->dma_as, addr, s->attr, buf, len, false);
+    }
+
+    if (result == MEMTX_OK) {
+        xlnx_csu_dma_data_process(s, buf, len);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address " TARGET_FMT_plx
+                      " for mem read", __func__, addr);
+        s->regs[R_INT_STATUS] |= R_INT_STATUS_AXI_BRESP_ERR_MASK;
+        xlnx_csu_dma_update_irq(s);
+    }
+    return len;
+}
+
+/* len is in bytes */
+static uint32_t xlnx_csu_dma_write(XlnxCSUDMA *s, uint8_t *buf, uint32_t len)
+{
+    hwaddr addr = (hwaddr)s->regs[R_ADDR_MSB] << 32 | s->regs[R_ADDR];
+    MemTxResult result = MEMTX_OK;
+
+    xlnx_csu_dma_data_process(s, buf, len);
+    if (xlnx_csu_dma_burst_is_fixed(s)) {
+        uint32_t i;
+
+        for (i = 0; i < len && (result == MEMTX_OK); i += s->width) {
+            uint32_t mlen = MIN(len - i, s->width);
+
+            result = address_space_rw(s->dma_as, addr, s->attr,
+                                      buf, mlen, true);
+            buf += mlen;
+        }
+    } else {
+        result = address_space_rw(s->dma_as, addr, s->attr, buf, len, true);
+    }
+
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address " TARGET_FMT_plx
+                      " for mem write", __func__, addr);
+        s->regs[R_INT_STATUS] |= R_INT_STATUS_AXI_BRESP_ERR_MASK;
+        xlnx_csu_dma_update_irq(s);
+    }
+    return len;
+}
+
+static void xlnx_csu_dma_done(XlnxCSUDMA *s)
+{
+    s->regs[R_STATUS] &= ~R_STATUS_BUSY_MASK;
+    s->regs[R_INT_STATUS] |= R_INT_STATUS_DONE_MASK;
+
+    if (!s->is_dst) {
+        s->regs[R_INT_STATUS] |= R_INT_STATUS_MEM_DONE_MASK;
+    }
+
+    xlnx_csu_dma_update_done_cnt(s, 1);
+}
+
+static uint32_t xlnx_csu_dma_advance(XlnxCSUDMA *s, uint32_t len)
+{
+    uint32_t size = s->regs[R_SIZE];
+    hwaddr dst = (hwaddr)s->regs[R_ADDR_MSB] << 32 | s->regs[R_ADDR];
+
+    assert(len <= size);
+
+    size -= len;
+    s->regs[R_SIZE] = size;
+
+    if (!xlnx_csu_dma_burst_is_fixed(s)) {
+        dst += len;
+        s->regs[R_ADDR] = (uint32_t) dst;
+        s->regs[R_ADDR_MSB] = dst >> 32;
+    }
+
+    if (size == 0) {
+        xlnx_csu_dma_done(s);
+    }
+
+    return size;
+}
+
+static void xlnx_csu_dma_src_notify(void *opaque)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(opaque);
+    unsigned char buf[4 * 1024];
+    size_t rlen = 0;
+
+    ptimer_transaction_begin(s->src_timer);
+    /* Stop the backpreassure timer */
+    ptimer_stop(s->src_timer);
+
+    while (s->regs[R_SIZE] && !xlnx_csu_dma_is_paused(s) &&
+           stream_can_push(s->tx_dev, xlnx_csu_dma_src_notify, s)) {
+        uint32_t plen = MIN(s->regs[R_SIZE], sizeof buf);
+        bool eop = false;
+
+        /* Did we fit it all? */
+        if (s->regs[R_SIZE] == plen && xlnx_csu_dma_get_eop(s)) {
+            eop = true;
+        }
+
+        /* DMA transfer */
+        xlnx_csu_dma_read(s, buf, plen);
+        rlen = stream_push(s->tx_dev, buf, plen, eop);
+        xlnx_csu_dma_advance(s, rlen);
+    }
+
+    if (xlnx_csu_dma_timeout_enabled(s) && s->regs[R_SIZE] &&
+        !stream_can_push(s->tx_dev, xlnx_csu_dma_src_notify, s)) {
+        uint32_t timeout = ARRAY_FIELD_EX32(s->regs, CTRL, TIMEOUT_VAL);
+        uint32_t div = ARRAY_FIELD_EX32(s->regs, CTRL2, TIMEOUT_PRE) + 1;
+        uint32_t freq = XLNX_CSU_DMA_TIMER_FREQ;
+
+        freq /= div;
+        ptimer_set_freq(s->src_timer, freq);
+        ptimer_set_count(s->src_timer, timeout);
+        ptimer_run(s->src_timer, 1);
+    }
+
+    ptimer_transaction_commit(s->src_timer);
+    xlnx_csu_dma_update_irq(s);
+}
+
+static uint64_t addr_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    /* Address is word aligned */
+    return val & R_ADDR_ADDR_MASK;
+}
+
+static uint64_t size_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    if (s->regs[R_SIZE] != 0) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Starting DMA while already running.\n", __func__);
+    }
+
+    if (!s->is_dst) {
+        s->r_size_last_word = !!(val & R_SIZE_LAST_WORD_MASK);
+    }
+
+    /* Size is word aligned */
+    return val & R_SIZE_SIZE_MASK;
+}
+
+static uint64_t size_post_read(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    return val | s->r_size_last_word;
+}
+
+static void size_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    s->regs[R_STATUS] |= R_STATUS_BUSY_MASK;
+
+    /*
+     * Note that if SIZE is programmed to 0, and the DMA is started,
+     * the interrupts DONE and MEM_DONE will be asserted.
+     */
+    if (s->regs[R_SIZE] == 0) {
+        xlnx_csu_dma_done(s);
+        xlnx_csu_dma_update_irq(s);
+        return;
+    }
+
+    /* Set SIZE is considered the last step in transfer configuration */
+    if (!s->is_dst) {
+        xlnx_csu_dma_src_notify(s);
+    } else {
+        if (s->notify) {
+            s->notify(s->notify_opaque);
+        }
+    }
+}
+
+static uint64_t status_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    return val & (R_STATUS_DONE_CNT_MASK | R_STATUS_BUSY_MASK);
+}
+
+static void ctrl_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    if (!s->is_dst) {
+        if (!xlnx_csu_dma_is_paused(s)) {
+            xlnx_csu_dma_src_notify(s);
+        }
+    } else {
+        if (!xlnx_csu_dma_is_paused(s) && s->notify) {
+            s->notify(s->notify_opaque);
+        }
+    }
+}
+
+static uint64_t int_status_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    /* DMA counter decrements when flag 'DONE' is cleared */
+    if ((val & s->regs[R_INT_STATUS] & R_INT_STATUS_DONE_MASK)) {
+        xlnx_csu_dma_update_done_cnt(s, -1);
+    }
+
+    return s->regs[R_INT_STATUS] & ~val;
+}
+
+static void int_status_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    xlnx_csu_dma_update_irq(s);
+}
+
+static uint64_t int_enable_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+    uint32_t v32 = val;
+
+    /*
+     * R_INT_ENABLE doesn't have its own state.
+     * It is used to indirectly modify R_INT_MASK.
+     *
+     * 1: Enable this interrupt field (the mask bit will be cleared to 0)
+     * 0: No effect
+     */
+    s->regs[R_INT_MASK] &= ~v32;
+    return 0;
+}
+
+static void int_enable_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    xlnx_csu_dma_update_irq(s);
+}
+
+static uint64_t int_disable_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+    uint32_t v32 = val;
+
+    /*
+     * R_INT_DISABLE doesn't have its own state.
+     * It is used to indirectly modify R_INT_MASK.
+     *
+     * 1: Disable this interrupt field (the mask bit will be set to 1)
+     * 0: No effect
+     */
+    s->regs[R_INT_MASK] |= v32;
+    return 0;
+}
+
+static void int_disable_post_write(RegisterInfo *reg, uint64_t val)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(reg->opaque);
+
+    xlnx_csu_dma_update_irq(s);
+}
+
+static uint64_t addr_msb_pre_write(RegisterInfo *reg, uint64_t val)
+{
+    return val & R_ADDR_MSB_ADDR_MSB_MASK;
+}
+
+static const RegisterAccessInfo *xlnx_csu_dma_regs_info[] = {
+#define DMACH_REGINFO(NAME, snd)                                              \
+    (const RegisterAccessInfo []) {                                           \
+        {                                                                     \
+            .name = #NAME "_ADDR",                                            \
+            .addr = A_ADDR,                                                   \
+            .pre_write = addr_pre_write                                       \
+        }, {                                                                  \
+            .name = #NAME "_SIZE",                                            \
+            .addr = A_SIZE,                                                   \
+            .pre_write = size_pre_write,                                      \
+            .post_write = size_post_write,                                    \
+            .post_read = size_post_read                                       \
+        }, {                                                                  \
+            .name = #NAME "_STATUS",                                          \
+            .addr = A_STATUS,                                                 \
+            .pre_write = status_pre_write,                                    \
+            .w1c = R_STATUS_DONE_CNT_MASK,                                    \
+            .ro = (R_STATUS_BUSY_MASK                                         \
+                   | R_STATUS_FIFO_LEVEL_MASK                                 \
+                   | R_STATUS_OUTSTANDING_MASK)                               \
+        }, {                                                                  \
+            .name = #NAME "_CTRL",                                            \
+            .addr = A_CTRL,                                                   \
+            .post_write = ctrl_post_write,                                    \
+            .reset = ((R_CTRL_TIMEOUT_VAL_RESET << R_CTRL_TIMEOUT_VAL_SHIFT)  \
+                      | (R_CTRL_FIFO_THRESH_RESET << R_CTRL_FIFO_THRESH_SHIFT)\
+                      | (snd ? 0 : R_CTRL_FIFOTHRESH_RESET                    \
+                         << R_CTRL_FIFOTHRESH_SHIFT))                         \
+        }, {                                                                  \
+            .name = #NAME "_CRC",                                             \
+            .addr = A_CRC,                                                    \
+        }, {                                                                  \
+            .name =  #NAME "_INT_STATUS",                                     \
+            .addr = A_INT_STATUS,                                             \
+            .pre_write = int_status_pre_write,                                \
+            .post_write = int_status_post_write                               \
+        }, {                                                                  \
+            .name = #NAME "_INT_ENABLE",                                      \
+            .addr = A_INT_ENABLE,                                             \
+            .pre_write = int_enable_pre_write,                                \
+            .post_write = int_enable_post_write                               \
+        }, {                                                                  \
+            .name = #NAME "_INT_DISABLE",                                     \
+            .addr = A_INT_DISABLE,                                            \
+            .pre_write = int_disable_pre_write,                               \
+            .post_write = int_disable_post_write                              \
+        }, {                                                                  \
+            .name = #NAME "_INT_MASK",                                        \
+            .addr = A_INT_MASK,                                               \
+            .ro = ~0,                                                         \
+            .reset = XLNX_CSU_DMA_INT_R_MASK                                  \
+        }, {                                                                  \
+            .name = #NAME "_CTRL2",                                           \
+            .addr = A_CTRL2,                                                  \
+            .reset = ((R_CTRL2_TIMEOUT_PRE_RESET                              \
+                       << R_CTRL2_TIMEOUT_PRE_SHIFT)                          \
+                      | (R_CTRL2_MAX_OUTS_CMDS_RESET                          \
+                         << R_CTRL2_MAX_OUTS_CMDS_SHIFT))                     \
+        }, {                                                                  \
+            .name = #NAME "_ADDR_MSB",                                        \
+            .addr = A_ADDR_MSB,                                               \
+            .pre_write = addr_msb_pre_write                                   \
+        }                                                                     \
+    }
+
+    DMACH_REGINFO(DMA_SRC, true),
+    DMACH_REGINFO(DMA_DST, false)
+};
+
+static const MemoryRegionOps xlnx_csu_dma_ops = {
+    .read = register_read_memory,
+    .write = register_write_memory,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
+static void xlnx_csu_dma_src_timeout_hit(void *opaque)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(opaque);
+
+    /* Ignore if the timeout is masked */
+    if (!xlnx_csu_dma_timeout_enabled(s)) {
+        return;
+    }
+
+    s->regs[R_INT_STATUS] |= R_INT_STATUS_TIMEOUT_STRM_MASK;
+    xlnx_csu_dma_update_irq(s);
+}
+
+static size_t xlnx_csu_dma_stream_push(StreamSink *obj, uint8_t *buf,
+                                       size_t len, bool eop)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(obj);
+    uint32_t size = s->regs[R_SIZE];
+    uint32_t mlen = MIN(size, len) & (~3); /* Size is word aligned */
+
+    /* Be called when it's DST */
+    assert(s->is_dst);
+
+    if (size == 0 || len <= 0) {
+        return 0;
+    }
+
+    if (len && (xlnx_csu_dma_is_paused(s) || mlen == 0)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "csu-dma: DST channel dropping %zd b of data.\n", len);
+        s->regs[R_INT_STATUS] |= R_INT_STATUS_FIFO_OVERFLOW_MASK;
+        return len;
+    }
+
+    if (xlnx_csu_dma_write(s, buf, mlen) != mlen) {
+        return 0;
+    }
+
+    xlnx_csu_dma_advance(s, mlen);
+    xlnx_csu_dma_update_irq(s);
+
+    return mlen;
+}
+
+static bool xlnx_csu_dma_stream_can_push(StreamSink *obj,
+                                         StreamCanPushNotifyFn notify,
+                                         void *notify_opaque)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(obj);
+
+    if (s->regs[R_SIZE] != 0) {
+        return true;
+    } else {
+        s->notify = notify;
+        s->notify_opaque = notify_opaque;
+        return false;
+    }
+}
+
+static void xlnx_csu_dma_reset(DeviceState *dev)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(dev);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        register_reset(&s->regs_info[i]);
+    }
+}
+
+static void xlnx_csu_dma_realize(DeviceState *dev, Error **errp)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(dev);
+    RegisterInfoArray *reg_array;
+
+    reg_array =
+        register_init_block32(dev, xlnx_csu_dma_regs_info[!!s->is_dst],
+                              XLNX_CSU_DMA_R_MAX,
+                              s->regs_info, s->regs,
+                              &xlnx_csu_dma_ops,
+                              XLNX_CSU_DMA_ERR_DEBUG,
+                              XLNX_CSU_DMA_R_MAX * 4);
+    memory_region_add_subregion(&s->iomem,
+                                0x0,
+                                &reg_array->mem);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
+    sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq);
+
+    if (!s->is_dst && !s->tx_dev) {
+        error_setg(errp, "zynqmp.csu-dma: Stream not connected");
+        return;
+    }
+
+    s->src_timer = ptimer_init(xlnx_csu_dma_src_timeout_hit,
+                               s, PTIMER_POLICY_DEFAULT);
+
+    if (s->dma_mr) {
+        s->dma_as = g_malloc0(sizeof(AddressSpace));
+        address_space_init(s->dma_as, s->dma_mr, NULL);
+    } else {
+        s->dma_as = &address_space_memory;
+    }
+
+    s->attr = MEMTXATTRS_UNSPECIFIED;
+
+    s->r_size_last_word = 0;
+}
+
+static const VMStateDescription vmstate_xlnx_csu_dma = {
+    .name = TYPE_XLNX_CSU_DMA,
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .minimum_version_id_old = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_PTIMER(src_timer, XlnxCSUDMA),
+        VMSTATE_UINT16(width, XlnxCSUDMA),
+        VMSTATE_BOOL(is_dst, XlnxCSUDMA),
+        VMSTATE_BOOL(r_size_last_word, XlnxCSUDMA),
+        VMSTATE_UINT32_ARRAY(regs, XlnxCSUDMA, XLNX_CSU_DMA_R_MAX),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property xlnx_csu_dma_properties[] = {
+    /*
+     * Ref PG021, Stream Data Width:
+     * Data width in bits of the AXI S2MM AXI4-Stream Data bus.
+     * This value must be equal or less than the Memory Map Data Width.
+     * Valid values are 8, 16, 32, 64, 128, 512 and 1024.
+     * "dma-width" is the byte value of the "Stream Data Width".
+     */
+    DEFINE_PROP_UINT16("dma-width", XlnxCSUDMA, width, 4),
+    /*
+     * The CSU DMA is a two-channel, simple DMA, allowing separate control of
+     * the SRC (read) channel and DST (write) channel. "is-dst" is used to mark
+     * which channel the device is connected to.
+     */
+    DEFINE_PROP_BOOL("is-dst", XlnxCSUDMA, is_dst, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xlnx_csu_dma_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    StreamSinkClass *ssc = STREAM_SINK_CLASS(klass);
+
+    dc->reset = xlnx_csu_dma_reset;
+    dc->realize = xlnx_csu_dma_realize;
+    dc->vmsd = &vmstate_xlnx_csu_dma;
+    device_class_set_props(dc, xlnx_csu_dma_properties);
+
+    ssc->push = xlnx_csu_dma_stream_push;
+    ssc->can_push = xlnx_csu_dma_stream_can_push;
+}
+
+static void xlnx_csu_dma_init(Object *obj)
+{
+    XlnxCSUDMA *s = XLNX_CSU_DMA(obj);
+
+    memory_region_init(&s->iomem, obj, TYPE_XLNX_CSU_DMA,
+                       XLNX_CSU_DMA_R_MAX * 4);
+
+    object_property_add_link(obj, "stream-connected-dma", TYPE_STREAM_SINK,
+                             (Object **)&s->tx_dev,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_STRONG);
+    object_property_add_link(obj, "dma", TYPE_MEMORY_REGION,
+                             (Object **)&s->dma_mr,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_STRONG);
+}
+
+static const TypeInfo xlnx_csu_dma_info = {
+    .name          = TYPE_XLNX_CSU_DMA,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxCSUDMA),
+    .class_init    = xlnx_csu_dma_class_init,
+    .instance_init = xlnx_csu_dma_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_STREAM_SINK },
+        { }
+    }
+};
+
+static void xlnx_csu_dma_register_types(void)
+{
+    type_register_static(&xlnx_csu_dma_info);
+}
+
+type_init(xlnx_csu_dma_register_types)
diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index d4eca46..4d2e866 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -350,8 +350,8 @@
 
     /* SCSI */
 
-    dev = qdev_new(TYPE_ESP);
-    sysbus_esp = ESP(dev);
+    dev = qdev_new(TYPE_SYSBUS_ESP);
+    sysbus_esp = SYSBUS_ESP(dev);
     esp = &sysbus_esp->esp;
     esp->dma_memory_read = NULL;
     esp->dma_memory_write = NULL;
diff --git a/hw/mips/cps.c b/hw/mips/cps.c
index 7a0d289..2b43670 100644
--- a/hw/mips/cps.c
+++ b/hw/mips/cps.c
@@ -39,7 +39,7 @@
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     MIPSCPSState *s = MIPS_CPS(obj);
 
-    s->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, NULL);
+    s->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, NULL, 0);
     /*
      * Cover entire address space as there do not seem to be any
      * constraints for the base address of CPC and GIC.
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index 83c8086..1a0888a 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -328,8 +328,8 @@
     }
 
     /* SCSI adapter */
-    dev = qdev_new(TYPE_ESP);
-    sysbus_esp = ESP(dev);
+    dev = qdev_new(TYPE_SYSBUS_ESP);
+    sysbus_esp = SYSBUS_ESP(dev);
     esp = &sysbus_esp->esp;
     esp->dma_memory_read = rc4030_dma_read;
     esp->dma_memory_write = rc4030_dma_write;
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index 19c216f..5426b9b 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -2,6 +2,15 @@
     bool
     depends on ISA_BUS
 
+config ARMSSE_CPUID
+    bool
+
+config ARMSSE_MHU
+    bool
+
+config ARMSSE_CPU_PWRCTRL
+    bool
+
 config MAX111X
     bool
 
diff --git a/hw/misc/armsse-cpu-pwrctrl.c b/hw/misc/armsse-cpu-pwrctrl.c
new file mode 100644
index 0000000..42fc388
--- /dev/null
+++ b/hw/misc/armsse-cpu-pwrctrl.c
@@ -0,0 +1,149 @@
+/*
+ * Arm SSE CPU PWRCTRL register block
+ *
+ * Copyright (c) 2021 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "CPU<N>_PWRCTRL block" which is part of the
+ * Arm Corstone SSE-300 Example Subsystem and documented in
+ * https://developer.arm.com/documentation/101773/0000
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/sysbus.h"
+#include "hw/registerfields.h"
+#include "hw/misc/armsse-cpu-pwrctrl.h"
+
+REG32(CPUPWRCFG, 0x0)
+REG32(PID4, 0xfd0)
+REG32(PID5, 0xfd4)
+REG32(PID6, 0xfd8)
+REG32(PID7, 0xfdc)
+REG32(PID0, 0xfe0)
+REG32(PID1, 0xfe4)
+REG32(PID2, 0xfe8)
+REG32(PID3, 0xfec)
+REG32(CID0, 0xff0)
+REG32(CID1, 0xff4)
+REG32(CID2, 0xff8)
+REG32(CID3, 0xffc)
+
+/* PID/CID values */
+static const int cpu_pwrctrl_id[] = {
+    0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
+    0x5a, 0xb8, 0x0b, 0x00, /* PID0..PID3 */
+    0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
+};
+
+static uint64_t pwrctrl_read(void *opaque, hwaddr offset, unsigned size)
+{
+    ARMSSECPUPwrCtrl *s = ARMSSE_CPU_PWRCTRL(opaque);
+    uint64_t r;
+
+    switch (offset) {
+    case A_CPUPWRCFG:
+        r = s->cpupwrcfg;
+        break;
+    case A_PID4 ... A_CID3:
+        r = cpu_pwrctrl_id[(offset - A_PID4) / 4];
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE CPU_PWRCTRL read: bad offset %x\n", (int)offset);
+        r = 0;
+        break;
+    }
+    trace_armsse_cpu_pwrctrl_read(offset, r, size);
+    return r;
+}
+
+static void pwrctrl_write(void *opaque, hwaddr offset,
+                          uint64_t value, unsigned size)
+{
+    ARMSSECPUPwrCtrl *s = ARMSSE_CPU_PWRCTRL(opaque);
+
+    trace_armsse_cpu_pwrctrl_write(offset, value, size);
+
+    switch (offset) {
+    case A_CPUPWRCFG:
+        qemu_log_mask(LOG_UNIMP,
+                      "SSE CPU_PWRCTRL: CPUPWRCFG unimplemented\n");
+        s->cpupwrcfg = value;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE CPU_PWRCTRL write: bad offset 0x%x\n", (int)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps pwrctrl_ops = {
+    .read = pwrctrl_read,
+    .write = pwrctrl_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static void pwrctrl_reset(DeviceState *dev)
+{
+    ARMSSECPUPwrCtrl *s = ARMSSE_CPU_PWRCTRL(dev);
+
+    s->cpupwrcfg = 0;
+}
+
+static const VMStateDescription pwrctrl_vmstate = {
+    .name = "armsse-cpu-pwrctrl",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(cpupwrcfg, ARMSSECPUPwrCtrl),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void pwrctrl_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    ARMSSECPUPwrCtrl *s = ARMSSE_CPU_PWRCTRL(obj);
+
+    memory_region_init_io(&s->iomem, obj, &pwrctrl_ops,
+                          s, "armsse-cpu-pwrctrl", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static void pwrctrl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = pwrctrl_reset;
+    dc->vmsd = &pwrctrl_vmstate;
+}
+
+static const TypeInfo pwrctrl_info = {
+    .name = TYPE_ARMSSE_CPU_PWRCTRL,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ARMSSECPUPwrCtrl),
+    .instance_init = pwrctrl_init,
+    .class_init = pwrctrl_class_init,
+};
+
+static void pwrctrl_register_types(void)
+{
+    type_register_static(&pwrctrl_info);
+}
+
+type_init(pwrctrl_register_types);
diff --git a/hw/misc/bcm2835_cprman.c b/hw/misc/bcm2835_cprman.c
index 7e415a0..75e6c57 100644
--- a/hw/misc/bcm2835_cprman.c
+++ b/hw/misc/bcm2835_cprman.c
@@ -107,7 +107,7 @@
     clock_update_hz(pll->out, freq);
 }
 
-static void pll_xosc_update(void *opaque)
+static void pll_xosc_update(void *opaque, ClockEvent event)
 {
     pll_update(CPRMAN_PLL(opaque));
 }
@@ -116,7 +116,8 @@
 {
     CprmanPllState *s = CPRMAN_PLL(obj);
 
-    s->xosc_in = qdev_init_clock_in(DEVICE(s), "xosc-in", pll_xosc_update, s);
+    s->xosc_in = qdev_init_clock_in(DEVICE(s), "xosc-in", pll_xosc_update,
+                                    s, ClockUpdate);
     s->out = qdev_init_clock_out(DEVICE(s), "out");
 }
 
@@ -209,7 +210,7 @@
     }
 }
 
-static void pll_channel_pll_in_update(void *opaque)
+static void pll_channel_pll_in_update(void *opaque, ClockEvent event)
 {
     pll_channel_update(CPRMAN_PLL_CHANNEL(opaque));
 }
@@ -219,7 +220,8 @@
     CprmanPllChannelState *s = CPRMAN_PLL_CHANNEL(obj);
 
     s->pll_in = qdev_init_clock_in(DEVICE(s), "pll-in",
-                                   pll_channel_pll_in_update, s);
+                                   pll_channel_pll_in_update, s,
+                                   ClockUpdate);
     s->out = qdev_init_clock_out(DEVICE(s), "out");
 }
 
@@ -303,7 +305,7 @@
     clock_update_hz(mux->out, freq);
 }
 
-static void clock_mux_src_update(void *opaque)
+static void clock_mux_src_update(void *opaque, ClockEvent event)
 {
     CprmanClockMuxState **backref = opaque;
     CprmanClockMuxState *s = *backref;
@@ -335,7 +337,8 @@
         s->backref[i] = s;
         s->srcs[i] = qdev_init_clock_in(DEVICE(s), name,
                                         clock_mux_src_update,
-                                        &s->backref[i]);
+                                        &s->backref[i],
+                                        ClockUpdate);
         g_free(name);
     }
 
@@ -380,7 +383,7 @@
     clock_update(s->out, clock_get(src));
 }
 
-static void dsi0hsck_mux_in_update(void *opaque)
+static void dsi0hsck_mux_in_update(void *opaque, ClockEvent event)
 {
     dsi0hsck_mux_update(CPRMAN_DSI0HSCK_MUX(opaque));
 }
@@ -390,8 +393,10 @@
     CprmanDsi0HsckMuxState *s = CPRMAN_DSI0HSCK_MUX(obj);
     DeviceState *dev = DEVICE(obj);
 
-    s->plla_in = qdev_init_clock_in(dev, "plla-in", dsi0hsck_mux_in_update, s);
-    s->plld_in = qdev_init_clock_in(dev, "plld-in", dsi0hsck_mux_in_update, s);
+    s->plla_in = qdev_init_clock_in(dev, "plla-in", dsi0hsck_mux_in_update,
+                                    s, ClockUpdate);
+    s->plld_in = qdev_init_clock_in(dev, "plld-in", dsi0hsck_mux_in_update,
+                                    s, ClockUpdate);
     s->out = qdev_init_clock_out(DEVICE(s), "out");
 }
 
diff --git a/hw/misc/iotkit-secctl.c b/hw/misc/iotkit-secctl.c
index 9fdb820..7b41cfa 100644
--- a/hw/misc/iotkit-secctl.c
+++ b/hw/misc/iotkit-secctl.c
@@ -19,6 +19,8 @@
 #include "hw/registerfields.h"
 #include "hw/irq.h"
 #include "hw/misc/iotkit-secctl.h"
+#include "hw/arm/armsse-version.h"
+#include "hw/qdev-properties.h"
 
 /* Registers in the secure privilege control block */
 REG32(SECRESPCFG, 0x10)
@@ -95,6 +97,19 @@
     0x0d, 0xf0, 0x05, 0xb1,
 };
 
+static const uint8_t iotkit_secctl_s_sse300_idregs[] = {
+    0x04, 0x00, 0x00, 0x00,
+    0x52, 0xb8, 0x2b, 0x00,
+    0x0d, 0xf0, 0x05, 0xb1,
+};
+
+static const uint8_t iotkit_secctl_ns_sse300_idregs[] = {
+    0x04, 0x00, 0x00, 0x00,
+    0x53, 0xb8, 0x2b, 0x00,
+    0x0d, 0xf0, 0x05, 0xb1,
+};
+
+
 /* The register sets for the various PPCs (AHB internal, APB internal,
  * AHB expansion, APB expansion) are all set up so that they are
  * in 16-aligned blocks so offsets 0xN0, 0xN4, 0xN8, 0xNC are PPCs
@@ -213,7 +228,14 @@
     case A_CID1:
     case A_CID2:
     case A_CID3:
-        r = iotkit_secctl_s_idregs[(offset - A_PID4) / 4];
+        switch (s->sse_version) {
+        case ARMSSE_SSE300:
+            r = iotkit_secctl_s_sse300_idregs[(offset - A_PID4) / 4];
+            break;
+        default:
+            r = iotkit_secctl_s_idregs[(offset - A_PID4) / 4];
+            break;
+        }
         break;
     case A_SECPPCINTCLR:
     case A_SECMSCINTCLR:
@@ -473,7 +495,14 @@
     case A_CID1:
     case A_CID2:
     case A_CID3:
-        r = iotkit_secctl_ns_idregs[(offset - A_PID4) / 4];
+        switch (s->sse_version) {
+        case ARMSSE_SSE300:
+            r = iotkit_secctl_ns_sse300_idregs[(offset - A_PID4) / 4];
+            break;
+        default:
+            r = iotkit_secctl_ns_idregs[(offset - A_PID4) / 4];
+            break;
+        }
         break;
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
@@ -710,6 +739,16 @@
     sysbus_init_mmio(sbd, &s->ns_regs);
 }
 
+static void iotkit_secctl_realize(DeviceState *dev, Error **errp)
+{
+    IoTKitSecCtl *s = IOTKIT_SECCTL(dev);
+
+    if (!armsse_version_valid(s->sse_version)) {
+        error_setg(errp, "invalid sse-version value %d", s->sse_version);
+        return;
+    }
+}
+
 static const VMStateDescription iotkit_secctl_ppc_vmstate = {
     .name = "iotkit-secctl-ppc",
     .version_id = 1,
@@ -775,12 +814,19 @@
     },
 };
 
+static Property iotkit_secctl_props[] = {
+    DEFINE_PROP_UINT32("sse-version", IoTKitSecCtl, sse_version, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void iotkit_secctl_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->vmsd = &iotkit_secctl_vmstate;
     dc->reset = iotkit_secctl_reset;
+    dc->realize = iotkit_secctl_realize;
+    device_class_set_props(dc, iotkit_secctl_props);
 }
 
 static const TypeInfo iotkit_secctl_info = {
diff --git a/hw/misc/iotkit-sysctl.c b/hw/misc/iotkit-sysctl.c
index 222511c..9ee8fe8 100644
--- a/hw/misc/iotkit-sysctl.c
+++ b/hw/misc/iotkit-sysctl.c
@@ -28,6 +28,7 @@
 #include "hw/registerfields.h"
 #include "hw/misc/iotkit-sysctl.h"
 #include "hw/qdev-properties.h"
+#include "hw/arm/armsse-version.h"
 #include "target/arm/arm-powerctl.h"
 #include "target/arm/cpu.h"
 
@@ -44,16 +45,22 @@
     FIELD(SWRESET, SWRESETREQ, 9, 1)
 REG32(GRETREG, 0x10c)
 REG32(INITSVTOR0, 0x110)
+    FIELD(INITSVTOR0, LOCK, 0, 1)
+    FIELD(INITSVTOR0, VTOR, 7, 25)
 REG32(INITSVTOR1, 0x114)
 REG32(CPUWAIT, 0x118)
 REG32(NMI_ENABLE, 0x11c) /* BUSWAIT in IoTKit */
 REG32(WICCTRL, 0x120)
 REG32(EWCTRL, 0x124)
+REG32(PWRCTRL, 0x1fc)
+    FIELD(PWRCTRL, PPU_ACCESS_UNLOCK, 0, 1)
+    FIELD(PWRCTRL, PPU_ACCESS_FILTER, 1, 1)
 REG32(PDCM_PD_SYS_SENSE, 0x200)
+REG32(PDCM_PD_CPU0_SENSE, 0x204)
 REG32(PDCM_PD_SRAM0_SENSE, 0x20c)
 REG32(PDCM_PD_SRAM1_SENSE, 0x210)
-REG32(PDCM_PD_SRAM2_SENSE, 0x214)
-REG32(PDCM_PD_SRAM3_SENSE, 0x218)
+REG32(PDCM_PD_SRAM2_SENSE, 0x214) /* PDCM_PD_VMR0_SENSE on SSE300 */
+REG32(PDCM_PD_SRAM3_SENSE, 0x218) /* PDCM_PD_VMR1_SENSE on SSE300 */
 REG32(PID4, 0xfd0)
 REG32(PID5, 0xfd4)
 REG32(PID6, 0xfd8)
@@ -68,12 +75,19 @@
 REG32(CID3, 0xffc)
 
 /* PID/CID values */
-static const int sysctl_id[] = {
+static const int iotkit_sysctl_id[] = {
     0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
     0x54, 0xb8, 0x0b, 0x00, /* PID0..PID3 */
     0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
 };
 
+/* Also used by the SSE300 */
+static const int sse200_sysctl_id[] = {
+    0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
+    0x54, 0xb8, 0x1b, 0x00, /* PID0..PID3 */
+    0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
+};
+
 /*
  * Set the initial secure vector table offset address for the core.
  * This will take effect when the CPU next resets.
@@ -100,28 +114,52 @@
         r = s->secure_debug;
         break;
     case A_SCSECCTRL:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            r = s->scsecctrl;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->scsecctrl;
         break;
     case A_FCLK_DIV:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            r = s->fclk_div;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->fclk_div;
         break;
     case A_SYSCLK_DIV:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            r = s->sysclk_div;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->sysclk_div;
         break;
     case A_CLOCK_FORCE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            r = s->clock_force;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->clock_force;
         break;
     case A_RESET_SYNDROME:
         r = s->reset_syndrome;
@@ -136,63 +174,178 @@
         r = s->initsvtor0;
         break;
     case A_INITSVTOR1:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            r = s->initsvtor1;
+            break;
+        case ARMSSE_SSE300:
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        r = s->initsvtor1;
         break;
     case A_CPUWAIT:
-        r = s->cpuwait;
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            r = s->cpuwait;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this is reserved (for INITSVTOR2) */
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_NMI_ENABLE:
-        /* In IoTKit this is named BUSWAIT but is marked reserved, R/O, zero */
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+            /* In IoTKit this is named BUSWAIT but marked reserved, R/O, zero */
             r = 0;
             break;
+        case ARMSSE_SSE200:
+            r = s->nmi_enable;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this is reserved (for INITSVTOR3) */
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        r = s->nmi_enable;
         break;
     case A_WICCTRL:
-        r = s->wicctrl;
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            r = s->wicctrl;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this offset is CPUWAIT */
+            r = s->cpuwait;
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_EWCTRL:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            r = s->ewctrl;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this offset is is NMI_ENABLE */
+            r = s->nmi_enable;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->ewctrl;
+        break;
+    case A_PWRCTRL:
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            goto bad_offset;
+        case ARMSSE_SSE300:
+            r = s->pwrctrl;
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_PDCM_PD_SYS_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            r = s->pdcm_pd_sys_sense;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->pdcm_pd_sys_sense;
+        break;
+    case A_PDCM_PD_CPU0_SENSE:
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            goto bad_offset;
+        case ARMSSE_SSE300:
+            r = s->pdcm_pd_cpu0_sense;
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_PDCM_PD_SRAM0_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            r = s->pdcm_pd_sram0_sense;
+            break;
+        case ARMSSE_SSE300:
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        r = s->pdcm_pd_sram0_sense;
         break;
     case A_PDCM_PD_SRAM1_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            r = s->pdcm_pd_sram1_sense;
+            break;
+        case ARMSSE_SSE300:
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        r = s->pdcm_pd_sram1_sense;
         break;
     case A_PDCM_PD_SRAM2_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            r = s->pdcm_pd_sram2_sense;
+            break;
+        case ARMSSE_SSE300:
+            r = s->pdcm_pd_vmr0_sense;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->pdcm_pd_sram2_sense;
         break;
     case A_PDCM_PD_SRAM3_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            r = s->pdcm_pd_sram3_sense;
+            break;
+        case ARMSSE_SSE300:
+            r = s->pdcm_pd_vmr1_sense;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        r = s->pdcm_pd_sram3_sense;
         break;
     case A_PID4 ... A_CID3:
-        r = sysctl_id[(offset - A_PID4) / 4];
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+            r = iotkit_sysctl_id[(offset - A_PID4) / 4];
+            break;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            r = sse200_sysctl_id[(offset - A_PID4) / 4];
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_SECDBGSET:
     case A_SECDBGCLR:
@@ -213,6 +366,21 @@
     return r;
 }
 
+static void cpuwait_write(IoTKitSysCtl *s, uint32_t value)
+{
+    int num_cpus = (s->sse_version == ARMSSE_SSE300) ? 1 : 2;
+    int i;
+
+    for (i = 0; i < num_cpus; i++) {
+        uint32_t mask = 1 << i;
+        if ((s->cpuwait & mask) && !(value & mask)) {
+            /* Powering up CPU 0 */
+            arm_set_cpu_on_and_reset(i);
+        }
+    }
+    s->cpuwait = value;
+}
+
 static void iotkit_sysctl_write(void *opaque, hwaddr offset,
                                  uint64_t value, unsigned size)
 {
@@ -249,23 +417,53 @@
         s->gretreg = value;
         break;
     case A_INITSVTOR0:
-        s->initsvtor0 = value;
-        set_init_vtor(0, s->initsvtor0);
+        switch (s->sse_version) {
+        case ARMSSE_SSE300:
+            /* SSE300 has a LOCK bit which prevents further writes when set */
+            if (s->initsvtor0 & R_INITSVTOR0_LOCK_MASK) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "IoTKit INITSVTOR0 write when register locked\n");
+                break;
+            }
+            s->initsvtor0 = value;
+            set_init_vtor(0, s->initsvtor0 & R_INITSVTOR0_VTOR_MASK);
+            break;
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            s->initsvtor0 = value;
+            set_init_vtor(0, s->initsvtor0);
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_CPUWAIT:
-        if ((s->cpuwait & 1) && !(value & 1)) {
-            /* Powering up CPU 0 */
-            arm_set_cpu_on_and_reset(0);
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            cpuwait_write(s, value);
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this is reserved (for INITSVTOR2) */
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        if ((s->cpuwait & 2) && !(value & 2)) {
-            /* Powering up CPU 1 */
-            arm_set_cpu_on_and_reset(1);
-        }
-        s->cpuwait = value;
         break;
     case A_WICCTRL:
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl WICCTRL unimplemented\n");
-        s->wicctrl = value;
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl WICCTRL unimplemented\n");
+            s->wicctrl = value;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this offset is CPUWAIT */
+            cpuwait_write(s, value);
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_SECDBGSET:
         /* write-1-to-set */
@@ -283,94 +481,214 @@
         }
         break;
     case A_SCSECCTRL:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl SCSECCTRL unimplemented\n");
+            s->scsecctrl = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl SCSECCTRL unimplemented\n");
-        s->scsecctrl = value;
         break;
     case A_FCLK_DIV:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl FCLK_DIV unimplemented\n");
+            s->fclk_div = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl FCLK_DIV unimplemented\n");
-        s->fclk_div = value;
         break;
     case A_SYSCLK_DIV:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl SYSCLK_DIV unimplemented\n");
+            s->sysclk_div = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl SYSCLK_DIV unimplemented\n");
-        s->sysclk_div = value;
         break;
     case A_CLOCK_FORCE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl CLOCK_FORCE unimplemented\n");
+            s->clock_force = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl CLOCK_FORCE unimplemented\n");
-        s->clock_force = value;
         break;
     case A_INITSVTOR1:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            s->initsvtor1 = value;
+            set_init_vtor(1, s->initsvtor1);
+            break;
+        case ARMSSE_SSE300:
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        s->initsvtor1 = value;
-        set_init_vtor(1, s->initsvtor1);
         break;
     case A_EWCTRL:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl EWCTRL unimplemented\n");
+            s->ewctrl = value;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this offset is is NMI_ENABLE */
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl NMI_ENABLE unimplemented\n");
+            s->nmi_enable = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl EWCTRL unimplemented\n");
-        s->ewctrl = value;
+        break;
+    case A_PWRCTRL:
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            goto bad_offset;
+        case ARMSSE_SSE300:
+            if (!(s->pwrctrl & R_PWRCTRL_PPU_ACCESS_UNLOCK_MASK)) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "IoTKit PWRCTRL write when register locked\n");
+                break;
+            }
+            s->pwrctrl = value;
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_PDCM_PD_SYS_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_SYS_SENSE unimplemented\n");
+            s->pdcm_pd_sys_sense = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP,
-                      "IoTKit SysCtl PDCM_PD_SYS_SENSE unimplemented\n");
-        s->pdcm_pd_sys_sense = value;
+        break;
+    case A_PDCM_PD_CPU0_SENSE:
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
+        case ARMSSE_SSE200:
+            goto bad_offset;
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_CPU0_SENSE unimplemented\n");
+            s->pdcm_pd_cpu0_sense = value;
+            break;
+        default:
+            g_assert_not_reached();
+        }
         break;
     case A_PDCM_PD_SRAM0_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_SRAM0_SENSE unimplemented\n");
+            s->pdcm_pd_sram0_sense = value;
+            break;
+        case ARMSSE_SSE300:
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP,
-                      "IoTKit SysCtl PDCM_PD_SRAM0_SENSE unimplemented\n");
-        s->pdcm_pd_sram0_sense = value;
         break;
     case A_PDCM_PD_SRAM1_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_SRAM1_SENSE unimplemented\n");
+            s->pdcm_pd_sram1_sense = value;
+            break;
+        case ARMSSE_SSE300:
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP,
-                      "IoTKit SysCtl PDCM_PD_SRAM1_SENSE unimplemented\n");
-        s->pdcm_pd_sram1_sense = value;
         break;
     case A_PDCM_PD_SRAM2_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_SRAM2_SENSE unimplemented\n");
+            s->pdcm_pd_sram2_sense = value;
+            break;
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_VMR0_SENSE unimplemented\n");
+            s->pdcm_pd_vmr0_sense = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP,
-                      "IoTKit SysCtl PDCM_PD_SRAM2_SENSE unimplemented\n");
-        s->pdcm_pd_sram2_sense = value;
         break;
     case A_PDCM_PD_SRAM3_SENSE:
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto bad_offset;
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_SRAM3_SENSE unimplemented\n");
+            s->pdcm_pd_sram3_sense = value;
+            break;
+        case ARMSSE_SSE300:
+            qemu_log_mask(LOG_UNIMP,
+                          "IoTKit SysCtl PDCM_PD_VMR1_SENSE unimplemented\n");
+            s->pdcm_pd_vmr1_sense = value;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP,
-                      "IoTKit SysCtl PDCM_PD_SRAM3_SENSE unimplemented\n");
-        s->pdcm_pd_sram3_sense = value;
         break;
     case A_NMI_ENABLE:
         /* In IoTKit this is BUSWAIT: reserved, R/O, zero */
-        if (!s->is_sse200) {
+        switch (s->sse_version) {
+        case ARMSSE_IOTKIT:
             goto ro_offset;
+        case ARMSSE_SSE200:
+            qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl NMI_ENABLE unimplemented\n");
+            s->nmi_enable = value;
+            break;
+        case ARMSSE_SSE300:
+            /* In SSE300 this is reserved (for INITSVTOR3) */
+            goto bad_offset;
+        default:
+            g_assert_not_reached();
         }
-        qemu_log_mask(LOG_UNIMP, "IoTKit SysCtl NMI_ENABLE unimplemented\n");
-        s->nmi_enable = value;
         break;
     case A_SECDBGSTAT:
     case A_PID4 ... A_CID3:
@@ -417,11 +735,15 @@
     s->clock_force = 0;
     s->nmi_enable = 0;
     s->ewctrl = 0;
+    s->pwrctrl = 0x3;
     s->pdcm_pd_sys_sense = 0x7f;
     s->pdcm_pd_sram0_sense = 0;
     s->pdcm_pd_sram1_sense = 0;
     s->pdcm_pd_sram2_sense = 0;
     s->pdcm_pd_sram3_sense = 0;
+    s->pdcm_pd_cpu0_sense = 0;
+    s->pdcm_pd_vmr0_sense = 0;
+    s->pdcm_pd_vmr1_sense = 0;
 }
 
 static void iotkit_sysctl_init(Object *obj)
@@ -438,17 +760,38 @@
 {
     IoTKitSysCtl *s = IOTKIT_SYSCTL(dev);
 
-    /* The top 4 bits of the SYS_VERSION register tell us if we're an SSE-200 */
-    if (extract32(s->sys_version, 28, 4) == 2) {
-        s->is_sse200 = true;
+    if (!armsse_version_valid(s->sse_version)) {
+        error_setg(errp, "invalid sse-version value %d", s->sse_version);
+        return;
     }
 }
 
+static bool sse300_needed(void *opaque)
+{
+    IoTKitSysCtl *s = IOTKIT_SYSCTL(opaque);
+
+    return s->sse_version == ARMSSE_SSE300;
+}
+
+static const VMStateDescription iotkit_sysctl_sse300_vmstate = {
+    .name = "iotkit-sysctl/sse-300",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = sse300_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(pwrctrl, IoTKitSysCtl),
+        VMSTATE_UINT32(pdcm_pd_cpu0_sense, IoTKitSysCtl),
+        VMSTATE_UINT32(pdcm_pd_vmr0_sense, IoTKitSysCtl),
+        VMSTATE_UINT32(pdcm_pd_vmr1_sense, IoTKitSysCtl),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool sse200_needed(void *opaque)
 {
     IoTKitSysCtl *s = IOTKIT_SYSCTL(opaque);
 
-    return s->is_sse200;
+    return s->sse_version != ARMSSE_IOTKIT;
 }
 
 static const VMStateDescription iotkit_sysctl_sse200_vmstate = {
@@ -488,12 +831,13 @@
     },
     .subsections = (const VMStateDescription*[]) {
         &iotkit_sysctl_sse200_vmstate,
+        &iotkit_sysctl_sse300_vmstate,
         NULL
     }
 };
 
 static Property iotkit_sysctl_props[] = {
-    DEFINE_PROP_UINT32("SYS_VERSION", IoTKitSysCtl, sys_version, 0),
+    DEFINE_PROP_UINT32("sse-version", IoTKitSysCtl, sse_version, 0),
     DEFINE_PROP_UINT32("CPUWAIT_RST", IoTKitSysCtl, cpuwait_rst, 0),
     DEFINE_PROP_UINT32("INITSVTOR0_RST", IoTKitSysCtl, initsvtor0_rst,
                        0x10000000),
diff --git a/hw/misc/iotkit-sysinfo.c b/hw/misc/iotkit-sysinfo.c
index 52e7005..aaa9305 100644
--- a/hw/misc/iotkit-sysinfo.c
+++ b/hw/misc/iotkit-sysinfo.c
@@ -26,9 +26,12 @@
 #include "hw/registerfields.h"
 #include "hw/misc/iotkit-sysinfo.h"
 #include "hw/qdev-properties.h"
+#include "hw/arm/armsse-version.h"
 
 REG32(SYS_VERSION, 0x0)
 REG32(SYS_CONFIG, 0x4)
+REG32(SYS_CONFIG1, 0x8)
+REG32(IIDR, 0xfc8)
 REG32(PID4, 0xfd0)
 REG32(PID5, 0xfd4)
 REG32(PID6, 0xfd8)
@@ -49,6 +52,12 @@
     0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
 };
 
+static const int sysinfo_sse300_id[] = {
+    0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
+    0x58, 0xb8, 0x1b, 0x00, /* PID0..PID3 */
+    0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
+};
+
 static uint64_t iotkit_sysinfo_read(void *opaque, hwaddr offset,
                                     unsigned size)
 {
@@ -63,10 +72,36 @@
     case A_SYS_CONFIG:
         r = s->sys_config;
         break;
+    case A_SYS_CONFIG1:
+        switch (s->sse_version) {
+        case ARMSSE_SSE300:
+            return 0;
+            break;
+        default:
+            goto bad_read;
+        }
+        break;
+    case A_IIDR:
+        switch (s->sse_version) {
+        case ARMSSE_SSE300:
+            return s->iidr;
+            break;
+        default:
+            goto bad_read;
+        }
+        break;
     case A_PID4 ... A_CID3:
-        r = sysinfo_id[(offset - A_PID4) / 4];
+        switch (s->sse_version) {
+        case ARMSSE_SSE300:
+            r = sysinfo_sse300_id[(offset - A_PID4) / 4];
+            break;
+        default:
+            r = sysinfo_id[(offset - A_PID4) / 4];
+            break;
+        }
         break;
     default:
+    bad_read:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "IoTKit SysInfo read: bad offset %x\n", (int)offset);
         r = 0;
@@ -99,6 +134,8 @@
 static Property iotkit_sysinfo_props[] = {
     DEFINE_PROP_UINT32("SYS_VERSION", IoTKitSysInfo, sys_version, 0),
     DEFINE_PROP_UINT32("SYS_CONFIG", IoTKitSysInfo, sys_config, 0),
+    DEFINE_PROP_UINT32("sse-version", IoTKitSysInfo, sse_version, 0),
+    DEFINE_PROP_UINT32("IIDR", IoTKitSysInfo, iidr, 0),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -112,6 +149,16 @@
     sysbus_init_mmio(sbd, &s->iomem);
 }
 
+static void iotkit_sysinfo_realize(DeviceState *dev, Error **errp)
+{
+    IoTKitSysInfo *s = IOTKIT_SYSINFO(dev);
+
+    if (!armsse_version_valid(s->sse_version)) {
+        error_setg(errp, "invalid sse-version value %d", s->sse_version);
+        return;
+    }
+}
+
 static void iotkit_sysinfo_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -120,7 +167,7 @@
      * This device has no guest-modifiable state and so it
      * does not need a reset function or VMState.
      */
-
+    dc->realize = iotkit_sysinfo_realize;
     device_class_set_props(dc, iotkit_sysinfo_props);
 }
 
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 6292839..e30a555 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -96,6 +96,7 @@
 softmmu_ss.add(when: 'CONFIG_TZ_PPC', if_true: files('tz-ppc.c'))
 softmmu_ss.add(when: 'CONFIG_IOTKIT_SECCTL', if_true: files('iotkit-secctl.c'))
 softmmu_ss.add(when: 'CONFIG_IOTKIT_SYSINFO', if_true: files('iotkit-sysinfo.c'))
+softmmu_ss.add(when: 'CONFIG_ARMSSE_CPU_PWRCTRL', if_true: files('armsse-cpu-pwrctrl.c'))
 softmmu_ss.add(when: 'CONFIG_ARMSSE_CPUID', if_true: files('armsse-cpuid.c'))
 softmmu_ss.add(when: 'CONFIG_ARMSSE_MHU', if_true: files('armsse-mhu.c'))
 
diff --git a/hw/misc/mps2-fpgaio.c b/hw/misc/mps2-fpgaio.c
index f3db88d..07b8cbd 100644
--- a/hw/misc/mps2-fpgaio.c
+++ b/hw/misc/mps2-fpgaio.c
@@ -29,6 +29,7 @@
 #include "qemu/timer.h"
 
 REG32(LED0, 0)
+REG32(DBGCTRL, 4)
 REG32(BUTTON, 8)
 REG32(CLK1HZ, 0x10)
 REG32(CLK100HZ, 0x14)
@@ -129,6 +130,12 @@
     case A_LED0:
         r = s->led0;
         break;
+    case A_DBGCTRL:
+        if (!s->has_dbgctrl) {
+            goto bad_offset;
+        }
+        r = s->dbgctrl;
+        break;
     case A_BUTTON:
         /* User-pressable board buttons. We don't model that, so just return
          * zeroes.
@@ -195,6 +202,14 @@
             }
         }
         break;
+    case A_DBGCTRL:
+        if (!s->has_dbgctrl) {
+            goto bad_offset;
+        }
+        qemu_log_mask(LOG_UNIMP,
+                      "MPS2 FPGAIO: DBGCTRL unimplemented\n");
+        s->dbgctrl = value;
+        break;
     case A_PRESCALE:
         resync_counter(s);
         s->prescale = value;
@@ -225,6 +240,7 @@
         s->pscntr = value;
         break;
     default:
+    bad_offset:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "MPS2 FPGAIO write: bad offset 0x%x\n", (int) offset);
         break;
@@ -285,41 +301,22 @@
     }
 }
 
-static bool mps2_fpgaio_counters_needed(void *opaque)
-{
-    /* Currently vmstate.c insists all subsections have a 'needed' function */
-    return true;
-}
-
-static const VMStateDescription mps2_fpgaio_counters_vmstate = {
-    .name = "mps2-fpgaio/counters",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .needed = mps2_fpgaio_counters_needed,
+static const VMStateDescription mps2_fpgaio_vmstate = {
+    .name = "mps2-fpgaio",
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = (VMStateField[]) {
+        VMSTATE_UINT32(led0, MPS2FPGAIO),
+        VMSTATE_UINT32(prescale, MPS2FPGAIO),
+        VMSTATE_UINT32(misc, MPS2FPGAIO),
+        VMSTATE_UINT32(dbgctrl, MPS2FPGAIO),
         VMSTATE_INT64(clk1hz_tick_offset, MPS2FPGAIO),
         VMSTATE_INT64(clk100hz_tick_offset, MPS2FPGAIO),
         VMSTATE_UINT32(counter, MPS2FPGAIO),
         VMSTATE_UINT32(pscntr, MPS2FPGAIO),
         VMSTATE_INT64(pscntr_sync_ticks, MPS2FPGAIO),
         VMSTATE_END_OF_LIST()
-    }
-};
-
-static const VMStateDescription mps2_fpgaio_vmstate = {
-    .name = "mps2-fpgaio",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(led0, MPS2FPGAIO),
-        VMSTATE_UINT32(prescale, MPS2FPGAIO),
-        VMSTATE_UINT32(misc, MPS2FPGAIO),
-        VMSTATE_END_OF_LIST()
     },
-    .subsections = (const VMStateDescription*[]) {
-        &mps2_fpgaio_counters_vmstate,
-        NULL
-    }
 };
 
 static Property mps2_fpgaio_properties[] = {
@@ -328,6 +325,7 @@
     /* Number of LEDs controlled by LED0 register */
     DEFINE_PROP_UINT32("num-leds", MPS2FPGAIO, num_leds, 2),
     DEFINE_PROP_BOOL("has-switches", MPS2FPGAIO, has_switches, false),
+    DEFINE_PROP_BOOL("has-dbgctrl", MPS2FPGAIO, has_dbgctrl, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/misc/mps2-scc.c b/hw/misc/mps2-scc.c
index 140a4b9..c56aca8 100644
--- a/hw/misc/mps2-scc.c
+++ b/hw/misc/mps2-scc.c
@@ -110,14 +110,14 @@
         r = s->cfg1;
         break;
     case A_CFG2:
-        if (scc_partno(s) != 0x524) {
+        if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
             /* CFG2 reserved on other boards */
             goto bad_offset;
         }
         r = s->cfg2;
         break;
     case A_CFG3:
-        if (scc_partno(s) == 0x524) {
+        if (scc_partno(s) == 0x524 && scc_partno(s) == 0x547) {
             /* CFG3 reserved on AN524 */
             goto bad_offset;
         }
@@ -130,7 +130,7 @@
         r = s->cfg4;
         break;
     case A_CFG5:
-        if (scc_partno(s) != 0x524) {
+        if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
             /* CFG5 reserved on other boards */
             goto bad_offset;
         }
@@ -185,7 +185,10 @@
 
     switch (offset) {
     case A_CFG0:
-        /* TODO on some boards bit 0 controls RAM remapping */
+        /*
+         * TODO on some boards bit 0 controls RAM remapping;
+         * on others bit 1 is CPU_WAIT.
+         */
         s->cfg0 = value;
         break;
     case A_CFG1:
@@ -195,7 +198,7 @@
         }
         break;
     case A_CFG2:
-        if (scc_partno(s) != 0x524) {
+        if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
             /* CFG2 reserved on other boards */
             goto bad_offset;
         }
@@ -203,7 +206,7 @@
         s->cfg2 = value;
         break;
     case A_CFG5:
-        if (scc_partno(s) != 0x524) {
+        if (scc_partno(s) != 0x524 && scc_partno(s) != 0x547) {
             /* CFG5 reserved on other boards */
             goto bad_offset;
         }
diff --git a/hw/misc/npcm7xx_clk.c b/hw/misc/npcm7xx_clk.c
index 0bcae9c..a1ee67d 100644
--- a/hw/misc/npcm7xx_clk.c
+++ b/hw/misc/npcm7xx_clk.c
@@ -586,15 +586,26 @@
     },
 };
 
+static void npcm7xx_clk_update_pll_cb(void *opaque, ClockEvent event)
+{
+    npcm7xx_clk_update_pll(opaque);
+}
+
 static void npcm7xx_clk_pll_init(Object *obj)
 {
     NPCM7xxClockPLLState *pll = NPCM7XX_CLOCK_PLL(obj);
 
     pll->clock_in = qdev_init_clock_in(DEVICE(pll), "clock-in",
-            npcm7xx_clk_update_pll, pll);
+                                       npcm7xx_clk_update_pll_cb, pll,
+                                       ClockUpdate);
     pll->clock_out = qdev_init_clock_out(DEVICE(pll), "clock-out");
 }
 
+static void npcm7xx_clk_update_sel_cb(void *opaque, ClockEvent event)
+{
+    npcm7xx_clk_update_sel(opaque);
+}
+
 static void npcm7xx_clk_sel_init(Object *obj)
 {
     int i;
@@ -603,16 +614,23 @@
     for (i = 0; i < NPCM7XX_CLK_SEL_MAX_INPUT; ++i) {
         sel->clock_in[i] = qdev_init_clock_in(DEVICE(sel),
                 g_strdup_printf("clock-in[%d]", i),
-                npcm7xx_clk_update_sel, sel);
+                npcm7xx_clk_update_sel_cb, sel, ClockUpdate);
     }
     sel->clock_out = qdev_init_clock_out(DEVICE(sel), "clock-out");
 }
+
+static void npcm7xx_clk_update_divider_cb(void *opaque, ClockEvent event)
+{
+    npcm7xx_clk_update_divider(opaque);
+}
+
 static void npcm7xx_clk_divider_init(Object *obj)
 {
     NPCM7xxClockDividerState *div = NPCM7XX_CLOCK_DIVIDER(obj);
 
     div->clock_in = qdev_init_clock_in(DEVICE(div), "clock-in",
-            npcm7xx_clk_update_divider, div);
+                                       npcm7xx_clk_update_divider_cb,
+                                       div, ClockUpdate);
     div->clock_out = qdev_init_clock_out(DEVICE(div), "clock-out");
 }
 
@@ -875,7 +893,7 @@
 {
     int i;
 
-    s->clkref = qdev_init_clock_in(DEVICE(s), "clkref", NULL, NULL);
+    s->clkref = qdev_init_clock_in(DEVICE(s), "clkref", NULL, NULL, 0);
 
     /* First pass: init all converter modules */
     QEMU_BUILD_BUG_ON(ARRAY_SIZE(pll_init_info_list) != NPCM7XX_CLOCK_NR_PLLS);
diff --git a/hw/misc/npcm7xx_pwm.c b/hw/misc/npcm7xx_pwm.c
index dabcb6c..ce192bb 100644
--- a/hw/misc/npcm7xx_pwm.c
+++ b/hw/misc/npcm7xx_pwm.c
@@ -493,7 +493,7 @@
     memory_region_init_io(&s->iomem, obj, &npcm7xx_pwm_ops, s,
                           TYPE_NPCM7XX_PWM, 4 * KiB);
     sysbus_init_mmio(sbd, &s->iomem);
-    s->clock = qdev_init_clock_in(DEVICE(s), "clock", NULL, NULL);
+    s->clock = qdev_init_clock_in(DEVICE(s), "clock", NULL, NULL, 0);
 
     for (i = 0; i < NPCM7XX_PWM_PER_MODULE; ++i) {
         object_property_add_uint32_ptr(obj, "freq[*]",
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index d626b9d..4b15db8 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -186,6 +186,10 @@
 iotkit_sysctl_write(uint64_t offset, uint64_t data, unsigned size) "IoTKit SysCtl write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 iotkit_sysctl_reset(void) "IoTKit SysCtl: reset"
 
+# armsse-cpu-pwrctrl.c
+armsse_cpu_pwrctrl_read(uint64_t offset, uint64_t data, unsigned size) "SSE-300 CPU_PWRCTRL read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+armsse_cpu_pwrctrl_write(uint64_t offset, uint64_t data, unsigned size) "SSE-300 CPU_PWRCTRL write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+
 # armsse-cpuid.c
 armsse_cpuid_read(uint64_t offset, uint64_t data, unsigned size) "SSE-200 CPU_IDENTITY read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 armsse_cpuid_write(uint64_t offset, uint64_t data, unsigned size) "SSE-200 CPU_IDENTITY write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index 66504a9..c66d7db 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -307,9 +307,10 @@
     clock_propagate(s->uart1_ref_clk);
 }
 
-static void zynq_slcr_ps_clk_callback(void *opaque)
+static void zynq_slcr_ps_clk_callback(void *opaque, ClockEvent event)
 {
     ZynqSLCRState *s = (ZynqSLCRState *) opaque;
+
     zynq_slcr_compute_clocks(s);
     zynq_slcr_propagate_clocks(s);
 }
@@ -576,7 +577,7 @@
 };
 
 static const ClockPortInitArray zynq_slcr_clocks = {
-    QDEV_CLOCK_IN(ZynqSLCRState, ps_clk, zynq_slcr_ps_clk_callback),
+    QDEV_CLOCK_IN(ZynqSLCRState, ps_clk, zynq_slcr_ps_clk_callback, ClockUpdate),
     QDEV_CLOCK_OUT(ZynqSLCRState, uart0_ref_clk),
     QDEV_CLOCK_OUT(ZynqSLCRState, uart1_ref_clk),
     QDEV_CLOCK_END
diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c
index 4d7c2ca..c3d3dab 100644
--- a/hw/scsi/esp-pci.c
+++ b/hw/scsi/esp-pci.c
@@ -79,8 +79,10 @@
 
 static void esp_pci_handle_idle(PCIESPState *pci, uint32_t val)
 {
+    ESPState *s = ESP(&pci->esp);
+
     trace_esp_pci_dma_idle(val);
-    esp_dma_enable(&pci->esp, 0, 0);
+    esp_dma_enable(s, 0, 0);
 }
 
 static void esp_pci_handle_blast(PCIESPState *pci, uint32_t val)
@@ -91,14 +93,18 @@
 
 static void esp_pci_handle_abort(PCIESPState *pci, uint32_t val)
 {
+    ESPState *s = ESP(&pci->esp);
+
     trace_esp_pci_dma_abort(val);
-    if (pci->esp.current_req) {
-        scsi_req_cancel(pci->esp.current_req);
+    if (s->current_req) {
+        scsi_req_cancel(s->current_req);
     }
 }
 
 static void esp_pci_handle_start(PCIESPState *pci, uint32_t val)
 {
+    ESPState *s = ESP(&pci->esp);
+
     trace_esp_pci_dma_start(val);
 
     pci->dma_regs[DMA_WBC] = pci->dma_regs[DMA_STC];
@@ -109,7 +115,7 @@
                                | DMA_STAT_DONE | DMA_STAT_ABORT
                                | DMA_STAT_ERROR | DMA_STAT_PWDN);
 
-    esp_dma_enable(&pci->esp, 0, 1);
+    esp_dma_enable(s, 0, 1);
 }
 
 static void esp_pci_dma_write(PCIESPState *pci, uint32_t saddr, uint32_t val)
@@ -155,11 +161,12 @@
 
 static uint32_t esp_pci_dma_read(PCIESPState *pci, uint32_t saddr)
 {
+    ESPState *s = ESP(&pci->esp);
     uint32_t val;
 
     val = pci->dma_regs[saddr];
     if (saddr == DMA_STAT) {
-        if (pci->esp.rregs[ESP_RSTAT] & STAT_INT) {
+        if (s->rregs[ESP_RSTAT] & STAT_INT) {
             val |= DMA_STAT_SCSIINT;
         }
         if (!(pci->sbac & SBAC_STATUS)) {
@@ -176,6 +183,7 @@
                              uint64_t val, unsigned int size)
 {
     PCIESPState *pci = opaque;
+    ESPState *s = ESP(&pci->esp);
 
     if (size < 4 || addr & 3) {
         /* need to upgrade request: we only support 4-bytes accesses */
@@ -183,7 +191,7 @@
         int shift;
 
         if (addr < 0x40) {
-            current = pci->esp.wregs[addr >> 2];
+            current = s->wregs[addr >> 2];
         } else if (addr < 0x60) {
             current = pci->dma_regs[(addr - 0x40) >> 2];
         } else if (addr < 0x74) {
@@ -203,7 +211,7 @@
 
     if (addr < 0x40) {
         /* SCSI core reg */
-        esp_reg_write(&pci->esp, addr >> 2, val);
+        esp_reg_write(s, addr >> 2, val);
     } else if (addr < 0x60) {
         /* PCI DMA CCB */
         esp_pci_dma_write(pci, (addr - 0x40) >> 2, val);
@@ -220,11 +228,12 @@
                                 unsigned int size)
 {
     PCIESPState *pci = opaque;
+    ESPState *s = ESP(&pci->esp);
     uint32_t ret;
 
     if (addr < 0x40) {
         /* SCSI core reg */
-        ret = esp_reg_read(&pci->esp, addr >> 2);
+        ret = esp_reg_read(s, addr >> 2);
     } else if (addr < 0x60) {
         /* PCI DMA CCB */
         ret = esp_pci_dma_read(pci, (addr - 0x40) >> 2);
@@ -306,7 +315,9 @@
 static void esp_pci_hard_reset(DeviceState *dev)
 {
     PCIESPState *pci = PCI_ESP(dev);
-    esp_hard_reset(&pci->esp);
+    ESPState *s = ESP(&pci->esp);
+
+    esp_hard_reset(s);
     pci->dma_regs[DMA_CMD] &= ~(DMA_CMD_DIR | DMA_CMD_INTE_D | DMA_CMD_INTE_P
                               | DMA_CMD_MDL | DMA_CMD_DIAG | DMA_CMD_MASK);
     pci->dma_regs[DMA_WBC] &= ~0xffff;
@@ -319,11 +330,12 @@
 
 static const VMStateDescription vmstate_esp_pci_scsi = {
     .name = "pciespscsi",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_PCI_DEVICE(parent_obj, PCIESPState),
         VMSTATE_BUFFER_UNSAFE(dma_regs, PCIESPState, 0, 8 * sizeof(uint32_t)),
+        VMSTATE_UINT8_V(esp.mig_version_id, PCIESPState, 2),
         VMSTATE_STRUCT(esp, PCIESPState, 0, vmstate_esp, ESPState),
         VMSTATE_END_OF_LIST()
     }
@@ -353,9 +365,13 @@
 {
     PCIESPState *pci = PCI_ESP(dev);
     DeviceState *d = DEVICE(dev);
-    ESPState *s = &pci->esp;
+    ESPState *s = ESP(&pci->esp);
     uint8_t *pci_conf;
 
+    if (!qdev_realize(DEVICE(s), NULL, errp)) {
+        return;
+    }
+
     pci_conf = dev->config;
 
     /* Interrupt pin A */
@@ -374,11 +390,19 @@
     scsi_bus_new(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info, NULL);
 }
 
-static void esp_pci_scsi_uninit(PCIDevice *d)
+static void esp_pci_scsi_exit(PCIDevice *d)
 {
     PCIESPState *pci = PCI_ESP(d);
+    ESPState *s = ESP(&pci->esp);
 
-    qemu_free_irq(pci->esp.irq);
+    qemu_free_irq(s->irq);
+}
+
+static void esp_pci_init(Object *obj)
+{
+    PCIESPState *pci = PCI_ESP(obj);
+
+    object_initialize_child(obj, "esp", &pci->esp, TYPE_ESP);
 }
 
 static void esp_pci_class_init(ObjectClass *klass, void *data)
@@ -387,7 +411,7 @@
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->realize = esp_pci_scsi_realize;
-    k->exit = esp_pci_scsi_uninit;
+    k->exit = esp_pci_scsi_exit;
     k->vendor_id = PCI_VENDOR_ID_AMD;
     k->device_id = PCI_DEVICE_ID_AMD_SCSI;
     k->revision = 0x10;
@@ -401,6 +425,7 @@
 static const TypeInfo esp_pci_info = {
     .name = TYPE_AM53C974_DEVICE,
     .parent = TYPE_PCI_DEVICE,
+    .instance_init = esp_pci_init,
     .instance_size = sizeof(PCIESPState),
     .class_init = esp_pci_class_init,
     .interfaces = (InterfaceInfo[]) {
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index 93d9c9c..507ab36 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -63,11 +63,13 @@
 static void esp_raise_drq(ESPState *s)
 {
     qemu_irq_raise(s->irq_data);
+    trace_esp_raise_drq();
 }
 
 static void esp_lower_drq(ESPState *s)
 {
     qemu_irq_lower(s->irq_data);
+    trace_esp_lower_drq();
 }
 
 void esp_dma_enable(ESPState *s, int irq, int level)
@@ -96,39 +98,112 @@
     }
 }
 
-static void set_pdma(ESPState *s, enum pdma_origin_id origin,
-                     uint32_t index, uint32_t len)
+static void esp_fifo_push(ESPState *s, uint8_t val)
 {
-    s->pdma_origin = origin;
-    s->pdma_start = index;
-    s->pdma_cur = index;
-    s->pdma_len = len;
-}
-
-static uint8_t *get_pdma_buf(ESPState *s)
-{
-    switch (s->pdma_origin) {
-    case PDMA:
-        return s->pdma_buf;
-    case TI:
-        return s->ti_buf;
-    case CMD:
-        return s->cmdbuf;
-    case ASYNC:
-        return s->async_buf;
+    if (fifo8_num_used(&s->fifo) == ESP_FIFO_SZ) {
+        trace_esp_error_fifo_overrun();
+        return;
     }
-    return NULL;
+
+    fifo8_push(&s->fifo, val);
 }
 
-static int get_cmd_cb(ESPState *s)
+static uint8_t esp_fifo_pop(ESPState *s)
+{
+    if (fifo8_is_empty(&s->fifo)) {
+        return 0;
+    }
+
+    return fifo8_pop(&s->fifo);
+}
+
+static void esp_cmdfifo_push(ESPState *s, uint8_t val)
+{
+    if (fifo8_num_used(&s->cmdfifo) == ESP_CMDFIFO_SZ) {
+        trace_esp_error_fifo_overrun();
+        return;
+    }
+
+    fifo8_push(&s->cmdfifo, val);
+}
+
+static uint8_t esp_cmdfifo_pop(ESPState *s)
+{
+    if (fifo8_is_empty(&s->cmdfifo)) {
+        return 0;
+    }
+
+    return fifo8_pop(&s->cmdfifo);
+}
+
+static uint32_t esp_get_tc(ESPState *s)
+{
+    uint32_t dmalen;
+
+    dmalen = s->rregs[ESP_TCLO];
+    dmalen |= s->rregs[ESP_TCMID] << 8;
+    dmalen |= s->rregs[ESP_TCHI] << 16;
+
+    return dmalen;
+}
+
+static void esp_set_tc(ESPState *s, uint32_t dmalen)
+{
+    s->rregs[ESP_TCLO] = dmalen;
+    s->rregs[ESP_TCMID] = dmalen >> 8;
+    s->rregs[ESP_TCHI] = dmalen >> 16;
+}
+
+static uint32_t esp_get_stc(ESPState *s)
+{
+    uint32_t dmalen;
+
+    dmalen = s->wregs[ESP_TCLO];
+    dmalen |= s->wregs[ESP_TCMID] << 8;
+    dmalen |= s->wregs[ESP_TCHI] << 16;
+
+    return dmalen;
+}
+
+static uint8_t esp_pdma_read(ESPState *s)
+{
+    uint8_t val;
+
+    if (s->do_cmd) {
+        val = esp_cmdfifo_pop(s);
+    } else {
+        val = esp_fifo_pop(s);
+    }
+
+    return val;
+}
+
+static void esp_pdma_write(ESPState *s, uint8_t val)
+{
+    uint32_t dmalen = esp_get_tc(s);
+
+    if (dmalen == 0) {
+        return;
+    }
+
+    if (s->do_cmd) {
+        esp_cmdfifo_push(s, val);
+    } else {
+        esp_fifo_push(s, val);
+    }
+
+    dmalen--;
+    esp_set_tc(s, dmalen);
+}
+
+static int esp_select(ESPState *s)
 {
     int target;
 
     target = s->wregs[ESP_WBUSID] & BUSID_DID;
 
     s->ti_size = 0;
-    s->ti_rptr = 0;
-    s->ti_wptr = 0;
+    fifo8_reset(&s->fifo);
 
     if (s->current_req) {
         /* Started a new command before the old one finished.  Cancel it.  */
@@ -140,148 +215,195 @@
     if (!s->current_dev) {
         /* No such drive */
         s->rregs[ESP_RSTAT] = 0;
-        s->rregs[ESP_RINTR] = INTR_DC;
+        s->rregs[ESP_RINTR] |= INTR_DC;
         s->rregs[ESP_RSEQ] = SEQ_0;
         esp_raise_irq(s);
         return -1;
     }
+
+    /*
+     * Note that we deliberately don't raise the IRQ here: this will be done
+     * either in do_busid_cmd() for DATA OUT transfers or by the deferred
+     * IRQ mechanism in esp_transfer_data() for DATA IN transfers
+     */
+    s->rregs[ESP_RINTR] |= INTR_FC;
+    s->rregs[ESP_RSEQ] = SEQ_CD;
     return 0;
 }
 
-static uint32_t get_cmd(ESPState *s, uint8_t *buf, uint8_t buflen)
+static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
 {
-    uint32_t dmalen;
+    uint8_t buf[ESP_CMDFIFO_SZ];
+    uint32_t dmalen, n;
     int target;
 
     target = s->wregs[ESP_WBUSID] & BUSID_DID;
     if (s->dma) {
-        dmalen = s->rregs[ESP_TCLO];
-        dmalen |= s->rregs[ESP_TCMID] << 8;
-        dmalen |= s->rregs[ESP_TCHI] << 16;
-        if (dmalen > buflen) {
+        dmalen = MIN(esp_get_tc(s), maxlen);
+        if (dmalen == 0) {
             return 0;
         }
         if (s->dma_memory_read) {
             s->dma_memory_read(s->dma_opaque, buf, dmalen);
+            fifo8_push_all(&s->cmdfifo, buf, dmalen);
         } else {
-            memcpy(s->pdma_buf, buf, dmalen);
-            set_pdma(s, PDMA, 0, dmalen);
+            if (esp_select(s) < 0) {
+                fifo8_reset(&s->cmdfifo);
+                return -1;
+            }
             esp_raise_drq(s);
+            fifo8_reset(&s->cmdfifo);
             return 0;
         }
     } else {
-        dmalen = s->ti_size;
-        if (dmalen > TI_BUFSZ) {
+        dmalen = MIN(fifo8_num_used(&s->fifo), maxlen);
+        if (dmalen == 0) {
             return 0;
         }
-        memcpy(buf, s->ti_buf, dmalen);
-        buf[0] = buf[2] >> 5;
+        memcpy(buf, fifo8_pop_buf(&s->fifo, dmalen, &n), dmalen);
+        if (dmalen >= 3) {
+            buf[0] = buf[2] >> 5;
+        }
+        fifo8_push_all(&s->cmdfifo, buf, dmalen);
     }
     trace_esp_get_cmd(dmalen, target);
 
-    if (get_cmd_cb(s) < 0) {
-        return 0;
+    if (esp_select(s) < 0) {
+        fifo8_reset(&s->cmdfifo);
+        return -1;
     }
     return dmalen;
 }
 
-static void do_busid_cmd(ESPState *s, uint8_t *buf, uint8_t busid)
+static void do_busid_cmd(ESPState *s, uint8_t busid)
 {
+    uint32_t n, cmdlen;
     int32_t datalen;
     int lun;
     SCSIDevice *current_lun;
+    uint8_t *buf;
 
     trace_esp_do_busid_cmd(busid);
     lun = busid & 7;
+    cmdlen = fifo8_num_used(&s->cmdfifo);
+    buf = (uint8_t *)fifo8_pop_buf(&s->cmdfifo, cmdlen, &n);
+
     current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
     s->current_req = scsi_req_new(current_lun, 0, lun, buf, s);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
+    fifo8_reset(&s->cmdfifo);
     if (datalen != 0) {
         s->rregs[ESP_RSTAT] = STAT_TC;
-        s->dma_left = 0;
-        s->dma_counter = 0;
+        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->ti_cmd = 0;
+        esp_set_tc(s, 0);
         if (datalen > 0) {
+            /*
+             * Switch to DATA IN phase but wait until initial data xfer is
+             * complete before raising the command completion interrupt
+             */
+            s->data_in_ready = false;
             s->rregs[ESP_RSTAT] |= STAT_DI;
         } else {
             s->rregs[ESP_RSTAT] |= STAT_DO;
+            s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
+            esp_raise_irq(s);
+            esp_lower_drq(s);
         }
         scsi_req_continue(s->current_req);
+        return;
     }
-    s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
-    s->rregs[ESP_RSEQ] = SEQ_CD;
-    esp_raise_irq(s);
 }
 
-static void do_cmd(ESPState *s, uint8_t *buf)
+static void do_cmd(ESPState *s)
 {
-    uint8_t busid = buf[0];
+    uint8_t busid = fifo8_pop(&s->cmdfifo);
+    uint32_t n;
 
-    do_busid_cmd(s, &buf[1], busid);
+    s->cmdfifo_cdb_offset--;
+
+    /* Ignore extended messages for now */
+    if (s->cmdfifo_cdb_offset) {
+        fifo8_pop_buf(&s->cmdfifo, s->cmdfifo_cdb_offset, &n);
+        s->cmdfifo_cdb_offset = 0;
+    }
+
+    do_busid_cmd(s, busid);
 }
 
 static void satn_pdma_cb(ESPState *s)
 {
-    if (get_cmd_cb(s) < 0) {
-        return;
-    }
-    if (s->pdma_cur != s->pdma_start) {
-        do_cmd(s, get_pdma_buf(s) + s->pdma_start);
+    s->do_cmd = 0;
+    if (!fifo8_is_empty(&s->cmdfifo)) {
+        s->cmdfifo_cdb_offset = 1;
+        do_cmd(s);
     }
 }
 
 static void handle_satn(ESPState *s)
 {
-    uint8_t buf[32];
-    int len;
+    int32_t cmdlen;
 
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn;
         return;
     }
     s->pdma_cb = satn_pdma_cb;
-    len = get_cmd(s, buf, sizeof(buf));
-    if (len)
-        do_cmd(s, buf);
+    cmdlen = get_cmd(s, ESP_CMDFIFO_SZ);
+    if (cmdlen > 0) {
+        s->cmdfifo_cdb_offset = 1;
+        do_cmd(s);
+    } else if (cmdlen == 0) {
+        s->do_cmd = 1;
+        /* Target present, but no cmd yet - switch to command phase */
+        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->rregs[ESP_RSTAT] = STAT_CD;
+    }
 }
 
 static void s_without_satn_pdma_cb(ESPState *s)
 {
-    if (get_cmd_cb(s) < 0) {
-        return;
-    }
-    if (s->pdma_cur != s->pdma_start) {
-        do_busid_cmd(s, get_pdma_buf(s) + s->pdma_start, 0);
+    uint32_t len;
+
+    s->do_cmd = 0;
+    len = fifo8_num_used(&s->cmdfifo);
+    if (len) {
+        s->cmdfifo_cdb_offset = 0;
+        do_busid_cmd(s, 0);
     }
 }
 
 static void handle_s_without_atn(ESPState *s)
 {
-    uint8_t buf[32];
-    int len;
+    int32_t cmdlen;
 
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_s_without_atn;
         return;
     }
     s->pdma_cb = s_without_satn_pdma_cb;
-    len = get_cmd(s, buf, sizeof(buf));
-    if (len) {
-        do_busid_cmd(s, buf, 0);
+    cmdlen = get_cmd(s, ESP_CMDFIFO_SZ);
+    if (cmdlen > 0) {
+        s->cmdfifo_cdb_offset = 0;
+        do_busid_cmd(s, 0);
+    } else if (cmdlen == 0) {
+        s->do_cmd = 1;
+        /* Target present, but no cmd yet - switch to command phase */
+        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->rregs[ESP_RSTAT] = STAT_CD;
     }
 }
 
 static void satn_stop_pdma_cb(ESPState *s)
 {
-    if (get_cmd_cb(s) < 0) {
-        return;
-    }
-    s->cmdlen = s->pdma_cur - s->pdma_start;
-    if (s->cmdlen) {
-        trace_esp_handle_satn_stop(s->cmdlen);
+    s->do_cmd = 0;
+    if (!fifo8_is_empty(&s->cmdfifo)) {
+        trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo));
         s->do_cmd = 1;
+        s->cmdfifo_cdb_offset = 1;
         s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
-        s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
+        s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
         s->rregs[ESP_RSEQ] = SEQ_CD;
         esp_raise_irq(s);
     }
@@ -289,51 +411,62 @@
 
 static void handle_satn_stop(ESPState *s)
 {
+    int32_t cmdlen;
+
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_satn_stop;
         return;
     }
     s->pdma_cb = satn_stop_pdma_cb;
-    s->cmdlen = get_cmd(s, s->cmdbuf, sizeof(s->cmdbuf));
-    if (s->cmdlen) {
-        trace_esp_handle_satn_stop(s->cmdlen);
+    cmdlen = get_cmd(s, 1);
+    if (cmdlen > 0) {
+        trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo));
         s->do_cmd = 1;
-        s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
-        s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
-        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->cmdfifo_cdb_offset = 1;
+        s->rregs[ESP_RSTAT] = STAT_MO;
+        s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
+        s->rregs[ESP_RSEQ] = SEQ_MO;
         esp_raise_irq(s);
+    } else if (cmdlen == 0) {
+        s->do_cmd = 1;
+        /* Target present, switch to message out phase */
+        s->rregs[ESP_RSEQ] = SEQ_MO;
+        s->rregs[ESP_RSTAT] = STAT_MO;
     }
 }
 
 static void write_response_pdma_cb(ESPState *s)
 {
     s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
-    s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
+    s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
     s->rregs[ESP_RSEQ] = SEQ_CD;
     esp_raise_irq(s);
 }
 
 static void write_response(ESPState *s)
 {
+    uint32_t n;
+
     trace_esp_write_response(s->status);
-    s->ti_buf[0] = s->status;
-    s->ti_buf[1] = 0;
+
+    fifo8_reset(&s->fifo);
+    esp_fifo_push(s, s->status);
+    esp_fifo_push(s, 0);
+
     if (s->dma) {
         if (s->dma_memory_write) {
-            s->dma_memory_write(s->dma_opaque, s->ti_buf, 2);
+            s->dma_memory_write(s->dma_opaque,
+                                (uint8_t *)fifo8_pop_buf(&s->fifo, 2, &n), 2);
             s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
-            s->rregs[ESP_RINTR] = INTR_BS | INTR_FC;
+            s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
             s->rregs[ESP_RSEQ] = SEQ_CD;
         } else {
-            set_pdma(s, TI, 0, 2);
             s->pdma_cb = write_response_pdma_cb;
             esp_raise_drq(s);
             return;
         }
     } else {
         s->ti_size = 2;
-        s->ti_rptr = 0;
-        s->ti_wptr = 2;
         s->rregs[ESP_RFLAGS] = 2;
     }
     esp_raise_irq(s);
@@ -342,27 +475,187 @@
 static void esp_dma_done(ESPState *s)
 {
     s->rregs[ESP_RSTAT] |= STAT_TC;
-    s->rregs[ESP_RINTR] = INTR_BS;
+    s->rregs[ESP_RINTR] |= INTR_BS;
     s->rregs[ESP_RSEQ] = 0;
     s->rregs[ESP_RFLAGS] = 0;
-    s->rregs[ESP_TCLO] = 0;
-    s->rregs[ESP_TCMID] = 0;
-    s->rregs[ESP_TCHI] = 0;
+    esp_set_tc(s, 0);
     esp_raise_irq(s);
 }
 
 static void do_dma_pdma_cb(ESPState *s)
 {
-    int to_device = (s->ti_size < 0);
-    int len = s->pdma_cur - s->pdma_start;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    int len;
+    uint32_t n;
+
     if (s->do_cmd) {
         s->ti_size = 0;
-        s->cmdlen = 0;
         s->do_cmd = 0;
-        do_cmd(s, s->cmdbuf);
+        do_cmd(s);
+        esp_lower_drq(s);
         return;
     }
-    s->dma_left -= len;
+
+    if (to_device) {
+        /* Copy FIFO data to device */
+        len = MIN(s->async_len, ESP_FIFO_SZ);
+        len = MIN(len, fifo8_num_used(&s->fifo));
+        memcpy(s->async_buf, fifo8_pop_buf(&s->fifo, len, &n), len);
+        s->async_buf += n;
+        s->async_len -= n;
+        s->ti_size += n;
+
+        if (n < len) {
+            /* Unaligned accesses can cause FIFO wraparound */
+            len = len - n;
+            memcpy(s->async_buf, fifo8_pop_buf(&s->fifo, len, &n), len);
+            s->async_buf += n;
+            s->async_len -= n;
+            s->ti_size += n;
+        }
+
+        if (s->async_len == 0) {
+            scsi_req_continue(s->current_req);
+            return;
+        }
+
+        if (esp_get_tc(s) == 0) {
+            esp_lower_drq(s);
+            esp_dma_done(s);
+        }
+
+        return;
+    } else {
+        if (s->async_len == 0) {
+            if (s->current_req) {
+                /* Defer until the scsi layer has completed */
+                scsi_req_continue(s->current_req);
+                s->data_in_ready = false;
+            }
+            return;
+        }
+
+        if (esp_get_tc(s) != 0) {
+            /* Copy device data to FIFO */
+            len = MIN(s->async_len, esp_get_tc(s));
+            len = MIN(len, fifo8_num_free(&s->fifo));
+            fifo8_push_all(&s->fifo, s->async_buf, len);
+            s->async_buf += len;
+            s->async_len -= len;
+            s->ti_size -= len;
+            esp_set_tc(s, esp_get_tc(s) - len);
+
+            if (esp_get_tc(s) == 0) {
+                /* Indicate transfer to FIFO is complete */
+                 s->rregs[ESP_RSTAT] |= STAT_TC;
+            }
+            return;
+        }
+
+        /* Partially filled a scsi buffer. Complete immediately.  */
+        esp_lower_drq(s);
+        esp_dma_done(s);
+    }
+}
+
+static void esp_do_dma(ESPState *s)
+{
+    uint32_t len, cmdlen;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    uint8_t buf[ESP_CMDFIFO_SZ];
+
+    len = esp_get_tc(s);
+    if (s->do_cmd) {
+        /*
+         * handle_ti_cmd() case: esp_do_dma() is called only from
+         * handle_ti_cmd() with do_cmd != NULL (see the assert())
+         */
+        cmdlen = fifo8_num_used(&s->cmdfifo);
+        trace_esp_do_dma(cmdlen, len);
+        if (s->dma_memory_read) {
+            s->dma_memory_read(s->dma_opaque, buf, len);
+            fifo8_push_all(&s->cmdfifo, buf, len);
+        } else {
+            s->pdma_cb = do_dma_pdma_cb;
+            esp_raise_drq(s);
+            return;
+        }
+        trace_esp_handle_ti_cmd(cmdlen);
+        s->ti_size = 0;
+        if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) {
+            /* No command received */
+            if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) {
+                return;
+            }
+
+            /* Command has been received */
+            s->do_cmd = 0;
+            do_cmd(s);
+        } else {
+            /*
+             * Extra message out bytes received: update cmdfifo_cdb_offset
+             * and then switch to commmand phase
+             */
+            s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo);
+            s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
+            s->rregs[ESP_RSEQ] = SEQ_CD;
+            s->rregs[ESP_RINTR] |= INTR_BS;
+            esp_raise_irq(s);
+        }
+        return;
+    }
+    if (s->async_len == 0) {
+        /* Defer until data is available.  */
+        return;
+    }
+    if (len > s->async_len) {
+        len = s->async_len;
+    }
+    if (to_device) {
+        if (s->dma_memory_read) {
+            s->dma_memory_read(s->dma_opaque, s->async_buf, len);
+        } else {
+            s->pdma_cb = do_dma_pdma_cb;
+            esp_raise_drq(s);
+            return;
+        }
+    } else {
+        if (s->dma_memory_write) {
+            s->dma_memory_write(s->dma_opaque, s->async_buf, len);
+        } else {
+            /* Adjust TC for any leftover data in the FIFO */
+            if (!fifo8_is_empty(&s->fifo)) {
+                esp_set_tc(s, esp_get_tc(s) - fifo8_num_used(&s->fifo));
+            }
+
+            /* Copy device data to FIFO */
+            len = MIN(len, fifo8_num_free(&s->fifo));
+            fifo8_push_all(&s->fifo, s->async_buf, len);
+            s->async_buf += len;
+            s->async_len -= len;
+            s->ti_size -= len;
+
+            /*
+             * MacOS toolbox uses a TI length of 16 bytes for all commands, so
+             * commands shorter than this must be padded accordingly
+             */
+            if (len < esp_get_tc(s) && esp_get_tc(s) <= ESP_FIFO_SZ) {
+                while (fifo8_num_used(&s->fifo) < ESP_FIFO_SZ) {
+                    esp_fifo_push(s, 0);
+                    len++;
+                }
+            }
+
+            esp_set_tc(s, esp_get_tc(s) - len);
+            s->pdma_cb = do_dma_pdma_cb;
+            esp_raise_drq(s);
+
+            /* Indicate transfer to FIFO is complete */
+            s->rregs[ESP_RSTAT] |= STAT_TC;
+            return;
+        }
+    }
+    esp_set_tc(s, esp_get_tc(s) - len);
     s->async_buf += len;
     s->async_len -= len;
     if (to_device) {
@@ -377,107 +670,98 @@
          * complete the DMA operation immediately.  Otherwise defer
          * until the scsi layer has completed.
          */
-        if (to_device || s->dma_left != 0 || s->ti_size == 0) {
+        if (to_device || esp_get_tc(s) != 0 || s->ti_size == 0) {
             return;
         }
     }
 
     /* Partially filled a scsi buffer. Complete immediately.  */
     esp_dma_done(s);
+    esp_lower_drq(s);
 }
 
-static void esp_do_dma(ESPState *s)
+static void esp_do_nodma(ESPState *s)
 {
-    uint32_t len;
-    int to_device;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    uint32_t cmdlen, n;
+    int len;
 
-    len = s->dma_left;
     if (s->do_cmd) {
-        /*
-         * handle_ti_cmd() case: esp_do_dma() is called only from
-         * handle_ti_cmd() with do_cmd != NULL (see the assert())
-         */
-        trace_esp_do_dma(s->cmdlen, len);
-        assert (s->cmdlen <= sizeof(s->cmdbuf) &&
-                len <= sizeof(s->cmdbuf) - s->cmdlen);
-        if (s->dma_memory_read) {
-            s->dma_memory_read(s->dma_opaque, &s->cmdbuf[s->cmdlen], len);
-        } else {
-            set_pdma(s, CMD, s->cmdlen, len);
-            s->pdma_cb = do_dma_pdma_cb;
-            esp_raise_drq(s);
-            return;
-        }
-        trace_esp_handle_ti_cmd(s->cmdlen);
+        cmdlen = fifo8_num_used(&s->cmdfifo);
+        trace_esp_handle_ti_cmd(cmdlen);
         s->ti_size = 0;
-        s->cmdlen = 0;
-        s->do_cmd = 0;
-        do_cmd(s, s->cmdbuf);
+        if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) {
+            /* No command received */
+            if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) {
+                return;
+            }
+
+            /* Command has been received */
+            s->do_cmd = 0;
+            do_cmd(s);
+        } else {
+            /*
+             * Extra message out bytes received: update cmdfifo_cdb_offset
+             * and then switch to commmand phase
+             */
+            s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo);
+            s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
+            s->rregs[ESP_RSEQ] = SEQ_CD;
+            s->rregs[ESP_RINTR] |= INTR_BS;
+            esp_raise_irq(s);
+        }
         return;
     }
+
     if (s->async_len == 0) {
         /* Defer until data is available.  */
         return;
     }
-    if (len > s->async_len) {
-        len = s->async_len;
-    }
-    to_device = (s->ti_size < 0);
+
     if (to_device) {
-        if (s->dma_memory_read) {
-            s->dma_memory_read(s->dma_opaque, s->async_buf, len);
-        } else {
-            set_pdma(s, ASYNC, 0, len);
-            s->pdma_cb = do_dma_pdma_cb;
-            esp_raise_drq(s);
-            return;
-        }
-    } else {
-        if (s->dma_memory_write) {
-            s->dma_memory_write(s->dma_opaque, s->async_buf, len);
-        } else {
-            set_pdma(s, ASYNC, 0, len);
-            s->pdma_cb = do_dma_pdma_cb;
-            esp_raise_drq(s);
-            return;
-        }
-    }
-    s->dma_left -= len;
-    s->async_buf += len;
-    s->async_len -= len;
-    if (to_device)
+        len = MIN(fifo8_num_used(&s->fifo), ESP_FIFO_SZ);
+        memcpy(s->async_buf, fifo8_pop_buf(&s->fifo, len, &n), len);
+        s->async_buf += len;
+        s->async_len -= len;
         s->ti_size += len;
-    else
+    } else {
+        len = MIN(s->ti_size, s->async_len);
+        len = MIN(len, fifo8_num_free(&s->fifo));
+        fifo8_push_all(&s->fifo, s->async_buf, len);
+        s->async_buf += len;
+        s->async_len -= len;
         s->ti_size -= len;
+    }
+
     if (s->async_len == 0) {
         scsi_req_continue(s->current_req);
-        /* If there is still data to be read from the device then
-           complete the DMA operation immediately.  Otherwise defer
-           until the scsi layer has completed.  */
-        if (to_device || s->dma_left != 0 || s->ti_size == 0) {
+
+        if (to_device || s->ti_size == 0) {
             return;
         }
     }
 
-    /* Partially filled a scsi buffer. Complete immediately.  */
-    esp_dma_done(s);
+    s->rregs[ESP_RINTR] |= INTR_BS;
+    esp_raise_irq(s);
 }
 
-static void esp_report_command_complete(ESPState *s, uint32_t status)
+void esp_command_complete(SCSIRequest *req, size_t resid)
 {
+    ESPState *s = req->hba_private;
+
     trace_esp_command_complete();
     if (s->ti_size != 0) {
         trace_esp_command_complete_unexpected();
     }
     s->ti_size = 0;
-    s->dma_left = 0;
     s->async_len = 0;
-    if (status) {
+    if (req->status) {
         trace_esp_command_complete_fail();
     }
-    s->status = status;
+    s->status = req->status;
     s->rregs[ESP_RSTAT] = STAT_ST;
     esp_dma_done(s);
+    esp_lower_drq(s);
     if (s->current_req) {
         scsi_req_unref(s->current_req);
         s->current_req = NULL;
@@ -485,73 +769,83 @@
     }
 }
 
-void esp_command_complete(SCSIRequest *req, size_t resid)
-{
-    ESPState *s = req->hba_private;
-
-    if (s->rregs[ESP_RSTAT] & STAT_INT) {
-        /* Defer handling command complete until the previous
-         * interrupt has been handled.
-         */
-        trace_esp_command_complete_deferred();
-        s->deferred_status = req->status;
-        s->deferred_complete = true;
-        return;
-    }
-    esp_report_command_complete(s, req->status);
-}
-
 void esp_transfer_data(SCSIRequest *req, uint32_t len)
 {
     ESPState *s = req->hba_private;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
+    uint32_t dmalen = esp_get_tc(s);
 
     assert(!s->do_cmd);
-    trace_esp_transfer_data(s->dma_left, s->ti_size);
+    trace_esp_transfer_data(dmalen, s->ti_size);
     s->async_len = len;
     s->async_buf = scsi_req_get_buf(req);
-    if (s->dma_left) {
-        esp_do_dma(s);
-    } else if (s->dma_counter != 0 && s->ti_size <= 0) {
-        /* If this was the last part of a DMA transfer then the
-           completion interrupt is deferred to here.  */
-        esp_dma_done(s);
+
+    if (!to_device && !s->data_in_ready) {
+        /*
+         * Initial incoming data xfer is complete so raise command
+         * completion interrupt
+         */
+        s->data_in_ready = true;
+        s->rregs[ESP_RSTAT] |= STAT_TC;
+        s->rregs[ESP_RINTR] |= INTR_BS;
+        esp_raise_irq(s);
+
+        /*
+         * If data is ready to transfer and the TI command has already
+         * been executed, start DMA immediately. Otherwise DMA will start
+         * when host sends the TI command
+         */
+        if (s->ti_size && (s->rregs[ESP_CMD] == (CMD_TI | CMD_DMA))) {
+            esp_do_dma(s);
+        }
+        return;
+    }
+
+    if (s->ti_cmd == 0) {
+        /*
+         * Always perform the initial transfer upon reception of the next TI
+         * command to ensure the DMA/non-DMA status of the command is correct.
+         * It is not possible to use s->dma directly in the section below as
+         * some OSs send non-DMA NOP commands after a DMA transfer. Hence if the
+         * async data transfer is delayed then s->dma is set incorrectly.
+         */
+        return;
+    }
+
+    if (s->ti_cmd & CMD_DMA) {
+        if (dmalen) {
+            esp_do_dma(s);
+        } else if (s->ti_size <= 0) {
+            /*
+             * If this was the last part of a DMA transfer then the
+             * completion interrupt is deferred to here.
+             */
+            esp_dma_done(s);
+            esp_lower_drq(s);
+        }
+    } else {
+        esp_do_nodma(s);
     }
 }
 
 static void handle_ti(ESPState *s)
 {
-    uint32_t dmalen, minlen;
+    uint32_t dmalen;
 
     if (s->dma && !s->dma_enabled) {
         s->dma_cb = handle_ti;
         return;
     }
 
-    dmalen = s->rregs[ESP_TCLO];
-    dmalen |= s->rregs[ESP_TCMID] << 8;
-    dmalen |= s->rregs[ESP_TCHI] << 16;
-    if (dmalen==0) {
-      dmalen=0x10000;
-    }
-    s->dma_counter = dmalen;
-
-    if (s->do_cmd)
-        minlen = (dmalen < ESP_CMDBUF_SZ) ? dmalen : ESP_CMDBUF_SZ;
-    else if (s->ti_size < 0)
-        minlen = (dmalen < -s->ti_size) ? dmalen : -s->ti_size;
-    else
-        minlen = (dmalen < s->ti_size) ? dmalen : s->ti_size;
-    trace_esp_handle_ti(minlen);
+    s->ti_cmd = s->rregs[ESP_CMD];
     if (s->dma) {
-        s->dma_left = minlen;
+        dmalen = esp_get_tc(s);
+        trace_esp_handle_ti(dmalen);
         s->rregs[ESP_RSTAT] &= ~STAT_TC;
         esp_do_dma(s);
-    } else if (s->do_cmd) {
-        trace_esp_handle_ti_cmd(s->cmdlen);
-        s->ti_size = 0;
-        s->cmdlen = 0;
-        s->do_cmd = 0;
-        do_cmd(s, s->cmdbuf);
+    } else {
+        trace_esp_handle_ti(s->ti_size);
+        esp_do_nodma(s);
     }
 }
 
@@ -561,8 +855,8 @@
     memset(s->wregs, 0, ESP_REGS);
     s->tchi_written = 0;
     s->ti_size = 0;
-    s->ti_rptr = 0;
-    s->ti_wptr = 0;
+    fifo8_reset(&s->fifo);
+    fifo8_reset(&s->cmdfifo);
     s->dma = 0;
     s->do_cmd = 0;
     s->dma_cb = NULL;
@@ -586,46 +880,50 @@
 
 uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
 {
-    uint32_t old_val;
+    uint32_t val;
 
-    trace_esp_mem_readb(saddr, s->rregs[saddr]);
     switch (saddr) {
     case ESP_FIFO:
-        if ((s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
+        if (s->dma_memory_read && s->dma_memory_write &&
+                (s->rregs[ESP_RSTAT] & STAT_PIO_MASK) == 0) {
             /* Data out.  */
             qemu_log_mask(LOG_UNIMP, "esp: PIO data read not implemented\n");
             s->rregs[ESP_FIFO] = 0;
-        } else if (s->ti_rptr < s->ti_wptr) {
-            s->ti_size--;
-            s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
+        } else {
+            s->rregs[ESP_FIFO] = esp_fifo_pop(s);
         }
-        if (s->ti_rptr == s->ti_wptr) {
-            s->ti_rptr = 0;
-            s->ti_wptr = 0;
-        }
+        val = s->rregs[ESP_FIFO];
         break;
     case ESP_RINTR:
-        /* Clear sequence step, interrupt register and all status bits
-           except TC */
-        old_val = s->rregs[ESP_RINTR];
+        /*
+         * Clear sequence step, interrupt register and all status bits
+         * except TC
+         */
+        val = s->rregs[ESP_RINTR];
         s->rregs[ESP_RINTR] = 0;
         s->rregs[ESP_RSTAT] &= ~STAT_TC;
-        s->rregs[ESP_RSEQ] = SEQ_CD;
+        s->rregs[ESP_RSEQ] = SEQ_0;
         esp_lower_irq(s);
-        if (s->deferred_complete) {
-            esp_report_command_complete(s, s->deferred_status);
-            s->deferred_complete = false;
-        }
-        return old_val;
+        break;
     case ESP_TCHI:
         /* Return the unique id if the value has never been written */
         if (!s->tchi_written) {
-            return s->chip_id;
+            val = s->chip_id;
+        } else {
+            val = s->rregs[saddr];
         }
+        break;
+     case ESP_RFLAGS:
+        /* Bottom 5 bits indicate number of bytes in FIFO */
+        val = fifo8_num_used(&s->fifo);
+        break;
     default:
+        val = s->rregs[saddr];
         break;
     }
-    return s->rregs[saddr];
+
+    trace_esp_mem_readb(saddr, val);
+    return val;
 }
 
 void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
@@ -641,16 +939,15 @@
         break;
     case ESP_FIFO:
         if (s->do_cmd) {
-            if (s->cmdlen < ESP_CMDBUF_SZ) {
-                s->cmdbuf[s->cmdlen++] = val & 0xff;
-            } else {
-                trace_esp_error_fifo_overrun();
-            }
-        } else if (s->ti_wptr == TI_BUFSZ - 1) {
-            trace_esp_error_fifo_overrun();
+            esp_cmdfifo_push(s, val);
         } else {
-            s->ti_size++;
-            s->ti_buf[s->ti_wptr++] = val & 0xff;
+            esp_fifo_push(s, val);
+        }
+
+        /* Non-DMA transfers raise an interrupt after every byte */
+        if (s->rregs[ESP_CMD] == CMD_TI) {
+            s->rregs[ESP_RINTR] |= INTR_FC | INTR_BS;
+            esp_raise_irq(s);
         }
         break;
     case ESP_CMD:
@@ -658,22 +955,21 @@
         if (val & CMD_DMA) {
             s->dma = 1;
             /* Reload DMA counter.  */
-            s->rregs[ESP_TCLO] = s->wregs[ESP_TCLO];
-            s->rregs[ESP_TCMID] = s->wregs[ESP_TCMID];
-            s->rregs[ESP_TCHI] = s->wregs[ESP_TCHI];
+            if (esp_get_stc(s) == 0) {
+                esp_set_tc(s, 0x10000);
+            } else {
+                esp_set_tc(s, esp_get_stc(s));
+            }
         } else {
             s->dma = 0;
         }
-        switch(val & CMD_CMD) {
+        switch (val & CMD_CMD) {
         case CMD_NOP:
             trace_esp_mem_writeb_cmd_nop(val);
             break;
         case CMD_FLUSH:
             trace_esp_mem_writeb_cmd_flush(val);
-            //s->ti_size = 0;
-            s->rregs[ESP_RINTR] = INTR_FC;
-            s->rregs[ESP_RSEQ] = 0;
-            s->rregs[ESP_RFLAGS] = 0;
+            fifo8_reset(&s->fifo);
             break;
         case CMD_RESET:
             trace_esp_mem_writeb_cmd_reset(val);
@@ -681,23 +977,24 @@
             break;
         case CMD_BUSRESET:
             trace_esp_mem_writeb_cmd_bus_reset(val);
-            s->rregs[ESP_RINTR] = INTR_RST;
             if (!(s->wregs[ESP_CFG1] & CFG1_RESREPT)) {
+                s->rregs[ESP_RINTR] |= INTR_RST;
                 esp_raise_irq(s);
             }
             break;
         case CMD_TI:
+            trace_esp_mem_writeb_cmd_ti(val);
             handle_ti(s);
             break;
         case CMD_ICCS:
             trace_esp_mem_writeb_cmd_iccs(val);
             write_response(s);
-            s->rregs[ESP_RINTR] = INTR_FC;
+            s->rregs[ESP_RINTR] |= INTR_FC;
             s->rregs[ESP_RSTAT] |= STAT_MI;
             break;
         case CMD_MSGACC:
             trace_esp_mem_writeb_cmd_msgacc(val);
-            s->rregs[ESP_RINTR] = INTR_DC;
+            s->rregs[ESP_RINTR] |= INTR_DC;
             s->rregs[ESP_RSEQ] = 0;
             s->rregs[ESP_RFLAGS] = 0;
             esp_raise_irq(s);
@@ -705,7 +1002,7 @@
         case CMD_PAD:
             trace_esp_mem_writeb_cmd_pad(val);
             s->rregs[ESP_RSTAT] = STAT_TC;
-            s->rregs[ESP_RINTR] = INTR_FC;
+            s->rregs[ESP_RINTR] |= INTR_FC;
             s->rregs[ESP_RSEQ] = 0;
             break;
         case CMD_SATN:
@@ -763,74 +1060,112 @@
     return (size == 1) || (is_write && size == 4);
 }
 
-static bool esp_pdma_needed(void *opaque)
+static bool esp_is_before_version_5(void *opaque, int version_id)
 {
-    ESPState *s = opaque;
-    return s->dma_memory_read == NULL && s->dma_memory_write == NULL &&
-           s->dma_enabled;
+    ESPState *s = ESP(opaque);
+
+    version_id = MIN(version_id, s->mig_version_id);
+    return version_id < 5;
 }
 
-static const VMStateDescription vmstate_esp_pdma = {
-    .name = "esp/pdma",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = esp_pdma_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_BUFFER(pdma_buf, ESPState),
-        VMSTATE_INT32(pdma_origin, ESPState),
-        VMSTATE_UINT32(pdma_len, ESPState),
-        VMSTATE_UINT32(pdma_start, ESPState),
-        VMSTATE_UINT32(pdma_cur, ESPState),
-        VMSTATE_END_OF_LIST()
+static bool esp_is_version_5(void *opaque, int version_id)
+{
+    ESPState *s = ESP(opaque);
+
+    version_id = MIN(version_id, s->mig_version_id);
+    return version_id == 5;
+}
+
+static int esp_pre_save(void *opaque)
+{
+    ESPState *s = ESP(opaque);
+
+    s->mig_version_id = vmstate_esp.version_id;
+    return 0;
+}
+
+static int esp_post_load(void *opaque, int version_id)
+{
+    ESPState *s = ESP(opaque);
+    int len, i;
+
+    version_id = MIN(version_id, s->mig_version_id);
+
+    if (version_id < 5) {
+        esp_set_tc(s, s->mig_dma_left);
+
+        /* Migrate ti_buf to fifo */
+        len = s->mig_ti_wptr - s->mig_ti_rptr;
+        for (i = 0; i < len; i++) {
+            fifo8_push(&s->fifo, s->mig_ti_buf[i]);
+        }
+
+        /* Migrate cmdbuf to cmdfifo */
+        for (i = 0; i < s->mig_cmdlen; i++) {
+            fifo8_push(&s->cmdfifo, s->mig_cmdbuf[i]);
+        }
     }
-};
+
+    s->mig_version_id = vmstate_esp.version_id;
+    return 0;
+}
 
 const VMStateDescription vmstate_esp = {
-    .name ="esp",
-    .version_id = 4,
+    .name = "esp",
+    .version_id = 5,
     .minimum_version_id = 3,
+    .pre_save = esp_pre_save,
+    .post_load = esp_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_BUFFER(rregs, ESPState),
         VMSTATE_BUFFER(wregs, ESPState),
         VMSTATE_INT32(ti_size, ESPState),
-        VMSTATE_UINT32(ti_rptr, ESPState),
-        VMSTATE_UINT32(ti_wptr, ESPState),
-        VMSTATE_BUFFER(ti_buf, ESPState),
+        VMSTATE_UINT32_TEST(mig_ti_rptr, ESPState, esp_is_before_version_5),
+        VMSTATE_UINT32_TEST(mig_ti_wptr, ESPState, esp_is_before_version_5),
+        VMSTATE_BUFFER_TEST(mig_ti_buf, ESPState, esp_is_before_version_5),
         VMSTATE_UINT32(status, ESPState),
-        VMSTATE_UINT32(deferred_status, ESPState),
-        VMSTATE_BOOL(deferred_complete, ESPState),
+        VMSTATE_UINT32_TEST(mig_deferred_status, ESPState,
+                            esp_is_before_version_5),
+        VMSTATE_BOOL_TEST(mig_deferred_complete, ESPState,
+                          esp_is_before_version_5),
         VMSTATE_UINT32(dma, ESPState),
-        VMSTATE_PARTIAL_BUFFER(cmdbuf, ESPState, 16),
-        VMSTATE_BUFFER_START_MIDDLE_V(cmdbuf, ESPState, 16, 4),
-        VMSTATE_UINT32(cmdlen, ESPState),
+        VMSTATE_STATIC_BUFFER(mig_cmdbuf, ESPState, 0,
+                              esp_is_before_version_5, 0, 16),
+        VMSTATE_STATIC_BUFFER(mig_cmdbuf, ESPState, 4,
+                              esp_is_before_version_5, 16,
+                              sizeof(typeof_field(ESPState, mig_cmdbuf))),
+        VMSTATE_UINT32_TEST(mig_cmdlen, ESPState, esp_is_before_version_5),
         VMSTATE_UINT32(do_cmd, ESPState),
-        VMSTATE_UINT32(dma_left, ESPState),
+        VMSTATE_UINT32_TEST(mig_dma_left, ESPState, esp_is_before_version_5),
+        VMSTATE_BOOL_TEST(data_in_ready, ESPState, esp_is_version_5),
+        VMSTATE_UINT8_TEST(cmdfifo_cdb_offset, ESPState, esp_is_version_5),
+        VMSTATE_FIFO8_TEST(fifo, ESPState, esp_is_version_5),
+        VMSTATE_FIFO8_TEST(cmdfifo, ESPState, esp_is_version_5),
+        VMSTATE_UINT8_TEST(ti_cmd, ESPState, esp_is_version_5),
         VMSTATE_END_OF_LIST()
     },
-    .subsections = (const VMStateDescription * []) {
-        &vmstate_esp_pdma,
-        NULL
-    }
 };
 
 static void sysbus_esp_mem_write(void *opaque, hwaddr addr,
                                  uint64_t val, unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
+    ESPState *s = ESP(&sysbus->esp);
     uint32_t saddr;
 
     saddr = addr >> sysbus->it_shift;
-    esp_reg_write(&sysbus->esp, saddr, val);
+    esp_reg_write(s, saddr, val);
 }
 
 static uint64_t sysbus_esp_mem_read(void *opaque, hwaddr addr,
                                     unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
+    ESPState *s = ESP(&sysbus->esp);
     uint32_t saddr;
 
     saddr = addr >> sysbus->it_shift;
-    return esp_reg_read(&sysbus->esp, saddr);
+    return esp_reg_read(s, saddr);
 }
 
 static const MemoryRegionOps sysbus_esp_mem_ops = {
@@ -844,36 +1179,23 @@
                                   uint64_t val, unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
-    ESPState *s = &sysbus->esp;
+    ESPState *s = ESP(&sysbus->esp);
     uint32_t dmalen;
-    uint8_t *buf = get_pdma_buf(s);
 
-    dmalen = s->rregs[ESP_TCLO];
-    dmalen |= s->rregs[ESP_TCMID] << 8;
-    dmalen |= s->rregs[ESP_TCHI] << 16;
-    if (dmalen == 0 || s->pdma_len == 0) {
-        return;
-    }
+    trace_esp_pdma_write(size);
+
     switch (size) {
     case 1:
-        buf[s->pdma_cur++] = val;
-        s->pdma_len--;
-        dmalen--;
+        esp_pdma_write(s, val);
         break;
     case 2:
-        buf[s->pdma_cur++] = val >> 8;
-        buf[s->pdma_cur++] = val;
-        s->pdma_len -= 2;
-        dmalen -= 2;
+        esp_pdma_write(s, val >> 8);
+        esp_pdma_write(s, val);
         break;
     }
-    s->rregs[ESP_TCLO] = dmalen & 0xff;
-    s->rregs[ESP_TCMID] = dmalen >> 8;
-    s->rregs[ESP_TCHI] = dmalen >> 16;
-    if (s->pdma_len == 0 && s->pdma_cb) {
-        esp_lower_drq(s);
+    dmalen = esp_get_tc(s);
+    if (dmalen == 0 || fifo8_num_free(&s->fifo) < 2) {
         s->pdma_cb(s);
-        s->pdma_cb = NULL;
     }
 }
 
@@ -881,29 +1203,22 @@
                                      unsigned int size)
 {
     SysBusESPState *sysbus = opaque;
-    ESPState *s = &sysbus->esp;
-    uint8_t *buf = get_pdma_buf(s);
+    ESPState *s = ESP(&sysbus->esp);
     uint64_t val = 0;
 
-    if (s->pdma_len == 0) {
-        return 0;
-    }
+    trace_esp_pdma_read(size);
+
     switch (size) {
     case 1:
-        val = buf[s->pdma_cur++];
-        s->pdma_len--;
+        val = esp_pdma_read(s);
         break;
     case 2:
-        val = buf[s->pdma_cur++];
-        val = (val << 8) | buf[s->pdma_cur++];
-        s->pdma_len -= 2;
+        val = esp_pdma_read(s);
+        val = (val << 8) | esp_pdma_read(s);
         break;
     }
-
-    if (s->pdma_len == 0 && s->pdma_cb) {
-        esp_lower_drq(s);
+    if (fifo8_num_used(&s->fifo) < 2) {
         s->pdma_cb(s);
-        s->pdma_cb = NULL;
     }
     return val;
 }
@@ -913,7 +1228,9 @@
     .write = sysbus_esp_pdma_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid.min_access_size = 1,
-    .valid.max_access_size = 2,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 2,
 };
 
 static const struct SCSIBusInfo esp_scsi_info = {
@@ -928,8 +1245,8 @@
 
 static void sysbus_esp_gpio_demux(void *opaque, int irq, int level)
 {
-    SysBusESPState *sysbus = ESP(opaque);
-    ESPState *s = &sysbus->esp;
+    SysBusESPState *sysbus = SYSBUS_ESP(opaque);
+    ESPState *s = ESP(&sysbus->esp);
 
     switch (irq) {
     case 0:
@@ -944,8 +1261,12 @@
 static void sysbus_esp_realize(DeviceState *dev, Error **errp)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-    SysBusESPState *sysbus = ESP(dev);
-    ESPState *s = &sysbus->esp;
+    SysBusESPState *sysbus = SYSBUS_ESP(dev);
+    ESPState *s = ESP(&sysbus->esp);
+
+    if (!qdev_realize(DEVICE(s), NULL, errp)) {
+        return;
+    }
 
     sysbus_init_irq(sbd, &s->irq);
     sysbus_init_irq(sbd, &s->irq_data);
@@ -956,7 +1277,7 @@
                           sysbus, "esp-regs", ESP_REGS << sysbus->it_shift);
     sysbus_init_mmio(sbd, &sysbus->iomem);
     memory_region_init_io(&sysbus->pdma, OBJECT(sysbus), &sysbus_esp_pdma_ops,
-                          sysbus, "esp-pdma", 2);
+                          sysbus, "esp-pdma", 4);
     sysbus_init_mmio(sbd, &sysbus->pdma);
 
     qdev_init_gpio_in(dev, sysbus_esp_gpio_demux, 2);
@@ -966,15 +1287,25 @@
 
 static void sysbus_esp_hard_reset(DeviceState *dev)
 {
-    SysBusESPState *sysbus = ESP(dev);
-    esp_hard_reset(&sysbus->esp);
+    SysBusESPState *sysbus = SYSBUS_ESP(dev);
+    ESPState *s = ESP(&sysbus->esp);
+
+    esp_hard_reset(s);
+}
+
+static void sysbus_esp_init(Object *obj)
+{
+    SysBusESPState *sysbus = SYSBUS_ESP(obj);
+
+    object_initialize_child(obj, "esp", &sysbus->esp, TYPE_ESP);
 }
 
 static const VMStateDescription vmstate_sysbus_esp_scsi = {
     .name = "sysbusespscsi",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
+        VMSTATE_UINT8_V(esp.mig_version_id, SysBusESPState, 2),
         VMSTATE_STRUCT(esp, SysBusESPState, 0, vmstate_esp, ESPState),
         VMSTATE_END_OF_LIST()
     }
@@ -991,15 +1322,51 @@
 }
 
 static const TypeInfo sysbus_esp_info = {
-    .name          = TYPE_ESP,
+    .name          = TYPE_SYSBUS_ESP,
     .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_init = sysbus_esp_init,
     .instance_size = sizeof(SysBusESPState),
     .class_init    = sysbus_esp_class_init,
 };
 
+static void esp_finalize(Object *obj)
+{
+    ESPState *s = ESP(obj);
+
+    fifo8_destroy(&s->fifo);
+    fifo8_destroy(&s->cmdfifo);
+}
+
+static void esp_init(Object *obj)
+{
+    ESPState *s = ESP(obj);
+
+    fifo8_create(&s->fifo, ESP_FIFO_SZ);
+    fifo8_create(&s->cmdfifo, ESP_CMDFIFO_SZ);
+}
+
+static void esp_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    /* internal device for sysbusesp/pciespscsi, not user-creatable */
+    dc->user_creatable = false;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+}
+
+static const TypeInfo esp_info = {
+    .name = TYPE_ESP,
+    .parent = TYPE_DEVICE,
+    .instance_init = esp_init,
+    .instance_finalize = esp_finalize,
+    .instance_size = sizeof(ESPState),
+    .class_init = esp_class_init,
+};
+
 static void esp_register_types(void)
 {
     type_register_static(&sysbus_esp_info);
+    type_register_static(&esp_info);
 }
 
 type_init(esp_register_types)
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index 9788661..1c331fb 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -159,8 +159,12 @@
 esp_error_invalid_write(uint32_t val, uint32_t addr) "invalid write of 0x%02x at [0x%x]"
 esp_raise_irq(void) "Raise IRQ"
 esp_lower_irq(void) "Lower IRQ"
+esp_raise_drq(void) "Raise DREQ"
+esp_lower_drq(void) "Lower DREQ"
 esp_dma_enable(void) "Raise enable"
 esp_dma_disable(void) "Lower enable"
+esp_pdma_read(int size) "pDMA read %u bytes"
+esp_pdma_write(int size) "pDMA write %u bytes"
 esp_get_cmd(uint32_t dmalen, int target) "len %d target %d"
 esp_do_busid_cmd(uint8_t busid) "busid 0x%x"
 esp_handle_satn_stop(uint32_t cmdlen) "cmdlen %d"
@@ -189,6 +193,7 @@
 esp_mem_writeb_cmd_selatns(uint32_t val) "Select with ATN & stop (0x%2.2x)"
 esp_mem_writeb_cmd_ensel(uint32_t val) "Enable selection (0x%2.2x)"
 esp_mem_writeb_cmd_dissel(uint32_t val) "Disable selection (0x%2.2x)"
+esp_mem_writeb_cmd_ti(uint32_t val) "Transfer Information (0x%2.2x)"
 
 # esp-pci.c
 esp_pci_error_invalid_dma_direction(void) "invalid DMA transfer direction"
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 38ca1e3..312e2af 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -334,7 +334,7 @@
                                    OBJECT(dma), "espdma"));
     sysbus_connect_irq(SYS_BUS_DEVICE(espdma), 0, espdma_irq);
 
-    esp = ESP(object_resolve_path_component(OBJECT(espdma), "esp"));
+    esp = SYSBUS_ESP(object_resolve_path_component(OBJECT(espdma), "esp"));
 
     ledma = SPARC32_LEDMA_DEVICE(object_resolve_path_component(
                                  OBJECT(dma), "ledma"));
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index a897034..1e9dba2 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -176,7 +176,8 @@
     FIELD(GQSPI_FIFO_CTRL, GENERIC_FIFO_RESET, 0, 1)
 #define R_GQSPI_GFIFO_THRESH    (0x150 / 4)
 #define R_GQSPI_DATA_STS (0x15c / 4)
-/* We use the snapshot register to hold the core state for the currently
+/*
+ * We use the snapshot register to hold the core state for the currently
  * or most recently executed command. So the generic fifo format is defined
  * for the snapshot register
  */
@@ -194,13 +195,6 @@
 #define R_GQSPI_MOD_ID        (0x1fc / 4)
 #define R_GQSPI_MOD_ID_RESET  (0x10a0000)
 
-#define R_QSPIDMA_DST_CTRL         (0x80c / 4)
-#define R_QSPIDMA_DST_CTRL_RESET   (0x803ffa00)
-#define R_QSPIDMA_DST_I_MASK       (0x820 / 4)
-#define R_QSPIDMA_DST_I_MASK_RESET (0xfe)
-#define R_QSPIDMA_DST_CTRL2        (0x824 / 4)
-#define R_QSPIDMA_DST_CTRL2_RESET  (0x081bfff8)
-
 /* size of TXRX FIFOs */
 #define RXFF_A          (128)
 #define TXFF_A          (128)
@@ -416,15 +410,13 @@
     s->regs[R_GQSPI_GPIO] = 1;
     s->regs[R_GQSPI_LPBK_DLY_ADJ] = R_GQSPI_LPBK_DLY_ADJ_RESET;
     s->regs[R_GQSPI_MOD_ID] = R_GQSPI_MOD_ID_RESET;
-    s->regs[R_QSPIDMA_DST_CTRL] = R_QSPIDMA_DST_CTRL_RESET;
-    s->regs[R_QSPIDMA_DST_I_MASK] = R_QSPIDMA_DST_I_MASK_RESET;
-    s->regs[R_QSPIDMA_DST_CTRL2] = R_QSPIDMA_DST_CTRL2_RESET;
     s->man_start_com_g = false;
     s->gqspi_irqline = 0;
     xlnx_zynqmp_qspips_update_ixr(s);
 }
 
-/* N way (num) in place bit striper. Lay out row wise bits (MSB to LSB)
+/*
+ * N way (num) in place bit striper. Lay out row wise bits (MSB to LSB)
  * column wise (from element 0 to N-1). num is the length of x, and dir
  * reverses the direction of the transform. Best illustrated by example:
  * Each digit in the below array is a single bit (num == 3):
@@ -637,8 +629,10 @@
                 tx_rx[i] = tx;
             }
         } else {
-            /* Extract a dummy byte and generate dummy cycles according to the
-             * link state */
+            /*
+             * Extract a dummy byte and generate dummy cycles according to the
+             * link state
+             */
             tx = fifo8_pop(&s->tx_fifo);
             dummy_cycles = 8 / s->link_state;
         }
@@ -721,8 +715,9 @@
             }
             break;
         case (SNOOP_ADDR):
-            /* Address has been transmitted, transmit dummy cycles now if
-             * needed */
+            /*
+             * Address has been transmitted, transmit dummy cycles now if needed
+             */
             if (s->cmd_dummies < 0) {
                 s->snoop_state = SNOOP_NONE;
             } else {
@@ -876,7 +871,7 @@
 }
 
 static uint64_t xilinx_spips_read(void *opaque, hwaddr addr,
-                                                        unsigned size)
+                                  unsigned size)
 {
     XilinxSPIPS *s = opaque;
     uint32_t mask = ~0;
@@ -970,7 +965,7 @@
 }
 
 static void xilinx_spips_write(void *opaque, hwaddr addr,
-                                        uint64_t value, unsigned size)
+                               uint64_t value, unsigned size)
 {
     int mask = ~0;
     XilinxSPIPS *s = opaque;
@@ -1072,7 +1067,7 @@
 }
 
 static void xlnx_zynqmp_qspips_write(void *opaque, hwaddr addr,
-                                        uint64_t value, unsigned size)
+                                     uint64_t value, unsigned size)
 {
     XlnxZynqMPQSPIPS *s = XLNX_ZYNQMP_QSPIPS(opaque);
     uint32_t reg = addr / 4;
diff --git a/hw/timer/Kconfig b/hw/timer/Kconfig
index 18936ef..bac2511 100644
--- a/hw/timer/Kconfig
+++ b/hw/timer/Kconfig
@@ -46,5 +46,11 @@
 config RENESAS_CMT
     bool
 
+config SSE_COUNTER
+    bool
+
+config SSE_TIMER
+    bool
+
 config AVR_TIMER16
     bool
diff --git a/hw/timer/cmsdk-apb-dualtimer.c b/hw/timer/cmsdk-apb-dualtimer.c
index ef49f58..d4a509c 100644
--- a/hw/timer/cmsdk-apb-dualtimer.c
+++ b/hw/timer/cmsdk-apb-dualtimer.c
@@ -449,7 +449,7 @@
     s->timeritop = 0;
 }
 
-static void cmsdk_apb_dualtimer_clk_update(void *opaque)
+static void cmsdk_apb_dualtimer_clk_update(void *opaque, ClockEvent event)
 {
     CMSDKAPBDualTimer *s = CMSDK_APB_DUALTIMER(opaque);
     int i;
@@ -478,7 +478,8 @@
         sysbus_init_irq(sbd, &s->timermod[i].timerint);
     }
     s->timclk = qdev_init_clock_in(DEVICE(s), "TIMCLK",
-                                   cmsdk_apb_dualtimer_clk_update, s);
+                                   cmsdk_apb_dualtimer_clk_update, s,
+                                   ClockUpdate);
 }
 
 static void cmsdk_apb_dualtimer_realize(DeviceState *dev, Error **errp)
diff --git a/hw/timer/cmsdk-apb-timer.c b/hw/timer/cmsdk-apb-timer.c
index ee51ce3..68aa1a7 100644
--- a/hw/timer/cmsdk-apb-timer.c
+++ b/hw/timer/cmsdk-apb-timer.c
@@ -204,7 +204,7 @@
     ptimer_transaction_commit(s->timer);
 }
 
-static void cmsdk_apb_timer_clk_update(void *opaque)
+static void cmsdk_apb_timer_clk_update(void *opaque, ClockEvent event)
 {
     CMSDKAPBTimer *s = CMSDK_APB_TIMER(opaque);
 
@@ -223,7 +223,7 @@
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->timerint);
     s->pclk = qdev_init_clock_in(DEVICE(s), "pclk",
-                                 cmsdk_apb_timer_clk_update, s);
+                                 cmsdk_apb_timer_clk_update, s, ClockUpdate);
 }
 
 static void cmsdk_apb_timer_realize(DeviceState *dev, Error **errp)
diff --git a/hw/timer/meson.build b/hw/timer/meson.build
index 26c2701..a429792 100644
--- a/hw/timer/meson.build
+++ b/hw/timer/meson.build
@@ -32,6 +32,8 @@
 softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_systmr.c'))
 softmmu_ss.add(when: 'CONFIG_SH_TIMER', if_true: files('sh_timer.c'))
 softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_timer.c'))
+softmmu_ss.add(when: 'CONFIG_SSE_COUNTER', if_true: files('sse-counter.c'))
+softmmu_ss.add(when: 'CONFIG_SSE_TIMER', if_true: files('sse-timer.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F2XX_TIMER', if_true: files('stm32f2xx_timer.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_timer.c'))
 
diff --git a/hw/timer/npcm7xx_timer.c b/hw/timer/npcm7xx_timer.c
index 36e2c07..32f5e02 100644
--- a/hw/timer/npcm7xx_timer.c
+++ b/hw/timer/npcm7xx_timer.c
@@ -138,8 +138,8 @@
 /* Convert a time interval in nanoseconds to a timer cycle count. */
 static uint32_t npcm7xx_timer_ns_to_count(NPCM7xxTimer *t, int64_t ns)
 {
-    return ns / clock_ticks_to_ns(t->ctrl->clock,
-                                  npcm7xx_tcsr_prescaler(t->tcsr));
+    return clock_ns_to_ticks(t->ctrl->clock, ns) /
+        npcm7xx_tcsr_prescaler(t->tcsr);
 }
 
 static uint32_t npcm7xx_watchdog_timer_prescaler(const NPCM7xxWatchdogTimer *t)
@@ -627,7 +627,7 @@
     sysbus_init_mmio(sbd, &s->iomem);
     qdev_init_gpio_out_named(dev, &w->reset_signal,
             NPCM7XX_WATCHDOG_RESET_GPIO_OUT, 1);
-    s->clock = qdev_init_clock_in(dev, "clock", NULL, NULL);
+    s->clock = qdev_init_clock_in(dev, "clock", NULL, NULL, 0);
 }
 
 static const VMStateDescription vmstate_npcm7xx_base_timer = {
diff --git a/hw/timer/renesas_tmr.c b/hw/timer/renesas_tmr.c
index e03a815..eed3991 100644
--- a/hw/timer/renesas_tmr.c
+++ b/hw/timer/renesas_tmr.c
@@ -46,8 +46,10 @@
   FIELD(TCCR, CSS,   3, 2)
   FIELD(TCCR, TMRIS, 7, 1)
 
-#define INTERNAL  0x01
-#define CASCADING 0x03
+#define CSS_EXTERNAL  0x00
+#define CSS_INTERNAL  0x01
+#define CSS_INVALID   0x02
+#define CSS_CASCADING 0x03
 #define CCLR_A    0x01
 #define CCLR_B    0x02
 
@@ -72,7 +74,7 @@
         /* event not happened */
         return ;
     }
-    if (FIELD_EX8(tmr->tccr[0], TCCR, CSS) == CASCADING) {
+    if (FIELD_EX8(tmr->tccr[0], TCCR, CSS) == CSS_CASCADING) {
         /* cascading mode */
         if (ch == 1) {
             tmr->next[ch] = none;
@@ -130,23 +132,32 @@
     if (delta > 0) {
         tmr->tick = now;
 
-        if (FIELD_EX8(tmr->tccr[1], TCCR, CSS) == INTERNAL) {
+        switch (FIELD_EX8(tmr->tccr[1], TCCR, CSS)) {
+        case CSS_INTERNAL:
             /* timer1 count update */
             elapsed = elapsed_time(tmr, 1, delta);
             if (elapsed >= 0x100) {
                 ovf = elapsed >> 8;
             }
             tcnt[1] = tmr->tcnt[1] + (elapsed & 0xff);
+            break;
+        case CSS_INVALID: /* guest error to have set this */
+        case CSS_EXTERNAL: /* QEMU doesn't implement these */
+        case CSS_CASCADING:
+            tcnt[1] = tmr->tcnt[1];
+            break;
         }
         switch (FIELD_EX8(tmr->tccr[0], TCCR, CSS)) {
-        case INTERNAL:
+        case CSS_INTERNAL:
             elapsed = elapsed_time(tmr, 0, delta);
             tcnt[0] = tmr->tcnt[0] + elapsed;
             break;
-        case CASCADING:
-            if (ovf > 0) {
-                tcnt[0] = tmr->tcnt[0] + ovf;
-            }
+        case CSS_CASCADING:
+            tcnt[0] = tmr->tcnt[0] + ovf;
+            break;
+        case CSS_INVALID: /* guest error to have set this */
+        case CSS_EXTERNAL: /* QEMU doesn't implement this */
+            tcnt[0] = tmr->tcnt[0];
             break;
         }
     } else {
@@ -330,7 +341,7 @@
                 qemu_irq_pulse(tmr->cmia[ch]);
             }
             if (sz == 8 && ch == 0 &&
-                FIELD_EX8(tmr->tccr[1], TCCR, CSS) == CASCADING) {
+                FIELD_EX8(tmr->tccr[1], TCCR, CSS) == CSS_CASCADING) {
                 tmr->tcnt[1]++;
                 timer_events(tmr, 1);
             }
@@ -362,7 +373,7 @@
     uint16_t tcnt;
 
     tmr->tcnt[ch] = read_tcnt(tmr, 1, ch);
-    if (FIELD_EX8(tmr->tccr[0], TCCR, CSS) != CASCADING) {
+    if (FIELD_EX8(tmr->tccr[0], TCCR, CSS) != CSS_CASCADING) {
         tmr->tcnt[ch] = issue_event(tmr, ch, 8,
                                     tmr->tcnt[ch],
                                     tmr->tcora[ch],
diff --git a/hw/timer/sse-counter.c b/hw/timer/sse-counter.c
new file mode 100644
index 0000000..0384051
--- /dev/null
+++ b/hw/timer/sse-counter.c
@@ -0,0 +1,474 @@
+/*
+ * Arm SSE Subsystem System Counter
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "System counter" which is documented in
+ * the Arm SSE-123 Example Subsystem Technical Reference Manual:
+ * https://developer.arm.com/documentation/101370/latest/
+ *
+ * The system counter is a non-stop 64-bit up-counter. It provides
+ * this count value to other devices like the SSE system timer,
+ * which are driven by this system timestamp rather than directly
+ * from a clock. Internally to the counter the count is actually
+ * 88-bit precision (64.24 fixed point), with a programmable scale factor.
+ *
+ * The hardware has the optional feature that it supports dynamic
+ * clock switching, where two clock inputs are connected, and which
+ * one is used is selected via a CLKSEL input signal. Since the
+ * users of this device in QEMU don't use this feature, we only model
+ * the HWCLKSW=0 configuration.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include "hw/timer/sse-counter.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/registerfields.h"
+#include "hw/clock.h"
+#include "hw/qdev-clock.h"
+#include "migration/vmstate.h"
+
+/* Registers in the control frame */
+REG32(CNTCR, 0x0)
+    FIELD(CNTCR, EN, 0, 1)
+    FIELD(CNTCR, HDBG, 1, 1)
+    FIELD(CNTCR, SCEN, 2, 1)
+    FIELD(CNTCR, INTRMASK, 3, 1)
+    FIELD(CNTCR, PSLVERRDIS, 4, 1)
+    FIELD(CNTCR, INTRCLR, 5, 1)
+/*
+ * Although CNTCR defines interrupt-related bits, the counter doesn't
+ * appear to actually have an interrupt output. So INTRCLR is
+ * effectively a RAZ/WI bit, as are the reserved bits [31:6].
+ */
+#define CNTCR_VALID_MASK (R_CNTCR_EN_MASK | R_CNTCR_HDBG_MASK | \
+                          R_CNTCR_SCEN_MASK | R_CNTCR_INTRMASK_MASK | \
+                          R_CNTCR_PSLVERRDIS_MASK)
+REG32(CNTSR, 0x4)
+REG32(CNTCV_LO, 0x8)
+REG32(CNTCV_HI, 0xc)
+REG32(CNTSCR, 0x10) /* Aliased with CNTSCR0 */
+REG32(CNTID, 0x1c)
+    FIELD(CNTID, CNTSC, 0, 4)
+    FIELD(CNTID, CNTCS, 16, 1)
+    FIELD(CNTID, CNTSELCLK, 17, 2)
+    FIELD(CNTID, CNTSCR_OVR, 19, 1)
+REG32(CNTSCR0, 0xd0)
+REG32(CNTSCR1, 0xd4)
+
+/* Registers in the status frame */
+REG32(STATUS_CNTCV_LO, 0x0)
+REG32(STATUS_CNTCV_HI, 0x4)
+
+/* Standard ID registers, present in both frames */
+REG32(PID4, 0xFD0)
+REG32(PID5, 0xFD4)
+REG32(PID6, 0xFD8)
+REG32(PID7, 0xFDC)
+REG32(PID0, 0xFE0)
+REG32(PID1, 0xFE4)
+REG32(PID2, 0xFE8)
+REG32(PID3, 0xFEC)
+REG32(CID0, 0xFF0)
+REG32(CID1, 0xFF4)
+REG32(CID2, 0xFF8)
+REG32(CID3, 0xFFC)
+
+/* PID/CID values */
+static const int control_id[] = {
+    0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
+    0xba, 0xb0, 0x0b, 0x00, /* PID0..PID3 */
+    0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
+};
+
+static const int status_id[] = {
+    0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
+    0xbb, 0xb0, 0x0b, 0x00, /* PID0..PID3 */
+    0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
+};
+
+static void sse_counter_notify_users(SSECounter *s)
+{
+    /*
+     * Notify users of the count timestamp that they may
+     * need to recalculate.
+     */
+    notifier_list_notify(&s->notifier_list, NULL);
+}
+
+static bool sse_counter_enabled(SSECounter *s)
+{
+    return (s->cntcr & R_CNTCR_EN_MASK) != 0;
+}
+
+uint64_t sse_counter_tick_to_time(SSECounter *s, uint64_t tick)
+{
+    if (!sse_counter_enabled(s)) {
+        return UINT64_MAX;
+    }
+
+    tick -= s->ticks_then;
+
+    if (s->cntcr & R_CNTCR_SCEN_MASK) {
+        /* Adjust the tick count to account for the scale factor */
+        tick = muldiv64(tick, 0x01000000, s->cntscr0);
+    }
+
+    return s->ns_then + clock_ticks_to_ns(s->clk, tick);
+}
+
+void sse_counter_register_consumer(SSECounter *s, Notifier *notifier)
+{
+    /*
+     * For the moment we assume that both we and the devices
+     * which consume us last for the life of the simulation,
+     * and so there is no mechanism for removing a notifier.
+     */
+    notifier_list_add(&s->notifier_list, notifier);
+}
+
+uint64_t sse_counter_for_timestamp(SSECounter *s, uint64_t now)
+{
+    /* Return the CNTCV value for a particular timestamp (clock ns value). */
+    uint64_t ticks;
+
+    if (!sse_counter_enabled(s)) {
+        /* Counter is disabled and does not increment */
+        return s->ticks_then;
+    }
+
+    ticks = clock_ns_to_ticks(s->clk, now - s->ns_then);
+    if (s->cntcr & R_CNTCR_SCEN_MASK) {
+        /*
+         * Scaling is enabled. The CNTSCR value is the amount added to
+         * the underlying 88-bit counter for every tick of the
+         * underlying clock; CNTCV is the top 64 bits of that full
+         * 88-bit value. Multiplying the tick count by CNTSCR tells us
+         * how much the full 88-bit counter has moved on; we then
+         * divide that by 0x01000000 to find out how much the 64-bit
+         * visible portion has advanced. muldiv64() gives us the
+         * necessary at-least-88-bit precision for the intermediate
+         * result.
+         */
+        ticks = muldiv64(ticks, s->cntscr0, 0x01000000);
+    }
+    return s->ticks_then + ticks;
+}
+
+static uint64_t sse_cntcv(SSECounter *s)
+{
+    /* Return the CNTCV value for the current time */
+    return sse_counter_for_timestamp(s, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+}
+
+static void sse_write_cntcv(SSECounter *s, uint32_t value, unsigned startbit)
+{
+    /*
+     * Write one 32-bit half of the counter value; startbit is the
+     * bit position of this half in the 64-bit word, either 0 or 32.
+     */
+    uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    uint64_t cntcv = sse_counter_for_timestamp(s, now);
+
+    cntcv = deposit64(cntcv, startbit, 32, value);
+    s->ticks_then = cntcv;
+    s->ns_then = now;
+    sse_counter_notify_users(s);
+}
+
+static uint64_t sse_counter_control_read(void *opaque, hwaddr offset,
+                                         unsigned size)
+{
+    SSECounter *s = SSE_COUNTER(opaque);
+    uint64_t r;
+
+    switch (offset) {
+    case A_CNTCR:
+        r = s->cntcr;
+        break;
+    case A_CNTSR:
+        /*
+         * The only bit here is DBGH, indicating that the counter has been
+         * halted via the Halt-on-Debug signal. We don't implement halting
+         * debug, so the whole register always reads as zero.
+         */
+        r = 0;
+        break;
+    case A_CNTCV_LO:
+        r = extract64(sse_cntcv(s), 0, 32);
+        break;
+    case A_CNTCV_HI:
+        r = extract64(sse_cntcv(s), 32, 32);
+        break;
+    case A_CNTID:
+        /*
+         * For our implementation:
+         *  - CNTSCR can only be written when CNTCR.EN == 0
+         *  - HWCLKSW=0, so selected clock is always CLK0
+         *  - counter scaling is implemented
+         */
+        r = (1 << R_CNTID_CNTSELCLK_SHIFT) | (1 << R_CNTID_CNTSC_SHIFT);
+        break;
+    case A_CNTSCR:
+    case A_CNTSCR0:
+        r = s->cntscr0;
+        break;
+    case A_CNTSCR1:
+        /* If HWCLKSW == 0, CNTSCR1 is RAZ/WI */
+        r = 0;
+        break;
+    case A_PID4 ... A_CID3:
+        r = control_id[(offset - A_PID4) / 4];
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Counter control frame read: bad offset 0x%x",
+                      (unsigned)offset);
+        r = 0;
+        break;
+    }
+
+    trace_sse_counter_control_read(offset, r, size);
+    return r;
+}
+
+static void sse_counter_control_write(void *opaque, hwaddr offset,
+                                      uint64_t value, unsigned size)
+{
+    SSECounter *s = SSE_COUNTER(opaque);
+
+    trace_sse_counter_control_write(offset, value, size);
+
+    switch (offset) {
+    case A_CNTCR:
+        /*
+         * Although CNTCR defines interrupt-related bits, the counter doesn't
+         * appear to actually have an interrupt output. So INTRCLR is
+         * effectively a RAZ/WI bit, as are the reserved bits [31:6].
+         * The documentation does not explicitly say so, but we assume
+         * that changing the scale factor while the counter is enabled
+         * by toggling CNTCR.SCEN has the same behaviour (making the counter
+         * value UNKNOWN) as changing it by writing to CNTSCR, and so we
+         * don't need to try to recalculate for that case.
+         */
+        value &= CNTCR_VALID_MASK;
+        if ((value ^ s->cntcr) & R_CNTCR_EN_MASK) {
+            /*
+             * Whether the counter is being enabled or disabled, the
+             * required action is the same: sync the (ns_then, ticks_then)
+             * tuple.
+             */
+            uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+            s->ticks_then = sse_counter_for_timestamp(s, now);
+            s->ns_then = now;
+            sse_counter_notify_users(s);
+        }
+        s->cntcr = value;
+        break;
+    case A_CNTCV_LO:
+        sse_write_cntcv(s, value, 0);
+        break;
+    case A_CNTCV_HI:
+        sse_write_cntcv(s, value, 32);
+        break;
+    case A_CNTSCR:
+    case A_CNTSCR0:
+        /*
+         * If the scale registers are changed when the counter is enabled,
+         * the count value becomes UNKNOWN. So we don't try to recalculate
+         * anything here but only do it on a write to CNTCR.EN.
+         */
+        s->cntscr0 = value;
+        break;
+    case A_CNTSCR1:
+        /* If HWCLKSW == 0, CNTSCR1 is RAZ/WI */
+        break;
+    case A_CNTSR:
+    case A_CNTID:
+    case A_PID4 ... A_CID3:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Counter control frame: write to RO offset 0x%x\n",
+                      (unsigned)offset);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Counter control frame: write to bad offset 0x%x\n",
+                      (unsigned)offset);
+        break;
+    }
+}
+
+static uint64_t sse_counter_status_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    SSECounter *s = SSE_COUNTER(opaque);
+    uint64_t r;
+
+    switch (offset) {
+    case A_STATUS_CNTCV_LO:
+        r = extract64(sse_cntcv(s), 0, 32);
+        break;
+    case A_STATUS_CNTCV_HI:
+        r = extract64(sse_cntcv(s), 32, 32);
+        break;
+    case A_PID4 ... A_CID3:
+        r = status_id[(offset - A_PID4) / 4];
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Counter status frame read: bad offset 0x%x",
+                      (unsigned)offset);
+        r = 0;
+        break;
+    }
+
+    trace_sse_counter_status_read(offset, r, size);
+    return r;
+}
+
+static void sse_counter_status_write(void *opaque, hwaddr offset,
+                                     uint64_t value, unsigned size)
+{
+    trace_sse_counter_status_write(offset, value, size);
+
+    switch (offset) {
+    case A_STATUS_CNTCV_LO:
+    case A_STATUS_CNTCV_HI:
+    case A_PID4 ... A_CID3:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Counter status frame: write to RO offset 0x%x\n",
+                      (unsigned)offset);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Counter status frame: write to bad offset 0x%x\n",
+                      (unsigned)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps sse_counter_control_ops = {
+    .read = sse_counter_control_read,
+    .write = sse_counter_control_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static const MemoryRegionOps sse_counter_status_ops = {
+    .read = sse_counter_status_read,
+    .write = sse_counter_status_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static void sse_counter_reset(DeviceState *dev)
+{
+    SSECounter *s = SSE_COUNTER(dev);
+
+    trace_sse_counter_reset();
+
+    s->cntcr = 0;
+    s->cntscr0 = 0x01000000;
+    s->ns_then = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    s->ticks_then = 0;
+}
+
+static void sse_clk_callback(void *opaque, ClockEvent event)
+{
+    SSECounter *s = SSE_COUNTER(opaque);
+    uint64_t now;
+
+    switch (event) {
+    case ClockPreUpdate:
+        /*
+         * Before the clock period updates, set (ticks_then, ns_then)
+         * to the current time and tick count (as calculated with
+         * the old clock period).
+         */
+        if (sse_counter_enabled(s)) {
+            now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+            s->ticks_then = sse_counter_for_timestamp(s, now);
+            s->ns_then = now;
+        }
+        break;
+    case ClockUpdate:
+        sse_counter_notify_users(s);
+        break;
+    default:
+        break;
+    }
+}
+
+static void sse_counter_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SSECounter *s = SSE_COUNTER(obj);
+
+    notifier_list_init(&s->notifier_list);
+
+    s->clk = qdev_init_clock_in(DEVICE(obj), "CLK", sse_clk_callback, s,
+                                ClockPreUpdate | ClockUpdate);
+    memory_region_init_io(&s->control_mr, obj, &sse_counter_control_ops,
+                          s, "sse-counter-control", 0x1000);
+    memory_region_init_io(&s->status_mr, obj, &sse_counter_status_ops,
+                          s, "sse-counter-status", 0x1000);
+    sysbus_init_mmio(sbd, &s->control_mr);
+    sysbus_init_mmio(sbd, &s->status_mr);
+}
+
+static void sse_counter_realize(DeviceState *dev, Error **errp)
+{
+    SSECounter *s = SSE_COUNTER(dev);
+
+    if (!clock_has_source(s->clk)) {
+        error_setg(errp, "SSE system counter: CLK must be connected");
+        return;
+    }
+}
+
+static const VMStateDescription sse_counter_vmstate = {
+    .name = "sse-counter",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_CLOCK(clk, SSECounter),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void sse_counter_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = sse_counter_realize;
+    dc->vmsd = &sse_counter_vmstate;
+    dc->reset = sse_counter_reset;
+}
+
+static const TypeInfo sse_counter_info = {
+    .name = TYPE_SSE_COUNTER,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SSECounter),
+    .instance_init = sse_counter_init,
+    .class_init = sse_counter_class_init,
+};
+
+static void sse_counter_register_types(void)
+{
+    type_register_static(&sse_counter_info);
+}
+
+type_init(sse_counter_register_types);
diff --git a/hw/timer/sse-timer.c b/hw/timer/sse-timer.c
new file mode 100644
index 0000000..8dbe6ac
--- /dev/null
+++ b/hw/timer/sse-timer.c
@@ -0,0 +1,470 @@
+/*
+ * Arm SSE Subsystem System Timer
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "System timer" which is documented in
+ * the Arm SSE-123 Example Subsystem Technical Reference Manual:
+ * https://developer.arm.com/documentation/101370/latest/
+ *
+ * The timer is based around a simple 64-bit incrementing counter
+ * (readable from CNTPCT_HI/LO). The timer fires when
+ *  Counter - CompareValue >= 0.
+ * The CompareValue is guest-writable, via CNTP_CVAL_HI/LO.
+ * CNTP_TVAL is an alternative view of the CompareValue defined by
+ *  TimerValue = CompareValue[31:0] - Counter[31:0]
+ * which can be both read and written.
+ * This part is similar to the generic timer in an Arm A-class CPU.
+ *
+ * The timer also has a separate auto-increment timer. When this
+ * timer is enabled, then the AutoIncrValue is set to:
+ *  AutoIncrValue = Reload + Counter
+ * and this timer fires when
+ *  Counter - AutoIncrValue >= 0
+ * at which point, an interrupt is generated and the new AutoIncrValue
+ * is calculated.
+ * When the auto-increment timer is enabled, interrupt generation
+ * via the compare/timervalue registers is disabled.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include "hw/timer/sse-timer.h"
+#include "hw/timer/sse-counter.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/registerfields.h"
+#include "hw/clock.h"
+#include "hw/qdev-clock.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+
+REG32(CNTPCT_LO, 0x0)
+REG32(CNTPCT_HI, 0x4)
+REG32(CNTFRQ, 0x10)
+REG32(CNTP_CVAL_LO, 0x20)
+REG32(CNTP_CVAL_HI, 0x24)
+REG32(CNTP_TVAL, 0x28)
+REG32(CNTP_CTL, 0x2c)
+    FIELD(CNTP_CTL, ENABLE, 0, 1)
+    FIELD(CNTP_CTL, IMASK, 1, 1)
+    FIELD(CNTP_CTL, ISTATUS, 2, 1)
+REG32(CNTP_AIVAL_LO, 0x40)
+REG32(CNTP_AIVAL_HI, 0x44)
+REG32(CNTP_AIVAL_RELOAD, 0x48)
+REG32(CNTP_AIVAL_CTL, 0x4c)
+    FIELD(CNTP_AIVAL_CTL, EN, 0, 1)
+    FIELD(CNTP_AIVAL_CTL, CLR, 1, 1)
+REG32(CNTP_CFG, 0x50)
+    FIELD(CNTP_CFG, AIVAL, 0, 4)
+#define R_CNTP_CFG_AIVAL_IMPLEMENTED 1
+REG32(PID4, 0xFD0)
+REG32(PID5, 0xFD4)
+REG32(PID6, 0xFD8)
+REG32(PID7, 0xFDC)
+REG32(PID0, 0xFE0)
+REG32(PID1, 0xFE4)
+REG32(PID2, 0xFE8)
+REG32(PID3, 0xFEC)
+REG32(CID0, 0xFF0)
+REG32(CID1, 0xFF4)
+REG32(CID2, 0xFF8)
+REG32(CID3, 0xFFC)
+
+/* PID/CID values */
+static const int timer_id[] = {
+    0x04, 0x00, 0x00, 0x00, /* PID4..PID7 */
+    0xb7, 0xb0, 0x0b, 0x00, /* PID0..PID3 */
+    0x0d, 0xf0, 0x05, 0xb1, /* CID0..CID3 */
+};
+
+static bool sse_is_autoinc(SSETimer *s)
+{
+    return (s->cntp_aival_ctl & R_CNTP_AIVAL_CTL_EN_MASK) != 0;
+}
+
+static bool sse_enabled(SSETimer *s)
+{
+    return (s->cntp_ctl & R_CNTP_CTL_ENABLE_MASK) != 0;
+}
+
+static uint64_t sse_cntpct(SSETimer *s)
+{
+    /* Return the CNTPCT value for the current time */
+    return sse_counter_for_timestamp(s->counter,
+                                     qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+}
+
+static bool sse_timer_status(SSETimer *s)
+{
+    /*
+     * Return true if timer condition is met. This is used for both
+     * the CNTP_CTL.ISTATUS bit and for whether (unless masked) we
+     * assert our IRQ.
+     * The documentation is unclear about the behaviour of ISTATUS when
+     * in autoincrement mode; we assume that it follows CNTP_AIVAL_CTL.CLR
+     * (ie whether the autoincrement timer is asserting the interrupt).
+     */
+    if (!sse_enabled(s)) {
+        return false;
+    }
+
+    if (sse_is_autoinc(s)) {
+        return s->cntp_aival_ctl & R_CNTP_AIVAL_CTL_CLR_MASK;
+    } else {
+        return sse_cntpct(s) >= s->cntp_cval;
+    }
+}
+
+static void sse_update_irq(SSETimer *s)
+{
+    bool irqstate = (!(s->cntp_ctl & R_CNTP_CTL_IMASK_MASK) &&
+                     sse_timer_status(s));
+
+    qemu_set_irq(s->irq, irqstate);
+}
+
+static void sse_set_timer(SSETimer *s, uint64_t nexttick)
+{
+    /* Set the timer to expire at nexttick */
+    uint64_t expiry = sse_counter_tick_to_time(s->counter, nexttick);
+
+    if (expiry <= INT64_MAX) {
+        timer_mod_ns(&s->timer, expiry);
+    } else {
+        /*
+         * nexttick is so far in the future that it would overflow the
+         * signed 64-bit range of a QEMUTimer. Since timer_mod_ns()
+         * expiry times are absolute, not relative, we are never going
+         * to be able to set the timer to this value, so we must just
+         * assume that guest execution can never run so long that it
+         * reaches the theoretical point when the timer fires.
+         * This is also the code path for "counter is not running",
+         * which is signalled by expiry == UINT64_MAX.
+         */
+        timer_del(&s->timer);
+    }
+}
+
+static void sse_recalc_timer(SSETimer *s)
+{
+    /* Recalculate the normal timer */
+    uint64_t count, nexttick;
+
+    if (sse_is_autoinc(s)) {
+        return;
+    }
+
+    if (!sse_enabled(s)) {
+        timer_del(&s->timer);
+        return;
+    }
+
+    count = sse_cntpct(s);
+
+    if (count >= s->cntp_cval) {
+        /*
+         * Timer condition already met. In theory we have a transition when
+         * the count rolls back over to 0, but that is so far in the future
+         * that it is not representable as a timer_mod() expiry, so in
+         * fact sse_set_timer() will always just delete the timer.
+         */
+        nexttick = UINT64_MAX;
+    } else {
+        /* Next transition is when count hits cval */
+        nexttick = s->cntp_cval;
+    }
+    sse_set_timer(s, nexttick);
+    sse_update_irq(s);
+}
+
+static void sse_autoinc(SSETimer *s)
+{
+    /* Auto-increment the AIVAL, and set the timer accordingly */
+    s->cntp_aival = sse_cntpct(s) + s->cntp_aival_reload;
+    sse_set_timer(s, s->cntp_aival);
+}
+
+static void sse_timer_cb(void *opaque)
+{
+    SSETimer *s = SSE_TIMER(opaque);
+
+    if (sse_is_autoinc(s)) {
+        uint64_t count = sse_cntpct(s);
+
+        if (count >= s->cntp_aival) {
+            /* Timer condition met, set CLR and do another autoinc */
+            s->cntp_aival_ctl |= R_CNTP_AIVAL_CTL_CLR_MASK;
+            s->cntp_aival = count + s->cntp_aival_reload;
+        }
+        sse_set_timer(s, s->cntp_aival);
+        sse_update_irq(s);
+    } else {
+        sse_recalc_timer(s);
+    }
+}
+
+static uint64_t sse_timer_read(void *opaque, hwaddr offset, unsigned size)
+{
+    SSETimer *s = SSE_TIMER(opaque);
+    uint64_t r;
+
+    switch (offset) {
+    case A_CNTPCT_LO:
+        r = extract64(sse_cntpct(s), 0, 32);
+        break;
+    case A_CNTPCT_HI:
+        r = extract64(sse_cntpct(s), 32, 32);
+        break;
+    case A_CNTFRQ:
+        r = s->cntfrq;
+        break;
+    case A_CNTP_CVAL_LO:
+        r = extract64(s->cntp_cval, 0, 32);
+        break;
+    case A_CNTP_CVAL_HI:
+        r = extract64(s->cntp_cval, 32, 32);
+        break;
+    case A_CNTP_TVAL:
+        r = extract64(s->cntp_cval - sse_cntpct(s), 0, 32);
+        break;
+    case A_CNTP_CTL:
+        r = s->cntp_ctl;
+        if (sse_timer_status(s)) {
+            r |= R_CNTP_CTL_ISTATUS_MASK;
+        }
+        break;
+    case A_CNTP_AIVAL_LO:
+        r = extract64(s->cntp_aival, 0, 32);
+        break;
+    case A_CNTP_AIVAL_HI:
+        r = extract64(s->cntp_aival, 32, 32);
+        break;
+    case A_CNTP_AIVAL_RELOAD:
+        r = s->cntp_aival_reload;
+        break;
+    case A_CNTP_AIVAL_CTL:
+        /*
+         * All the bits of AIVAL_CTL are documented as WO, but this is probably
+         * a documentation error. We implement them as readable.
+         */
+        r = s->cntp_aival_ctl;
+        break;
+    case A_CNTP_CFG:
+        r = R_CNTP_CFG_AIVAL_IMPLEMENTED << R_CNTP_CFG_AIVAL_SHIFT;
+        break;
+    case A_PID4 ... A_CID3:
+        r = timer_id[(offset - A_PID4) / 4];
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Timer read: bad offset 0x%x",
+                      (unsigned) offset);
+        r = 0;
+        break;
+    }
+
+    trace_sse_timer_read(offset, r, size);
+    return r;
+}
+
+static void sse_timer_write(void *opaque, hwaddr offset, uint64_t value,
+                            unsigned size)
+{
+    SSETimer *s = SSE_TIMER(opaque);
+
+    trace_sse_timer_write(offset, value, size);
+
+    switch (offset) {
+    case A_CNTFRQ:
+        s->cntfrq = value;
+        break;
+    case A_CNTP_CVAL_LO:
+        s->cntp_cval = deposit64(s->cntp_cval, 0, 32, value);
+        sse_recalc_timer(s);
+        break;
+    case A_CNTP_CVAL_HI:
+        s->cntp_cval = deposit64(s->cntp_cval, 32, 32, value);
+        sse_recalc_timer(s);
+        break;
+    case A_CNTP_TVAL:
+        s->cntp_cval = sse_cntpct(s) + sextract64(value, 0, 32);
+        sse_recalc_timer(s);
+        break;
+    case A_CNTP_CTL:
+    {
+        uint32_t old_ctl = s->cntp_ctl;
+        value &= R_CNTP_CTL_ENABLE_MASK | R_CNTP_CTL_IMASK_MASK;
+        s->cntp_ctl = value;
+        if ((old_ctl ^ s->cntp_ctl) & R_CNTP_CTL_ENABLE_MASK) {
+            if (sse_enabled(s)) {
+                if (sse_is_autoinc(s)) {
+                    sse_autoinc(s);
+                } else {
+                    sse_recalc_timer(s);
+                }
+            }
+        }
+        sse_update_irq(s);
+        break;
+    }
+    case A_CNTP_AIVAL_RELOAD:
+        s->cntp_aival_reload = value;
+        break;
+    case A_CNTP_AIVAL_CTL:
+    {
+        uint32_t old_ctl = s->cntp_aival_ctl;
+
+        /* EN bit is writeable; CLR bit is write-0-to-clear, write-1-ignored */
+        s->cntp_aival_ctl &= ~R_CNTP_AIVAL_CTL_EN_MASK;
+        s->cntp_aival_ctl |= value & R_CNTP_AIVAL_CTL_EN_MASK;
+        if (!(value & R_CNTP_AIVAL_CTL_CLR_MASK)) {
+            s->cntp_aival_ctl &= ~R_CNTP_AIVAL_CTL_CLR_MASK;
+        }
+        if ((old_ctl ^ s->cntp_aival_ctl) & R_CNTP_AIVAL_CTL_EN_MASK) {
+            /* Auto-increment toggled on/off */
+            if (sse_enabled(s)) {
+                if (sse_is_autoinc(s)) {
+                    sse_autoinc(s);
+                } else {
+                    sse_recalc_timer(s);
+                }
+            }
+        }
+        sse_update_irq(s);
+        break;
+    }
+    case A_CNTPCT_LO:
+    case A_CNTPCT_HI:
+    case A_CNTP_CFG:
+    case A_CNTP_AIVAL_LO:
+    case A_CNTP_AIVAL_HI:
+    case A_PID4 ... A_CID3:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Timer write: write to RO offset 0x%x\n",
+                      (unsigned)offset);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSE System Timer write: bad offset 0x%x\n",
+                      (unsigned)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps sse_timer_ops = {
+    .read = sse_timer_read,
+    .write = sse_timer_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static void sse_timer_reset(DeviceState *dev)
+{
+    SSETimer *s = SSE_TIMER(dev);
+
+    trace_sse_timer_reset();
+
+    timer_del(&s->timer);
+    s->cntfrq = 0;
+    s->cntp_ctl = 0;
+    s->cntp_cval = 0;
+    s->cntp_aival = 0;
+    s->cntp_aival_ctl = 0;
+    s->cntp_aival_reload = 0;
+}
+
+static void sse_timer_counter_callback(Notifier *notifier, void *data)
+{
+    SSETimer *s = container_of(notifier, SSETimer, counter_notifier);
+
+    /* System counter told us we need to recalculate */
+    if (sse_enabled(s)) {
+        if (sse_is_autoinc(s)) {
+            sse_set_timer(s, s->cntp_aival);
+        } else {
+            sse_recalc_timer(s);
+        }
+    }
+}
+
+static void sse_timer_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SSETimer *s = SSE_TIMER(obj);
+
+    memory_region_init_io(&s->iomem, obj, &sse_timer_ops,
+                          s, "sse-timer", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+    sysbus_init_irq(sbd, &s->irq);
+}
+
+static void sse_timer_realize(DeviceState *dev, Error **errp)
+{
+    SSETimer *s = SSE_TIMER(dev);
+
+    if (!s->counter) {
+        error_setg(errp, "counter property was not set");
+    }
+
+    s->counter_notifier.notify = sse_timer_counter_callback;
+    sse_counter_register_consumer(s->counter, &s->counter_notifier);
+
+    timer_init_ns(&s->timer, QEMU_CLOCK_VIRTUAL, sse_timer_cb, s);
+}
+
+static const VMStateDescription sse_timer_vmstate = {
+    .name = "sse-timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER(timer, SSETimer),
+        VMSTATE_UINT32(cntfrq, SSETimer),
+        VMSTATE_UINT32(cntp_ctl, SSETimer),
+        VMSTATE_UINT64(cntp_cval, SSETimer),
+        VMSTATE_UINT64(cntp_aival, SSETimer),
+        VMSTATE_UINT32(cntp_aival_ctl, SSETimer),
+        VMSTATE_UINT32(cntp_aival_reload, SSETimer),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property sse_timer_properties[] = {
+    DEFINE_PROP_LINK("counter", SSETimer, counter, TYPE_SSE_COUNTER, SSECounter *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sse_timer_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = sse_timer_realize;
+    dc->vmsd = &sse_timer_vmstate;
+    dc->reset = sse_timer_reset;
+    device_class_set_props(dc, sse_timer_properties);
+}
+
+static const TypeInfo sse_timer_info = {
+    .name = TYPE_SSE_TIMER,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SSETimer),
+    .instance_init = sse_timer_init,
+    .class_init = sse_timer_class_init,
+};
+
+static void sse_timer_register_types(void)
+{
+    type_register_static(&sse_timer_info);
+}
+
+type_init(sse_timer_register_types);
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index 7a4326d..f8b9db2 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -93,3 +93,15 @@
 avr_timer16_interrupt_overflow(const char *reason) "overflow: %s"
 avr_timer16_next_alarm(uint64_t delay_ns) "next alarm: %" PRIu64 " ns from now"
 avr_timer16_clksrc_update(uint64_t freq_hz, uint64_t period_ns, uint64_t delay_s) "timer frequency: %" PRIu64 " Hz, period: %" PRIu64 " ns (%" PRId64 " us)"
+
+# sse_counter.c
+sse_counter_control_read(uint64_t offset, uint64_t data, unsigned size) "SSE system counter control frame read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+sse_counter_control_write(uint64_t offset, uint64_t data, unsigned size) "SSE system counter control framen write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+sse_counter_status_read(uint64_t offset, uint64_t data, unsigned size) "SSE system counter status frame read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+sse_counter_status_write(uint64_t offset, uint64_t data, unsigned size) "SSE system counter status frame write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+sse_counter_reset(void) "SSE system counter: reset"
+
+# sse_timer.c
+sse_timer_read(uint64_t offset, uint64_t data, unsigned size) "SSE system timer read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+sse_timer_write(uint64_t offset, uint64_t data, unsigned size) "SSE system timer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
+sse_timer_reset(void) "SSE system timer: reset"
diff --git a/hw/watchdog/cmsdk-apb-watchdog.c b/hw/watchdog/cmsdk-apb-watchdog.c
index 302f171..5a2cd46 100644
--- a/hw/watchdog/cmsdk-apb-watchdog.c
+++ b/hw/watchdog/cmsdk-apb-watchdog.c
@@ -310,7 +310,7 @@
     ptimer_transaction_commit(s->timer);
 }
 
-static void cmsdk_apb_watchdog_clk_update(void *opaque)
+static void cmsdk_apb_watchdog_clk_update(void *opaque, ClockEvent event)
 {
     CMSDKAPBWatchdog *s = CMSDK_APB_WATCHDOG(opaque);
 
@@ -329,7 +329,8 @@
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->wdogint);
     s->wdogclk = qdev_init_clock_in(DEVICE(s), "WDOGCLK",
-                                    cmsdk_apb_watchdog_clk_update, s);
+                                    cmsdk_apb_watchdog_clk_update, s,
+                                    ClockUpdate);
 
     s->is_luminary = false;
     s->id = cmsdk_apb_watchdog_id;
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 36e8da4..f581cf9 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -57,6 +57,8 @@
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
                                               uint64_t offset, uint64_t bytes);
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap);
+uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size,
+        const BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                       uint8_t *buf, uint64_t offset,
                                       uint64_t bytes);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b7b3c0e..6b036ca 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -448,9 +448,6 @@
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
     uint32_t flags; /* flags defining in which context the code was generated */
-    uint16_t size;      /* size of target code for this block (1 <=
-                           size <= TARGET_PAGE_SIZE) */
-    uint16_t icount;
     uint32_t cflags;    /* compile flags */
 #define CF_COUNT_MASK  0x00007fff
 #define CF_LAST_IO     0x00008000 /* Last insn may be an IO access.  */
@@ -460,12 +457,18 @@
 #define CF_PARALLEL    0x00080000 /* Generate code for a parallel context */
 #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
 #define CF_CLUSTER_SHIFT 24
-/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
-#define CF_HASH_MASK   (~CF_INVALID)
 
     /* Per-vCPU dynamic tracing state used to generate this TB */
     uint32_t trace_vcpu_dstate;
 
+    /*
+     * Above fields used for comparing
+     */
+
+    /* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */
+    uint16_t size;
+    uint16_t icount;
+
     struct tb_tc tc;
 
     /* first and second physical page containing code. The lower bit
@@ -510,8 +513,6 @@
     uintptr_t jmp_dest[2];
 };
 
-extern bool parallel_cpus;
-
 /* Hide the qatomic_read to make code a little easier on the eyes */
 static inline uint32_t tb_cflags(const TranslationBlock *tb)
 {
@@ -519,10 +520,9 @@
 }
 
 /* current cflags for hashing/comparison */
-static inline uint32_t curr_cflags(void)
+static inline uint32_t curr_cflags(CPUState *cpu)
 {
-    return (parallel_cpus ? CF_PARALLEL : 0)
-         | (icount_enabled() ? CF_USE_ICOUNT : 0);
+    return cpu->tcg_cflags;
 }
 
 /* TranslationBlock invalidate API */
@@ -536,7 +536,7 @@
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                    target_ulong cs_base, uint32_t flags,
-                                   uint32_t cf_mask);
+                                   uint32_t cflags);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 7b1bb12..4cd3f8a 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -10,6 +10,7 @@
 #pragma GCC poison TARGET_ALPHA
 #pragma GCC poison TARGET_ARM
 #pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_HEXAGON
 #pragma GCC poison TARGET_HPPA
 #pragma GCC poison TARGET_LM32
 #pragma GCC poison TARGET_M68K
@@ -72,6 +73,7 @@
 #pragma GCC poison CONFIG_CRIS_DIS
 #pragma GCC poison CONFIG_HPPA_DIS
 #pragma GCC poison CONFIG_I386_DIS
+#pragma GCC poison CONFIG_HEXAGON_DIS
 #pragma GCC poison CONFIG_LM32_DIS
 #pragma GCC poison CONFIG_M68K_DIS
 #pragma GCC poison CONFIG_MICROBLAZE_DIS
diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h
index 9cf475b..29d61ce 100644
--- a/include/exec/tb-lookup.h
+++ b/include/exec/tb-lookup.h
@@ -17,30 +17,28 @@
 #include "exec/tb-hash.h"
 
 /* Might cause an exception, so have a longjmp destination ready */
-static inline TranslationBlock *
-tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
-                     uint32_t *flags, uint32_t cf_mask)
+static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
+                                          target_ulong cs_base,
+                                          uint32_t flags, uint32_t cflags)
 {
-    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     uint32_t hash;
 
-    cpu_get_tb_cpu_state(env, pc, cs_base, flags);
-    hash = tb_jmp_cache_hash_func(*pc);
+    /* we should never be trying to look up an INVALID tb */
+    tcg_debug_assert(!(cflags & CF_INVALID));
+
+    hash = tb_jmp_cache_hash_func(pc);
     tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
 
-    cf_mask &= ~CF_CLUSTER_MASK;
-    cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT;
-
     if (likely(tb &&
-               tb->pc == *pc &&
-               tb->cs_base == *cs_base &&
-               tb->flags == *flags &&
+               tb->pc == pc &&
+               tb->cs_base == cs_base &&
+               tb->flags == flags &&
                tb->trace_vcpu_dstate == *cpu->trace_dstate &&
-               (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
+               tb_cflags(tb) == cflags)) {
         return tb;
     }
-    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
+    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
     if (tb == NULL) {
         return NULL;
     }
diff --git a/include/hw/arm/armsse-version.h b/include/hw/arm/armsse-version.h
new file mode 100644
index 0000000..60780fa
--- /dev/null
+++ b/include/hw/arm/armsse-version.h
@@ -0,0 +1,42 @@
+/*
+ * ARM SSE (Subsystems for Embedded): IoTKit, SSE-200
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+#ifndef ARMSSE_VERSION_H
+#define ARMSSE_VERSION_H
+
+
+/*
+ * Define an enumeration of the possible values of the sse-version
+ * property implemented by various sub-devices of the SSE, and
+ * a validation function that checks that a valid value has been passed.
+ * These are arbitrary QEMU-internal values (nobody should be creating
+ * the sub-devices of the SSE except for the SSE object itself), but
+ * we pick obvious numbers for the benefit of people debugging with gdb.
+ */
+enum {
+    ARMSSE_IOTKIT = 0,
+    ARMSSE_SSE200 = 200,
+    ARMSSE_SSE300 = 300,
+};
+
+static inline bool armsse_version_valid(uint32_t sse_version)
+{
+    switch (sse_version) {
+    case ARMSSE_IOTKIT:
+    case ARMSSE_SSE200:
+    case ARMSSE_SSE300:
+        return true;
+    default:
+        return false;
+    }
+}
+
+#endif
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
index 09284ca..36592be 100644
--- a/include/hw/arm/armsse.h
+++ b/include/hw/arm/armsse.h
@@ -97,11 +97,14 @@
 #include "hw/misc/tz-mpc.h"
 #include "hw/timer/cmsdk-apb-timer.h"
 #include "hw/timer/cmsdk-apb-dualtimer.h"
+#include "hw/timer/sse-counter.h"
+#include "hw/timer/sse-timer.h"
 #include "hw/watchdog/cmsdk-apb-watchdog.h"
 #include "hw/misc/iotkit-sysctl.h"
 #include "hw/misc/iotkit-sysinfo.h"
 #include "hw/misc/armsse-cpuid.h"
 #include "hw/misc/armsse-mhu.h"
+#include "hw/misc/armsse-cpu-pwrctrl.h"
 #include "hw/misc/unimp.h"
 #include "hw/or-irq.h"
 #include "hw/clock.h"
@@ -120,12 +123,14 @@
  */
 #define TYPE_IOTKIT "iotkit"
 #define TYPE_SSE200 "sse-200"
+#define TYPE_SSE300 "sse-300"
 
 /* We have an IRQ splitter and an OR gate input for each external PPC
  * and the 2 internal PPCs
  */
+#define NUM_INTERNAL_PPCS 2
 #define NUM_EXTERNAL_PPCS (IOTS_NUM_AHB_EXP_PPC + IOTS_NUM_APB_EXP_PPC)
-#define NUM_PPCS (NUM_EXTERNAL_PPCS + 2)
+#define NUM_PPCS (NUM_EXTERNAL_PPCS + NUM_INTERNAL_PPCS)
 
 #define MAX_SRAM_BANKS 4
 #if MAX_SRAM_BANKS > IOTS_NUM_MPC
@@ -134,15 +139,10 @@
 
 #define SSE_MAX_CPUS 2
 
-/* These define what each PPU in the ppu[] index is for */
-#define CPU0CORE_PPU 0
-#define CPU1CORE_PPU 1
-#define DBG_PPU 2
-#define RAM0_PPU 3
-#define RAM1_PPU 4
-#define RAM2_PPU 5
-#define RAM3_PPU 6
-#define NUM_PPUS 7
+#define NUM_PPUS 8
+
+/* Number of CPU IRQs used by the SSE itself */
+#define NUM_SSE_IRQS 32
 
 struct ARMSSE {
     /*< private >*/
@@ -152,12 +152,9 @@
     ARMv7MState armv7m[SSE_MAX_CPUS];
     CPUClusterState cluster[SSE_MAX_CPUS];
     IoTKitSecCtl secctl;
-    TZPPC apb_ppc0;
-    TZPPC apb_ppc1;
+    TZPPC apb_ppc[NUM_INTERNAL_PPCS];
     TZMPC mpc[IOTS_NUM_MPC];
-    CMSDKAPBTimer timer0;
-    CMSDKAPBTimer timer1;
-    CMSDKAPBTimer s32ktimer;
+    CMSDKAPBTimer timer[3];
     qemu_or_irq ppc_irq_orgate;
     SplitIRQ sec_resp_splitter;
     SplitIRQ ppc_irq_splitter[NUM_PPCS];
@@ -165,24 +162,27 @@
     qemu_or_irq mpc_irq_orgate;
     qemu_or_irq nmi_orgate;
 
-    SplitIRQ cpu_irq_splitter[32];
+    SplitIRQ cpu_irq_splitter[NUM_SSE_IRQS];
 
     CMSDKAPBDualTimer dualtimer;
 
-    CMSDKAPBWatchdog s32kwatchdog;
-    CMSDKAPBWatchdog nswatchdog;
-    CMSDKAPBWatchdog swatchdog;
+    CMSDKAPBWatchdog cmsdk_watchdog[3];
+
+    SSECounter sse_counter;
+    SSETimer sse_timer[4];
 
     IoTKitSysCtl sysctl;
     IoTKitSysCtl sysinfo;
 
     ARMSSEMHU mhu[2];
-    UnimplementedDeviceState ppu[NUM_PPUS];
+    UnimplementedDeviceState unimp[NUM_PPUS];
     UnimplementedDeviceState cachectrl[SSE_MAX_CPUS];
     UnimplementedDeviceState cpusecctrl[SSE_MAX_CPUS];
 
     ARMSSECPUID cpuid[SSE_MAX_CPUS];
 
+    ARMSSECPUPwrCtrl cpu_pwrctrl[SSE_MAX_CPUS];
+
     /*
      * 'container' holds all devices seen by all CPUs.
      * 'cpu_container[i]' is the view that CPU i has: this has the
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 0678b41..1676a84 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -35,6 +35,7 @@
 #include "target/arm/cpu.h"
 #include "qom/object.h"
 #include "net/can_emu.h"
+#include "hw/dma/xlnx_csu_dma.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp"
 OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
@@ -60,7 +61,8 @@
 
 #define XLNX_ZYNQMP_GIC_REGIONS 6
 
-/* ZynqMP maps the ARM GIC regions (GICC, GICD ...) at consecutive 64k offsets
+/*
+ * ZynqMP maps the ARM GIC regions (GICC, GICD ...) at consecutive 64k offsets
  * and under-decodes the 64k region. This mirrors the 4k regions to every 4k
  * aligned address in the 64k region. To implement each GIC region needs a
  * number of memory region aliases.
@@ -107,6 +109,7 @@
     XlnxZynqMPRTC rtc;
     XlnxZDMA gdma[XLNX_ZYNQMP_NUM_GDMA_CH];
     XlnxZDMA adma[XLNX_ZYNQMP_NUM_ADMA_CH];
+    XlnxCSUDMA qspi_dma;
 
     char *boot_cpu;
     ARMCPU *boot_cpu_ptr;
diff --git a/include/hw/clock.h b/include/hw/clock.h
index e5f45e2..a7187ea 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -22,7 +22,18 @@
 #define TYPE_CLOCK "clock"
 OBJECT_DECLARE_SIMPLE_TYPE(Clock, CLOCK)
 
-typedef void ClockCallback(void *opaque);
+/*
+ * Argument to ClockCallback functions indicating why the callback
+ * has been called. A mask of these values logically ORed together
+ * is used to specify which events are interesting when the callback
+ * is registered, so these values must all be different bit values.
+ */
+typedef enum ClockEvent {
+    ClockUpdate = 1, /* Clock period has just updated */
+    ClockPreUpdate = 2, /* Clock period is about to update */
+} ClockEvent;
+
+typedef void ClockCallback(void *opaque, ClockEvent event);
 
 /*
  * clock store a value representing the clock's period in 2^-32ns unit.
@@ -50,6 +61,7 @@
  * @canonical_path: clock path string cache (used for trace purpose)
  * @callback: called when clock changes
  * @callback_opaque: argument for @callback
+ * @callback_events: mask of events when callback should be called
  * @source: source (or parent in clock tree) of the clock
  * @children: list of clocks connected to this one (it is their source)
  * @sibling: structure used to form a clock list
@@ -67,6 +79,7 @@
     char *canonical_path;
     ClockCallback *callback;
     void *callback_opaque;
+    unsigned int callback_events;
 
     /* Clocks are organized in a clock tree */
     Clock *source;
@@ -114,10 +127,15 @@
  * @clk: the clock to register the callback into
  * @cb: the callback function
  * @opaque: the argument to the callback
+ * @events: the events the callback should be called for
+ *          (logical OR of ClockEvent enum values)
  *
  * Register a callback called on every clock update.
+ * Note that a clock has only one callback: you cannot register
+ * different callback functions for different events.
  */
-void clock_set_callback(Clock *clk, ClockCallback *cb, void *opaque);
+void clock_set_callback(Clock *clk, ClockCallback *cb,
+                        void *opaque, unsigned int events);
 
 /**
  * clock_clear_callback:
@@ -269,6 +287,47 @@
 }
 
 /**
+ * clock_ns_to_ticks:
+ * @clk: the clock to query
+ * @ns: duration in nanoseconds
+ *
+ * Returns the number of ticks this clock would make in the given
+ * number of nanoseconds. Because a clock can have a period which
+ * is not a whole number of nanoseconds, it is important to use this
+ * function rather than attempting to obtain a "period in nanoseconds"
+ * value and then dividing the duration by that value.
+ *
+ * If the clock is stopped (ie it has period zero), returns 0.
+ *
+ * For some inputs the result could overflow a 64-bit value (because
+ * the clock's period is short and the duration is long). In these
+ * cases we truncate the result to a 64-bit value. This is on the
+ * assumption that generally the result is going to be used to report
+ * a 32-bit or 64-bit guest register value, so wrapping either cannot
+ * happen or is the desired behaviour.
+ */
+static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
+{
+    /*
+     * ticks = duration_in_ns / period_in_ns
+     *       = ns / (period / 2^32)
+     *       = (ns * 2^32) / period
+     * The hi, lo inputs to divu128() are (ns << 32) as a 128 bit value.
+     */
+    uint64_t lo = ns << 32;
+    uint64_t hi = ns >> 32;
+    if (clk->period == 0) {
+        return 0;
+    }
+    /*
+     * Ignore divu128() return value as we've caught div-by-zero and don't
+     * need different behaviour for overflow.
+     */
+    divu128(&lo, &hi, clk->period);
+    return lo;
+}
+
+/**
  * clock_is_enabled:
  * @clk: a clock
  *
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index c005d3d..c68bc3b 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -282,6 +282,7 @@
  *   to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
  *   be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
  *   QOM parent.
+ * @tcg_cflags: Pre-computed cflags for this cpu.
  * @nr_cores: Number of cores within this CPU package.
  * @nr_threads: Number of threads within this CPU.
  * @running: #true if CPU is currently running (lockless).
@@ -412,6 +413,7 @@
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index;
     int cluster_index;
+    uint32_t tcg_cflags;
     uint32_t halted;
     uint32_t can_do_io;
     int32_t exception_index;
diff --git a/include/hw/dma/xlnx_csu_dma.h b/include/hw/dma/xlnx_csu_dma.h
new file mode 100644
index 0000000..204d94c
--- /dev/null
+++ b/include/hw/dma/xlnx_csu_dma.h
@@ -0,0 +1,52 @@
+/*
+ * Xilinx Platform CSU Stream DMA emulation
+ *
+ * This implementation is based on
+ * https://github.com/Xilinx/qemu/blob/master/hw/dma/csu_stream_dma.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XLNX_CSU_DMA_H
+#define XLNX_CSU_DMA_H
+
+#define TYPE_XLNX_CSU_DMA "xlnx.csu_dma"
+
+#define XLNX_CSU_DMA_R_MAX (0x2c / 4)
+
+typedef struct XlnxCSUDMA {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    MemTxAttrs attr;
+    MemoryRegion *dma_mr;
+    AddressSpace *dma_as;
+    qemu_irq irq;
+    StreamSink *tx_dev; /* Used as generic StreamSink */
+    ptimer_state *src_timer;
+
+    uint16_t width;
+    bool is_dst;
+    bool r_size_last_word;
+
+    StreamCanPushNotifyFn notify;
+    void *notify_opaque;
+
+    uint32_t regs[XLNX_CSU_DMA_R_MAX];
+    RegisterInfo regs_info[XLNX_CSU_DMA_R_MAX];
+} XlnxCSUDMA;
+
+#define XLNX_CSU_DMA(obj) \
+    OBJECT_CHECK(XlnxCSUDMA, (obj), TYPE_XLNX_CSU_DMA)
+
+#endif
diff --git a/include/hw/misc/armsse-cpu-pwrctrl.h b/include/hw/misc/armsse-cpu-pwrctrl.h
new file mode 100644
index 0000000..51d45ed
--- /dev/null
+++ b/include/hw/misc/armsse-cpu-pwrctrl.h
@@ -0,0 +1,40 @@
+/*
+ * ARM SSE CPU PWRCTRL register block
+ *
+ * Copyright (c) 2021 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "CPU<N>_PWRCTRL block" which is part of the
+ * Arm Corstone SSE-300 Example Subsystem and documented in
+ * https://developer.arm.com/documentation/101773/0000
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ */
+
+#ifndef HW_MISC_ARMSSE_CPU_PWRCTRL_H
+#define HW_MISC_ARMSSE_CPU_PWRCTRL_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ARMSSE_CPU_PWRCTRL "armsse-cpu-pwrctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMSSECPUPwrCtrl, ARMSSE_CPU_PWRCTRL)
+
+struct ARMSSECPUPwrCtrl {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t cpupwrcfg;
+};
+
+#endif
diff --git a/include/hw/misc/iotkit-secctl.h b/include/hw/misc/iotkit-secctl.h
index 227d44a..79a3628 100644
--- a/include/hw/misc/iotkit-secctl.h
+++ b/include/hw/misc/iotkit-secctl.h
@@ -120,6 +120,8 @@
     IoTKitSecCtlPPC apb[IOTS_NUM_APB_PPC];
     IoTKitSecCtlPPC apbexp[IOTS_NUM_APB_EXP_PPC];
     IoTKitSecCtlPPC ahbexp[IOTS_NUM_APB_EXP_PPC];
+
+    uint32_t sse_version;
 };
 
 #endif
diff --git a/include/hw/misc/iotkit-sysctl.h b/include/hw/misc/iotkit-sysctl.h
index 2bc3911..481e27f 100644
--- a/include/hw/misc/iotkit-sysctl.h
+++ b/include/hw/misc/iotkit-sysctl.h
@@ -17,9 +17,8 @@
  * "system control register" blocks.
  *
  * QEMU interface:
- *  + QOM property "SYS_VERSION": value of the SYS_VERSION register of the
- *    system information block of the SSE
- *    (used to identify whether to provide SSE-200-only registers)
+ *  + QOM property "sse-version": indicates which SSE version this is part of
+ *    (used to identify whether to provide SSE-200-only registers, etc)
  *  + sysbus MMIO region 0: the system information register bank
  *  + sysbus MMIO region 1: the system control register bank
  */
@@ -54,19 +53,21 @@
     uint32_t initsvtor1;
     uint32_t nmi_enable;
     uint32_t ewctrl;
+    uint32_t pwrctrl;
     uint32_t pdcm_pd_sys_sense;
     uint32_t pdcm_pd_sram0_sense;
     uint32_t pdcm_pd_sram1_sense;
     uint32_t pdcm_pd_sram2_sense;
     uint32_t pdcm_pd_sram3_sense;
+    uint32_t pdcm_pd_cpu0_sense;
+    uint32_t pdcm_pd_vmr0_sense;
+    uint32_t pdcm_pd_vmr1_sense;
 
     /* Properties */
-    uint32_t sys_version;
+    uint32_t sse_version;
     uint32_t cpuwait_rst;
     uint32_t initsvtor0_rst;
     uint32_t initsvtor1_rst;
-
-    bool is_sse200;
 };
 
 #endif
diff --git a/include/hw/misc/iotkit-sysinfo.h b/include/hw/misc/iotkit-sysinfo.h
index 055771d..91c23f9 100644
--- a/include/hw/misc/iotkit-sysinfo.h
+++ b/include/hw/misc/iotkit-sysinfo.h
@@ -38,6 +38,8 @@
     /* Properties */
     uint32_t sys_version;
     uint32_t sys_config;
+    uint32_t sse_version;
+    uint32_t iidr;
 };
 
 #endif
diff --git a/include/hw/misc/mps2-fpgaio.h b/include/hw/misc/mps2-fpgaio.h
index e04fd59..7b8bd60 100644
--- a/include/hw/misc/mps2-fpgaio.h
+++ b/include/hw/misc/mps2-fpgaio.h
@@ -39,10 +39,12 @@
     LEDState *led[MPS2FPGAIO_MAX_LEDS];
     uint32_t num_leds;
     bool has_switches;
+    bool has_dbgctrl;
 
     uint32_t led0;
     uint32_t prescale;
     uint32_t misc;
+    uint32_t dbgctrl;
 
     /* QEMU_CLOCK_VIRTUAL time at which counter and pscntr were last synced */
     int64_t pscntr_sync_ticks;
diff --git a/include/hw/qdev-clock.h b/include/hw/qdev-clock.h
index 64ca4d2..ffa0f7b 100644
--- a/include/hw/qdev-clock.h
+++ b/include/hw/qdev-clock.h
@@ -22,6 +22,8 @@
  * @name: the name of the clock (can't be NULL).
  * @callback: optional callback to be called on update or NULL.
  * @opaque: argument for the callback
+ * @events: the events the callback should be called for
+ *          (logical OR of ClockEvent enum values)
  * @returns: a pointer to the newly added clock
  *
  * Add an input clock to device @dev as a clock named @name.
@@ -29,7 +31,8 @@
  * The callback will be called with @opaque as opaque parameter.
  */
 Clock *qdev_init_clock_in(DeviceState *dev, const char *name,
-                          ClockCallback *callback, void *opaque);
+                          ClockCallback *callback, void *opaque,
+                          unsigned int events);
 
 /**
  * qdev_init_clock_out:
@@ -105,6 +108,7 @@
  * @output: indicates whether the clock is input or output
  * @callback: for inputs, optional callback to be called on clock's update
  * with device as opaque
+ * @callback_events: mask of ClockEvent values for when callback is called
  * @offset: optional offset to store the ClockIn or ClockOut pointer in device
  * state structure (0 means unused)
  */
@@ -112,6 +116,7 @@
     const char *name;
     bool is_output;
     ClockCallback *callback;
+    unsigned int callback_events;
     size_t offset;
 };
 
@@ -119,10 +124,11 @@
     (offsetof(devstate, field) + \
      type_check(Clock *, typeof_field(devstate, field)))
 
-#define QDEV_CLOCK(out_not_in, devstate, field, cb) { \
+#define QDEV_CLOCK(out_not_in, devstate, field, cb, cbevents) {  \
     .name = (stringify(field)), \
     .is_output = out_not_in, \
     .callback = cb, \
+    .callback_events = cbevents, \
     .offset = clock_offset_value(devstate, field), \
 }
 
@@ -133,14 +139,15 @@
  * @field: a field in @_devstate (must be Clock*)
  * @callback: (for input only) callback (or NULL) to be called with the device
  * state as argument
+ * @cbevents: (for input only) ClockEvent mask for when callback is called
  *
  * The name of the clock will be derived from @field
  */
-#define QDEV_CLOCK_IN(devstate, field, callback) \
-    QDEV_CLOCK(false, devstate, field, callback)
+#define QDEV_CLOCK_IN(devstate, field, callback, cbevents)       \
+    QDEV_CLOCK(false, devstate, field, callback, cbevents)
 
 #define QDEV_CLOCK_OUT(devstate, field) \
-    QDEV_CLOCK(true, devstate, field, NULL)
+    QDEV_CLOCK(true, devstate, field, NULL, 0)
 
 #define QDEV_CLOCK_END { .name = NULL }
 
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
index d8a6263..9508849 100644
--- a/include/hw/scsi/esp.h
+++ b/include/hw/scsi/esp.h
@@ -3,6 +3,7 @@
 
 #include "hw/scsi/scsi.h"
 #include "hw/sysbus.h"
+#include "qemu/fifo8.h"
 #include "qom/object.h"
 
 /* esp.c */
@@ -10,19 +11,17 @@
 typedef void (*ESPDMAMemoryReadWriteFunc)(void *opaque, uint8_t *buf, int len);
 
 #define ESP_REGS 16
-#define TI_BUFSZ 16
-#define ESP_CMDBUF_SZ 32
+#define ESP_FIFO_SZ 16
+#define ESP_CMDFIFO_SZ 32
 
 typedef struct ESPState ESPState;
 
-enum pdma_origin_id {
-    PDMA,
-    TI,
-    CMD,
-    ASYNC,
-};
+#define TYPE_ESP "esp"
+OBJECT_DECLARE_SIMPLE_TYPE(ESPState, ESP)
 
 struct ESPState {
+    DeviceState parent_obj;
+
     uint8_t rregs[ESP_REGS];
     uint8_t wregs[ESP_REGS];
     qemu_irq irq;
@@ -30,24 +29,18 @@
     uint8_t chip_id;
     bool tchi_written;
     int32_t ti_size;
-    uint32_t ti_rptr, ti_wptr;
     uint32_t status;
-    uint32_t deferred_status;
-    bool deferred_complete;
     uint32_t dma;
-    uint8_t ti_buf[TI_BUFSZ];
+    Fifo8 fifo;
     SCSIBus bus;
     SCSIDevice *current_dev;
     SCSIRequest *current_req;
-    uint8_t cmdbuf[ESP_CMDBUF_SZ];
-    uint32_t cmdlen;
+    Fifo8 cmdfifo;
+    uint8_t cmdfifo_cdb_offset;
     uint32_t do_cmd;
 
-    /* The amount of data left in the current DMA transfer.  */
-    uint32_t dma_left;
-    /* The size of the current DMA transfer.  Zero if no transfer is in
-       progress.  */
-    uint32_t dma_counter;
+    bool data_in_ready;
+    uint8_t ti_cmd;
     int dma_enabled;
 
     uint32_t async_len;
@@ -57,16 +50,22 @@
     ESPDMAMemoryReadWriteFunc dma_memory_write;
     void *dma_opaque;
     void (*dma_cb)(ESPState *s);
-    uint8_t pdma_buf[32];
-    int pdma_origin;
-    uint32_t pdma_len;
-    uint32_t pdma_start;
-    uint32_t pdma_cur;
     void (*pdma_cb)(ESPState *s);
+
+    uint8_t mig_version_id;
+
+    /* Legacy fields for vmstate_esp version < 5 */
+    uint32_t mig_dma_left;
+    uint32_t mig_deferred_status;
+    bool mig_deferred_complete;
+    uint32_t mig_ti_rptr, mig_ti_wptr;
+    uint8_t mig_ti_buf[ESP_FIFO_SZ];
+    uint8_t mig_cmdbuf[ESP_CMDFIFO_SZ];
+    uint32_t mig_cmdlen;
 };
 
-#define TYPE_ESP "esp"
-OBJECT_DECLARE_SIMPLE_TYPE(SysBusESPState, ESP)
+#define TYPE_SYSBUS_ESP "sysbus-esp"
+OBJECT_DECLARE_SIMPLE_TYPE(SysBusESPState, SYSBUS_ESP)
 
 struct SysBusESPState {
     /*< private >*/
@@ -142,6 +141,7 @@
 #define INTR_RST 0x80
 
 #define SEQ_0 0x0
+#define SEQ_MO 0x1
 #define SEQ_CD 0x4
 
 #define CFG1_RESREPT 0x40
diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h
index 3eae734..06bfd18 100644
--- a/include/hw/ssi/xilinx_spips.h
+++ b/include/hw/ssi/xilinx_spips.h
@@ -34,7 +34,7 @@
 typedef struct XilinxSPIPS XilinxSPIPS;
 
 #define XLNX_SPIPS_R_MAX        (0x100 / 4)
-#define XLNX_ZYNQMP_SPIPS_R_MAX (0x830 / 4)
+#define XLNX_ZYNQMP_SPIPS_R_MAX (0x200 / 4)
 
 /* Bite off 4k chunks at a time */
 #define LQSPI_CACHE_SIZE 1024
diff --git a/include/hw/timer/sse-counter.h b/include/hw/timer/sse-counter.h
new file mode 100644
index 0000000..b433e58
--- /dev/null
+++ b/include/hw/timer/sse-counter.h
@@ -0,0 +1,105 @@
+/*
+ * Arm SSE Subsystem System Counter
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "System counter" which is documented in
+ * the Arm SSE-123 Example Subsystem Technical Reference Manual:
+ * https://developer.arm.com/documentation/101370/latest/
+ *
+ * QEMU interface:
+ *  + Clock input "CLK": clock
+ *  + sysbus MMIO region 0: the control register frame
+ *  + sysbus MMIO region 1: the status register frame
+ *
+ * Consumers of the system counter's timestamp, such as the SSE
+ * System Timer device, can also use the APIs sse_counter_for_timestamp(),
+ * sse_counter_tick_to_time() and sse_counter_register_consumer() to
+ * interact with an instance of the System Counter. Generally the
+ * consumer device should have a QOM link property which the board
+ * code can set to the appropriate instance of the system counter.
+ */
+
+#ifndef SSE_COUNTER_H
+#define SSE_COUNTER_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "qemu/notify.h"
+
+#define TYPE_SSE_COUNTER "sse-counter"
+OBJECT_DECLARE_SIMPLE_TYPE(SSECounter, SSE_COUNTER)
+
+struct SSECounter {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion control_mr;
+    MemoryRegion status_mr;
+    Clock *clk;
+    NotifierList notifier_list;
+
+    uint32_t cntcr;
+    uint32_t cntscr0;
+
+    /*
+     * These are used for handling clock frequency changes: they are a
+     * tuple of (QEMU_CLOCK_VIRTUAL timestamp, CNTCV at that time),
+     * taken when the clock frequency changes. sse_cntcv() needs them
+     * to calculate the current CNTCV.
+     */
+    uint64_t ns_then;
+    uint64_t ticks_then;
+};
+
+/*
+ * These functions are the interface by which a consumer of
+ * the system timestamp (such as the SSE system timer device)
+ * can communicate with the SSECounter.
+ */
+
+/**
+ * sse_counter_for_timestamp:
+ * @counter: SSECounter
+ * @ns: timestamp of QEMU_CLOCK_VIRTUAL in nanoseconds
+ *
+ * Returns the value of the timestamp counter at the specified
+ * point in time (assuming that no changes to scale factor, enable, etc
+ * happen in the meantime).
+ */
+uint64_t sse_counter_for_timestamp(SSECounter *counter, uint64_t ns);
+
+/**
+ * sse_counter_tick_to_time:
+ * @counter: SSECounter
+ * @tick: tick value
+ *
+ * Returns the time (a QEMU_CLOCK_VIRTUAL timestamp in nanoseconds)
+ * when the timestamp counter will reach the specified tick count.
+ * If the counter is not currently running, returns UINT64_MAX.
+ */
+uint64_t sse_counter_tick_to_time(SSECounter *counter, uint64_t tick);
+
+/**
+ * sse_counter_register_consumer:
+ * @counter: SSECounter
+ * @notifier: Notifier which is notified on counter changes
+ *
+ * Registers @notifier with the SSECounter. When the counter's
+ * configuration changes in a way that might invalidate information
+ * previously returned via sse_counter_for_timestamp() or
+ * sse_counter_tick_to_time(), the notifier will be called.
+ * Devices which consume the timestamp counter can use this as
+ * a cue to recalculate timer events.
+ */
+void sse_counter_register_consumer(SSECounter *counter, Notifier *notifier);
+
+#endif
diff --git a/include/hw/timer/sse-timer.h b/include/hw/timer/sse-timer.h
new file mode 100644
index 0000000..b4ee8e7
--- /dev/null
+++ b/include/hw/timer/sse-timer.h
@@ -0,0 +1,53 @@
+/*
+ * Arm SSE Subsystem System Timer
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "System timer" which is documented in
+ * the Arm SSE-123 Example Subsystem Technical Reference Manual:
+ * https://developer.arm.com/documentation/101370/latest/
+ *
+ * QEMU interface:
+ *  + QOM property "counter": link property to be set to the
+ *    TYPE_SSE_COUNTER timestamp counter device this timer runs off
+ *  + sysbus MMIO region 0: the register bank
+ *  + sysbus IRQ 0: timer interrupt
+ */
+
+#ifndef SSE_TIMER_H
+#define SSE_TIMER_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "hw/timer/sse-counter.h"
+
+#define TYPE_SSE_TIMER "sse-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(SSETimer, SSE_TIMER)
+
+struct SSETimer {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq irq;
+    SSECounter *counter;
+    QEMUTimer timer;
+    Notifier counter_notifier;
+
+    uint32_t cntfrq;
+    uint32_t cntp_ctl;
+    uint64_t cntp_cval;
+    uint64_t cntp_aival;
+    uint32_t cntp_aival_ctl;
+    uint32_t cntp_aival_reload;
+};
+
+#endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 81f48ff..4f4746d 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -205,6 +205,7 @@
     /* Reset non arch specific state */
     cpu_reset(new_cpu);
 
+    new_cpu->tcg_cflags = cpu->tcg_cflags;
     memcpy(new_env, env, sizeof(CPUArchState));
 
     /* Clone all break/watchpoints.
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index cc89a48..29c1ee3 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -82,9 +82,11 @@
     return (sp - frame_size) & -8ul;
 }
 
-/* Notice when we're in the middle of a gUSA region and reset.
-   Note that this will only occur for !parallel_cpus, as we will
-   translate such sequences differently in a parallel context.  */
+/*
+ * Notice when we're in the middle of a gUSA region and reset.
+ * Note that this will only occur when #CF_PARALLEL is unset, as we
+ * will translate such sequences differently in a parallel context.
+ */
 static void unwind_gusa(CPUSH4State *regs)
 {
     /* If the stack pointer is sufficiently negative, and we haven't
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 389ec09..9522f60 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6481,6 +6481,16 @@
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
 
+        /*
+         * If this is our first additional thread, we need to ensure we
+         * generate code for parallel execution and flush old translations.
+         * Do this now so that the copy gets CF_PARALLEL too.
+         */
+        if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+            cpu->tcg_cflags |= CF_PARALLEL;
+            tb_flush(cpu);
+        }
+
         /* we create a new CPU instance. */
         new_env = cpu_copy(env);
         /* Init regs that differ from the parent.  */
@@ -6521,14 +6531,6 @@
         sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
         cpu->random_seed = qemu_guest_random_seed_thread_part1();
 
-        /* If this is our first additional thread, we need to ensure we
-         * generate code for parallel execution and flush old translations.
-         */
-        if (!parallel_cpus) {
-            parallel_cpus = true;
-            tb_flush(cpu);
-        }
-
         ret = pthread_create(&info.thread, &attr, clone_func, &info);
         /* TODO: Free new CPU state if thread creation failed.  */
 
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 19e0aa9..7e8b0fa 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -937,7 +937,7 @@
                     cpu_loop_exit_restore(cpu, ra);
                 } else {
                     /* Force execution of one insn next time.  */
-                    cpu->cflags_next_tb = 1 | curr_cflags();
+                    cpu->cflags_next_tb = 1 | curr_cflags(cpu);
                     mmap_unlock();
                     if (ra) {
                         cpu_restore_state(cpu, ra, true);
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
index 9021a46..23756fc 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -59,6 +59,7 @@
 #include "sysemu/runstate.h"
 #include "trace/control.h"
 
+static const char *pid_file;
 static volatile bool exit_requested = false;
 
 void qemu_system_killed(int signal, pid_t pid)
@@ -115,6 +116,8 @@
 "                         See the qemu(1) man page for documentation of the\n"
 "                         objects that can be added.\n"
 "\n"
+"  --pidfile <path>       write process ID to a file after startup\n"
+"\n"
 QEMU_HELP_BOTTOM "\n",
     error_get_progname());
 }
@@ -126,6 +129,7 @@
     OPTION_MONITOR,
     OPTION_NBD_SERVER,
     OPTION_OBJECT,
+    OPTION_PIDFILE,
 };
 
 extern QemuOptsList qemu_chardev_opts;
@@ -152,6 +156,20 @@
                          qmp_marshal_qmp_capabilities, QCO_ALLOW_PRECONFIG);
 }
 
+static int getopt_set_loc(int argc, char **argv, const char *optstring,
+                          const struct option *longopts)
+{
+    int c, save_index;
+
+    optarg = NULL;
+    save_index = optind;
+    c = getopt_long(argc, argv, optstring, longopts, NULL);
+    if (optarg) {
+        loc_set_cmdline(argv, save_index, MAX(1, optind - save_index));
+    }
+    return c;
+}
+
 static void process_options(int argc, char *argv[])
 {
     int c;
@@ -164,6 +182,7 @@
         {"monitor", required_argument, NULL, OPTION_MONITOR},
         {"nbd-server", required_argument, NULL, OPTION_NBD_SERVER},
         {"object", required_argument, NULL, OPTION_OBJECT},
+        {"pidfile", required_argument, NULL, OPTION_PIDFILE},
         {"trace", required_argument, NULL, 'T'},
         {"version", no_argument, NULL, 'V'},
         {0, 0, 0, 0}
@@ -174,7 +193,7 @@
      * they are given on the command lines. This means that things must be
      * defined first before they can be referenced in another option.
      */
-    while ((c = getopt_long(argc, argv, "hT:V", long_options, NULL)) != -1) {
+    while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) {
         switch (c) {
         case '?':
             exit(EXIT_FAILURE);
@@ -275,14 +294,38 @@
                 qobject_unref(args);
                 break;
             }
+        case OPTION_PIDFILE:
+            pid_file = optarg;
+            break;
+        case 1:
+            error_report("Unexpected argument");
+            exit(EXIT_FAILURE);
         default:
             g_assert_not_reached();
         }
     }
-    if (optind != argc) {
-        error_report("Unexpected argument: %s", argv[optind]);
+    loc_set_none();
+}
+
+static void pid_file_cleanup(void)
+{
+    unlink(pid_file);
+}
+
+static void pid_file_init(void)
+{
+    Error *err = NULL;
+
+    if (!pid_file) {
+        return;
+    }
+
+    if (!qemu_write_pidfile(pid_file, &err)) {
+        error_reportf_err(err, "cannot create PID file: ");
         exit(EXIT_FAILURE);
     }
+
+    atexit(pid_file_cleanup);
 }
 
 int main(int argc, char *argv[])
@@ -312,6 +355,13 @@
     qemu_init_main_loop(&error_fatal);
     process_options(argc, argv);
 
+    /*
+     * Write the pid file after creating chardevs, exports, and NBD servers but
+     * before accepting connections. This ordering is documented. Do not change
+     * it.
+     */
+    pid_file_init();
+
     while (!exit_requested) {
         main_loop_wait(false);
     }
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 6facb66..ae04884 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1922,331 +1922,6 @@
     return oc;
 }
 
-/* CPU models. These are not needed for the AArch64 linux-user build. */
-#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-
-static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
-    { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0,
-      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
-    { .name = "L2AUXCR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
-      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
-    REGINFO_SENTINEL
-};
-
-static void cortex_a8_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "arm,cortex-a8";
-    set_feature(&cpu->env, ARM_FEATURE_V7);
-    set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
-    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
-    set_feature(&cpu->env, ARM_FEATURE_EL3);
-    cpu->midr = 0x410fc080;
-    cpu->reset_fpsid = 0x410330c0;
-    cpu->isar.mvfr0 = 0x11110222;
-    cpu->isar.mvfr1 = 0x00011111;
-    cpu->ctr = 0x82048004;
-    cpu->reset_sctlr = 0x00c50078;
-    cpu->isar.id_pfr0 = 0x1031;
-    cpu->isar.id_pfr1 = 0x11;
-    cpu->isar.id_dfr0 = 0x400;
-    cpu->id_afr0 = 0;
-    cpu->isar.id_mmfr0 = 0x31100003;
-    cpu->isar.id_mmfr1 = 0x20000000;
-    cpu->isar.id_mmfr2 = 0x01202000;
-    cpu->isar.id_mmfr3 = 0x11;
-    cpu->isar.id_isar0 = 0x00101111;
-    cpu->isar.id_isar1 = 0x12112111;
-    cpu->isar.id_isar2 = 0x21232031;
-    cpu->isar.id_isar3 = 0x11112131;
-    cpu->isar.id_isar4 = 0x00111142;
-    cpu->isar.dbgdidr = 0x15141000;
-    cpu->clidr = (1 << 27) | (2 << 24) | 3;
-    cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
-    cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */
-    cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */
-    cpu->reset_auxcr = 2;
-    define_arm_cp_regs(cpu, cortexa8_cp_reginfo);
-}
-
-static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
-    /*
-     * power_control should be set to maximum latency. Again,
-     * default to 0 and set by private hook
-     */
-    { .name = "A9_PWRCTL", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c15_power_control) },
-    { .name = "A9_DIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c15_diagnostic) },
-    { .name = "A9_PWRDIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c15_power_diagnostic) },
-    { .name = "NEONBUSY", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
-    /* TLB lockdown control */
-    { .name = "TLB_LOCKR", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 2,
-      .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
-    { .name = "TLB_LOCKW", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 4,
-      .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
-    { .name = "TLB_VA", .cp = 15, .crn = 15, .crm = 5, .opc1 = 5, .opc2 = 2,
-      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
-    { .name = "TLB_PA", .cp = 15, .crn = 15, .crm = 6, .opc1 = 5, .opc2 = 2,
-      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
-    { .name = "TLB_ATTR", .cp = 15, .crn = 15, .crm = 7, .opc1 = 5, .opc2 = 2,
-      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
-    REGINFO_SENTINEL
-};
-
-static void cortex_a9_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "arm,cortex-a9";
-    set_feature(&cpu->env, ARM_FEATURE_V7);
-    set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
-    set_feature(&cpu->env, ARM_FEATURE_EL3);
-    /*
-     * Note that A9 supports the MP extensions even for
-     * A9UP and single-core A9MP (which are both different
-     * and valid configurations; we don't model A9UP).
-     */
-    set_feature(&cpu->env, ARM_FEATURE_V7MP);
-    set_feature(&cpu->env, ARM_FEATURE_CBAR);
-    cpu->midr = 0x410fc090;
-    cpu->reset_fpsid = 0x41033090;
-    cpu->isar.mvfr0 = 0x11110222;
-    cpu->isar.mvfr1 = 0x01111111;
-    cpu->ctr = 0x80038003;
-    cpu->reset_sctlr = 0x00c50078;
-    cpu->isar.id_pfr0 = 0x1031;
-    cpu->isar.id_pfr1 = 0x11;
-    cpu->isar.id_dfr0 = 0x000;
-    cpu->id_afr0 = 0;
-    cpu->isar.id_mmfr0 = 0x00100103;
-    cpu->isar.id_mmfr1 = 0x20000000;
-    cpu->isar.id_mmfr2 = 0x01230000;
-    cpu->isar.id_mmfr3 = 0x00002111;
-    cpu->isar.id_isar0 = 0x00101111;
-    cpu->isar.id_isar1 = 0x13112111;
-    cpu->isar.id_isar2 = 0x21232041;
-    cpu->isar.id_isar3 = 0x11112131;
-    cpu->isar.id_isar4 = 0x00111142;
-    cpu->isar.dbgdidr = 0x35141000;
-    cpu->clidr = (1 << 27) | (1 << 24) | 3;
-    cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
-    cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
-    define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
-}
-
-#ifndef CONFIG_USER_ONLY
-static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    MachineState *ms = MACHINE(qdev_get_machine());
-
-    /*
-     * Linux wants the number of processors from here.
-     * Might as well set the interrupt-controller bit too.
-     */
-    return ((ms->smp.cpus - 1) << 24) | (1 << 23);
-}
-#endif
-
-static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
-#ifndef CONFIG_USER_ONLY
-    { .name = "L2CTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
-      .access = PL1_RW, .resetvalue = 0, .readfn = a15_l2ctlr_read,
-      .writefn = arm_cp_write_ignore, },
-#endif
-    { .name = "L2ECTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 3,
-      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
-    REGINFO_SENTINEL
-};
-
-static void cortex_a7_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "arm,cortex-a7";
-    set_feature(&cpu->env, ARM_FEATURE_V7VE);
-    set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
-    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
-    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
-    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
-    set_feature(&cpu->env, ARM_FEATURE_EL2);
-    set_feature(&cpu->env, ARM_FEATURE_EL3);
-    set_feature(&cpu->env, ARM_FEATURE_PMU);
-    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7;
-    cpu->midr = 0x410fc075;
-    cpu->reset_fpsid = 0x41023075;
-    cpu->isar.mvfr0 = 0x10110222;
-    cpu->isar.mvfr1 = 0x11111111;
-    cpu->ctr = 0x84448003;
-    cpu->reset_sctlr = 0x00c50078;
-    cpu->isar.id_pfr0 = 0x00001131;
-    cpu->isar.id_pfr1 = 0x00011011;
-    cpu->isar.id_dfr0 = 0x02010555;
-    cpu->id_afr0 = 0x00000000;
-    cpu->isar.id_mmfr0 = 0x10101105;
-    cpu->isar.id_mmfr1 = 0x40000000;
-    cpu->isar.id_mmfr2 = 0x01240000;
-    cpu->isar.id_mmfr3 = 0x02102211;
-    /*
-     * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
-     * table 4-41 gives 0x02101110, which includes the arm div insns.
-     */
-    cpu->isar.id_isar0 = 0x02101110;
-    cpu->isar.id_isar1 = 0x13112111;
-    cpu->isar.id_isar2 = 0x21232041;
-    cpu->isar.id_isar3 = 0x11112131;
-    cpu->isar.id_isar4 = 0x10011142;
-    cpu->isar.dbgdidr = 0x3515f005;
-    cpu->clidr = 0x0a200023;
-    cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
-    cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
-    cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
-    define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
-}
-
-static void cortex_a15_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cpu->dtb_compatible = "arm,cortex-a15";
-    set_feature(&cpu->env, ARM_FEATURE_V7VE);
-    set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
-    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
-    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
-    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
-    set_feature(&cpu->env, ARM_FEATURE_EL2);
-    set_feature(&cpu->env, ARM_FEATURE_EL3);
-    set_feature(&cpu->env, ARM_FEATURE_PMU);
-    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
-    cpu->midr = 0x412fc0f1;
-    cpu->reset_fpsid = 0x410430f0;
-    cpu->isar.mvfr0 = 0x10110222;
-    cpu->isar.mvfr1 = 0x11111111;
-    cpu->ctr = 0x8444c004;
-    cpu->reset_sctlr = 0x00c50078;
-    cpu->isar.id_pfr0 = 0x00001131;
-    cpu->isar.id_pfr1 = 0x00011011;
-    cpu->isar.id_dfr0 = 0x02010555;
-    cpu->id_afr0 = 0x00000000;
-    cpu->isar.id_mmfr0 = 0x10201105;
-    cpu->isar.id_mmfr1 = 0x20000000;
-    cpu->isar.id_mmfr2 = 0x01240000;
-    cpu->isar.id_mmfr3 = 0x02102211;
-    cpu->isar.id_isar0 = 0x02101110;
-    cpu->isar.id_isar1 = 0x13112111;
-    cpu->isar.id_isar2 = 0x21232041;
-    cpu->isar.id_isar3 = 0x11112131;
-    cpu->isar.id_isar4 = 0x10011142;
-    cpu->isar.dbgdidr = 0x3515f021;
-    cpu->clidr = 0x0a200023;
-    cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
-    cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
-    cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
-    define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
-}
-
-#ifndef TARGET_AARCH64
-/*
- * -cpu max: a CPU with as many features enabled as our emulation supports.
- * The version of '-cpu max' for qemu-system-aarch64 is defined in cpu64.c;
- * this only needs to handle 32 bits, and need not care about KVM.
- */
-static void arm_max_initfn(Object *obj)
-{
-    ARMCPU *cpu = ARM_CPU(obj);
-
-    cortex_a15_initfn(obj);
-
-    /* old-style VFP short-vector support */
-    cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
-
-#ifdef CONFIG_USER_ONLY
-    /*
-     * We don't set these in system emulation mode for the moment,
-     * since we don't correctly set (all of) the ID registers to
-     * advertise them.
-     */
-    set_feature(&cpu->env, ARM_FEATURE_V8);
-    {
-        uint32_t t;
-
-        t = cpu->isar.id_isar5;
-        t = FIELD_DP32(t, ID_ISAR5, AES, 2);
-        t = FIELD_DP32(t, ID_ISAR5, SHA1, 1);
-        t = FIELD_DP32(t, ID_ISAR5, SHA2, 1);
-        t = FIELD_DP32(t, ID_ISAR5, CRC32, 1);
-        t = FIELD_DP32(t, ID_ISAR5, RDM, 1);
-        t = FIELD_DP32(t, ID_ISAR5, VCMA, 1);
-        cpu->isar.id_isar5 = t;
-
-        t = cpu->isar.id_isar6;
-        t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1);
-        t = FIELD_DP32(t, ID_ISAR6, DP, 1);
-        t = FIELD_DP32(t, ID_ISAR6, FHM, 1);
-        t = FIELD_DP32(t, ID_ISAR6, SB, 1);
-        t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1);
-        cpu->isar.id_isar6 = t;
-
-        t = cpu->isar.mvfr1;
-        t = FIELD_DP32(t, MVFR1, FPHP, 3);     /* v8.2-FP16 */
-        t = FIELD_DP32(t, MVFR1, SIMDHP, 2);   /* v8.2-FP16 */
-        cpu->isar.mvfr1 = t;
-
-        t = cpu->isar.mvfr2;
-        t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */
-        t = FIELD_DP32(t, MVFR2, FPMISC, 4);   /* FP MaxNum */
-        cpu->isar.mvfr2 = t;
-
-        t = cpu->isar.id_mmfr3;
-        t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */
-        cpu->isar.id_mmfr3 = t;
-
-        t = cpu->isar.id_mmfr4;
-        t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */
-        t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
-        t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
-        t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */
-        cpu->isar.id_mmfr4 = t;
-
-        t = cpu->isar.id_pfr0;
-        t = FIELD_DP32(t, ID_PFR0, DIT, 1);
-        cpu->isar.id_pfr0 = t;
-
-        t = cpu->isar.id_pfr2;
-        t = FIELD_DP32(t, ID_PFR2, SSBS, 1);
-        cpu->isar.id_pfr2 = t;
-    }
-#endif
-}
-#endif
-
-#endif /* !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) */
-
-static const ARMCPUInfo arm_cpus[] = {
-#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-    { .name = "cortex-a7",   .initfn = cortex_a7_initfn },
-    { .name = "cortex-a8",   .initfn = cortex_a8_initfn },
-    { .name = "cortex-a9",   .initfn = cortex_a9_initfn },
-    { .name = "cortex-a15",  .initfn = cortex_a15_initfn },
-#ifndef TARGET_AARCH64
-    { .name = "max",         .initfn = arm_max_initfn },
-#endif
-#ifdef CONFIG_USER_ONLY
-    { .name = "any",         .initfn = arm_max_initfn },
-#endif
-#endif
-};
-
 static Property arm_cpu_properties[] = {
     DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
     DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
@@ -2390,21 +2065,11 @@
 
 static void arm_cpu_register_types(void)
 {
-    const size_t cpu_count = ARRAY_SIZE(arm_cpus);
-
     type_register_static(&arm_cpu_type_info);
 
 #ifdef CONFIG_KVM
     type_register_static(&host_arm_cpu_type_info);
 #endif
-
-    if (cpu_count) {
-        size_t i;
-
-        for (i = 0; i < cpu_count; ++i) {
-            arm_cpu_register(&arm_cpus[i]);
-        }
-    }
 }
 
 type_init(arm_cpu_register_types)
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index fb07a33..046e476 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -15,6 +15,9 @@
 #endif /* CONFIG_TCG */
 #include "internals.h"
 #include "target/arm/idau.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/boards.h"
+#endif
 
 /* CPU models. These are not needed for the AArch64 linux-user build. */
 #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
@@ -255,6 +258,236 @@
     cpu->reset_auxcr = 1;
 }
 
+static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
+    { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2AUXCR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+static void cortex_a8_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a8";
+    set_feature(&cpu->env, ARM_FEATURE_V7);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    cpu->midr = 0x410fc080;
+    cpu->reset_fpsid = 0x410330c0;
+    cpu->isar.mvfr0 = 0x11110222;
+    cpu->isar.mvfr1 = 0x00011111;
+    cpu->ctr = 0x82048004;
+    cpu->reset_sctlr = 0x00c50078;
+    cpu->isar.id_pfr0 = 0x1031;
+    cpu->isar.id_pfr1 = 0x11;
+    cpu->isar.id_dfr0 = 0x400;
+    cpu->id_afr0 = 0;
+    cpu->isar.id_mmfr0 = 0x31100003;
+    cpu->isar.id_mmfr1 = 0x20000000;
+    cpu->isar.id_mmfr2 = 0x01202000;
+    cpu->isar.id_mmfr3 = 0x11;
+    cpu->isar.id_isar0 = 0x00101111;
+    cpu->isar.id_isar1 = 0x12112111;
+    cpu->isar.id_isar2 = 0x21232031;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x00111142;
+    cpu->isar.dbgdidr = 0x15141000;
+    cpu->clidr = (1 << 27) | (2 << 24) | 3;
+    cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
+    cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */
+    cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */
+    cpu->reset_auxcr = 2;
+    define_arm_cp_regs(cpu, cortexa8_cp_reginfo);
+}
+
+static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
+    /*
+     * power_control should be set to maximum latency. Again,
+     * default to 0 and set by private hook
+     */
+    { .name = "A9_PWRCTL", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c15_power_control) },
+    { .name = "A9_DIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 1,
+      .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c15_diagnostic) },
+    { .name = "A9_PWRDIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 2,
+      .access = PL1_RW, .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.c15_power_diagnostic) },
+    { .name = "NEONBUSY", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+    /* TLB lockdown control */
+    { .name = "TLB_LOCKR", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 2,
+      .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
+    { .name = "TLB_LOCKW", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 4,
+      .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
+    { .name = "TLB_VA", .cp = 15, .crn = 15, .crm = 5, .opc1 = 5, .opc2 = 2,
+      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+    { .name = "TLB_PA", .cp = 15, .crn = 15, .crm = 6, .opc1 = 5, .opc2 = 2,
+      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+    { .name = "TLB_ATTR", .cp = 15, .crn = 15, .crm = 7, .opc1 = 5, .opc2 = 2,
+      .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+    REGINFO_SENTINEL
+};
+
+static void cortex_a9_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a9";
+    set_feature(&cpu->env, ARM_FEATURE_V7);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    /*
+     * Note that A9 supports the MP extensions even for
+     * A9UP and single-core A9MP (which are both different
+     * and valid configurations; we don't model A9UP).
+     */
+    set_feature(&cpu->env, ARM_FEATURE_V7MP);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR);
+    cpu->midr = 0x410fc090;
+    cpu->reset_fpsid = 0x41033090;
+    cpu->isar.mvfr0 = 0x11110222;
+    cpu->isar.mvfr1 = 0x01111111;
+    cpu->ctr = 0x80038003;
+    cpu->reset_sctlr = 0x00c50078;
+    cpu->isar.id_pfr0 = 0x1031;
+    cpu->isar.id_pfr1 = 0x11;
+    cpu->isar.id_dfr0 = 0x000;
+    cpu->id_afr0 = 0;
+    cpu->isar.id_mmfr0 = 0x00100103;
+    cpu->isar.id_mmfr1 = 0x20000000;
+    cpu->isar.id_mmfr2 = 0x01230000;
+    cpu->isar.id_mmfr3 = 0x00002111;
+    cpu->isar.id_isar0 = 0x00101111;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232041;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x00111142;
+    cpu->isar.dbgdidr = 0x35141000;
+    cpu->clidr = (1 << 27) | (1 << 24) | 3;
+    cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
+    cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
+    define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
+}
+
+#ifndef CONFIG_USER_ONLY
+static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    /*
+     * Linux wants the number of processors from here.
+     * Might as well set the interrupt-controller bit too.
+     */
+    return ((ms->smp.cpus - 1) << 24) | (1 << 23);
+}
+#endif
+
+static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
+#ifndef CONFIG_USER_ONLY
+    { .name = "L2CTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
+      .access = PL1_RW, .resetvalue = 0, .readfn = a15_l2ctlr_read,
+      .writefn = arm_cp_write_ignore, },
+#endif
+    { .name = "L2ECTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+static void cortex_a7_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a7";
+    set_feature(&cpu->env, ARM_FEATURE_V7VE);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    set_feature(&cpu->env, ARM_FEATURE_EL2);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7;
+    cpu->midr = 0x410fc075;
+    cpu->reset_fpsid = 0x41023075;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x11111111;
+    cpu->ctr = 0x84448003;
+    cpu->reset_sctlr = 0x00c50078;
+    cpu->isar.id_pfr0 = 0x00001131;
+    cpu->isar.id_pfr1 = 0x00011011;
+    cpu->isar.id_dfr0 = 0x02010555;
+    cpu->id_afr0 = 0x00000000;
+    cpu->isar.id_mmfr0 = 0x10101105;
+    cpu->isar.id_mmfr1 = 0x40000000;
+    cpu->isar.id_mmfr2 = 0x01240000;
+    cpu->isar.id_mmfr3 = 0x02102211;
+    /*
+     * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
+     * table 4-41 gives 0x02101110, which includes the arm div insns.
+     */
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232041;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x10011142;
+    cpu->isar.dbgdidr = 0x3515f005;
+    cpu->clidr = 0x0a200023;
+    cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
+    cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
+    cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
+    define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
+}
+
+static void cortex_a15_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a15";
+    set_feature(&cpu->env, ARM_FEATURE_V7VE);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    set_feature(&cpu->env, ARM_FEATURE_EL2);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
+    cpu->midr = 0x412fc0f1;
+    cpu->reset_fpsid = 0x410430f0;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x11111111;
+    cpu->ctr = 0x8444c004;
+    cpu->reset_sctlr = 0x00c50078;
+    cpu->isar.id_pfr0 = 0x00001131;
+    cpu->isar.id_pfr1 = 0x00011011;
+    cpu->isar.id_dfr0 = 0x02010555;
+    cpu->id_afr0 = 0x00000000;
+    cpu->isar.id_mmfr0 = 0x10201105;
+    cpu->isar.id_mmfr1 = 0x20000000;
+    cpu->isar.id_mmfr2 = 0x01240000;
+    cpu->isar.id_mmfr3 = 0x02102211;
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232041;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x10011142;
+    cpu->isar.dbgdidr = 0x3515f021;
+    cpu->clidr = 0x0a200023;
+    cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
+    cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
+    cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
+    define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
+}
+
 static void cortex_m0_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -695,6 +928,81 @@
     cc->gdb_core_xml_file = "arm-m-profile.xml";
 }
 
+#ifndef TARGET_AARCH64
+/*
+ * -cpu max: a CPU with as many features enabled as our emulation supports.
+ * The version of '-cpu max' for qemu-system-aarch64 is defined in cpu64.c;
+ * this only needs to handle 32 bits, and need not care about KVM.
+ */
+static void arm_max_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cortex_a15_initfn(obj);
+
+    /* old-style VFP short-vector support */
+    cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
+
+#ifdef CONFIG_USER_ONLY
+    /*
+     * We don't set these in system emulation mode for the moment,
+     * since we don't correctly set (all of) the ID registers to
+     * advertise them.
+     */
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    {
+        uint32_t t;
+
+        t = cpu->isar.id_isar5;
+        t = FIELD_DP32(t, ID_ISAR5, AES, 2);
+        t = FIELD_DP32(t, ID_ISAR5, SHA1, 1);
+        t = FIELD_DP32(t, ID_ISAR5, SHA2, 1);
+        t = FIELD_DP32(t, ID_ISAR5, CRC32, 1);
+        t = FIELD_DP32(t, ID_ISAR5, RDM, 1);
+        t = FIELD_DP32(t, ID_ISAR5, VCMA, 1);
+        cpu->isar.id_isar5 = t;
+
+        t = cpu->isar.id_isar6;
+        t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1);
+        t = FIELD_DP32(t, ID_ISAR6, DP, 1);
+        t = FIELD_DP32(t, ID_ISAR6, FHM, 1);
+        t = FIELD_DP32(t, ID_ISAR6, SB, 1);
+        t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1);
+        cpu->isar.id_isar6 = t;
+
+        t = cpu->isar.mvfr1;
+        t = FIELD_DP32(t, MVFR1, FPHP, 3);     /* v8.2-FP16 */
+        t = FIELD_DP32(t, MVFR1, SIMDHP, 2);   /* v8.2-FP16 */
+        cpu->isar.mvfr1 = t;
+
+        t = cpu->isar.mvfr2;
+        t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */
+        t = FIELD_DP32(t, MVFR2, FPMISC, 4);   /* FP MaxNum */
+        cpu->isar.mvfr2 = t;
+
+        t = cpu->isar.id_mmfr3;
+        t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */
+        cpu->isar.id_mmfr3 = t;
+
+        t = cpu->isar.id_mmfr4;
+        t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */
+        t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
+        t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
+        t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */
+        cpu->isar.id_mmfr4 = t;
+
+        t = cpu->isar.id_pfr0;
+        t = FIELD_DP32(t, ID_PFR0, DIT, 1);
+        cpu->isar.id_pfr0 = t;
+
+        t = cpu->isar.id_pfr2;
+        t = FIELD_DP32(t, ID_PFR2, SSBS, 1);
+        cpu->isar.id_pfr2 = t;
+    }
+#endif /* CONFIG_USER_ONLY */
+}
+#endif /* !TARGET_AARCH64 */
+
 static const ARMCPUInfo arm_tcg_cpus[] = {
     { .name = "arm926",      .initfn = arm926_initfn },
     { .name = "arm946",      .initfn = arm946_initfn },
@@ -708,6 +1016,10 @@
     { .name = "arm1136",     .initfn = arm1136_initfn },
     { .name = "arm1176",     .initfn = arm1176_initfn },
     { .name = "arm11mpcore", .initfn = arm11mpcore_initfn },
+    { .name = "cortex-a7",   .initfn = cortex_a7_initfn },
+    { .name = "cortex-a8",   .initfn = cortex_a8_initfn },
+    { .name = "cortex-a9",   .initfn = cortex_a9_initfn },
+    { .name = "cortex-a15",  .initfn = cortex_a15_initfn },
     { .name = "cortex-m0",   .initfn = cortex_m0_initfn,
                              .class_init = arm_v7m_class_init },
     { .name = "cortex-m3",   .initfn = cortex_m3_initfn,
@@ -738,6 +1050,12 @@
     { .name = "pxa270-b1",   .initfn = pxa270b1_initfn },
     { .name = "pxa270-c0",   .initfn = pxa270c0_initfn },
     { .name = "pxa270-c5",   .initfn = pxa270c5_initfn },
+#ifndef TARGET_AARCH64
+    { .name = "max",         .initfn = arm_max_initfn },
+#endif
+#ifdef CONFIG_USER_ONLY
+    { .name = "any",         .initfn = arm_max_initfn },
+#endif
 };
 
 static const TypeInfo idau_interface_type_info = {
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 78c4efb..cfcb817 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -459,7 +459,7 @@
                    : (fCAST##REGSTYPE##s(SRC) >> (SHAMT)))
 #define fASHIFTR(SRC, SHAMT, REGSTYPE) (fCAST##REGSTYPE##s(SRC) >> (SHAMT))
 #define fLSHIFTR(SRC, SHAMT, REGSTYPE) \
-    (((SHAMT) >= 64) ? 0 : (fCAST##REGSTYPE##u(SRC) >> (SHAMT)))
+    (((SHAMT) >= (sizeof(SRC) * 8)) ? 0 : (fCAST##REGSTYPE##u(SRC) >> (SHAMT)))
 #define fROTL(SRC, SHAMT, REGSTYPE) \
     (((SHAMT) == 0) ? (SRC) : ((fCAST##REGSTYPE##u(SRC) << (SHAMT)) | \
                               ((fCAST##REGSTYPE##u(SRC) >> \
@@ -469,7 +469,7 @@
                               ((fCAST##REGSTYPE##u(SRC) << \
                                  ((sizeof(SRC) * 8) - (SHAMT))))))
 #define fASHIFTL(SRC, SHAMT, REGSTYPE) \
-    (((SHAMT) >= 64) ? 0 : (fCAST##REGSTYPE##s(SRC) << (SHAMT)))
+    (((SHAMT) >= (sizeof(SRC) * 8)) ? 0 : (fCAST##REGSTYPE##s(SRC) << (SHAMT)))
 
 #ifdef QEMU_GENERATE
 #define fLOAD(NUM, SIZE, SIGN, EA, DST) MEM_LOAD##SIZE##SIGN(DST, EA)
diff --git a/target/hexagon/opcodes.c b/target/hexagon/opcodes.c
index 4eef5fc..35d790c 100644
--- a/target/hexagon/opcodes.c
+++ b/target/hexagon/opcodes.c
@@ -82,6 +82,7 @@
     while ((attr = va_arg(ap, int)) != 0) {
         set_bit(attr, opcode_attribs[tag]);
     }
+    va_end(ap);
 }
 
 const OpcodeEncoding opcode_encodings[] = {
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index bf70c77..f6ef09c 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -653,7 +653,7 @@
     MIPSCPUClass *mcc = MIPS_CPU_GET_CLASS(obj);
 
     cpu_set_cpustate_pointers(cpu);
-    cpu->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, cpu);
+    cpu->clock = qdev_init_clock_in(DEVICE(obj), "clk-in", NULL, cpu, 0);
     env->cpu_model = mcc->cpu_def;
 }
 
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 1376cdc..fcaa5af 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -519,6 +519,39 @@
     I3606_BIC       = 0x2f001400,
     I3606_ORR       = 0x0f001400,
 
+    /* AdvSIMD scalar shift by immediate */
+    I3609_SSHR      = 0x5f000400,
+    I3609_SSRA      = 0x5f001400,
+    I3609_SHL       = 0x5f005400,
+    I3609_USHR      = 0x7f000400,
+    I3609_USRA      = 0x7f001400,
+    I3609_SLI       = 0x7f005400,
+
+    /* AdvSIMD scalar three same */
+    I3611_SQADD     = 0x5e200c00,
+    I3611_SQSUB     = 0x5e202c00,
+    I3611_CMGT      = 0x5e203400,
+    I3611_CMGE      = 0x5e203c00,
+    I3611_SSHL      = 0x5e204400,
+    I3611_ADD       = 0x5e208400,
+    I3611_CMTST     = 0x5e208c00,
+    I3611_UQADD     = 0x7e200c00,
+    I3611_UQSUB     = 0x7e202c00,
+    I3611_CMHI      = 0x7e203400,
+    I3611_CMHS      = 0x7e203c00,
+    I3611_USHL      = 0x7e204400,
+    I3611_SUB       = 0x7e208400,
+    I3611_CMEQ      = 0x7e208c00,
+
+    /* AdvSIMD scalar two-reg misc */
+    I3612_CMGT0     = 0x5e208800,
+    I3612_CMEQ0     = 0x5e209800,
+    I3612_CMLT0     = 0x5e20a800,
+    I3612_ABS       = 0x5e20b800,
+    I3612_CMGE0     = 0x7e208800,
+    I3612_CMLE0     = 0x7e209800,
+    I3612_NEG       = 0x7e20b800,
+
     /* AdvSIMD shift by immediate */
     I3614_SSHR      = 0x0f000400,
     I3614_SSRA      = 0x0f001400,
@@ -561,7 +594,7 @@
     I3617_CMEQ0     = 0x0e209800,
     I3617_CMLT0     = 0x0e20a800,
     I3617_CMGE0     = 0x2e208800,
-    I3617_CMLE0     = 0x2e20a800,
+    I3617_CMLE0     = 0x2e209800,
     I3617_NOT       = 0x2e205800,
     I3617_ABS       = 0x0e20b800,
     I3617_NEG       = 0x2e20b800,
@@ -735,6 +768,25 @@
               | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
 }
 
+static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
+                              TCGReg rd, TCGReg rn, unsigned immhb)
+{
+    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
+                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
+{
+    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
+              | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
+                              unsigned size, TCGReg rd, TCGReg rn)
+{
+    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
                               TCGReg rd, TCGReg rn, unsigned immhb)
 {
@@ -1410,10 +1462,10 @@
     }
 }
 
-static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
-                                   TCGReg rh, TCGReg al, TCGReg ah,
-                                   tcg_target_long bl, tcg_target_long bh,
-                                   bool const_bl, bool const_bh, bool sub)
+static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
+                            TCGReg rh, TCGReg al, TCGReg ah,
+                            tcg_target_long bl, tcg_target_long bh,
+                            bool const_bl, bool const_bh, bool sub)
 {
     TCGReg orig_rl = rl;
     AArch64Insn insn;
@@ -1423,11 +1475,13 @@
     }
 
     if (const_bl) {
-        insn = I3401_ADDSI;
-        if ((bl < 0) ^ sub) {
-            insn = I3401_SUBSI;
+        if (bl < 0) {
             bl = -bl;
+            insn = sub ? I3401_ADDSI : I3401_SUBSI;
+        } else {
+            insn = sub ? I3401_SUBSI : I3401_ADDSI;
         }
+
         if (unlikely(al == TCG_REG_XZR)) {
             /* ??? We want to allow al to be zero for the benefit of
                negation via subtraction.  However, that leaves open the
@@ -2234,23 +2288,38 @@
                            unsigned vecl, unsigned vece,
                            const TCGArg *args, const int *const_args)
 {
-    static const AArch64Insn cmp_insn[16] = {
+    static const AArch64Insn cmp_vec_insn[16] = {
         [TCG_COND_EQ] = I3616_CMEQ,
         [TCG_COND_GT] = I3616_CMGT,
         [TCG_COND_GE] = I3616_CMGE,
         [TCG_COND_GTU] = I3616_CMHI,
         [TCG_COND_GEU] = I3616_CMHS,
     };
-    static const AArch64Insn cmp0_insn[16] = {
+    static const AArch64Insn cmp_scalar_insn[16] = {
+        [TCG_COND_EQ] = I3611_CMEQ,
+        [TCG_COND_GT] = I3611_CMGT,
+        [TCG_COND_GE] = I3611_CMGE,
+        [TCG_COND_GTU] = I3611_CMHI,
+        [TCG_COND_GEU] = I3611_CMHS,
+    };
+    static const AArch64Insn cmp0_vec_insn[16] = {
         [TCG_COND_EQ] = I3617_CMEQ0,
         [TCG_COND_GT] = I3617_CMGT0,
         [TCG_COND_GE] = I3617_CMGE0,
         [TCG_COND_LT] = I3617_CMLT0,
         [TCG_COND_LE] = I3617_CMLE0,
     };
+    static const AArch64Insn cmp0_scalar_insn[16] = {
+        [TCG_COND_EQ] = I3612_CMEQ0,
+        [TCG_COND_GT] = I3612_CMGT0,
+        [TCG_COND_GE] = I3612_CMGE0,
+        [TCG_COND_LT] = I3612_CMLT0,
+        [TCG_COND_LE] = I3612_CMLE0,
+    };
 
     TCGType type = vecl + TCG_TYPE_V64;
     unsigned is_q = vecl;
+    bool is_scalar = !is_q && vece == MO_64;
     TCGArg a0, a1, a2, a3;
     int cmode, imm8;
 
@@ -2269,19 +2338,35 @@
         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
         break;
     case INDEX_op_add_vec:
-        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_sub_vec:
-        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_mul_vec:
         tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
         break;
     case INDEX_op_neg_vec:
-        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
+        if (is_scalar) {
+            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
+        } else {
+            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
+        }
         break;
     case INDEX_op_abs_vec:
-        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
+        if (is_scalar) {
+            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
+        } else {
+            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
+        }
         break;
     case INDEX_op_and_vec:
         if (const_args[2]) {
@@ -2335,16 +2420,32 @@
         tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
         break;
     case INDEX_op_ssadd_vec:
-        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_sssub_vec:
-        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_usadd_vec:
-        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_ussub_vec:
-        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_smax_vec:
         tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
@@ -2362,22 +2463,46 @@
         tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
         break;
     case INDEX_op_shli_vec:
-        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
+        } else {
+            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
+        }
         break;
     case INDEX_op_shri_vec:
-        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
+        } else {
+            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
+        }
         break;
     case INDEX_op_sari_vec:
-        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
+        } else {
+            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
+        }
         break;
     case INDEX_op_aa64_sli_vec:
-        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+        if (is_scalar) {
+            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
+        } else {
+            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+        }
         break;
     case INDEX_op_shlv_vec:
-        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_aa64_sshl_vec:
-        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
+        if (is_scalar) {
+            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
+        } else {
+            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
+        }
         break;
     case INDEX_op_cmp_vec:
         {
@@ -2386,30 +2511,58 @@
 
             if (cond == TCG_COND_NE) {
                 if (const_args[2]) {
-                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
+                    if (is_scalar) {
+                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
+                    } else {
+                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
+                    }
                 } else {
-                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
+                    if (is_scalar) {
+                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
+                    } else {
+                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
+                    }
                     tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
                 }
             } else {
                 if (const_args[2]) {
-                    insn = cmp0_insn[cond];
-                    if (insn) {
-                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
-                        break;
+                    if (is_scalar) {
+                        insn = cmp0_scalar_insn[cond];
+                        if (insn) {
+                            tcg_out_insn_3612(s, insn, vece, a0, a1);
+                            break;
+                        }
+                    } else {
+                        insn = cmp0_vec_insn[cond];
+                        if (insn) {
+                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
+                            break;
+                        }
                     }
                     tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
                     a2 = TCG_VEC_TMP;
                 }
-                insn = cmp_insn[cond];
-                if (insn == 0) {
-                    TCGArg t;
-                    t = a1, a1 = a2, a2 = t;
-                    cond = tcg_swap_cond(cond);
-                    insn = cmp_insn[cond];
-                    tcg_debug_assert(insn != 0);
+                if (is_scalar) {
+                    insn = cmp_scalar_insn[cond];
+                    if (insn == 0) {
+                        TCGArg t;
+                        t = a1, a1 = a2, a2 = t;
+                        cond = tcg_swap_cond(cond);
+                        insn = cmp_scalar_insn[cond];
+                        tcg_debug_assert(insn != 0);
+                    }
+                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
+                } else {
+                    insn = cmp_vec_insn[cond];
+                    if (insn == 0) {
+                        TCGArg t;
+                        t = a1, a1 = a2, a2 = t;
+                        cond = tcg_swap_cond(cond);
+                        insn = cmp_vec_insn[cond];
+                        tcg_debug_assert(insn != 0);
+                    }
+                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
                 }
-                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
             }
         }
         break;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 63a12b1..2991112 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -346,6 +346,12 @@
     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 }
 
+/* Signal overflow, starting over with fewer guest insns. */
+static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
+{
+    siglongjmp(s->jmp_trans, -2);
+}
+
 #define C_PFX1(P, A)                    P##A
 #define C_PFX2(P, A, B)                 P##A##_##B
 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
@@ -507,11 +513,21 @@
     }
 }
 
-static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
+static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 {
-    void *p = tcg_splitwx_to_rw(cp);
     size_t region_idx;
 
+    /*
+     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
+     * a signal handler over which the caller has no control.
+     */
+    if (!in_code_gen_buffer(p)) {
+        p -= tcg_splitwx_diff;
+        if (!in_code_gen_buffer(p)) {
+            return NULL;
+        }
+    }
+
     if (p < region.start_aligned) {
         region_idx = 0;
     } else {
@@ -530,6 +546,7 @@
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 
+    g_assert(rt != NULL);
     qemu_mutex_lock(&rt->lock);
     g_tree_insert(rt->tree, &tb->tc, tb);
     qemu_mutex_unlock(&rt->lock);
@@ -539,6 +556,7 @@
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 
+    g_assert(rt != NULL);
     qemu_mutex_lock(&rt->lock);
     g_tree_remove(rt->tree, &tb->tc);
     qemu_mutex_unlock(&rt->lock);
@@ -555,6 +573,10 @@
     TranslationBlock *tb;
     struct tb_tc s = { .ptr = (void *)tc_ptr };
 
+    if (rt == NULL) {
+        return NULL;
+    }
+
     qemu_mutex_lock(&rt->lock);
     tb = g_tree_lookup(rt->tree, &s);
     qemu_mutex_unlock(&rt->lock);
@@ -1310,8 +1332,7 @@
     int n = s->nb_temps++;
 
     if (n >= TCG_MAX_TEMPS) {
-        /* Signal overflow, starting over with fewer guest insns. */
-        siglongjmp(s->jmp_trans, -2);
+        tcg_raise_tb_overflow(s);
     }
     return memset(&s->temps[n], 0, sizeof(TCGTemp));
 }
diff --git a/tcg/tci.c b/tcg/tci.c
index fb3c97a..3ccd30c 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -57,49 +57,6 @@
     return regs[index];
 }
 
-#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
-static int8_t tci_read_reg8s(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (int8_t)tci_read_reg(regs, index);
-}
-#endif
-
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
-static int16_t tci_read_reg16s(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (int16_t)tci_read_reg(regs, index);
-}
-#endif
-
-#if TCG_TARGET_REG_BITS == 64
-static int32_t tci_read_reg32s(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (int32_t)tci_read_reg(regs, index);
-}
-#endif
-
-static uint8_t tci_read_reg8(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (uint8_t)tci_read_reg(regs, index);
-}
-
-static uint16_t tci_read_reg16(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (uint16_t)tci_read_reg(regs, index);
-}
-
-static uint32_t tci_read_reg32(const tcg_target_ulong *regs, TCGReg index)
-{
-    return (uint32_t)tci_read_reg(regs, index);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static uint64_t tci_read_reg64(const tcg_target_ulong *regs, TCGReg index)
-{
-    return tci_read_reg(regs, index);
-}
-#endif
-
 static void
 tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
 {
@@ -169,78 +126,20 @@
     return value;
 }
 
-/* Read indexed register (8 bit) from bytecode. */
-static uint8_t tci_read_r8(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
-{
-    uint8_t value = tci_read_reg8(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
-/* Read indexed register (8 bit signed) from bytecode. */
-static int8_t tci_read_r8s(const tcg_target_ulong *regs, const uint8_t **tb_ptr)
-{
-    int8_t value = tci_read_reg8s(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-#endif
-
-/* Read indexed register (16 bit) from bytecode. */
-static uint16_t tci_read_r16(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    uint16_t value = tci_read_reg16(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
-/* Read indexed register (16 bit signed) from bytecode. */
-static int16_t tci_read_r16s(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    int16_t value = tci_read_reg16s(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-#endif
-
-/* Read indexed register (32 bit) from bytecode. */
-static uint32_t tci_read_r32(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    uint32_t value = tci_read_reg32(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
 #if TCG_TARGET_REG_BITS == 32
 /* Read two indexed registers (2 * 32 bit) from bytecode. */
 static uint64_t tci_read_r64(const tcg_target_ulong *regs,
                              const uint8_t **tb_ptr)
 {
-    uint32_t low = tci_read_r32(regs, tb_ptr);
-    return tci_uint64(tci_read_r32(regs, tb_ptr), low);
+    uint32_t low = tci_read_r(regs, tb_ptr);
+    return tci_uint64(tci_read_r(regs, tb_ptr), low);
 }
 #elif TCG_TARGET_REG_BITS == 64
-/* Read indexed register (32 bit signed) from bytecode. */
-static int32_t tci_read_r32s(const tcg_target_ulong *regs,
-                             const uint8_t **tb_ptr)
-{
-    int32_t value = tci_read_reg32s(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
 /* Read indexed register (64 bit) from bytecode. */
 static uint64_t tci_read_r64(const tcg_target_ulong *regs,
                              const uint8_t **tb_ptr)
 {
-    uint64_t value = tci_read_reg64(regs, **tb_ptr);
-    *tb_ptr += 1;
-    return value;
+    return tci_read_r(regs, tb_ptr);
 }
 #endif
 
@@ -346,51 +245,34 @@
     return result;
 }
 
-#ifdef CONFIG_SOFTMMU
-# define qemu_ld_ub \
-    helper_ret_ldub_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_leuw \
-    helper_le_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_leul \
-    helper_le_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_leq \
-    helper_le_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_beuw \
-    helper_be_lduw_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_beul \
-    helper_be_ldul_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_ld_beq \
-    helper_be_ldq_mmu(env, taddr, oi, (uintptr_t)tb_ptr)
-# define qemu_st_b(X) \
-    helper_ret_stb_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_lew(X) \
-    helper_le_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_lel(X) \
-    helper_le_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_leq(X) \
-    helper_le_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_bew(X) \
-    helper_be_stw_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_bel(X) \
-    helper_be_stl_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-# define qemu_st_beq(X) \
-    helper_be_stq_mmu(env, taddr, X, oi, (uintptr_t)tb_ptr)
-#else
-# define qemu_ld_ub      ldub_p(g2h(taddr))
-# define qemu_ld_leuw    lduw_le_p(g2h(taddr))
-# define qemu_ld_leul    (uint32_t)ldl_le_p(g2h(taddr))
-# define qemu_ld_leq     ldq_le_p(g2h(taddr))
-# define qemu_ld_beuw    lduw_be_p(g2h(taddr))
-# define qemu_ld_beul    (uint32_t)ldl_be_p(g2h(taddr))
-# define qemu_ld_beq     ldq_be_p(g2h(taddr))
-# define qemu_st_b(X)    stb_p(g2h(taddr), X)
-# define qemu_st_lew(X)  stw_le_p(g2h(taddr), X)
-# define qemu_st_lel(X)  stl_le_p(g2h(taddr), X)
-# define qemu_st_leq(X)  stq_le_p(g2h(taddr), X)
-# define qemu_st_bew(X)  stw_be_p(g2h(taddr), X)
-# define qemu_st_bel(X)  stl_be_p(g2h(taddr), X)
-# define qemu_st_beq(X)  stq_be_p(g2h(taddr), X)
-#endif
+#define qemu_ld_ub \
+    cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_leuw \
+    cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_leul \
+    cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_leq \
+    cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_beuw \
+    cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_beul \
+    cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_ld_beq \
+    cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_b(X) \
+    cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_lew(X) \
+    cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_lel(X) \
+    cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_leq(X) \
+    cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_bew(X) \
+    cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_bel(X) \
+    cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+#define qemu_st_beq(X) \
+    cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
 
 #if TCG_TARGET_REG_BITS == 64
 # define CASE_32_64(x) \
@@ -483,8 +365,8 @@
             continue;
         case INDEX_op_setcond_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             tci_write_reg(regs, t0, tci_compare32(t1, t2, condition));
             break;
@@ -499,15 +381,15 @@
 #elif TCG_TARGET_REG_BITS == 64
         case INDEX_op_setcond_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             tci_write_reg(regs, t0, tci_compare64(t1, t2, condition));
             break;
 #endif
-        case INDEX_op_mov_i32:
+        CASE_32_64(mov)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1);
             break;
         case INDEX_op_tci_movi_i32:
@@ -550,127 +432,130 @@
             tci_write_reg(regs, t0, *(uint32_t *)(t1 + t2));
             break;
         CASE_32_64(st8)
-            t0 = tci_read_r8(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint8_t *)(t1 + t2) = t0;
             break;
         CASE_32_64(st16)
-            t0 = tci_read_r16(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint16_t *)(t1 + t2) = t0;
             break;
         case INDEX_op_st_i32:
         CASE_64(st32)
-            t0 = tci_read_r32(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint32_t *)(t1 + t2) = t0;
             break;
 
-            /* Arithmetic operations (32 bit). */
+            /* Arithmetic operations (mixed 32/64 bit). */
 
-        case INDEX_op_add_i32:
+        CASE_32_64(add)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 + t2);
             break;
-        case INDEX_op_sub_i32:
+        CASE_32_64(sub)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 - t2);
             break;
-        case INDEX_op_mul_i32:
+        CASE_32_64(mul)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 * t2);
             break;
+        CASE_32_64(and)
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, t1 & t2);
+            break;
+        CASE_32_64(or)
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, t1 | t2);
+            break;
+        CASE_32_64(xor)
+            t0 = *tb_ptr++;
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, t1 ^ t2);
+            break;
+
+            /* Arithmetic operations (32 bit). */
+
         case INDEX_op_div_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (int32_t)t1 / (int32_t)t2);
             break;
         case INDEX_op_divu_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 / t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 / (uint32_t)t2);
             break;
         case INDEX_op_rem_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (int32_t)t1 % (int32_t)t2);
             break;
         case INDEX_op_remu_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 % t2);
-            break;
-        case INDEX_op_and_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 & t2);
-            break;
-        case INDEX_op_or_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 | t2);
-            break;
-        case INDEX_op_xor_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 ^ t2);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 % (uint32_t)t2);
             break;
 
             /* Shift/rotate operations (32 bit). */
 
         case INDEX_op_shl_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 << (t2 & 31));
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 << (t2 & 31));
             break;
         case INDEX_op_shr_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 >> (t2 & 31));
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1 >> (t2 & 31));
             break;
         case INDEX_op_sar_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, ((int32_t)t1 >> (t2 & 31)));
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int32_t)t1 >> (t2 & 31));
             break;
 #if TCG_TARGET_HAS_rot_i32
         case INDEX_op_rotl_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, rol32(t1, t2 & 31));
             break;
         case INDEX_op_rotr_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ror32(t1, t2 & 31));
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i32
         case INDEX_op_deposit_i32:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            t2 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tmp16 = *tb_ptr++;
             tmp8 = *tb_ptr++;
             tmp32 = (((1 << tmp8) - 1) << tmp16);
@@ -678,8 +563,8 @@
             break;
 #endif
         case INDEX_op_brcond_i32:
-            t0 = tci_read_r32(regs, &tb_ptr);
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare32(t0, t1, condition)) {
@@ -717,73 +602,68 @@
         case INDEX_op_mulu2_i32:
             t0 = *tb_ptr++;
             t1 = *tb_ptr++;
-            t2 = tci_read_r32(regs, &tb_ptr);
-            tmp64 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg64(regs, t1, t0, t2 * tmp64);
+            t2 = tci_read_r(regs, &tb_ptr);
+            tmp64 = (uint32_t)tci_read_r(regs, &tb_ptr);
+            tci_write_reg64(regs, t1, t0, (uint32_t)t2 * tmp64);
             break;
 #endif /* TCG_TARGET_REG_BITS == 32 */
-#if TCG_TARGET_HAS_ext8s_i32
-        case INDEX_op_ext8s_i32:
+#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
+        CASE_32_64(ext8s)
             t0 = *tb_ptr++;
-            t1 = tci_read_r8s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int8_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_ext16s_i32
-        case INDEX_op_ext16s_i32:
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
+        CASE_32_64(ext16s)
             t0 = *tb_ptr++;
-            t1 = tci_read_r16s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int16_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_ext8u_i32
-        case INDEX_op_ext8u_i32:
+#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64
+        CASE_32_64(ext8u)
             t0 = *tb_ptr++;
-            t1 = tci_read_r8(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint8_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_ext16u_i32
-        case INDEX_op_ext16u_i32:
+#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64
+        CASE_32_64(ext16u)
             t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint16_t)t1);
             break;
 #endif
-#if TCG_TARGET_HAS_bswap16_i32
-        case INDEX_op_bswap16_i32:
+#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
+        CASE_32_64(bswap16)
             t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, bswap16(t1));
             break;
 #endif
-#if TCG_TARGET_HAS_bswap32_i32
-        case INDEX_op_bswap32_i32:
+#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64
+        CASE_32_64(bswap32)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, bswap32(t1));
             break;
 #endif
-#if TCG_TARGET_HAS_not_i32
-        case INDEX_op_not_i32:
+#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64
+        CASE_32_64(not)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ~t1);
             break;
 #endif
-#if TCG_TARGET_HAS_neg_i32
-        case INDEX_op_neg_i32:
+#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
+        CASE_32_64(neg)
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, -t1);
             break;
 #endif
 #if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_mov_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
         case INDEX_op_tci_movi_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_i64(&tb_ptr);
@@ -805,7 +685,7 @@
             tci_write_reg(regs, t0, *(uint64_t *)(t1 + t2));
             break;
         case INDEX_op_st_i64:
-            t0 = tci_read_r64(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
             t1 = tci_read_r(regs, &tb_ptr);
             t2 = tci_read_s32(&tb_ptr);
             *(uint64_t *)(t1 + t2) = t0;
@@ -813,106 +693,70 @@
 
             /* Arithmetic operations (64 bit). */
 
-        case INDEX_op_add_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 + t2);
-            break;
-        case INDEX_op_sub_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 - t2);
-            break;
-        case INDEX_op_mul_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 * t2);
-            break;
         case INDEX_op_div_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (int64_t)t1 / (int64_t)t2);
             break;
         case INDEX_op_divu_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (uint64_t)t1 / (uint64_t)t2);
             break;
         case INDEX_op_rem_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (int64_t)t1 % (int64_t)t2);
             break;
         case INDEX_op_remu_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, (uint64_t)t1 % (uint64_t)t2);
             break;
-        case INDEX_op_and_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 & t2);
-            break;
-        case INDEX_op_or_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 | t2);
-            break;
-        case INDEX_op_xor_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1 ^ t2);
-            break;
 
             /* Shift/rotate operations (64 bit). */
 
         case INDEX_op_shl_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 << (t2 & 63));
             break;
         case INDEX_op_shr_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, t1 >> (t2 & 63));
             break;
         case INDEX_op_sar_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ((int64_t)t1 >> (t2 & 63)));
             break;
 #if TCG_TARGET_HAS_rot_i64
         case INDEX_op_rotl_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, rol64(t1, t2 & 63));
             break;
         case INDEX_op_rotr_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, ror64(t1, t2 & 63));
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i64
         case INDEX_op_deposit_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            t2 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
+            t2 = tci_read_r(regs, &tb_ptr);
             tmp16 = *tb_ptr++;
             tmp8 = *tb_ptr++;
             tmp64 = (((1ULL << tmp8) - 1) << tmp16);
@@ -920,8 +764,8 @@
             break;
 #endif
         case INDEX_op_brcond_i64:
-            t0 = tci_read_r64(regs, &tb_ptr);
-            t1 = tci_read_r64(regs, &tb_ptr);
+            t0 = tci_read_r(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             condition = *tb_ptr++;
             label = tci_read_label(&tb_ptr);
             if (tci_compare64(t0, t1, condition)) {
@@ -930,85 +774,29 @@
                 continue;
             }
             break;
-#if TCG_TARGET_HAS_ext8u_i64
-        case INDEX_op_ext8u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext8s_i64
-        case INDEX_op_ext8s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16s_i64
-        case INDEX_op_ext16s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16u_i64
-        case INDEX_op_ext16u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
-            break;
-#endif
 #if TCG_TARGET_HAS_ext32s_i64
         case INDEX_op_ext32s_i64:
 #endif
         case INDEX_op_ext_i32_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32s(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (int32_t)t1);
             break;
 #if TCG_TARGET_HAS_ext32u_i64
         case INDEX_op_ext32u_i64:
 #endif
         case INDEX_op_extu_i32_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, t1);
+            t1 = tci_read_r(regs, &tb_ptr);
+            tci_write_reg(regs, t0, (uint32_t)t1);
             break;
-#if TCG_TARGET_HAS_bswap16_i64
-        case INDEX_op_bswap16_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(regs, &tb_ptr);
-            tci_write_reg(regs, t0, bswap16(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap32_i64
-        case INDEX_op_bswap32_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(regs, &tb_ptr);
-            tci_write_reg(regs, t0, bswap32(t1));
-            break;
-#endif
 #if TCG_TARGET_HAS_bswap64_i64
         case INDEX_op_bswap64_i64:
             t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
+            t1 = tci_read_r(regs, &tb_ptr);
             tci_write_reg(regs, t0, bswap64(t1));
             break;
 #endif
-#if TCG_TARGET_HAS_not_i64
-        case INDEX_op_not_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, ~t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_neg_i64
-        case INDEX_op_neg_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(regs, &tb_ptr);
-            tci_write_reg(regs, t0, -t1);
-            break;
-#endif
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
             /* QEMU specific operations. */
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index feac465..c79f9c3 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -380,6 +380,18 @@
     old_code_ptr[1] = s->code_ptr - old_code_ptr;
 }
 
+#if TCG_TARGET_REG_BITS == 64
+# define CASE_32_64(x) \
+        case glue(glue(INDEX_op_, x), _i64): \
+        case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x) \
+        case glue(glue(INDEX_op_, x), _i64):
+#else
+# define CASE_32_64(x) \
+        case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x)
+#endif
+
 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        const int *const_args)
 {
@@ -391,6 +403,7 @@
     case INDEX_op_exit_tb:
         tcg_out64(s, args[0]);
         break;
+
     case INDEX_op_goto_tb:
         if (s->tb_jmp_insn_offset) {
             /* Direct jump method. */
@@ -404,15 +417,18 @@
         }
         set_jmp_reset_offset(s, args[0]);
         break;
+
     case INDEX_op_br:
         tci_out_label(s, arg_label(args[0]));
         break;
-    case INDEX_op_setcond_i32:
+
+    CASE_32_64(setcond)
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out_r(s, args[2]);
         tcg_out8(s, args[3]);   /* condition */
         break;
+
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_setcond2_i32:
         /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */
@@ -423,60 +439,54 @@
         tcg_out_r(s, args[4]);
         tcg_out8(s, args[5]);   /* condition */
         break;
-#elif TCG_TARGET_REG_BITS == 64
-    case INDEX_op_setcond_i64:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_out8(s, args[3]);   /* condition */
-        break;
 #endif
-    case INDEX_op_ld8u_i32:
-    case INDEX_op_ld8s_i32:
-    case INDEX_op_ld16u_i32:
-    case INDEX_op_ld16s_i32:
+
+    CASE_32_64(ld8u)
+    CASE_32_64(ld8s)
+    CASE_32_64(ld16u)
+    CASE_32_64(ld16s)
     case INDEX_op_ld_i32:
-    case INDEX_op_st8_i32:
-    case INDEX_op_st16_i32:
+    CASE_64(ld32u)
+    CASE_64(ld32s)
+    CASE_64(ld)
+    CASE_32_64(st8)
+    CASE_32_64(st16)
     case INDEX_op_st_i32:
-    case INDEX_op_ld8u_i64:
-    case INDEX_op_ld8s_i64:
-    case INDEX_op_ld16u_i64:
-    case INDEX_op_ld16s_i64:
-    case INDEX_op_ld32u_i64:
-    case INDEX_op_ld32s_i64:
-    case INDEX_op_ld_i64:
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
+    CASE_64(st32)
+    CASE_64(st)
         stack_bounds_check(args[1], args[2]);
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_debug_assert(args[2] == (int32_t)args[2]);
         tcg_out32(s, args[2]);
         break;
-    case INDEX_op_add_i32:
-    case INDEX_op_sub_i32:
-    case INDEX_op_mul_i32:
-    case INDEX_op_and_i32:
-    case INDEX_op_andc_i32:     /* Optional (TCG_TARGET_HAS_andc_i32). */
-    case INDEX_op_eqv_i32:      /* Optional (TCG_TARGET_HAS_eqv_i32). */
-    case INDEX_op_nand_i32:     /* Optional (TCG_TARGET_HAS_nand_i32). */
-    case INDEX_op_nor_i32:      /* Optional (TCG_TARGET_HAS_nor_i32). */
-    case INDEX_op_or_i32:
-    case INDEX_op_orc_i32:      /* Optional (TCG_TARGET_HAS_orc_i32). */
-    case INDEX_op_xor_i32:
-    case INDEX_op_shl_i32:
-    case INDEX_op_shr_i32:
-    case INDEX_op_sar_i32:
-    case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
-    case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
+
+    CASE_32_64(add)
+    CASE_32_64(sub)
+    CASE_32_64(mul)
+    CASE_32_64(and)
+    CASE_32_64(or)
+    CASE_32_64(xor)
+    CASE_32_64(andc)     /* Optional (TCG_TARGET_HAS_andc_*). */
+    CASE_32_64(orc)      /* Optional (TCG_TARGET_HAS_orc_*). */
+    CASE_32_64(eqv)      /* Optional (TCG_TARGET_HAS_eqv_*). */
+    CASE_32_64(nand)     /* Optional (TCG_TARGET_HAS_nand_*). */
+    CASE_32_64(nor)      /* Optional (TCG_TARGET_HAS_nor_*). */
+    CASE_32_64(shl)
+    CASE_32_64(shr)
+    CASE_32_64(sar)
+    CASE_32_64(rotl)     /* Optional (TCG_TARGET_HAS_rot_*). */
+    CASE_32_64(rotr)     /* Optional (TCG_TARGET_HAS_rot_*). */
+    CASE_32_64(div)      /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(divu)     /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(rem)      /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(remu)     /* Optional (TCG_TARGET_HAS_div_*). */
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out_r(s, args[2]);
         break;
-    case INDEX_op_deposit_i32:  /* Optional (TCG_TARGET_HAS_deposit_i32). */
+
+    CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out_r(s, args[2]);
@@ -486,79 +496,30 @@
         tcg_out8(s, args[4]);
         break;
 
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_add_i64:
-    case INDEX_op_sub_i64:
-    case INDEX_op_mul_i64:
-    case INDEX_op_and_i64:
-    case INDEX_op_andc_i64:     /* Optional (TCG_TARGET_HAS_andc_i64). */
-    case INDEX_op_eqv_i64:      /* Optional (TCG_TARGET_HAS_eqv_i64). */
-    case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */
-    case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */
-    case INDEX_op_or_i64:
-    case INDEX_op_orc_i64:      /* Optional (TCG_TARGET_HAS_orc_i64). */
-    case INDEX_op_xor_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
-    case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
-    case INDEX_op_div_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_divu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_rem_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_remu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        break;
-    case INDEX_op_deposit_i64:  /* Optional (TCG_TARGET_HAS_deposit_i64). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_debug_assert(args[3] <= UINT8_MAX);
-        tcg_out8(s, args[3]);
-        tcg_debug_assert(args[4] <= UINT8_MAX);
-        tcg_out8(s, args[4]);
-        break;
-    case INDEX_op_brcond_i64:
+    CASE_32_64(brcond)
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         tcg_out8(s, args[2]);           /* condition */
         tci_out_label(s, arg_label(args[3]));
         break;
-    case INDEX_op_bswap16_i64:  /* Optional (TCG_TARGET_HAS_bswap16_i64). */
-    case INDEX_op_bswap32_i64:  /* Optional (TCG_TARGET_HAS_bswap32_i64). */
-    case INDEX_op_bswap64_i64:  /* Optional (TCG_TARGET_HAS_bswap64_i64). */
-    case INDEX_op_not_i64:      /* Optional (TCG_TARGET_HAS_not_i64). */
-    case INDEX_op_neg_i64:      /* Optional (TCG_TARGET_HAS_neg_i64). */
-    case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */
-    case INDEX_op_ext8u_i64:    /* Optional (TCG_TARGET_HAS_ext8u_i64). */
-    case INDEX_op_ext16s_i64:   /* Optional (TCG_TARGET_HAS_ext16s_i64). */
-    case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
-    case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
-    case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_extu_i32_i64:
-#endif /* TCG_TARGET_REG_BITS == 64 */
-    case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
-    case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
-    case INDEX_op_ext8s_i32:    /* Optional (TCG_TARGET_HAS_ext8s_i32). */
-    case INDEX_op_ext16s_i32:   /* Optional (TCG_TARGET_HAS_ext16s_i32). */
-    case INDEX_op_ext8u_i32:    /* Optional (TCG_TARGET_HAS_ext8u_i32). */
-    case INDEX_op_ext16u_i32:   /* Optional (TCG_TARGET_HAS_ext16u_i32). */
-    case INDEX_op_bswap16_i32:  /* Optional (TCG_TARGET_HAS_bswap16_i32). */
-    case INDEX_op_bswap32_i32:  /* Optional (TCG_TARGET_HAS_bswap32_i32). */
+
+    CASE_32_64(neg)      /* Optional (TCG_TARGET_HAS_neg_*). */
+    CASE_32_64(not)      /* Optional (TCG_TARGET_HAS_not_*). */
+    CASE_32_64(ext8s)    /* Optional (TCG_TARGET_HAS_ext8s_*). */
+    CASE_32_64(ext8u)    /* Optional (TCG_TARGET_HAS_ext8u_*). */
+    CASE_32_64(ext16s)   /* Optional (TCG_TARGET_HAS_ext16s_*). */
+    CASE_32_64(ext16u)   /* Optional (TCG_TARGET_HAS_ext16u_*). */
+    CASE_64(ext32s)      /* Optional (TCG_TARGET_HAS_ext32s_i64). */
+    CASE_64(ext32u)      /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+    CASE_64(ext_i32)
+    CASE_64(extu_i32)
+    CASE_32_64(bswap16)  /* Optional (TCG_TARGET_HAS_bswap16_*). */
+    CASE_32_64(bswap32)  /* Optional (TCG_TARGET_HAS_bswap32_*). */
+    CASE_64(bswap64)     /* Optional (TCG_TARGET_HAS_bswap64_i64). */
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
         break;
-    case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_divu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_rem_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_remu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        break;
+
 #if TCG_TARGET_REG_BITS == 32
     case INDEX_op_add2_i32:
     case INDEX_op_sub2_i32:
@@ -584,31 +545,8 @@
         tcg_out_r(s, args[3]);
         break;
 #endif
-    case INDEX_op_brcond_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out8(s, args[2]);           /* condition */
-        tci_out_label(s, arg_label(args[3]));
-        break;
+
     case INDEX_op_qemu_ld_i32:
-        tcg_out_r(s, *args++);
-        tcg_out_r(s, *args++);
-        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-            tcg_out_r(s, *args++);
-        }
-        tcg_out_i(s, *args++);
-        break;
-    case INDEX_op_qemu_ld_i64:
-        tcg_out_r(s, *args++);
-        if (TCG_TARGET_REG_BITS == 32) {
-            tcg_out_r(s, *args++);
-        }
-        tcg_out_r(s, *args++);
-        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-            tcg_out_r(s, *args++);
-        }
-        tcg_out_i(s, *args++);
-        break;
     case INDEX_op_qemu_st_i32:
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
@@ -617,6 +555,8 @@
         }
         tcg_out_i(s, *args++);
         break;
+
+    case INDEX_op_qemu_ld_i64:
     case INDEX_op_qemu_st_i64:
         tcg_out_r(s, *args++);
         if (TCG_TARGET_REG_BITS == 32) {
@@ -628,8 +568,10 @@
         }
         tcg_out_i(s, *args++);
         break;
+
     case INDEX_op_mb:
         break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 12aa9ed..5fb65b4 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -153,7 +153,7 @@
     def test_device_not_found(self):
         result = self.vm.qmp('block-stream', device='nonexistent')
         self.assert_qmp(result, 'error/desc',
-            'Cannot find device=nonexistent nor node_name=nonexistent')
+            'Cannot find device=\'nonexistent\' nor node-name=\'nonexistent\'')
 
     def test_job_id_missing(self):
         result = self.vm.qmp('block-stream', device='mid')
@@ -507,7 +507,7 @@
         # Error: the base node does not exist
         result = self.vm.qmp('block-stream', device='node4', base_node='none', job_id='stream')
         self.assert_qmp(result, 'error/desc',
-            'Cannot find device= nor node_name=none')
+            'Cannot find device=\'\' nor node-name=\'none\'')
 
         # Error: the base node is not a backing file of the top node
         result = self.vm.qmp('block-stream', device='node4', base_node='node6', job_id='stream')
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 7ebc9ed..336ff7c 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -175,13 +175,13 @@
         self.assert_no_active_block_jobs()
         result = self.vm.qmp('block-commit', device='drive0', top_node='badfile', base_node='base')
         self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
+        self.assert_qmp(result, 'error/desc', "Cannot find device='' nor node-name='badfile'")
 
     def test_base_node_invalid(self):
         self.assert_no_active_block_jobs()
         result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='badfile')
         self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
+        self.assert_qmp(result, 'error/desc', "Cannot find device='' nor node-name='badfile'")
 
     def test_top_path_and_node(self):
         self.assert_no_active_block_jobs()
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index f707471..f570610 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -61,13 +61,13 @@
 (qemu) quit
 
 Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node-name: '123foo'
 
 Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node-name: '_foo'
 
 Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node-name: 'foo#12'
 
 
 === Device without drive ===
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 1974262..615c083 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -140,7 +140,7 @@
 QMP_VERSION
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "blkverify=on can only be set if there are exactly two files and vote-threshold is 2"}}
-{"error": {"class": "GenericError", "desc": "Cannot find device=drive0-quorum nor node_name=drive0-quorum"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='drive0-quorum' nor node-name='drive0-quorum'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 32a193f..1d4c565 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -24,7 +24,7 @@
 { 'execute': 'blockdev-snapshot-sync',
                          'arguments': { 'snapshot-file':'TEST_DIR/1-snapshot-v0.IMGFMT',
                                      'format': 'IMGFMT' } }
-{"error": {"class": "GenericError", "desc": "Cannot find device= nor node_name="}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='' nor node-name=''"}}
 
 === Invalid command - missing snapshot-file ===
 
@@ -222,10 +222,10 @@
 { 'execute': 'blockdev-snapshot',
                       'arguments': { 'node': 'virtio0',
                                      'overlay':'snap_14' } }
-{"error": {"class": "GenericError", "desc": "Cannot find device=snap_14 nor node_name=snap_14"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='snap_14' nor node-name='snap_14'"}}
 { 'execute': 'blockdev-snapshot',
                      'arguments': { 'node':'nodevice',
                                     'overlay':'snap_13' }
                    }
-{"error": {"class": "GenericError", "desc": "Cannot find device=nodevice nor node_name=nodevice"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='nodevice' nor node-name='nodevice'"}}
 *** done
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index edd43f1..d8e0e38 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -143,9 +143,7 @@
   "arguments": {
       "qom-type": "secret",
       "id": "sec0",
-      "props": {
-          "data": "123456"
-      }
+      "data": "123456"
   }
 }
 { "execute": "blockdev-add",
@@ -176,9 +174,7 @@
   "arguments": {
       "qom-type": "secret",
       "id": "sec0",
-      "props": {
-          "data": "123456"
-      }
+      "data": "123456"
   }
 }
 { "execute": "blockdev-add",
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index b61ba63..e1c23a6 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -17,7 +17,7 @@
 QMP_VERSION
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}}
-{"error": {"class": "GenericError", "desc": "Duplicate node name"}}
+{"error": {"class": "GenericError", "desc": "Duplicate nodes with node-name='test-node'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 
diff --git a/tests/qemu-iotests/184 b/tests/qemu-iotests/184
index 513d167..e4cbcd8 100755
--- a/tests/qemu-iotests/184
+++ b/tests/qemu-iotests/184
@@ -67,10 +67,8 @@
   "arguments": {
     "qom-type": "throttle-group",
     "id": "group0",
-    "props": {
-      "limits" : {
-        "iops-total": 1000
-      }
+    "limits" : {
+      "iops-total": 1000
     }
   }
 }
@@ -96,10 +94,8 @@
   "arguments": {
     "qom-type": "throttle-group",
     "id": "group0",
-    "props" : {
-      "limits": {
-          "iops-total": 1000
-      }
+    "limits": {
+        "iops-total": 1000
     }
   }
 }
@@ -136,10 +132,8 @@
   "arguments": {
     "qom-type": "throttle-group",
     "id": "group0",
-    "props" : {
-      "limits": {
-          "iops-total": 1000
-      }
+    "limits": {
+        "iops-total": 1000
     }
   }
 }
diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out
index 5dd589d..b68c443 100644
--- a/tests/qemu-iotests/206.out
+++ b/tests/qemu-iotests/206.out
@@ -155,7 +155,7 @@
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/210.out b/tests/qemu-iotests/210.out
index 2e9fc59..55c0844 100644
--- a/tests/qemu-iotests/210.out
+++ b/tests/qemu-iotests/210.out
@@ -108,7 +108,7 @@
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "luks", "file": "this doesn't exist", "size": 67108864}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/211.out b/tests/qemu-iotests/211.out
index b83384d..3bc092a 100644
--- a/tests/qemu-iotests/211.out
+++ b/tests/qemu-iotests/211.out
@@ -62,7 +62,7 @@
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "vdi", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/212.out b/tests/qemu-iotests/212.out
index 1538d67..8102033 100644
--- a/tests/qemu-iotests/212.out
+++ b/tests/qemu-iotests/212.out
@@ -52,7 +52,7 @@
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "parallels", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/213.out b/tests/qemu-iotests/213.out
index be4ae85..3cdce4d 100644
--- a/tests/qemu-iotests/213.out
+++ b/tests/qemu-iotests/213.out
@@ -55,7 +55,7 @@
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "vhdx", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/218 b/tests/qemu-iotests/218
index ae7c4fb..325d824 100755
--- a/tests/qemu-iotests/218
+++ b/tests/qemu-iotests/218
@@ -152,7 +152,7 @@
     vm.launch()
 
     ret = vm.qmp('object-add', qom_type='throttle-group', id='tg',
-                 props={'x-bps-read': 4096})
+                 limits={'bps-read': 4096})
     assert ret['return'] == {}
 
     ret = vm.qmp('blockdev-add',
diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index bbc8528..083b62d 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -53,7 +53,7 @@
 {"return": {}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"nosuch"}}
-{"error": {"class": "GenericError", "desc": "Cannot find device=nosuch nor node_name=nosuch"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='nosuch' nor node-name='nosuch'"}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"n"}}
 {"error": {"class": "GenericError", "desc": "Block export id 'n' is already in use"}}
@@ -154,7 +154,7 @@
 {"return": {}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"nosuch"}}
-{"error": {"class": "GenericError", "desc": "Cannot find device=nosuch nor node_name=nosuch"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device='nosuch' nor node-name='nosuch'"}}
 {"execute":"nbd-server-add",
   "arguments":{"device":"n"}}
 {"error": {"class": "GenericError", "desc": "Block export id 'n' is already in use"}}
diff --git a/tests/qemu-iotests/235 b/tests/qemu-iotests/235
index 20d16db..8aed45f 100755
--- a/tests/qemu-iotests/235
+++ b/tests/qemu-iotests/235
@@ -57,7 +57,7 @@
 vm.launch()
 
 log(vm.qmp('object-add', qom_type='throttle-group', id='tg0',
-           props={ 'x-bps-total': size }))
+           limits={'bps-total': size}))
 
 log(vm.qmp('blockdev-add',
            **{ 'node-name': 'target',
diff --git a/tests/qemu-iotests/237.out b/tests/qemu-iotests/237.out
index a8c800b..aa94986 100644
--- a/tests/qemu-iotests/237.out
+++ b/tests/qemu-iotests/237.out
@@ -85,7 +85,7 @@
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "vmdk", "file": "this doesn't exist", "size": 33554432}}}
 {"return": {}}
-Job failed: Cannot find device=this doesn't exist nor node_name=this doesn't exist
+Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't exist'
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index cfdeb90..11104b9 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -140,8 +140,8 @@
         self.reopen(opts, {'file': 'hd0-file'})
 
         # We cannot change any of these
-        self.reopen(opts, {'node-name': 'not-found'}, "Cannot find node named 'not-found'")
-        self.reopen(opts, {'node-name': ''}, "Cannot find node named ''")
+        self.reopen(opts, {'node-name': 'not-found'}, "Failed to find node with node-name='not-found'")
+        self.reopen(opts, {'node-name': ''}, "Failed to find node with node-name=''")
         self.reopen(opts, {'node-name': None}, "Invalid parameter type for 'node-name', expected: string")
         self.reopen(opts, {'driver': 'raw'}, "Cannot change the option 'driver'")
         self.reopen(opts, {'driver': ''}, "Invalid parameter ''")
@@ -158,7 +158,7 @@
 
         # node-name is optional in BlockdevOptions, but x-blockdev-reopen needs it
         del opts['node-name']
-        self.reopen(opts, {}, "Node name not specified")
+        self.reopen(opts, {}, "node-name not specified")
 
         # Check that nothing has changed
         self.check_node_graph(original_graph)
@@ -187,8 +187,8 @@
         self.reopen(opts, {'backing': backing_node_name})
 
         # We can't use a non-existing or empty (non-NULL) node as the backing image
-        self.reopen(opts, {'backing': 'not-found'}, "Cannot find device= nor node_name=not-found")
-        self.reopen(opts, {'backing': ''}, "Cannot find device= nor node_name=")
+        self.reopen(opts, {'backing': 'not-found'}, "Cannot find device=\'\' nor node-name=\'not-found\'")
+        self.reopen(opts, {'backing': ''}, "Cannot find device=\'\' nor node-name=\'\'")
 
         # We can reopen the image just fine if we specify the backing options
         opts['backing'] = {'driver': iotests.imgfmt,
@@ -644,12 +644,12 @@
         ###### throttle ######
         ######################
         opts = { 'qom-type': 'throttle-group', 'id': 'group0',
-                 'props': { 'limits': { 'iops-total': 1000 } } }
+                 'limits': { 'iops-total': 1000 } }
         result = self.vm.qmp('object-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
 
         opts = { 'qom-type': 'throttle-group', 'id': 'group1',
-                 'props': { 'limits': { 'iops-total': 2000 } } }
+                 'limits': { 'iops-total': 2000 } }
         result = self.vm.qmp('object-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
 
diff --git a/tests/qemu-iotests/249.out b/tests/qemu-iotests/249.out
index 92ec81d..d2bf9be 100644
--- a/tests/qemu-iotests/249.out
+++ b/tests/qemu-iotests/249.out
@@ -18,7 +18,7 @@
                      'filter-node-name': '1234'}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{"error": {"class": "GenericError", "desc": "Invalid node name"}}
+{"error": {"class": "GenericError", "desc": "Invalid node-name: '1234'"}}
 
 === Send a write command to a drive opened in read-only mode (2)
 
diff --git a/tests/qemu-iotests/258 b/tests/qemu-iotests/258
index 9a2d33a..a661820 100755
--- a/tests/qemu-iotests/258
+++ b/tests/qemu-iotests/258
@@ -103,9 +103,9 @@
         vm.qmp_log('object-add',
                    qom_type='throttle-group',
                    id='tg',
-                   props={
-                       'x-iops-write': 1,
-                       'x-iops-write-max': 1
+                   limits={
+                       'iops-write': 1,
+                       'iops-write-max': 1
                    })
 
         vm.qmp_log('blockdev-add',
diff --git a/tests/qemu-iotests/258.out b/tests/qemu-iotests/258.out
index ce6e9ba..c3a003d 100644
--- a/tests/qemu-iotests/258.out
+++ b/tests/qemu-iotests/258.out
@@ -2,7 +2,7 @@
 
 === Commit and stream finish concurrently (letting stream write) ===
 
-{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
+{"execute": "object-add", "arguments": {"id": "tg", "limits": {"iops-write": 1, "iops-write-max": 1}, "qom-type": "throttle-group"}}
 {"return": {}}
 {"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "throttle-group": "tg"}, "node-name": "node4"}}
 {"return": {}}
@@ -18,7 +18,7 @@
 
 === Commit and stream finish concurrently (letting commit write) ===
 
-{"execute": "object-add", "arguments": {"id": "tg", "props": {"x-iops-write": 1, "x-iops-write-max": 1}, "qom-type": "throttle-group"}}
+{"execute": "object-add", "arguments": {"id": "tg", "limits": {"iops-write": 1, "iops-write-max": 1}, "qom-type": "throttle-group"}}
 {"return": {}}
 {"execute": "blockdev-add", "arguments": {"backing": {"backing": {"backing": {"backing": {"driver": "raw", "file": {"driver": "throttle", "file": {"driver": "file", "filename": "TEST_DIR/PID-node0.img"}, "throttle-group": "tg"}, "node-name": "node0"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node1.img"}, "node-name": "node1"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node2.img"}, "node-name": "node2"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node3.img"}, "node-name": "node3"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-node4.img"}, "node-name": "node4"}}
 {"return": {}}
diff --git a/tests/qemu-iotests/283 b/tests/qemu-iotests/283
index 79643e3..010c22f 100755
--- a/tests/qemu-iotests/283
+++ b/tests/qemu-iotests/283
@@ -97,3 +97,56 @@
 vm.qmp_log('blockdev-backup', sync='full', device='source', target='target')
 
 vm.shutdown()
+
+
+print('\n=== backup-top should be gone after job-finalize ===\n')
+
+# Check that the backup-top node is gone after job-finalize.
+#
+# During finalization, the node becomes inactive and can no longer
+# function.  If it is still present, new parents might be attached, and
+# there would be no meaningful way to handle their I/O requests.
+
+vm = iotests.VM()
+vm.launch()
+
+vm.qmp_log('blockdev-add', **{
+    'node-name': 'source',
+    'driver': 'null-co',
+})
+
+vm.qmp_log('blockdev-add', **{
+    'node-name': 'target',
+    'driver': 'null-co',
+})
+
+vm.qmp_log('blockdev-backup',
+           job_id='backup',
+           device='source',
+           target='target',
+           sync='full',
+           filter_node_name='backup-filter',
+           auto_finalize=False,
+           auto_dismiss=False)
+
+vm.event_wait('BLOCK_JOB_PENDING', 5.0)
+
+# The backup-top filter should still be present prior to finalization
+assert vm.node_info('backup-filter') is not None
+
+vm.qmp_log('job-finalize', id='backup')
+vm.event_wait('BLOCK_JOB_COMPLETED', 5.0)
+
+# The filter should be gone now.  Check that by trying to access it
+# with qemu-io (which will most likely crash qemu if it is still
+# there.).
+vm.qmp_log('human-monitor-command',
+           command_line='qemu-io backup-filter "write 0 1M"')
+
+# (Also, do an explicit check.)
+assert vm.node_info('backup-filter') is None
+
+vm.qmp_log('job-dismiss', id='backup')
+vm.event_wait('JOB_STATUS_CHANGE', 5.0, {'data': {'status': 'null'}})
+
+vm.shutdown()
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
index d8cff22..37c3505 100644
--- a/tests/qemu-iotests/283.out
+++ b/tests/qemu-iotests/283.out
@@ -6,3 +6,18 @@
 {"return": {}}
 {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
 {"error": {"class": "GenericError", "desc": "Cannot set permissions for backup-top filter: Conflicts with use by other as 'image', which uses 'write' on base"}}
+
+=== backup-top should be gone after job-finalize ===
+
+{"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "source"}}
+{"return": {}}
+{"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "target"}}
+{"return": {}}
+{"execute": "blockdev-backup", "arguments": {"auto-dismiss": false, "auto-finalize": false, "device": "source", "filter-node-name": "backup-filter", "job-id": "backup", "sync": "full", "target": "target"}}
+{"return": {}}
+{"execute": "job-finalize", "arguments": {"id": "backup"}}
+{"return": {}}
+{"execute": "human-monitor-command", "arguments": {"command-line": "qemu-io backup-filter \"write 0 1M\""}}
+{"return": "Error: Cannot find device='' nor node-name='backup-filter'\r\n"}
+{"execute": "job-dismiss", "arguments": {"id": "backup"}}
+{"return": {}}
diff --git a/tests/qemu-iotests/295 b/tests/qemu-iotests/295
index 01a6c0b..270ad39 100755
--- a/tests/qemu-iotests/295
+++ b/tests/qemu-iotests/295
@@ -43,7 +43,7 @@
 
     def to_qmp_object(self):
         return { "qom_type" : "secret", "id": self.id(),
-                 "props": { "data": self.secret() } }
+                 "data": self.secret() }
 
 ################################################################################
 class EncryptionSetupTestCase(iotests.QMPTestCase):
diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296
index 0bc3c6c..7c65e98 100755
--- a/tests/qemu-iotests/296
+++ b/tests/qemu-iotests/296
@@ -43,7 +43,7 @@
 
     def to_qmp_object(self):
         return { "qom_type" : "secret", "id": self.id(),
-                 "props": { "data": self.secret() } }
+                 "data": self.secret() }
 
 ################################################################################
 
diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index 63036f6..b475a92 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -22,7 +22,7 @@
 import os
 import random
 import re
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional
 
 import iotests
 
@@ -30,7 +30,7 @@
 # pylint: disable=wrong-import-order
 import qemu
 
-BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]]]]]
+BlockBitmapMapping = List[Dict[str, object]]
 
 mig_sock = os.path.join(iotests.sock_dir, 'mig_sock')
 
@@ -189,8 +189,8 @@
         # Check for error message on the destination
         if self.src_node_name != self.dst_node_name:
             self.verify_dest_error(f"Cannot find "
-                                   f"device={self.src_node_name} nor "
-                                   f"node_name={self.src_node_name}")
+                                   f"device='{self.src_node_name}' nor "
+                                   f"node-name='{self.src_node_name}'")
         else:
             self.verify_dest_error(None)
 
@@ -602,7 +602,8 @@
 
 class TestAliasTransformMigration(TestDirtyBitmapMigration):
     """
-    Tests the 'transform' option which modifies bitmap persistence on migration.
+    Tests the 'transform' option which modifies bitmap persistence on
+    migration.
     """
 
     src_node_name = 'node-a'
@@ -674,7 +675,8 @@
         bitmaps = self.vm_b.query_bitmaps()
 
         for node in bitmaps:
-            bitmaps[node] = sorted(((bmap['name'], bmap['persistent']) for bmap in bitmaps[node]))
+            bitmaps[node] = sorted(((bmap['name'], bmap['persistent'])
+                                    for bmap in bitmaps[node]))
 
         self.assertEqual(bitmaps,
                          {'node-a': [('bmap-a', True), ('bmap-b', False)],
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 4e75830..90d0b62 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -17,6 +17,7 @@
 #
 
 import atexit
+import bz2
 from collections import OrderedDict
 import faulthandler
 import io
@@ -24,6 +25,7 @@
 import logging
 import os
 import re
+import shutil
 import signal
 import struct
 import subprocess
@@ -96,6 +98,14 @@
                              os.environ.get('IMGKEYSECRET', '')
 luks_default_key_secret_opt = 'key-secret=keysec0'
 
+sample_img_dir = os.environ['SAMPLE_IMG_DIR']
+
+
+def unarchive_sample_image(sample, fname):
+    sample_fname = os.path.join(sample_img_dir, sample + '.bz2')
+    with bz2.open(sample_fname) as f_in, open(fname, 'wb') as f_out:
+        shutil.copyfileobj(f_in, f_out)
+
 
 def qemu_tool_pipe_and_status(tool: str, args: Sequence[str],
                               connect_stderr: bool = True) -> Tuple[str, int]:
diff --git a/tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2 b/tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2
new file mode 100644
index 0000000..54892fd
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/parallels-with-bitmap.bz2
Binary files differ
diff --git a/tests/qemu-iotests/sample_images/parallels-with-bitmap.sh b/tests/qemu-iotests/sample_images/parallels-with-bitmap.sh
new file mode 100755
index 0000000..30615aa
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/parallels-with-bitmap.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Test parallels load bitmap
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+CT=parallels-with-bitmap-ct
+DIR=$PWD/parallels-with-bitmap-dir
+IMG=$DIR/root.hds
+XML=$DIR/DiskDescriptor.xml
+TARGET=parallels-with-bitmap.bz2
+
+rm -rf $DIR
+
+prlctl create $CT --vmtype ct
+prlctl set $CT --device-add hdd --image $DIR --recreate --size 2G
+
+# cleanup the image
+qemu-img create -f parallels $IMG 64G
+
+# create bitmap
+prlctl backup $CT
+
+prlctl set $CT --device-del hdd1
+prlctl destroy $CT
+
+dev=$(ploop mount $XML | sed -n 's/^Adding delta dev=\(\/dev\/ploop[0-9]\+\).*/\1/p')
+dd if=/dev/zero of=$dev bs=64K seek=5 count=2 oflag=direct
+dd if=/dev/zero of=$dev bs=64K seek=30 count=1 oflag=direct
+dd if=/dev/zero of=$dev bs=64K seek=10 count=3 oflag=direct
+ploop umount $XML  # bitmap name will be in the output
+
+bzip2 -z $IMG
+
+mv $IMG.bz2 $TARGET
+
+rm -rf $DIR
diff --git a/tests/qemu-iotests/tests/parallels-read-bitmap b/tests/qemu-iotests/tests/parallels-read-bitmap
new file mode 100755
index 0000000..af6b9c5
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-read-bitmap
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+#
+# Test parallels load bitmap
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import json
+import iotests
+from iotests import qemu_nbd_popen, qemu_img_pipe, log, file_path
+
+iotests.script_initialize(supported_fmts=['parallels'])
+
+nbd_sock = file_path('nbd-sock', base_dir=iotests.sock_dir)
+disk = iotests.file_path('disk')
+bitmap = 'e4f2eed0-37fe-4539-b50b-85d2e7fd235f'
+nbd_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' \
+        f',x-dirty-bitmap=qemu:dirty-bitmap:{bitmap}'
+
+
+iotests.unarchive_sample_image('parallels-with-bitmap', disk)
+
+
+with qemu_nbd_popen('--read-only', f'--socket={nbd_sock}',
+                    f'--bitmap={bitmap}', '-f', iotests.imgfmt, disk):
+    out = qemu_img_pipe('map', '--output=json', '--image-opts', nbd_opts)
+    chunks = json.loads(out)
+    cluster = 64 * 1024
+
+    log('dirty clusters (cluster size is 64K):')
+    for c in chunks:
+        assert c['start'] % cluster == 0
+        assert c['length'] % cluster == 0
+        if c['data']:
+            continue
+
+        a = c['start'] // cluster
+        b = (c['start'] + c['length']) // cluster
+        if b - a > 1:
+            log(f'{a}-{b-1}')
+        else:
+            log(a)
diff --git a/tests/qemu-iotests/tests/parallels-read-bitmap.out b/tests/qemu-iotests/tests/parallels-read-bitmap.out
new file mode 100644
index 0000000..e8f6bc9
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-read-bitmap.out
@@ -0,0 +1,6 @@
+Start NBD server
+dirty clusters (cluster size is 64K):
+5-6
+10-12
+30
+Kill NBD server
diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
index 724f65a..a68dcd7 100644
--- a/tests/qtest/libqos/libqtest.h
+++ b/tests/qtest/libqos/libqtest.h
@@ -75,6 +75,17 @@
 QTestState *qtest_init_with_serial(const char *extra_args, int *sock_fd);
 
 /**
+ * qtest_kill_qemu:
+ * @s: #QTestState instance to operate on.
+ *
+ * Kill the QEMU process and wait for it to terminate. It is safe to call this
+ * function multiple times. Normally qtest_quit() is used instead because it
+ * also frees QTestState. Use qtest_kill_qemu() when you just want to kill QEMU
+ * and qtest_quit() will be called later.
+ */
+void qtest_kill_qemu(QTestState *s);
+
+/**
  * qtest_quit:
  * @s: #QTestState instance to operate on.
  *
@@ -133,6 +144,14 @@
     GCC_FMT_ATTR(2, 3);
 
 /**
+ * qtest_socket_server:
+ * @socket_path: the UNIX domain socket path
+ *
+ * Create and return a listen socket file descriptor, or abort on failure.
+ */
+int qtest_socket_server(const char *socket_path);
+
+/**
  * qtest_vqmp_fds:
  * @s: #QTestState instance to operate on.
  * @fds: array of file descriptors
@@ -630,9 +649,27 @@
         g_free(path); \
     } while (0)
 
+/**
+ * qtest_add_abrt_handler:
+ * @fn: Handler function
+ * @data: Argument that is passed to the handler
+ *
+ * Add a handler function that is invoked on SIGABRT. This can be used to
+ * terminate processes and perform other cleanup. The handler can be removed
+ * with qtest_remove_abrt_handler().
+ */
 void qtest_add_abrt_handler(GHookFunc fn, const void *data);
 
 /**
+ * qtest_remove_abrt_handler:
+ * @data: Argument previously passed to qtest_add_abrt_handler()
+ *
+ * Remove an abrt handler that was previously added with
+ * qtest_add_abrt_handler().
+ */
+void qtest_remove_abrt_handler(void *data);
+
+/**
  * qtest_qmp_assert_success:
  * @qts: QTestState instance to operate on
  * @fmt: QMP message to send to qemu, formatted like
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index fd043b0..71e359e 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -81,24 +81,8 @@
 
 static int init_socket(const char *socket_path)
 {
-    struct sockaddr_un addr;
-    int sock;
-    int ret;
-
-    sock = socket(PF_UNIX, SOCK_STREAM, 0);
-    g_assert_cmpint(sock, !=, -1);
-
-    addr.sun_family = AF_UNIX;
-    snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+    int sock = qtest_socket_server(socket_path);
     qemu_set_cloexec(sock);
-
-    do {
-        ret = bind(sock, (struct sockaddr *)&addr, sizeof(addr));
-    } while (ret == -1 && errno == EINTR);
-    g_assert_cmpint(ret, !=, -1);
-    ret = listen(sock, 1);
-    g_assert_cmpint(ret, !=, -1);
-
     return sock;
 }
 
@@ -149,7 +133,7 @@
     s->expected_status = status;
 }
 
-static void kill_qemu(QTestState *s)
+void qtest_kill_qemu(QTestState *s)
 {
     pid_t pid = s->qemu_pid;
     int wstatus;
@@ -159,6 +143,7 @@
         kill(pid, SIGTERM);
         TFR(pid = waitpid(s->qemu_pid, &s->wstatus, 0));
         assert(pid == s->qemu_pid);
+        s->qemu_pid = -1;
     }
 
     /*
@@ -185,7 +170,7 @@
 
 static void kill_qemu_hook_func(void *s)
 {
-    kill_qemu(s);
+    qtest_kill_qemu(s);
 }
 
 static void sigabrt_handler(int signo)
@@ -211,15 +196,30 @@
     sigaction(SIGABRT, &sigact_old, NULL);
 }
 
+static bool hook_list_is_empty(GHookList *hook_list)
+{
+    GHook *hook = g_hook_first_valid(hook_list, TRUE);
+
+    if (!hook) {
+        return false;
+    }
+
+    g_hook_unref(hook_list, hook);
+    return true;
+}
+
 void qtest_add_abrt_handler(GHookFunc fn, const void *data)
 {
     GHook *hook;
 
-    /* Only install SIGABRT handler once */
     if (!abrt_hooks.is_setup) {
         g_hook_list_init(&abrt_hooks, sizeof(GHook));
     }
-    setup_sigabrt_handler();
+
+    /* Only install SIGABRT handler once */
+    if (hook_list_is_empty(&abrt_hooks)) {
+        setup_sigabrt_handler();
+    }
 
     hook = g_hook_alloc(&abrt_hooks);
     hook->func = fn;
@@ -228,6 +228,17 @@
     g_hook_prepend(&abrt_hooks, hook);
 }
 
+void qtest_remove_abrt_handler(void *data)
+{
+    GHook *hook = g_hook_find_data(&abrt_hooks, TRUE, data);
+    g_hook_destroy_link(&abrt_hooks, hook);
+
+    /* Uninstall SIGABRT handler on last instance */
+    if (hook_list_is_empty(&abrt_hooks)) {
+        cleanup_sigabrt_handler();
+    }
+}
+
 static const char *qtest_qemu_binary(void)
 {
     const char *qemu_bin;
@@ -384,12 +395,9 @@
 
 void qtest_quit(QTestState *s)
 {
-    g_hook_destroy_link(&abrt_hooks, g_hook_find_data(&abrt_hooks, TRUE, s));
+    qtest_remove_abrt_handler(s);
 
-    /* Uninstall SIGABRT handler on last instance */
-    cleanup_sigabrt_handler();
-
-    kill_qemu(s);
+    qtest_kill_qemu(s);
     close(s->fd);
     close(s->qmp_fd);
     g_string_free(s->rx, true);
@@ -638,6 +646,28 @@
     return qmp_fd_receive(s->qmp_fd);
 }
 
+int qtest_socket_server(const char *socket_path)
+{
+    struct sockaddr_un addr;
+    int sock;
+    int ret;
+
+    sock = socket(PF_UNIX, SOCK_STREAM, 0);
+    g_assert_cmpint(sock, !=, -1);
+
+    addr.sun_family = AF_UNIX;
+    snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+
+    do {
+        ret = bind(sock, (struct sockaddr *)&addr, sizeof(addr));
+    } while (ret == -1 && errno == EINTR);
+    g_assert_cmpint(ret, !=, -1);
+    ret = listen(sock, 1);
+    g_assert_cmpint(ret, !=, -1);
+
+    return sock;
+}
+
 /**
  * Allow users to send a message without waiting for the reply,
  * in the case that they choose to discard all replies up until
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 58efc46..2688e1b 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -157,6 +157,7 @@
    'npcm7xx_watchdog_timer-test'] + \
    (slirp.found() ? ['npcm7xx_emc-test'] : [])
 qtests_arm = \
+  (config_all_devices.has_key('CONFIG_MPS2') ? ['sse-timer-test'] : []) + \
   (config_all_devices.has_key('CONFIG_CMSDK_APB_DUALTIMER') ? ['cmsdk-apb-dualtimer-test'] : []) + \
   (config_all_devices.has_key('CONFIG_CMSDK_APB_TIMER') ? ['cmsdk-apb-timer-test'] : []) + \
   (config_all_devices.has_key('CONFIG_CMSDK_APB_WATCHDOG') ? ['cmsdk-apb-watchdog-test'] : []) + \
diff --git a/tests/qtest/sse-timer-test.c b/tests/qtest/sse-timer-test.c
new file mode 100644
index 0000000..a65d754
--- /dev/null
+++ b/tests/qtest/sse-timer-test.c
@@ -0,0 +1,240 @@
+/*
+ * QTest testcase for the SSE timer device
+ *
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest-single.h"
+
+/*
+ * SSE-123/SSE-300 timer in the mps3-an547 board, where it is driven
+ * at 32MHz, so 31.25ns per tick.
+ */
+#define TIMER_BASE 0x48000000
+
+/* PERIPHNSPPC0 register in the SSE-300 Secure Access Configuration block */
+#define PERIPHNSPPC0 (0x50080000 + 0x70)
+
+/* Base of the System Counter control frame */
+#define COUNTER_BASE 0x58100000
+
+/* SSE counter register offsets in the control frame */
+#define CNTCR 0
+#define CNTSR 0x4
+#define CNTCV_LO 0x8
+#define CNTCV_HI 0xc
+#define CNTSCR 0x10
+
+/* SSE timer register offsets */
+#define CNTPCT_LO 0
+#define CNTPCT_HI 4
+#define CNTFRQ 0x10
+#define CNTP_CVAL_LO 0x20
+#define CNTP_CVAL_HI 0x24
+#define CNTP_TVAL 0x28
+#define CNTP_CTL 0x2c
+#define CNTP_AIVAL_LO 0x40
+#define CNTP_AIVAL_HI 0x44
+#define CNTP_AIVAL_RELOAD 0x48
+#define CNTP_AIVAL_CTL 0x4c
+
+/* 4 ticks in nanoseconds (so we can work in integers) */
+#define FOUR_TICKS 125
+
+static void clock_step_ticks(uint64_t ticks)
+{
+    /*
+     * Advance the qtest clock by however many nanoseconds we
+     * need to move the timer forward the specified number of ticks.
+     * ticks must be a multiple of 4, so we get a whole number of ns.
+     */
+    assert(!(ticks & 3));
+    clock_step(FOUR_TICKS * (ticks >> 2));
+}
+
+static void reset_counter_and_timer(void)
+{
+    /*
+     * Reset the system counter and the timer between tests. This
+     * isn't a full reset, but it's sufficient for what the tests check.
+     */
+    writel(COUNTER_BASE + CNTCR, 0);
+    writel(TIMER_BASE + CNTP_CTL, 0);
+    writel(TIMER_BASE + CNTP_AIVAL_CTL, 0);
+    writel(COUNTER_BASE + CNTCV_LO, 0);
+    writel(COUNTER_BASE + CNTCV_HI, 0);
+}
+
+static void test_counter(void)
+{
+    /* Basic counter functionality test */
+
+    reset_counter_and_timer();
+    /* The counter should start disabled: check that it doesn't move */
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(COUNTER_BASE + CNTCV_LO), ==, 0);
+    g_assert_cmpuint(readl(COUNTER_BASE + CNTCV_HI), ==, 0);
+    /* Now enable it and check that it does count */
+    writel(COUNTER_BASE + CNTCR, 1);
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(COUNTER_BASE + CNTCV_LO), ==, 100);
+    g_assert_cmpuint(readl(COUNTER_BASE + CNTCV_HI), ==, 0);
+    /* Check the counter scaling functionality */
+    writel(COUNTER_BASE + CNTCR, 0);
+    writel(COUNTER_BASE + CNTSCR, 0x00100000); /* 1/16th normal speed */
+    writel(COUNTER_BASE + CNTCR, 5); /* EN, SCEN */
+    clock_step_ticks(160);
+    g_assert_cmpuint(readl(COUNTER_BASE + CNTCV_LO), ==, 110);
+    g_assert_cmpuint(readl(COUNTER_BASE + CNTCV_HI), ==, 0);
+}
+
+static void test_timer(void)
+{
+    /* Basic timer functionality test */
+
+    reset_counter_and_timer();
+    /*
+     * The timer is behind a Peripheral Protection Controller, and
+     * qtest accesses are always non-secure (no memory attributes),
+     * so we must program the PPC to accept NS transactions.  TIMER0
+     * is on port 0 of PPC0, controlled by bit 0 of this register.
+     */
+    writel(PERIPHNSPPC0, 1);
+    /* We must enable the System Counter or the timer won't run. */
+    writel(COUNTER_BASE + CNTCR, 1);
+
+    /* Timer starts disabled and with a counter of 0 */
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 0);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTPCT_LO), ==, 0);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTPCT_HI), ==, 0);
+
+    /* Turn it on */
+    writel(TIMER_BASE + CNTP_CTL, 1);
+
+    /* Is the timer ticking? */
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTPCT_LO), ==, 100);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTPCT_HI), ==, 0);
+
+    /* Set the CompareValue to 4000 ticks */
+    writel(TIMER_BASE + CNTP_CVAL_LO, 4000);
+    writel(TIMER_BASE + CNTP_CVAL_HI, 0);
+
+    /* Check TVAL view of the counter */
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_TVAL), ==, 3900);
+
+    /* Advance to the CompareValue mark and check ISTATUS is set */
+    clock_step_ticks(3900);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_TVAL), ==, 0);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 5);
+
+    /* Now exercise the auto-reload part of the timer */
+    writel(TIMER_BASE + CNTP_AIVAL_RELOAD, 200);
+    writel(TIMER_BASE + CNTP_AIVAL_CTL, 1);
+
+    /* Check AIVAL was reloaded and that ISTATUS is now clear */
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_LO), ==, 4200);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_HI), ==, 0);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 1);
+
+    /*
+     * Check that when we advance forward to the reload time the interrupt
+     * fires and the value reloads
+     */
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 1);
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 5);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_LO), ==, 4400);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_HI), ==, 0);
+
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 5);
+    /* Check that writing 0 to CLR clears the interrupt */
+    writel(TIMER_BASE + CNTP_AIVAL_CTL, 1);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 1);
+    /* Check that when we move forward to the reload time it fires again */
+    clock_step_ticks(100);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 5);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_LO), ==, 4600);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_HI), ==, 0);
+
+    /*
+     * Step the clock far enough that we overflow the low half of the
+     * CNTPCT and AIVAL registers, and check that their high halves
+     * give the right values. We do the forward movement in
+     * non-autoinc mode because otherwise it takes forever as the
+     * timer has to emulate all the 'reload at t + N, t + 2N, etc'
+     * steps.
+     */
+    writel(TIMER_BASE + CNTP_AIVAL_CTL, 0);
+    clock_step_ticks(0x42ULL << 32);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTPCT_LO), ==, 4400);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTPCT_HI), ==, 0x42);
+
+    /* Turn on the autoinc again to check AIVAL_HI */
+    writel(TIMER_BASE + CNTP_AIVAL_CTL, 1);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_LO), ==, 4600);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_AIVAL_HI), ==, 0x42);
+}
+
+static void test_timer_scale_change(void)
+{
+    /*
+     * Test that the timer responds correctly to counter
+     * scaling changes while it has an active timer.
+     */
+    reset_counter_and_timer();
+    /* Give ourselves access to the timer, and enable the counter and timer */
+    writel(PERIPHNSPPC0, 1);
+    writel(COUNTER_BASE + CNTCR, 1);
+    writel(TIMER_BASE + CNTP_CTL, 1);
+    /* Set the CompareValue to 4000 ticks */
+    writel(TIMER_BASE + CNTP_CVAL_LO, 4000);
+    writel(TIMER_BASE + CNTP_CVAL_HI, 0);
+    /* Advance halfway and check ISTATUS is not set */
+    clock_step_ticks(2000);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 1);
+    /* Reprogram the counter to run at 1/16th speed */
+    writel(COUNTER_BASE + CNTCR, 0);
+    writel(COUNTER_BASE + CNTSCR, 0x00100000); /* 1/16th normal speed */
+    writel(COUNTER_BASE + CNTCR, 5); /* EN, SCEN */
+    /* Advance to where the timer would have fired and check it has not */
+    clock_step_ticks(2000);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 1);
+    /* Advance to where the timer must fire at the new clock rate */
+    clock_step_ticks(29996);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 1);
+    clock_step_ticks(4);
+    g_assert_cmpuint(readl(TIMER_BASE + CNTP_CTL), ==, 5);
+}
+
+int main(int argc, char **argv)
+{
+    int r;
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_start("-machine mps3-an547");
+
+    qtest_add_func("/sse-timer/counter", test_counter);
+    qtest_add_func("/sse-timer/timer", test_timer);
+    qtest_add_func("/sse-timer/timer-scale-change", test_timer_scale_change);
+
+    r = g_test_run();
+
+    qtest_end();
+
+    return r;
+}