Merge remote-tracking branch 'remotes/xtensa/tags/20181030-xtensa' into staging

target/xtensa: support for bFLT binaries

- add support for bFLT binaries for target/xtensa
- fix per-architecture target_flat.h customization
- fix initial stack pointer for bFLT

# gpg: Signature made Tue 30 Oct 2018 18:27:42 GMT
# gpg:                using RSA key 51F9CC91F83FA044
# gpg: Good signature from "Max Filippov <filippov@cadence.com>"
# gpg:                 aka "Max Filippov <max.filippov@cogentembedded.com>"
# gpg:                 aka "Max Filippov <jcmvbkbc@gmail.com>"
# Primary key fingerprint: 2B67 854B 98E5 327D CDEB  17D8 51F9 CC91 F83F A044

* remotes/xtensa/tags/20181030-xtensa:
  linux-user/flatload: fix initial stack pointer alignment
  linux-user: xtensa: enable bFLT support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/.gitignore b/.gitignore
index 5668d02..64efdfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -107,6 +107,7 @@
 /qemu-doc.html
 /qemu-doc.info
 /qemu-doc.txt
+/qemu-edid
 /qemu-img
 /qemu-nbd
 /qemu-options.def
diff --git a/.gitmodules b/.gitmodules
index d108478..a48d2a7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -43,3 +43,9 @@
 [submodule "roms/u-boot-sam460ex"]
 	path = roms/u-boot-sam460ex
 	url = git://git.qemu.org/u-boot-sam460ex.git
+[submodule "tests/fp/berkeley-testfloat-3"]
+	path = tests/fp/berkeley-testfloat-3
+	url = git://github.com/cota/berkeley-testfloat-3
+[submodule "tests/fp/berkeley-softfloat-3"]
+	path = tests/fp/berkeley-softfloat-3
+	url = git://github.com/cota/berkeley-softfloat-3
diff --git a/.mailmap b/.mailmap
index 2c2b9b1..ed8faa5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -12,6 +12,7 @@
 James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com>
 Jocelyn Mayer <l_indien@magic.fr> j_mayer <j_mayer@c046a42c-6fe2-441c-8c8c-71466251a162>
 Paul Brook <paul@codesourcery.com> pbrook <pbrook@c046a42c-6fe2-441c-8c8c-71466251a162>
+Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com>
 Aleksandar Markovic <amarkovic@wavecomp.com> <aleksandar.markovic@mips.com>
 Aleksandar Markovic <amarkovic@wavecomp.com> <aleksandar.markovic@imgtec.com>
 Paul Burton <pburton@wavecomp.com> <paul.burton@mips.com>
@@ -33,5 +34,6 @@
 
 
 # Also list preferred name forms where people have changed their
-# git author config
+# git author config, or having utf8/latin1 encoding issues.
 Daniel P. Berrangé <berrange@redhat.com>
+Reimar Döffinger <Reimar.Doeffinger@gmx.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index 15503f4..d794bd7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -88,6 +88,7 @@
 F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: hw/s390x/
+F: hw/vfio/ap.c
 F: hw/vfio/ccw.c
 F: hw/watchdog/wdt_diag288.c
 F: include/hw/s390x/
@@ -95,6 +96,7 @@
 F: pc-bios/s390-ccw/
 F: pc-bios/s390-ccw.img
 F: target/s390x/
+F: docs/vfio-ap.txt
 K: ^Subject:.*(?i)s390x?
 T: git git://github.com/cohuck/qemu.git s390-next
 L: qemu-s390x@nongnu.org
@@ -200,6 +202,8 @@
 F: include/hw/timer/mips_gictimer.h
 F: tests/tcg/mips/
 F: disas/mips.c
+F: disas/nanomips.h
+F: disas/nanomips.cpp
 
 Moxie
 M: Anthony Green <green@moxielogic.com>
@@ -1207,6 +1211,20 @@
 T: git git://github.com/cohuck/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
+vfio-ap
+M: Christian Borntraeger <borntraeger@de.ibm.com>
+M: Tony Krowiak <akrowiak@linux.ibm.com>
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Pierre Morel <pmorel@linux.ibm.com>
+S: Supported
+F: hw/s390x/ap-device.c
+F: hw/s390x/ap-bridge.c
+F: include/hw/s390x/ap-device.h
+F: include/hw/s390x/ap-bridge.h
+F: hw/vfio/ap.c
+F: docs/vfio-ap.txt
+L: qemu-s390x@nongnu.org
+
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -1903,6 +1921,11 @@
 F: include/qemu/iova-tree.h
 F: util/iova-tree.c
 
+elf2dmp
+M: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
+S: Maintained
+F: contrib/elf2dmp/
+
 Usermode Emulation
 ------------------
 Overall
@@ -2002,7 +2025,6 @@
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block
 
 Sheepdog
-M: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
 M: Liu Yuan <namei.unix@gmail.com>
 M: Jeff Cody <jcody@redhat.com>
 L: qemu-block@nongnu.org
diff --git a/Makefile b/Makefile
index 3730092..f294718 100644
--- a/Makefile
+++ b/Makefile
@@ -415,6 +415,7 @@
                 chardev-obj-y \
                 util-obj-y \
                 qga-obj-y \
+                elf2dmp-obj-y \
                 ivshmem-client-obj-y \
                 ivshmem-server-obj-y \
                 libvhost-user-obj-y \
@@ -710,6 +711,10 @@
 qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
 endif
 
+elf2dmp: LIBS = $(CURL_LIBS)
+elf2dmp: $(elf2dmp-obj-y)
+	$(call LINK, $^)
+
 ifdef CONFIG_IVSHMEM
 ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
@@ -797,6 +802,7 @@
 ifdef INSTALL_BLOBS
 BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
+vgabios-ramfb.bin vgabios-bochs-display.bin \
 ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
diff --git a/Makefile.objs b/Makefile.objs
index ce9c792..1e1ff38 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -186,6 +186,7 @@
 
 ######################################################################
 # contrib
+elf2dmp-obj-y = contrib/elf2dmp/
 ivshmem-client-obj-$(CONFIG_IVSHMEM) = contrib/ivshmem-client/
 ivshmem-server-obj-$(CONFIG_IVSHMEM) = contrib/ivshmem-server/
 libvhost-user-obj-y = contrib/libvhost-user/
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index de12f78..4880a05 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -79,6 +79,7 @@
     int fd;
     int vmfd;
     int coalesced_mmio;
+    int coalesced_pio;
     struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
     bool coalesced_flush_in_progress;
     int vcpu_events;
@@ -560,6 +561,45 @@
     }
 }
 
+static void kvm_coalesce_pio_add(MemoryListener *listener,
+                                MemoryRegionSection *section,
+                                hwaddr start, hwaddr size)
+{
+    KVMState *s = kvm_state;
+
+    if (s->coalesced_pio) {
+        struct kvm_coalesced_mmio_zone zone;
+
+        zone.addr = start;
+        zone.size = size;
+        zone.pio = 1;
+
+        (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+    }
+}
+
+static void kvm_coalesce_pio_del(MemoryListener *listener,
+                                MemoryRegionSection *section,
+                                hwaddr start, hwaddr size)
+{
+    KVMState *s = kvm_state;
+
+    if (s->coalesced_pio) {
+        struct kvm_coalesced_mmio_zone zone;
+
+        zone.addr = start;
+        zone.size = size;
+        zone.pio = 1;
+
+        (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+     }
+}
+
+static MemoryListener kvm_coalesced_pio_listener = {
+    .coalesced_io_add = kvm_coalesce_pio_add,
+    .coalesced_io_del = kvm_coalesce_pio_del,
+};
+
 int kvm_check_extension(KVMState *s, unsigned int extension)
 {
     int ret;
@@ -1616,6 +1656,8 @@
     }
 
     s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
+    s->coalesced_pio = s->coalesced_mmio &&
+                       kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
 
 #ifdef KVM_CAP_VCPU_EVENTS
     s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
@@ -1686,13 +1728,15 @@
         s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
         s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
     }
-    s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
-    s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
+    s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region;
+    s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region;
 
     kvm_memory_listener_register(s, &s->memory_listener,
                                  &address_space_memory, 0);
     memory_listener_register(&kvm_io_listener,
                              &address_space_io);
+    memory_listener_register(&kvm_coalesced_pio_listener,
+                             &address_space_io);
 
     s->many_ioeventfds = kvm_check_many_ioeventfds();
 
@@ -1778,7 +1822,13 @@
 
             ent = &ring->coalesced_mmio[ring->first];
 
-            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+            if (ent->pio == 1) {
+                address_space_rw(&address_space_io, ent->phys_addr,
+                                 MEMTXATTRS_UNSPECIFIED, ent->data,
+                                 ent->len, true);
+            } else {
+                cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+            }
             smp_wmb();
             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
         }
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index d751bcb..efde12f 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -100,19 +100,24 @@
     DATA_TYPE ret;
 
     ATOMIC_TRACE_RMW;
+#if DATA_SIZE == 16
+    ret = atomic16_cmpxchg(haddr, cmpv, newv);
+#else
     ret = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+#endif
     ATOMIC_MMU_CLEANUP;
     return ret;
 }
 
 #if DATA_SIZE >= 16
+#if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
 
     ATOMIC_TRACE_LD;
-    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
     return val;
 }
@@ -124,9 +129,10 @@
     DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
 
     ATOMIC_TRACE_ST;
-    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+    atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
 }
+#endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                            ABI_TYPE val EXTRA_ARGS)
@@ -228,19 +234,24 @@
     DATA_TYPE ret;
 
     ATOMIC_TRACE_RMW;
+#if DATA_SIZE == 16
+    ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
+#else
     ret = atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
+#endif
     ATOMIC_MMU_CLEANUP;
     return BSWAP(ret);
 }
 
 #if DATA_SIZE >= 16
+#if HAVE_ATOMIC128
 ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
 {
     ATOMIC_MMU_DECLS;
     DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
 
     ATOMIC_TRACE_LD;
-    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
     return BSWAP(val);
 }
@@ -253,9 +264,10 @@
 
     ATOMIC_TRACE_ST;
     val = BSWAP(val);
-    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+    atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
 }
+#endif
 #else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
                            ABI_TYPE val EXTRA_ARGS)
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 6bcb6d9..870027d 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -416,7 +416,7 @@
     }
 #endif
     /* See if we can patch the calling TB. */
-    if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+    if (last_tb) {
         tb_add_jump(last_tb, tb_exit, tb);
     }
     return tb;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f4702ce..af57aca 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -32,6 +32,7 @@
 #include "exec/log.h"
 #include "exec/helper-proto.h"
 #include "qemu/atomic.h"
+#include "qemu/atomic128.h"
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -58,9 +59,9 @@
     } \
 } while (0)
 
-#define assert_cpu_is_self(this_cpu) do {                         \
+#define assert_cpu_is_self(cpu) do {                              \
         if (DEBUG_TLB_GATE) {                                     \
-            g_assert(!cpu->created || qemu_cpu_is_self(cpu));     \
+            g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
         }                                                         \
     } while (0)
 
@@ -73,6 +74,13 @@
 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
 
+void tlb_init(CPUState *cpu)
+{
+    CPUArchState *env = cpu->env_ptr;
+
+    qemu_spin_init(&env->tlb_lock);
+}
+
 /* flush_all_helper: run fn across all cpus
  *
  * If the wait flag is set then the src cpu's helper will be queued as
@@ -125,8 +133,17 @@
     atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
     tlb_debug("(count: %zu)\n", tlb_flush_count());
 
+    /*
+     * tlb_table/tlb_v_table updates from any thread must hold tlb_lock.
+     * However, updates from the owner thread (as is the case here; see the
+     * above assert_cpu_is_self) do not need atomic_set because all reads
+     * that do not hold the lock are performed by the same owner thread.
+     */
+    qemu_spin_lock(&env->tlb_lock);
     memset(env->tlb_table, -1, sizeof(env->tlb_table));
     memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+    qemu_spin_unlock(&env->tlb_lock);
+
     cpu_tb_jmp_cache_clear(cpu);
 
     env->vtlb_index = 0;
@@ -178,6 +195,7 @@
 
     tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
 
+    qemu_spin_lock(&env->tlb_lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 
         if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
@@ -187,6 +205,7 @@
             memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
         }
     }
+    qemu_spin_unlock(&env->tlb_lock);
 
     cpu_tb_jmp_cache_clear(cpu);
 
@@ -239,23 +258,28 @@
                                         target_ulong page)
 {
     return tlb_hit_page(tlb_entry->addr_read, page) ||
-           tlb_hit_page(tlb_entry->addr_write, page) ||
+           tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
            tlb_hit_page(tlb_entry->addr_code, page);
 }
 
-static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong page)
+/* Called with tlb_lock held */
+static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
+                                          target_ulong page)
 {
     if (tlb_hit_page_anyprot(tlb_entry, page)) {
         memset(tlb_entry, -1, sizeof(*tlb_entry));
     }
 }
 
-static inline void tlb_flush_vtlb_page(CPUArchState *env, int mmu_idx,
-                                       target_ulong page)
+/* Called with tlb_lock held */
+static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
+                                              target_ulong page)
 {
     int k;
+
+    assert_cpu_is_self(ENV_GET_CPU(env));
     for (k = 0; k < CPU_VTLB_SIZE; k++) {
-        tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], page);
+        tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page);
     }
 }
 
@@ -263,7 +287,6 @@
 {
     CPUArchState *env = cpu->env_ptr;
     target_ulong addr = (target_ulong) data.target_ptr;
-    int i;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
@@ -281,11 +304,12 @@
     }
 
     addr &= TARGET_PAGE_MASK;
-    i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    qemu_spin_lock(&env->tlb_lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
-        tlb_flush_vtlb_page(env, mmu_idx, addr);
+        tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
+        tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
     }
+    qemu_spin_unlock(&env->tlb_lock);
 
     tb_flush_jmp_cache(cpu, addr);
 }
@@ -314,20 +338,21 @@
     target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
     target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
     unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
-    int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
 
-    tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
-              page, addr, mmu_idx_bitmap);
+    tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
+              addr, mmu_idx_bitmap);
 
+    qemu_spin_lock(&env->tlb_lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
-            tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
-            tlb_flush_vtlb_page(env, mmu_idx, addr);
+            tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
+            tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
         }
     }
+    qemu_spin_unlock(&env->tlb_lock);
 
     tb_flush_jmp_cache(cpu, addr);
 }
@@ -450,72 +475,44 @@
  * most usual is detecting writes to code regions which may invalidate
  * generated code.
  *
- * Because we want other vCPUs to respond to changes straight away we
- * update the te->addr_write field atomically. If the TLB entry has
- * been changed by the vCPU in the mean time we skip the update.
+ * Other vCPUs might be reading their TLBs during guest execution, so we update
+ * te->addr_write with atomic_set. We don't need to worry about this for
+ * oversized guests as MTTCG is disabled for them.
  *
- * As this function uses atomic accesses we also need to ensure
- * updates to tlb_entries follow the same access rules. We don't need
- * to worry about this for oversized guests as MTTCG is disabled for
- * them.
+ * Called with tlb_lock held.
  */
-
-static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
-                           uintptr_t length)
+static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
+                                         uintptr_t start, uintptr_t length)
 {
-#if TCG_OVERSIZED_GUEST
     uintptr_t addr = tlb_entry->addr_write;
 
     if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
         addr &= TARGET_PAGE_MASK;
         addr += tlb_entry->addend;
         if ((addr - start) < length) {
+#if TCG_OVERSIZED_GUEST
             tlb_entry->addr_write |= TLB_NOTDIRTY;
-        }
-    }
 #else
-    /* paired with atomic_mb_set in tlb_set_page_with_attrs */
-    uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
-    uintptr_t addr = orig_addr;
-
-    if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
-        addr &= TARGET_PAGE_MASK;
-        addr += atomic_read(&tlb_entry->addend);
-        if ((addr - start) < length) {
-            uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
-            atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
+            atomic_set(&tlb_entry->addr_write,
+                       tlb_entry->addr_write | TLB_NOTDIRTY);
+#endif
         }
     }
-#endif
 }
 
-/* For atomic correctness when running MTTCG we need to use the right
- * primitives when copying entries */
-static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
-                                   bool atomic_set)
+/*
+ * Called with tlb_lock held.
+ * Called only from the vCPU context, i.e. the TLB's owner thread.
+ */
+static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
 {
-#if TCG_OVERSIZED_GUEST
     *d = *s;
-#else
-    if (atomic_set) {
-        d->addr_read = s->addr_read;
-        d->addr_code = s->addr_code;
-        atomic_set(&d->addend, atomic_read(&s->addend));
-        /* Pairs with flag setting in tlb_reset_dirty_range */
-        atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
-    } else {
-        d->addr_read = s->addr_read;
-        d->addr_write = atomic_read(&s->addr_write);
-        d->addr_code = s->addr_code;
-        d->addend = atomic_read(&s->addend);
-    }
-#endif
 }
 
 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
- * the target vCPU). As such care needs to be taken that we don't
- * dangerously race with another vCPU update. The only thing actually
- * updated is the target TLB entry ->addr_write flags.
+ * the target vCPU).
+ * We must take tlb_lock to avoid racing with another vCPU update. The only
+ * thing actually updated is the target TLB entry ->addr_write flags.
  */
 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
 {
@@ -524,22 +521,26 @@
     int mmu_idx;
 
     env = cpu->env_ptr;
+    qemu_spin_lock(&env->tlb_lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         unsigned int i;
 
         for (i = 0; i < CPU_TLB_SIZE; i++) {
-            tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
-                                  start1, length);
+            tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1,
+                                         length);
         }
 
         for (i = 0; i < CPU_VTLB_SIZE; i++) {
-            tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
-                                  start1, length);
+            tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1,
+                                         length);
         }
     }
+    qemu_spin_unlock(&env->tlb_lock);
 }
 
-static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
+/* Called with tlb_lock held */
+static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
+                                         target_ulong vaddr)
 {
     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
         tlb_entry->addr_write = vaddr;
@@ -551,23 +552,23 @@
 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 {
     CPUArchState *env = cpu->env_ptr;
-    int i;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
 
     vaddr &= TARGET_PAGE_MASK;
-    i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    qemu_spin_lock(&env->tlb_lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-        tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
+        tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
     }
 
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         int k;
         for (k = 0; k < CPU_VTLB_SIZE; k++) {
-            tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
+            tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
         }
     }
+    qemu_spin_unlock(&env->tlb_lock);
 }
 
 /* Our TLB does not support large pages, so remember the area covered by
@@ -654,15 +655,24 @@
         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
     }
 
-    /* Make sure there's no cached translation for the new page.  */
-    tlb_flush_vtlb_page(env, mmu_idx, vaddr_page);
-
     code_address = address;
     iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
                                             paddr_page, xlat, prot, &address);
 
-    index = (vaddr_page >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    te = &env->tlb_table[mmu_idx][index];
+    index = tlb_index(env, mmu_idx, vaddr_page);
+    te = tlb_entry(env, mmu_idx, vaddr_page);
+
+    /*
+     * Hold the TLB lock for the rest of the function. We could acquire/release
+     * the lock several times in the function, but it is faster to amortize the
+     * acquisition cost by acquiring it just once. Note that this leads to
+     * a longer critical section, but this is not a concern since the TLB lock
+     * is unlikely to be contended.
+     */
+    qemu_spin_lock(&env->tlb_lock);
+
+    /* Make sure there's no cached translation for the new page.  */
+    tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
 
     /*
      * Only evict the old entry to the victim tlb if it's for a
@@ -673,7 +683,7 @@
         CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
 
         /* Evict the old entry into the victim tlb.  */
-        copy_tlb_helper(tv, te, true);
+        copy_tlb_helper_locked(tv, te);
         env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
     }
 
@@ -725,9 +735,8 @@
         }
     }
 
-    /* Pairs with flag setting in tlb_reset_dirty_range */
-    copy_tlb_helper(te, &tn, true);
-    /* atomic_mb_set(&te->addr_write, write_address); */
+    copy_tlb_helper_locked(te, &tn);
+    qemu_spin_unlock(&env->tlb_lock);
 }
 
 /* Add a new TLB entry, but without specifying the memory
@@ -773,16 +782,16 @@
          * repeat the MMU check here. This tlb_fill() call might
          * longjump out if this access should cause a guest exception.
          */
-        int index;
+        CPUTLBEntry *entry;
         target_ulong tlb_addr;
 
         tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
 
-        index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-        tlb_addr = env->tlb_table[mmu_idx][index].addr_read;
+        entry = tlb_entry(env, mmu_idx, addr);
+        tlb_addr = entry->addr_read;
         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
             /* RAM access */
-            uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
+            uintptr_t haddr = addr + entry->addend;
 
             return ldn_p((void *)haddr, size);
         }
@@ -840,16 +849,16 @@
          * repeat the MMU check here. This tlb_fill() call might
          * longjump out if this access should cause a guest exception.
          */
-        int index;
+        CPUTLBEntry *entry;
         target_ulong tlb_addr;
 
         tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
 
-        index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+        entry = tlb_entry(env, mmu_idx, addr);
+        tlb_addr = tlb_addr_write(entry);
         if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) {
             /* RAM access */
-            uintptr_t haddr = addr + env->tlb_table[mmu_idx][index].addend;
+            uintptr_t haddr = addr + entry->addend;
 
             stn_p((void *)haddr, size, val);
             return;
@@ -891,17 +900,28 @@
                            size_t elt_ofs, target_ulong page)
 {
     size_t vidx;
+
+    assert_cpu_is_self(ENV_GET_CPU(env));
     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
         CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
-        target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
+        target_ulong cmp;
+
+        /* elt_ofs might correspond to .addr_write, so use atomic_read */
+#if TCG_OVERSIZED_GUEST
+        cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
+#else
+        cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
+#endif
 
         if (cmp == page) {
             /* Found entry in victim tlb, swap tlb and iotlb.  */
             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
 
-            copy_tlb_helper(&tmptlb, tlb, false);
-            copy_tlb_helper(tlb, vtlb, true);
-            copy_tlb_helper(vtlb, &tmptlb, true);
+            qemu_spin_lock(&env->tlb_lock);
+            copy_tlb_helper_locked(&tmptlb, tlb);
+            copy_tlb_helper_locked(tlb, vtlb);
+            copy_tlb_helper_locked(vtlb, &tmptlb);
+            qemu_spin_unlock(&env->tlb_lock);
 
             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
@@ -924,20 +944,19 @@
  */
 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
 {
-    int mmu_idx, index;
+    uintptr_t mmu_idx = cpu_mmu_index(env, true);
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
     void *p;
 
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    mmu_idx = cpu_mmu_index(env, true);
-    if (unlikely(!tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr))) {
+    if (unlikely(!tlb_hit(entry->addr_code, addr))) {
         if (!VICTIM_TLB_HIT(addr_code, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
         }
-        assert(tlb_hit(env->tlb_table[mmu_idx][index].addr_code, addr));
+        assert(tlb_hit(entry->addr_code, addr));
     }
 
-    if (unlikely(env->tlb_table[mmu_idx][index].addr_code &
-                 (TLB_RECHECK | TLB_MMIO))) {
+    if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) {
         /*
          * Return -1 if we can't translate and execute from an entire
          * page of RAM here, which will cause us to execute by loading
@@ -949,7 +968,7 @@
         return -1;
     }
 
-    p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
+    p = (void *)((uintptr_t)addr + entry->addend);
     return qemu_ram_addr_from_host_nofail(p);
 }
 
@@ -962,10 +981,10 @@
 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
                  uintptr_t retaddr)
 {
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
 
-    if (!tlb_hit(tlb_addr, addr)) {
+    if (!tlb_hit(tlb_addr_write(entry), addr)) {
         /* TLB entry is for a different page */
         if (!VICTIM_TLB_HIT(addr_write, addr)) {
             tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
@@ -981,9 +1000,9 @@
                                NotDirtyInfo *ndi)
 {
     size_t mmu_idx = get_mmuidx(oi);
-    size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
-    target_ulong tlb_addr = tlbe->addr_write;
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
+    target_ulong tlb_addr = tlb_addr_write(tlbe);
     TCGMemOp mop = get_memop(oi);
     int a_bits = get_alignment_bits(mop);
     int s_bits = mop & MO_SIZE;
@@ -1014,7 +1033,7 @@
             tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
                      mmu_idx, retaddr);
         }
-        tlb_addr = tlbe->addr_write & ~TLB_INVALID_MASK;
+        tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
     }
 
     /* Notice an IO access or a needs-MMU-lookup access */
@@ -1101,7 +1120,7 @@
 #include "atomic_template.h"
 #endif
 
-#ifdef CONFIG_ATOMIC128
+#if HAVE_CMPXCHG128 || HAVE_ATOMIC128
 #define DATA_SIZE 16
 #include "atomic_template.h"
 #endif
diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h
index f060a69..b0adea0 100644
--- a/accel/tcg/softmmu_template.h
+++ b/accel/tcg/softmmu_template.h
@@ -111,9 +111,10 @@
 WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
                             TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    unsigned mmu_idx = get_mmuidx(oi);
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    uintptr_t mmu_idx = get_mmuidx(oi);
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+    target_ulong tlb_addr = entry->ADDR_READ;
     unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
     DATA_TYPE res;
@@ -129,7 +130,7 @@
             tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
                      mmu_idx, retaddr);
         }
-        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+        tlb_addr = entry->ADDR_READ;
     }
 
     /* Handle an IO access.  */
@@ -166,7 +167,7 @@
         return res;
     }
 
-    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    haddr = addr + entry->addend;
 #if DATA_SIZE == 1
     res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
 #else
@@ -179,9 +180,10 @@
 WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
                             TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    unsigned mmu_idx = get_mmuidx(oi);
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    uintptr_t mmu_idx = get_mmuidx(oi);
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+    target_ulong tlb_addr = entry->ADDR_READ;
     unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
     DATA_TYPE res;
@@ -197,7 +199,7 @@
             tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
                      mmu_idx, retaddr);
         }
-        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+        tlb_addr = entry->ADDR_READ;
     }
 
     /* Handle an IO access.  */
@@ -234,7 +236,7 @@
         return res;
     }
 
-    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    haddr = addr + entry->addend;
     res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
     return res;
 }
@@ -275,9 +277,10 @@
 void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    unsigned mmu_idx = get_mmuidx(oi);
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t mmu_idx = get_mmuidx(oi);
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+    target_ulong tlb_addr = tlb_addr_write(entry);
     unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
 
@@ -292,7 +295,7 @@
             tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
                      mmu_idx, retaddr);
         }
-        tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
+        tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
     }
 
     /* Handle an IO access.  */
@@ -313,16 +316,16 @@
     if (DATA_SIZE > 1
         && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
                      >= TARGET_PAGE_SIZE)) {
-        int i, index2;
-        target_ulong page2, tlb_addr2;
+        int i;
+        target_ulong page2;
+        CPUTLBEntry *entry2;
     do_unaligned_access:
         /* Ensure the second page is in the TLB.  Note that the first page
            is already guaranteed to be filled, and that the second page
            cannot evict the first.  */
         page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
-        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
-        if (!tlb_hit_page(tlb_addr2, page2)
+        entry2 = tlb_entry(env, mmu_idx, page2);
+        if (!tlb_hit_page(tlb_addr_write(entry2), page2)
             && !VICTIM_TLB_HIT(addr_write, page2)) {
             tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
                      mmu_idx, retaddr);
@@ -340,7 +343,7 @@
         return;
     }
 
-    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    haddr = addr + entry->addend;
 #if DATA_SIZE == 1
     glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
 #else
@@ -352,9 +355,10 @@
 void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    unsigned mmu_idx = get_mmuidx(oi);
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t mmu_idx = get_mmuidx(oi);
+    uintptr_t index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+    target_ulong tlb_addr = tlb_addr_write(entry);
     unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
 
@@ -369,7 +373,7 @@
             tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
                      mmu_idx, retaddr);
         }
-        tlb_addr = env->tlb_table[mmu_idx][index].addr_write & ~TLB_INVALID_MASK;
+        tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
     }
 
     /* Handle an IO access.  */
@@ -390,16 +394,16 @@
     if (DATA_SIZE > 1
         && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
                      >= TARGET_PAGE_SIZE)) {
-        int i, index2;
-        target_ulong page2, tlb_addr2;
+        int i;
+        target_ulong page2;
+        CPUTLBEntry *entry2;
     do_unaligned_access:
         /* Ensure the second page is in the TLB.  Note that the first page
            is already guaranteed to be filled, and that the second page
            cannot evict the first.  */
         page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
-        index2 = (page2 >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-        tlb_addr2 = env->tlb_table[mmu_idx][index2].addr_write;
-        if (!tlb_hit_page(tlb_addr2, page2)
+        entry2 = tlb_entry(env, mmu_idx, page2);
+        if (!tlb_hit_page(tlb_addr_write(entry2), page2)
             && !VICTIM_TLB_HIT(addr_write, page2)) {
             tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
                      mmu_idx, retaddr);
@@ -417,7 +421,7 @@
         return;
     }
 
-    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    haddr = addr + entry->addend;
     glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
 }
 #endif /* DATA_SIZE > 1 */
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index 56dbb56..3d25bdc 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -51,7 +51,7 @@
     if (!qemu_cpu_is_self(cpu)) {
         qemu_cpu_kick(cpu);
     } else {
-        cpu->icount_decr.u16.high = -1;
+        atomic_set(&cpu->icount_decr.u16.high, -1);
         if (use_icount &&
             !cpu->can_do_io
             && (mask & ~old_mask) != 0) {
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 9ffbbc2..356dcd0 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -2009,15 +2009,6 @@
 {
     PageDesc *p;
 
-#if 0
-    if (1) {
-        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
-                  cpu_single_env->mem_io_vaddr, len,
-                  cpu_single_env->eip,
-                  cpu_single_env->eip +
-                  (intptr_t)cpu_single_env->segs[R_CS].base);
-    }
-#endif
     assert_memory_lock();
 
     p = page_find(start >> TARGET_PAGE_BITS);
@@ -2350,7 +2341,7 @@
 {
     g_assert(qemu_mutex_iothread_locked());
     cpu->interrupt_request |= mask;
-    cpu->icount_decr.u16.high = -1;
+    atomic_set(&cpu->icount_decr.u16.high, -1);
 }
 
 /*
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 0f9dca9..afd0a49 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -34,6 +34,8 @@
 void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
                      CPUState *cpu, TranslationBlock *tb)
 {
+    int bp_insn = 0;
+
     /* Initialize DisasContext */
     db->tb = tb;
     db->pc_first = tb->pc;
@@ -71,11 +73,13 @@
         tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
         /* Pass breakpoint hits to target for further processing */
-        if (unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
+        if (!db->singlestep_enabled
+            && unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
             CPUBreakpoint *bp;
             QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
                 if (bp->pc == db->pc_next) {
                     if (ops->breakpoint_check(db, cpu, bp)) {
+                        bp_insn = 1;
                         break;
                     }
                 }
@@ -118,7 +122,7 @@
 
     /* Emit code to exit the TB, as indicated by db->is_jmp.  */
     ops->tb_stop(db, cpu);
-    gen_tb_end(db->tb, db->num_insns);
+    gen_tb_end(db->tb, db->num_insns - bp_insn);
 
     /* The disas_log hook may use these values rather than recompute.  */
     db->tb->size = db->pc_next - db->pc_first;
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 26a3ffb..cd75829 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "translate-all.h"
 #include "exec/helper-proto.h"
+#include "qemu/atomic128.h"
 
 #undef EAX
 #undef ECX
@@ -615,7 +616,7 @@
 /* The following is only callable from other helpers, and matches up
    with the softmmu version.  */
 
-#ifdef CONFIG_ATOMIC128
+#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
 
 #undef EXTRA_ARGS
 #undef ATOMIC_NAME
@@ -628,4 +629,4 @@
 
 #define DATA_SIZE 16
 #include "atomic_template.h"
-#endif /* CONFIG_ATOMIC128 */
+#endif
diff --git a/backends/Makefile.objs b/backends/Makefile.objs
index ad7c032..717fcbd 100644
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -4,7 +4,7 @@
 common-obj-$(CONFIG_TPM) += tpm.o
 
 common-obj-y += hostmem.o hostmem-ram.o
-common-obj-$(CONFIG_LINUX) += hostmem-file.o
+common-obj-$(CONFIG_POSIX) += hostmem-file.o
 
 common-obj-y += cryptodev.o
 common-obj-y += cryptodev-builtin.o
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 2476dcb..639c8d4 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -51,7 +51,7 @@
         error_setg(errp, "mem-path property not set");
         return;
     }
-#ifndef CONFIG_LINUX
+#ifndef CONFIG_POSIX
     error_setg(errp, "-mem-path not supported on this host");
 #else
     if (!host_memory_backend_mr_inited(backend)) {
@@ -145,20 +145,26 @@
     HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
 
     if (host_memory_backend_mr_inited(backend)) {
+        char *path = object_get_canonical_path_component(o);
+
         error_setg(errp, "cannot change property 'pmem' of %s '%s'",
                    object_get_typename(o),
-                   object_get_canonical_path_component(o));
+                   path);
+        g_free(path);
         return;
     }
 
 #ifndef CONFIG_LIBPMEM
     if (value) {
         Error *local_err = NULL;
+        char *path = object_get_canonical_path_component(o);
+
         error_setg(&local_err,
                    "Lack of libpmem support while setting the 'pmem=on'"
                    " of %s '%s'. We can't ensure data persistence.",
                    object_get_typename(o),
-                   object_get_canonical_path_component(o));
+                   path);
+        g_free(path);
         error_propagate(errp, local_err);
         return;
     }
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index 1e20fe0..b6836b2 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -140,18 +140,31 @@
 
     bc->alloc = memfd_backend_memory_alloc;
 
-    object_class_property_add_bool(oc, "hugetlb",
-                                   memfd_backend_get_hugetlb,
-                                   memfd_backend_set_hugetlb,
-                                   &error_abort);
-    object_class_property_add(oc, "hugetlbsize", "int",
-                              memfd_backend_get_hugetlbsize,
-                              memfd_backend_set_hugetlbsize,
-                              NULL, NULL, &error_abort);
-    object_class_property_add_bool(oc, "seal",
-                                   memfd_backend_get_seal,
-                                   memfd_backend_set_seal,
-                                   &error_abort);
+    if (qemu_memfd_check(MFD_HUGETLB)) {
+        object_class_property_add_bool(oc, "hugetlb",
+                                       memfd_backend_get_hugetlb,
+                                       memfd_backend_set_hugetlb,
+                                       &error_abort);
+        object_class_property_set_description(oc, "hugetlb",
+                                              "Use huge pages",
+                                              &error_abort);
+        object_class_property_add(oc, "hugetlbsize", "int",
+                                  memfd_backend_get_hugetlbsize,
+                                  memfd_backend_set_hugetlbsize,
+                                  NULL, NULL, &error_abort);
+        object_class_property_set_description(oc, "hugetlbsize",
+                                              "Huge pages size (ex: 2M, 1G)",
+                                              &error_abort);
+    }
+    if (qemu_memfd_check(MFD_ALLOW_SEALING)) {
+        object_class_property_add_bool(oc, "seal",
+                                       memfd_backend_get_seal,
+                                       memfd_backend_set_seal,
+                                       &error_abort);
+        object_class_property_set_description(oc, "seal",
+                                              "Seal growing & shrinking",
+                                              &error_abort);
+    }
 }
 
 static const TypeInfo memfd_backend_info = {
@@ -164,7 +177,9 @@
 
 static void register_types(void)
 {
-    type_register_static(&memfd_backend_info);
+    if (qemu_memfd_check(0)) {
+        type_register_static(&memfd_backend_info);
+    }
 }
 
 type_init(register_types);
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 4908946..1a89342 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -397,27 +397,41 @@
     object_class_property_add_bool(oc, "merge",
         host_memory_backend_get_merge,
         host_memory_backend_set_merge, &error_abort);
+    object_class_property_set_description(oc, "merge",
+        "Mark memory as mergeable", &error_abort);
     object_class_property_add_bool(oc, "dump",
         host_memory_backend_get_dump,
         host_memory_backend_set_dump, &error_abort);
+    object_class_property_set_description(oc, "dump",
+        "Set to 'off' to exclude from core dump", &error_abort);
     object_class_property_add_bool(oc, "prealloc",
         host_memory_backend_get_prealloc,
         host_memory_backend_set_prealloc, &error_abort);
+    object_class_property_set_description(oc, "prealloc",
+        "Preallocate memory", &error_abort);
     object_class_property_add(oc, "size", "int",
         host_memory_backend_get_size,
         host_memory_backend_set_size,
         NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, "size",
+        "Size of the memory region (ex: 500M)", &error_abort);
     object_class_property_add(oc, "host-nodes", "int",
         host_memory_backend_get_host_nodes,
         host_memory_backend_set_host_nodes,
         NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, "host-nodes",
+        "Binds memory to the list of NUMA host nodes", &error_abort);
     object_class_property_add_enum(oc, "policy", "HostMemPolicy",
         &HostMemPolicy_lookup,
         host_memory_backend_get_policy,
         host_memory_backend_set_policy, &error_abort);
+    object_class_property_set_description(oc, "policy",
+        "Set the NUMA policy", &error_abort);
     object_class_property_add_bool(oc, "share",
         host_memory_backend_get_share, host_memory_backend_set_share,
         &error_abort);
+    object_class_property_set_description(oc, "share",
+        "Mark the memory as private to QEMU or shared", &error_abort);
 }
 
 static const TypeInfo host_memory_backend_info = {
diff --git a/block.c b/block.c
index 7710b39..95d8635 100644
--- a/block.c
+++ b/block.c
@@ -4403,6 +4403,7 @@
     uint64_t perm, shared_perm;
     Error *local_err = NULL;
     int ret;
+    BdrvDirtyBitmap *bm;
 
     if (!bs->drv)  {
         return;
@@ -4452,6 +4453,12 @@
         }
     }
 
+    for (bm = bdrv_dirty_bitmap_next(bs, NULL); bm;
+         bm = bdrv_dirty_bitmap_next(bs, bm))
+    {
+        bdrv_dirty_bitmap_set_migration(bm, false);
+    }
+
     ret = refresh_total_sectors(bs, bs->total_sectors);
     if (ret < 0) {
         bs->open_flags |= BDRV_O_INACTIVE;
@@ -4566,10 +4573,6 @@
         }
     }
 
-    /* At this point persistent bitmaps should be already stored by the format
-     * driver */
-    bdrv_release_persistent_dirty_bitmaps(bs);
-
     return 0;
 }
 
@@ -4697,9 +4700,9 @@
     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
         blocker = QLIST_FIRST(&bs->op_blockers[op]);
-        error_propagate(errp, error_copy(blocker->reason));
-        error_prepend(errp, "Node '%s' is busy: ",
-                      bdrv_get_device_or_node_name(bs));
+        error_propagate_prepend(errp, error_copy(blocker->reason),
+                                "Node '%s' is busy: ",
+                                bdrv_get_device_or_node_name(bs));
         return true;
     }
     return false;
@@ -4803,9 +4806,6 @@
     if (options) {
         qemu_opts_do_parse(opts, options, NULL, &local_err);
         if (local_err) {
-            error_report_err(local_err);
-            local_err = NULL;
-            error_setg(errp, "Invalid options for file format '%s'", fmt);
             goto out;
         }
     }
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index c9b8a6f..89fd1d7 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -55,6 +55,10 @@
                                    and this bitmap must remain unchanged while
                                    this flag is set. */
     bool persistent;            /* bitmap must be saved to owner disk image */
+    bool migration;             /* Bitmap is selected for migration, it should
+                                   not be stored on the next inactivation
+                                   (persistent flag doesn't matter until next
+                                   invalidation).*/
     QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -176,6 +180,12 @@
     return bitmap->successor;
 }
 
+/* Both conditions disallow user-modification via QMP. */
+bool bdrv_dirty_bitmap_user_locked(BdrvDirtyBitmap *bitmap) {
+    return bdrv_dirty_bitmap_frozen(bitmap) ||
+           bdrv_dirty_bitmap_qmp_locked(bitmap);
+}
+
 void bdrv_dirty_bitmap_set_qmp_locked(BdrvDirtyBitmap *bitmap, bool qmp_locked)
 {
     qemu_mutex_lock(bitmap->mutex);
@@ -314,7 +324,7 @@
         return NULL;
     }
 
-    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+    if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) {
         error_setg(errp, "Merging of parent and successor bitmap failed");
         return NULL;
     }
@@ -384,26 +394,6 @@
 }
 
 /**
- * Release all persistent dirty bitmaps attached to a BDS (for use in
- * bdrv_inactivate_recurse()).
- * There must not be any frozen bitmaps attached.
- * This function does not remove persistent bitmaps from the storage.
- * Called with BQL taken.
- */
-void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm, *next;
-
-    bdrv_dirty_bitmaps_lock(bs);
-    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if (bdrv_dirty_bitmap_get_persistance(bm)) {
-            bdrv_release_dirty_bitmap_locked(bm);
-        }
-    }
-    bdrv_dirty_bitmaps_unlock(bs);
-}
-
-/**
  * Remove persistent dirty bitmap from the storage if it exists.
  * Absence of bitmap is not an error, because we have the following scenario:
  * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
@@ -619,7 +609,6 @@
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
     assert(!bdrv_dirty_bitmap_readonly(bitmap));
     bdrv_dirty_bitmap_lock(bitmap);
     if (!out) {
@@ -633,12 +622,12 @@
     bdrv_dirty_bitmap_unlock(bitmap);
 }
 
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup)
 {
     HBitmap *tmp = bitmap->bitmap;
     assert(bdrv_dirty_bitmap_enabled(bitmap));
     assert(!bdrv_dirty_bitmap_readonly(bitmap));
-    bitmap->bitmap = in;
+    bitmap->bitmap = backup;
     hbitmap_free(tmp);
 }
 
@@ -756,16 +745,24 @@
     qemu_mutex_unlock(bitmap->mutex);
 }
 
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->migration = migration;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
 bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
 {
-    return bitmap->persistent;
+    return bitmap->persistent && !bitmap->migration;
 }
 
 bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
 {
     BdrvDirtyBitmap *bm;
     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->persistent && !bm->readonly) {
+        if (bm->persistent && !bm->readonly && !bm->migration) {
             return true;
         }
     }
@@ -791,19 +788,41 @@
 }
 
 void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
-                             Error **errp)
+                             HBitmap **backup, Error **errp)
 {
+    bool ret;
+
     /* only bitmaps from one bds are supported */
     assert(dest->mutex == src->mutex);
 
     qemu_mutex_lock(dest->mutex);
 
-    assert(bdrv_dirty_bitmap_enabled(dest));
-    assert(!bdrv_dirty_bitmap_readonly(dest));
-
-    if (!hbitmap_merge(dest->bitmap, src->bitmap)) {
-        error_setg(errp, "Bitmaps are incompatible and can't be merged");
+    if (bdrv_dirty_bitmap_user_locked(dest)) {
+        error_setg(errp, "Bitmap '%s' is currently in use by another"
+        " operation and cannot be modified", dest->name);
+        goto out;
     }
 
+    if (bdrv_dirty_bitmap_readonly(dest)) {
+        error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
+                   dest->name);
+        goto out;
+    }
+
+    if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) {
+        error_setg(errp, "Bitmaps are incompatible and can't be merged");
+        goto out;
+    }
+
+    if (backup) {
+        *backup = dest->bitmap;
+        dest->bitmap = hbitmap_alloc(dest->size, hbitmap_granularity(*backup));
+        ret = hbitmap_merge(*backup, src->bitmap, dest->bitmap);
+    } else {
+        ret = hbitmap_merge(dest->bitmap, src->bitmap, dest->bitmap);
+    }
+    assert(ret);
+
+out:
     qemu_mutex_unlock(dest->mutex);
 }
diff --git a/block/iscsi.c b/block/iscsi.c
index bb69faf..73998c2 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1844,7 +1844,7 @@
     iscsi_set_timeout(iscsi, timeout);
 #else
     if (timeout) {
-        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
+        warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
     }
 #endif
 
diff --git a/block/nvme.c b/block/nvme.c
index 781d77d..2929403 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -489,10 +489,8 @@
     BDRVNVMeState *s = container_of(n, BDRVNVMeState, irq_notifier);
 
     trace_nvme_handle_event(s);
-    aio_context_acquire(s->aio_context);
     event_notifier_test_and_clear(n);
     nvme_poll_queues(s);
-    aio_context_release(s->aio_context);
 }
 
 static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index ba978ad..b5f1b35 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1418,6 +1418,22 @@
         g_free(tb);
     }
 
+    QSIMPLEQ_FOREACH(bm, bm_list, entry) {
+        /* For safety, we remove bitmap after storing.
+         * We may be here in two cases:
+         * 1. bdrv_close. It's ok to drop bitmap.
+         * 2. inactivation. It means migration without 'dirty-bitmaps'
+         *    capability, so bitmaps are not marked with
+         *    BdrvDirtyBitmap.migration flags. It's not bad to drop them too,
+         *    and reload on invalidation.
+         */
+        if (bm->dirty_bitmap == NULL) {
+            continue;
+        }
+
+        bdrv_release_dirty_bitmap(bs, bm->dirty_bitmap);
+    }
+
     bitmap_list_free(bm_list);
     return;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 7277fed..30689b7 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1153,7 +1153,6 @@
     uint64_t ext_end;
     uint64_t l1_vm_state_index;
     bool update_header = false;
-    bool header_updated = false;
 
     ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
     if (ret < 0) {
@@ -1492,23 +1491,70 @@
         s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
     }
 
-    if (s->dirty_bitmaps_loaded) {
-        /* It's some kind of reopen. There are no known cases where we need to
-         * reload bitmaps in such a situation, so it's safer to skip them.
-         *
-         * Moreover, if we have some readonly bitmaps and we are reopening for
-         * rw we should reopen bitmaps correspondingly.
-         */
-        if (bdrv_has_readonly_bitmaps(bs) &&
-            !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE))
-        {
-            qcow2_reopen_bitmaps_rw_hint(bs, &header_updated, &local_err);
-        }
-    } else {
-        header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
-        s->dirty_bitmaps_loaded = true;
+    /* == Handle persistent dirty bitmaps ==
+     *
+     * We want load dirty bitmaps in three cases:
+     *
+     * 1. Normal open of the disk in active mode, not related to invalidation
+     *    after migration.
+     *
+     * 2. Invalidation of the target vm after pre-copy phase of migration, if
+     *    bitmaps are _not_ migrating through migration channel, i.e.
+     *    'dirty-bitmaps' capability is disabled.
+     *
+     * 3. Invalidation of source vm after failed or canceled migration.
+     *    This is a very interesting case. There are two possible types of
+     *    bitmaps:
+     *
+     *    A. Stored on inactivation and removed. They should be loaded from the
+     *       image.
+     *
+     *    B. Not stored: not-persistent bitmaps and bitmaps, migrated through
+     *       the migration channel (with dirty-bitmaps capability).
+     *
+     *    On the other hand, there are two possible sub-cases:
+     *
+     *    3.1 disk was changed by somebody else while were inactive. In this
+     *        case all in-RAM dirty bitmaps (both persistent and not) are
+     *        definitely invalid. And we don't have any method to determine
+     *        this.
+     *
+     *        Simple and safe thing is to just drop all the bitmaps of type B on
+     *        inactivation. But in this case we lose bitmaps in valid 4.2 case.
+     *
+     *        On the other hand, resuming source vm, if disk was already changed
+     *        is a bad thing anyway: not only bitmaps, the whole vm state is
+     *        out of sync with disk.
+     *
+     *        This means, that user or management tool, who for some reason
+     *        decided to resume source vm, after disk was already changed by
+     *        target vm, should at least drop all dirty bitmaps by hand.
+     *
+     *        So, we can ignore this case for now, but TODO: "generation"
+     *        extension for qcow2, to determine, that image was changed after
+     *        last inactivation. And if it is changed, we will drop (or at least
+     *        mark as 'invalid' all the bitmaps of type B, both persistent
+     *        and not).
+     *
+     *    3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
+     *        to disk ('dirty-bitmaps' capability disabled), or not saved
+     *        ('dirty-bitmaps' capability enabled), but we don't need to care
+     *        of: let's load bitmaps as always: stored bitmaps will be loaded,
+     *        and not stored has flag IN_USE=1 in the image and will be skipped
+     *        on loading.
+     *
+     * One remaining possible case when we don't want load bitmaps:
+     *
+     * 4. Open disk in inactive mode in target vm (bitmaps are migrating or
+     *    will be loaded on invalidation, no needs try loading them before)
+     */
+
+    if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
+        /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
+        bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
+
+        update_header = update_header && !header_updated;
     }
-    update_header = update_header && !header_updated;
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         ret = -EINVAL;
@@ -2123,9 +2169,9 @@
     qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
     if (local_err != NULL) {
         result = -EINVAL;
-        error_report_err(local_err);
-        error_report("Persistent bitmaps are lost for node '%s'",
-                     bdrv_get_device_or_node_name(bs));
+        error_reportf_err(local_err, "Lost persistent bitmaps during "
+                          "inactivation of node '%s': ",
+                          bdrv_get_device_or_node_name(bs));
     }
 
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
@@ -2208,8 +2254,8 @@
     qemu_co_mutex_unlock(&s->lock);
     qobject_unref(options);
     if (local_err) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "Could not reopen qcow2 layer: ");
+        error_propagate_prepend(errp, local_err,
+                                "Could not reopen qcow2 layer: ");
         bs->drv = NULL;
         return;
     } else if (ret < 0) {
diff --git a/block/qcow2.h b/block/qcow2.h
index ba43031..29c98d8 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -300,7 +300,6 @@
     uint32_t nb_bitmaps;
     uint64_t bitmap_directory_size;
     uint64_t bitmap_directory_offset;
-    bool dirty_bitmaps_loaded;
 
     int flags;
     int qcow_version;
diff --git a/block/qed.c b/block/qed.c
index 689ea9d..9377c0b 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1606,8 +1606,8 @@
     ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
     qemu_co_mutex_unlock(&s->table_lock);
     if (local_err) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "Could not reopen qed layer: ");
+        error_propagate_prepend(errp, local_err,
+                                "Could not reopen qed layer: ");
         return;
     } else if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not reopen qed layer");
diff --git a/block/rbd.c b/block/rbd.c
index 014c68d..e5bf5a1 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -750,8 +750,8 @@
         /* Take care whenever deciding to actually deprecate; once this ability
          * is removed, we will not be able to open any images with legacy-styled
          * backing image strings. */
-        error_report("RBD options encoded in the filename as keyvalue pairs "
-                     "is deprecated");
+        warn_report("RBD options encoded in the filename as keyvalue pairs "
+                    "is deprecated");
     }
 
     /* Remove the processed options from the QDict (the visitor processes
diff --git a/block/sheepdog.c b/block/sheepdog.c
index b229a66..0125df9 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -572,7 +572,7 @@
     if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) {
         int ret = socket_set_nodelay(fd);
         if (ret < 0) {
-            error_report("%s", strerror(errno));
+            warn_report("can't set TCP_NODELAY: %s", strerror(errno));
         }
     }
 
diff --git a/block/vpc.c b/block/vpc.c
index bf294ab..984187c 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -284,9 +284,11 @@
 
     checksum = be32_to_cpu(footer->checksum);
     footer->checksum = 0;
-    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
-        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
-            "incorrect.\n", bs->filename);
+    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) {
+        error_setg(errp, "Incorrect header checksum");
+        ret = -EINVAL;
+        goto fail;
+    }
 
     /* Write 'checksum' back to footer, or else will leave it with zero. */
     footer->checksum = cpu_to_be32(checksum);
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 1ef1104..1d170c8 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -36,8 +36,7 @@
                        gpointer opaque)
 {
     qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
-    nbd_client_new(NULL, cioc,
-                   nbd_server->tlscreds, NULL,
+    nbd_client_new(cioc, nbd_server->tlscreds, NULL,
                    nbd_blockdev_client_closed);
 }
 
diff --git a/blockdev.c b/blockdev.c
index a8755bd..c30495d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -759,7 +759,8 @@
     },
 };
 
-DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
+DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
+                     Error **errp)
 {
     const char *value;
     BlockBackend *blk;
@@ -808,7 +809,7 @@
         qemu_opt_rename(all_opts, opt_renames[i].from, opt_renames[i].to,
                         &local_err);
         if (local_err) {
-            error_report_err(local_err);
+            error_propagate(errp, local_err);
             return NULL;
         }
     }
@@ -819,7 +820,7 @@
         bool writethrough;
 
         if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
-            error_report("invalid cache option");
+            error_setg(errp, "invalid cache option");
             return NULL;
         }
 
@@ -847,7 +848,7 @@
                                    &error_abort);
     qemu_opts_absorb_qdict(legacy_opts, bs_opts, &local_err);
     if (local_err) {
-        error_report_err(local_err);
+        error_propagate(errp, local_err);
         goto fail;
     }
 
@@ -860,7 +861,7 @@
             media = MEDIA_CDROM;
             read_only = true;
         } else {
-            error_report("'%s' invalid media", value);
+            error_setg(errp, "'%s' invalid media", value);
             goto fail;
         }
     }
@@ -885,7 +886,7 @@
              type++) {
         }
         if (type == IF_COUNT) {
-            error_report("unsupported bus type '%s'", value);
+            error_setg(errp, "unsupported bus type '%s'", value);
             goto fail;
         }
     } else {
@@ -902,7 +903,7 @@
 
     if (index != -1) {
         if (bus_id != 0 || unit_id != -1) {
-            error_report("index cannot be used with bus and unit");
+            error_setg(errp, "index cannot be used with bus and unit");
             goto fail;
         }
         bus_id = drive_index_to_bus_id(type, index);
@@ -921,13 +922,13 @@
     }
 
     if (max_devs && unit_id >= max_devs) {
-        error_report("unit %d too big (max is %d)", unit_id, max_devs - 1);
+        error_setg(errp, "unit %d too big (max is %d)", unit_id, max_devs - 1);
         goto fail;
     }
 
     if (drive_get(type, bus_id, unit_id) != NULL) {
-        error_report("drive with bus=%d, unit=%d (index=%d) exists",
-                     bus_id, unit_id, index);
+        error_setg(errp, "drive with bus=%d, unit=%d (index=%d) exists",
+                   bus_id, unit_id, index);
         goto fail;
     }
 
@@ -970,7 +971,7 @@
     if (werror != NULL) {
         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
             type != IF_NONE) {
-            error_report("werror is not supported by this bus type");
+            error_setg(errp, "werror is not supported by this bus type");
             goto fail;
         }
         qdict_put_str(bs_opts, "werror", werror);
@@ -980,7 +981,7 @@
     if (rerror != NULL) {
         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
             type != IF_NONE) {
-            error_report("rerror is not supported by this bus type");
+            error_setg(errp, "rerror is not supported by this bus type");
             goto fail;
         }
         qdict_put_str(bs_opts, "rerror", rerror);
@@ -991,7 +992,7 @@
     bs_opts = NULL;
     if (!blk) {
         if (local_err) {
-            error_report_err(local_err);
+            error_propagate(errp, local_err);
         }
         goto fail;
     } else {
@@ -2009,14 +2010,8 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_frozen(state->bitmap)) {
-        error_setg(errp, "Cannot modify a frozen bitmap");
-        return;
-    } else if (bdrv_dirty_bitmap_qmp_locked(state->bitmap)) {
-        error_setg(errp, "Cannot modify a locked bitmap");
-        return;
-    } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
-        error_setg(errp, "Cannot clear a disabled bitmap");
+    if (bdrv_dirty_bitmap_user_locked(state->bitmap)) {
+        error_setg(errp, "Cannot modify a bitmap in use by another operation");
         return;
     } else if (bdrv_dirty_bitmap_readonly(state->bitmap)) {
         error_setg(errp, "Cannot clear a readonly bitmap");
@@ -2026,17 +2021,17 @@
     bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
 }
 
-static void block_dirty_bitmap_clear_abort(BlkActionState *common)
+static void block_dirty_bitmap_restore(BlkActionState *common)
 {
     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
                                              common, common);
 
     if (state->backup) {
-        bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup);
+        bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
     }
 }
 
-static void block_dirty_bitmap_clear_commit(BlkActionState *common)
+static void block_dirty_bitmap_free_backup(BlkActionState *common)
 {
     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
                                              common, common);
@@ -2064,6 +2059,13 @@
         return;
     }
 
+    if (bdrv_dirty_bitmap_user_locked(state->bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently in use by another operation"
+                   " and cannot be enabled", action->name);
+        return;
+    }
+
     state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
     bdrv_enable_dirty_bitmap(state->bitmap);
 }
@@ -2098,6 +2100,13 @@
         return;
     }
 
+    if (bdrv_dirty_bitmap_user_locked(state->bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently in use by another operation"
+                   " and cannot be disabled", action->name);
+        return;
+    }
+
     state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
     bdrv_disable_dirty_bitmap(state->bitmap);
 }
@@ -2112,6 +2121,35 @@
     }
 }
 
+static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
+                                             Error **errp)
+{
+    BlockDirtyBitmapMerge *action;
+    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+                                             common, common);
+    BdrvDirtyBitmap *merge_source;
+
+    if (action_check_completion_mode(common, errp) < 0) {
+        return;
+    }
+
+    action = common->action->u.x_block_dirty_bitmap_merge.data;
+    state->bitmap = block_dirty_bitmap_lookup(action->node,
+                                              action->dst_name,
+                                              &state->bs,
+                                              errp);
+    if (!state->bitmap) {
+        return;
+    }
+
+    merge_source = bdrv_find_dirty_bitmap(state->bs, action->src_name);
+    if (!merge_source) {
+        return;
+    }
+
+    bdrv_merge_dirty_bitmap(state->bitmap, merge_source, &state->backup, errp);
+}
+
 static void abort_prepare(BlkActionState *common, Error **errp)
 {
     error_setg(errp, "Transaction aborted using Abort action");
@@ -2170,8 +2208,8 @@
     [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
         .instance_size = sizeof(BlockDirtyBitmapState),
         .prepare = block_dirty_bitmap_clear_prepare,
-        .commit = block_dirty_bitmap_clear_commit,
-        .abort = block_dirty_bitmap_clear_abort,
+        .commit = block_dirty_bitmap_free_backup,
+        .abort = block_dirty_bitmap_restore,
     },
     [TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_ENABLE] = {
         .instance_size = sizeof(BlockDirtyBitmapState),
@@ -2183,6 +2221,12 @@
         .prepare = block_dirty_bitmap_disable_prepare,
         .abort = block_dirty_bitmap_disable_abort,
     },
+    [TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_MERGE] = {
+        .instance_size = sizeof(BlockDirtyBitmapState),
+        .prepare = block_dirty_bitmap_merge_prepare,
+        .commit = block_dirty_bitmap_free_backup,
+        .abort = block_dirty_bitmap_restore,
+    },
     /* Where are transactions for MIRROR, COMMIT and STREAM?
      * Although these blockjobs use transaction callbacks like the backup job,
      * these jobs do not necessarily adhere to transaction semantics.
@@ -2847,15 +2891,10 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
         error_setg(errp,
-                   "Bitmap '%s' is currently frozen and cannot be removed",
-                   name);
-        return;
-    } else if (bdrv_dirty_bitmap_qmp_locked(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently locked and cannot be removed",
-                   name);
+                   "Bitmap '%s' is currently in use by another operation and"
+                   " cannot be removed", name);
         return;
     }
 
@@ -2885,20 +2924,10 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
         error_setg(errp,
-                   "Bitmap '%s' is currently frozen and cannot be modified",
-                   name);
-        return;
-    } else if (bdrv_dirty_bitmap_qmp_locked(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently locked and cannot be modified",
-                   name);
-        return;
-    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently disabled and cannot be cleared",
-                   name);
+                   "Bitmap '%s' is currently in use by another operation"
+                   " and cannot be cleared", name);
         return;
     } else if (bdrv_dirty_bitmap_readonly(bitmap)) {
         error_setg(errp, "Bitmap '%s' is readonly and cannot be cleared", name);
@@ -2919,10 +2948,10 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
         error_setg(errp,
-                   "Bitmap '%s' is currently frozen and cannot be enabled",
-                   name);
+                   "Bitmap '%s' is currently in use by another operation"
+                   " and cannot be enabled", name);
         return;
     }
 
@@ -2940,10 +2969,10 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
         error_setg(errp,
-                   "Bitmap '%s' is currently frozen and cannot be disabled",
-                   name);
+                   "Bitmap '%s' is currently in use by another operation"
+                   " and cannot be disabled", name);
         return;
     }
 
@@ -2961,23 +2990,13 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_frozen(dst)) {
-        error_setg(errp, "Bitmap '%s' is frozen and cannot be modified",
-                   dst_name);
-        return;
-    } else if (bdrv_dirty_bitmap_readonly(dst)) {
-        error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
-                   dst_name);
-        return;
-    }
-
     src = bdrv_find_dirty_bitmap(bs, src_name);
     if (!src) {
         error_setg(errp, "Dirty bitmap '%s' not found", src_name);
         return;
     }
 
-    bdrv_merge_dirty_bitmap(dst, src, errp);
+    bdrv_merge_dirty_bitmap(dst, src, NULL, errp);
 }
 
 BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
@@ -3494,10 +3513,10 @@
             bdrv_unref(target_bs);
             goto out;
         }
-        if (bdrv_dirty_bitmap_qmp_locked(bmap)) {
+        if (bdrv_dirty_bitmap_user_locked(bmap)) {
             error_setg(errp,
-                       "Bitmap '%s' is currently locked and cannot be used for "
-                       "backup", backup->bitmap);
+                       "Bitmap '%s' is currently in use by another operation"
+                       " and cannot be used for backup", backup->bitmap);
             goto out;
         }
     }
@@ -3544,6 +3563,7 @@
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     Error *local_err = NULL;
+    BdrvDirtyBitmap *bmap = NULL;
     AioContext *aio_context;
     BlockJob *job = NULL;
     int job_flags = JOB_DEFAULT;
@@ -3594,6 +3614,21 @@
             goto out;
         }
     }
+
+    if (backup->has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
+            goto out;
+        }
+        if (bdrv_dirty_bitmap_user_locked(bmap)) {
+            error_setg(errp,
+                       "Bitmap '%s' is currently in use by another operation"
+                       " and cannot be used for backup", backup->bitmap);
+            goto out;
+        }
+    }
+
     if (!backup->auto_finalize) {
         job_flags |= JOB_MANUAL_FINALIZE;
     }
@@ -3601,7 +3636,7 @@
         job_flags |= JOB_MANUAL_DISMISS;
     }
     job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
-                            backup->sync, NULL, backup->compress,
+                            backup->sync, bmap, backup->compress,
                             backup->on_source_error, backup->on_target_error,
                             job_flags, NULL, NULL, txn, &local_err);
     if (local_err != NULL) {
diff --git a/chardev/char-fe.c b/chardev/char-fe.c
index b1f228e..a8931f7 100644
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -56,7 +56,7 @@
 int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
 {
     Chardev *s = be->chr;
-    int offset = 0, counter = 10;
+    int offset = 0;
     int res;
 
     if (!s || !CHARDEV_GET_CLASS(s)->chr_sync_read) {
@@ -88,10 +88,6 @@
         }
 
         offset += res;
-
-        if (!counter--) {
-            break;
-        }
     }
 
     if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
@@ -239,7 +235,12 @@
             d->backends[b->tag] = NULL;
         }
         if (del) {
-            object_unparent(OBJECT(b->chr));
+            Object *obj = OBJECT(b->chr);
+            if (obj->parent) {
+                object_unparent(obj);
+            } else {
+                object_unref(obj);
+            }
         }
         b->chr = NULL;
     }
diff --git a/chardev/char-io.c b/chardev/char-io.c
index f810524..8ced184 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -33,7 +33,6 @@
     IOCanReadHandler *fd_can_read;
     GSourceFunc fd_read;
     void *opaque;
-    GMainContext *context;
 } IOWatchPoll;
 
 static IOWatchPoll *io_watch_poll_from_source(GSource *source)
@@ -55,47 +54,24 @@
         iwp->src = qio_channel_create_watch(
             iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
         g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
-        g_source_attach(iwp->src, iwp->context);
-    } else {
-        g_source_destroy(iwp->src);
+        g_source_add_child_source(source, iwp->src);
         g_source_unref(iwp->src);
+    } else {
+        g_source_remove_child_source(source, iwp->src);
         iwp->src = NULL;
     }
     return FALSE;
 }
 
-static gboolean io_watch_poll_check(GSource *source)
-{
-    return FALSE;
-}
-
 static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
                                        gpointer user_data)
 {
-    abort();
-}
-
-static void io_watch_poll_finalize(GSource *source)
-{
-    /* Due to a glib bug, removing the last reference to a source
-     * inside a finalize callback causes recursive locking (and a
-     * deadlock).  This is not a problem inside other callbacks,
-     * including dispatch callbacks, so we call io_remove_watch_poll
-     * to remove this source.  At this point, iwp->src must
-     * be NULL, or we would leak it.
-     *
-     * This would be solved much more elegantly by child sources,
-     * but we support older glib versions that do not have them.
-     */
-    IOWatchPoll *iwp = io_watch_poll_from_source(source);
-    assert(iwp->src == NULL);
+    return G_SOURCE_CONTINUE;
 }
 
 static GSourceFuncs io_watch_poll_funcs = {
     .prepare = io_watch_poll_prepare,
-    .check = io_watch_poll_check,
     .dispatch = io_watch_poll_dispatch,
-    .finalize = io_watch_poll_finalize,
 };
 
 GSource *io_add_watch_poll(Chardev *chr,
@@ -115,7 +91,6 @@
     iwp->ioc = ioc;
     iwp->fd_read = (GSourceFunc) fd_read;
     iwp->src = NULL;
-    iwp->context = context;
 
     name = g_strdup_printf("chardev-iowatch-%s", chr->label);
     g_source_set_name((GSource *)iwp, name);
@@ -126,23 +101,10 @@
     return (GSource *)iwp;
 }
 
-static void io_remove_watch_poll(GSource *source)
-{
-    IOWatchPoll *iwp;
-
-    iwp = io_watch_poll_from_source(source);
-    if (iwp->src) {
-        g_source_destroy(iwp->src);
-        g_source_unref(iwp->src);
-        iwp->src = NULL;
-    }
-    g_source_destroy(&iwp->parent);
-}
-
 void remove_fd_in_watch(Chardev *chr)
 {
     if (chr->gsource) {
-        io_remove_watch_poll(chr->gsource);
+        g_source_destroy(chr->gsource);
         chr->gsource = NULL;
     }
 }
diff --git a/chardev/char-pty.c b/chardev/char-pty.c
index 68fd4e2..f681d63 100644
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@@ -31,10 +31,6 @@
 
 #include "chardev/char-io.h"
 
-#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)      \
-    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
-    || defined(__GLIBC__)
-
 typedef struct {
     Chardev parent;
     QIOChannel *ioc;
@@ -263,7 +259,7 @@
     qemu_set_nonblock(master_fd);
 
     chr->filename = g_strdup_printf("pty:%s", pty_name);
-    error_report("char device redirected to %s (label %s)",
+    error_printf("char device redirected to %s (label %s)\n",
                  pty_name, chr->label);
 
     s = PTY_CHARDEV(chr);
@@ -299,5 +295,3 @@
 }
 
 type_init(register_types);
-
-#endif
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index efbad6e..a75b46d 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -32,7 +32,6 @@
 #include "qapi/error.h"
 #include "qapi/clone-visitor.h"
 #include "qapi/qapi-visit-sockets.h"
-#include "sysemu/sysemu.h"
 
 #include "chardev/char-io.h"
 
@@ -354,6 +353,15 @@
     return qio_channel_create_watch(s->ioc, cond);
 }
 
+static void remove_hup_source(SocketChardev *s)
+{
+    if (s->hup_source != NULL) {
+        g_source_destroy(s->hup_source);
+        g_source_unref(s->hup_source);
+        s->hup_source = NULL;
+    }
+}
+
 static void tcp_chr_free_connection(Chardev *chr)
 {
     SocketChardev *s = SOCKET_CHARDEV(chr);
@@ -368,11 +376,7 @@
         s->read_msgfds_num = 0;
     }
 
-    if (s->hup_source != NULL) {
-        g_source_destroy(s->hup_source);
-        g_source_unref(s->hup_source);
-        s->hup_source = NULL;
-    }
+    remove_hup_source(s);
 
     tcp_set_msgfds(chr, NULL, 0);
     remove_fd_in_watch(chr);
@@ -419,8 +423,12 @@
     Chardev *chr = CHARDEV(s);
 
     g_free(chr->filename);
-    chr->filename = SocketAddress_to_str("disconnected:", s->addr,
-                                         s->is_listen, s->is_telnet);
+    if (s->addr) {
+        chr->filename = SocketAddress_to_str("disconnected:", s->addr,
+                                             s->is_listen, s->is_telnet);
+    } else {
+        chr->filename = g_strdup("disconnected:socket");
+    }
 }
 
 /* NB may be called even if tcp_chr_connect has not been
@@ -541,6 +549,27 @@
     }
 }
 
+static void update_ioc_handlers(SocketChardev *s)
+{
+    Chardev *chr = CHARDEV(s);
+
+    if (!s->connected) {
+        return;
+    }
+
+    remove_fd_in_watch(chr);
+    chr->gsource = io_add_watch_poll(chr, s->ioc,
+                                     tcp_chr_read_poll,
+                                     tcp_chr_read, chr,
+                                     chr->gcontext);
+
+    remove_hup_source(s);
+    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
+    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
+                          chr, NULL);
+    g_source_attach(s->hup_source, chr->gcontext);
+}
+
 static void tcp_chr_connect(void *opaque)
 {
     Chardev *chr = CHARDEV(opaque);
@@ -553,16 +582,7 @@
         s->is_listen, s->is_telnet);
 
     s->connected = 1;
-    chr->gsource = io_add_watch_poll(chr, s->ioc,
-                                       tcp_chr_read_poll,
-                                       tcp_chr_read,
-                                       chr, chr->gcontext);
-
-    s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP);
-    g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup,
-                          chr, NULL);
-    g_source_attach(s->hup_source, chr->gcontext);
-
+    update_ioc_handlers(s);
     qemu_chr_be_event(chr, CHR_EVENT_OPENED);
 }
 
@@ -593,17 +613,7 @@
         tcp_chr_telnet_init(CHARDEV(s));
     }
 
-    if (!s->connected) {
-        return;
-    }
-
-    remove_fd_in_watch(chr);
-    if (s->ioc) {
-        chr->gsource = io_add_watch_poll(chr, s->ioc,
-                                           tcp_chr_read_poll,
-                                           tcp_chr_read, chr,
-                                           chr->gcontext);
-    }
+    update_ioc_handlers(s);
 }
 
 static gboolean tcp_chr_telnet_init_io(QIOChannel *ioc,
@@ -724,11 +734,6 @@
     Error *err = NULL;
     gchar *name;
 
-    if (!machine_init_done) {
-        /* This will be postponed to machine_done notifier */
-        return;
-    }
-
     if (s->is_listen) {
         tioc = qio_channel_tls_new_server(
             s->ioc, s->tls_creds,
@@ -1011,8 +1016,9 @@
         s->reconnect_time = reconnect;
     }
 
-    /* If reconnect_time is set, will do that in chr_machine_done. */
-    if (!s->reconnect_time) {
+    if (s->reconnect_time) {
+        tcp_chr_connect_async(chr);
+    } else {
         if (s->is_listen) {
             char *name;
             s->listener = qio_net_listener_new();
@@ -1161,21 +1167,6 @@
     return s->connected;
 }
 
-static int tcp_chr_machine_done_hook(Chardev *chr)
-{
-    SocketChardev *s = SOCKET_CHARDEV(chr);
-
-    if (s->reconnect_time) {
-        tcp_chr_connect_async(chr);
-    }
-
-    if (s->ioc && s->tls_creds) {
-        tcp_chr_tls_init(chr);
-    }
-
-    return 0;
-}
-
 static void char_socket_class_init(ObjectClass *oc, void *data)
 {
     ChardevClass *cc = CHARDEV_CLASS(oc);
@@ -1191,7 +1182,6 @@
     cc->chr_add_client = tcp_chr_add_client;
     cc->chr_add_watch = tcp_chr_add_watch;
     cc->chr_update_read_handler = tcp_chr_update_read_handler;
-    cc->chr_machine_done = tcp_chr_machine_done_hook;
 
     object_class_property_add(oc, "addr", "SocketAddress",
                               char_socket_get_addr, NULL,
diff --git a/chardev/char.c b/chardev/char.c
index 76d866e..7f07a1b 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -329,7 +329,8 @@
     return 0;
 }
 
-QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
+QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename,
+                                bool permit_mux_mon)
 {
     char host[65], port[33], width[8], height[8];
     int pos;
@@ -344,6 +345,10 @@
     }
 
     if (strstart(filename, "mon:", &p)) {
+        if (!permit_mux_mon) {
+            error_report("mon: isn't supported in this context");
+            return NULL;
+        }
         filename = p;
         qemu_opt_set(opts, "mux", "on", &error_abort);
         if (strcmp(filename, "stdio") == 0) {
@@ -629,7 +634,7 @@
 
         chardev_name_foreach(help_string_append, str);
 
-        error_report("Available chardev backend types: %s", str->str);
+        error_printf("Available chardev backend types: %s\n", str->str);
         g_string_free(str, true);
         return NULL;
     }
@@ -683,7 +688,8 @@
     return chr;
 }
 
-Chardev *qemu_chr_new_noreplay(const char *label, const char *filename)
+Chardev *qemu_chr_new_noreplay(const char *label, const char *filename,
+                               bool permit_mux_mon)
 {
     const char *p;
     Chardev *chr;
@@ -694,25 +700,32 @@
         return qemu_chr_find(p);
     }
 
-    opts = qemu_chr_parse_compat(label, filename);
+    opts = qemu_chr_parse_compat(label, filename, permit_mux_mon);
     if (!opts)
         return NULL;
 
     chr = qemu_chr_new_from_opts(opts, &err);
-    if (err) {
+    if (!chr) {
         error_report_err(err);
+        goto out;
     }
-    if (chr && qemu_opt_get_bool(opts, "mux", 0)) {
+
+    if (qemu_opt_get_bool(opts, "mux", 0)) {
+        assert(permit_mux_mon);
         monitor_init(chr, MONITOR_USE_READLINE);
     }
+
+out:
     qemu_opts_del(opts);
     return chr;
 }
 
-Chardev *qemu_chr_new(const char *label, const char *filename)
+static Chardev *qemu_chr_new_permit_mux_mon(const char *label,
+                                          const char *filename,
+                                          bool permit_mux_mon)
 {
     Chardev *chr;
-    chr = qemu_chr_new_noreplay(label, filename);
+    chr = qemu_chr_new_noreplay(label, filename, permit_mux_mon);
     if (chr) {
         if (replay_mode != REPLAY_MODE_NONE) {
             qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_REPLAY);
@@ -726,6 +739,16 @@
     return chr;
 }
 
+Chardev *qemu_chr_new(const char *label, const char *filename)
+{
+    return qemu_chr_new_permit_mux_mon(label, filename, false);
+}
+
+Chardev *qemu_chr_new_mux_mon(const char *label, const char *filename)
+{
+    return qemu_chr_new_permit_mux_mon(label, filename, true);
+}
+
 static int qmp_query_chardev_foreach(Object *obj, void *data)
 {
     Chardev *chr = CHARDEV(obj);
diff --git a/configure b/configure
index 2c6b850..743e12f 100755
--- a/configure
+++ b/configure
@@ -296,6 +296,8 @@
 then
     git_update=yes
     git_submodules="ui/keycodemapdb"
+    git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
+    git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
 else
     git_update=no
     git_submodules=""
@@ -426,7 +428,7 @@
 opengl=""
 opengl_dmabuf="no"
 cpuid_h="no"
-avx2_opt="no"
+avx2_opt=""
 zlib="yes"
 capstone=""
 lzo=""
@@ -452,16 +454,12 @@
 glusterfs_fallocate="no"
 glusterfs_zerofill="no"
 gtk=""
-gtkabi=""
 gtk_gl="no"
 tls_priority="NORMAL"
 gnutls=""
-gnutls_rnd=""
 nettle=""
-nettle_kdf="no"
 gcrypt=""
 gcrypt_hmac="no"
-gcrypt_kdf="no"
 vte=""
 virglrenderer=""
 tpm="yes"
@@ -1331,6 +1329,10 @@
   ;;
   --disable-glusterfs) glusterfs="no"
   ;;
+  --disable-avx2) avx2_opt="no"
+  ;;
+  --enable-avx2) avx2_opt="yes"
+  ;;
   --enable-glusterfs) glusterfs="yes"
   ;;
   --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
@@ -1368,8 +1370,6 @@
   ;;
   --disable-pvrdma) pvrdma="no"
   ;;
-  --with-gtkabi=*) gtkabi="$optarg"
-  ;;
   --disable-vte) vte="no"
   ;;
   --enable-vte) vte="yes"
@@ -1657,7 +1657,6 @@
   sdl             SDL UI
   --with-sdlabi     select preferred SDL ABI 1.2 or 2.0
   gtk             gtk UI
-  --with-gtkabi     select preferred GTK ABI 2.0 or 3.0
   vte             vte support for the gtk UI
   curses          curses UI
   vnc             VNC UI support
@@ -1708,6 +1707,7 @@
   libxml2         for Parallels image format
   tcmalloc        tcmalloc support
   jemalloc        jemalloc support
+  avx2            AVX2 optimization support
   replication     replication support
   vhost-vsock     virtio sockets device support
   opengl          opengl support
@@ -2151,23 +2151,6 @@
   fi
 fi
 
-#########################################
-# zlib check
-
-if test "$zlib" != "no" ; then
-    cat > $TMPC << EOF
-#include <zlib.h>
-int main(void) { zlibVersion(); return 0; }
-EOF
-    if compile_prog "" "-lz" ; then
-        :
-    else
-        error_exit "zlib check failed" \
-            "Make sure to have the zlib libs and headers installed."
-    fi
-fi
-LIBS="$LIBS -lz"
-
 ##########################################
 # lzo check
 
@@ -2644,24 +2627,9 @@
 # GTK probe
 
 if test "$gtk" != "no"; then
-    if test "$gtkabi" = ""; then
-        # The GTK ABI was not specified explicitly, so try whether 3.0 is available.
-        # Use 2.0 as a fallback if that is available.
-        if $pkg_config --exists "gtk+-3.0 >= 3.0.0"; then
-            gtkabi=3.0
-        elif $pkg_config --exists "gtk+-2.0 >= 2.18.0"; then
-            gtkabi=2.0
-        else
-            gtkabi=3.0
-        fi
-    fi
-    gtkpackage="gtk+-$gtkabi"
-    gtkx11package="gtk+-x11-$gtkabi"
-    if test "$gtkabi" = "3.0" ; then
-      gtkversion="3.0.0"
-    else
-      gtkversion="2.18.0"
-    fi
+    gtkpackage="gtk+-3.0"
+    gtkx11package="gtk+-x11-3.0"
+    gtkversion="3.14.0"
     if $pkg_config --exists "$gtkpackage >= $gtkversion"; then
         gtk_cflags=$($pkg_config --cflags $gtkpackage)
         gtk_libs=$($pkg_config --libs $gtkpackage)
@@ -2683,79 +2651,28 @@
 ##########################################
 # GNUTLS probe
 
-gnutls_works() {
-    # Unfortunately some distros have bad pkg-config information for gnutls
-    # such that it claims to exist but you get a compiler error if you try
-    # to use the options returned by --libs. Specifically, Ubuntu for --static
-    # builds doesn't work:
-    # https://bugs.launchpad.net/ubuntu/+source/gnutls26/+bug/1478035
-    #
-    # So sanity check the cflags/libs before assuming gnutls can be used.
-    if ! $pkg_config --exists "gnutls"; then
-        return 1
-    fi
-
-    write_c_skeleton
-    compile_prog "$($pkg_config --cflags gnutls)" "$($pkg_config --libs gnutls)"
-}
-
-gnutls_gcrypt=no
-gnutls_nettle=no
 if test "$gnutls" != "no"; then
-    if gnutls_works; then
+    if $pkg_config --exists "gnutls >= 3.1.18"; then
         gnutls_cflags=$($pkg_config --cflags gnutls)
         gnutls_libs=$($pkg_config --libs gnutls)
         libs_softmmu="$gnutls_libs $libs_softmmu"
         libs_tools="$gnutls_libs $libs_tools"
 	QEMU_CFLAGS="$QEMU_CFLAGS $gnutls_cflags"
         gnutls="yes"
-
-	# gnutls_rnd requires >= 2.11.0
-	if $pkg_config --exists "gnutls >= 2.11.0"; then
-	    gnutls_rnd="yes"
-	else
-	    gnutls_rnd="no"
-	fi
-
-	if $pkg_config --exists 'gnutls >= 3.0'; then
-	    gnutls_gcrypt=no
-	    gnutls_nettle=yes
-	elif $pkg_config --exists 'gnutls >= 2.12'; then
-	    case $($pkg_config --libs --static gnutls) in
-		*gcrypt*)
-		    gnutls_gcrypt=yes
-		    gnutls_nettle=no
-		    ;;
-		*nettle*)
-		    gnutls_gcrypt=no
-		    gnutls_nettle=yes
-		    ;;
-		*)
-		    gnutls_gcrypt=yes
-		    gnutls_nettle=no
-		    ;;
-	    esac
-	else
-	    gnutls_gcrypt=yes
-	    gnutls_nettle=no
-	fi
     elif test "$gnutls" = "yes"; then
-	feature_not_found "gnutls" "Install gnutls devel"
+	feature_not_found "gnutls" "Install gnutls devel >= 3.1.18"
     else
         gnutls="no"
-        gnutls_rnd="no"
     fi
-else
-    gnutls_rnd="no"
 fi
 
 
 # If user didn't give a --disable/enable-gcrypt flag,
 # then mark as disabled if user requested nettle
-# explicitly, or if gnutls links to nettle
+# explicitly
 if test -z "$gcrypt"
 then
-    if test "$nettle" = "yes" || test "$gnutls_nettle" = "yes"
+    if test "$nettle" = "yes"
     then
         gcrypt="no"
     fi
@@ -2763,16 +2680,16 @@
 
 # If user didn't give a --disable/enable-nettle flag,
 # then mark as disabled if user requested gcrypt
-# explicitly, or if gnutls links to gcrypt
+# explicitly
 if test -z "$nettle"
 then
-    if test "$gcrypt" = "yes" || test "$gnutls_gcrypt" = "yes"
+    if test "$gcrypt" = "yes"
     then
         nettle="no"
     fi
 fi
 
-has_libgcrypt_config() {
+has_libgcrypt() {
     if ! has "libgcrypt-config"
     then
 	return 1
@@ -2787,11 +2704,42 @@
 	fi
     fi
 
+    maj=`libgcrypt-config --version | awk -F . '{print $1}'`
+    min=`libgcrypt-config --version | awk -F . '{print $2}'`
+
+    if test $maj != 1 || test $min -lt 5
+    then
+       return 1
+    fi
+
     return 0
 }
 
+
+if test "$nettle" != "no"; then
+    if $pkg_config --exists "nettle >= 2.7.1"; then
+        nettle_cflags=$($pkg_config --cflags nettle)
+        nettle_libs=$($pkg_config --libs nettle)
+        nettle_version=$($pkg_config --modversion nettle)
+        libs_softmmu="$nettle_libs $libs_softmmu"
+        libs_tools="$nettle_libs $libs_tools"
+        QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
+        nettle="yes"
+
+        if test -z "$gcrypt"; then
+           gcrypt="no"
+        fi
+    else
+        if test "$nettle" = "yes"; then
+            feature_not_found "nettle" "Install nettle devel >= 2.7.1"
+        else
+            nettle="no"
+        fi
+    fi
+fi
+
 if test "$gcrypt" != "no"; then
-    if has_libgcrypt_config; then
+    if has_libgcrypt; then
         gcrypt_cflags=$(libgcrypt-config --cflags)
         gcrypt_libs=$(libgcrypt-config --libs)
         # Debian has remove -lgpg-error from libgcrypt-config
@@ -2805,22 +2753,6 @@
         libs_tools="$gcrypt_libs $libs_tools"
         QEMU_CFLAGS="$QEMU_CFLAGS $gcrypt_cflags"
         gcrypt="yes"
-        if test -z "$nettle"; then
-           nettle="no"
-        fi
-
-        cat > $TMPC << EOF
-#include <gcrypt.h>
-int main(void) {
-  gcry_kdf_derive(NULL, 0, GCRY_KDF_PBKDF2,
-                  GCRY_MD_SHA256,
-                  NULL, 0, 0, 0, NULL);
- return 0;
-}
-EOF
-        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
-            gcrypt_kdf=yes
-        fi
 
         cat > $TMPC << EOF
 #include <gcrypt.h>
@@ -2836,7 +2768,7 @@
         fi
     else
         if test "$gcrypt" = "yes"; then
-            feature_not_found "gcrypt" "Install gcrypt devel"
+            feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0"
         else
             gcrypt="no"
         fi
@@ -2844,36 +2776,6 @@
 fi
 
 
-if test "$nettle" != "no"; then
-    if $pkg_config --exists "nettle"; then
-        nettle_cflags=$($pkg_config --cflags nettle)
-        nettle_libs=$($pkg_config --libs nettle)
-        nettle_version=$($pkg_config --modversion nettle)
-        libs_softmmu="$nettle_libs $libs_softmmu"
-        libs_tools="$nettle_libs $libs_tools"
-        QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
-        nettle="yes"
-
-        cat > $TMPC << EOF
-#include <stddef.h>
-#include <nettle/pbkdf2.h>
-int main(void) {
-     pbkdf2_hmac_sha256(8, NULL, 1000, 8, NULL, 8, NULL);
-     return 0;
-}
-EOF
-        if compile_prog "$nettle_cflags" "$nettle_libs" ; then
-            nettle_kdf=yes
-        fi
-    else
-        if test "$nettle" = "yes"; then
-            feature_not_found "nettle" "Install nettle devel"
-        else
-            nettle="no"
-        fi
-    fi
-fi
-
 if test "$gcrypt" = "yes" && test "$nettle" = "yes"
 then
     error_exit "Only one of gcrypt & nettle can be enabled"
@@ -2905,16 +2807,11 @@
 # VTE probe
 
 if test "$vte" != "no"; then
-    if test "$gtkabi" = "3.0"; then
-      vteminversion="0.32.0"
-      if $pkg_config --exists "vte-2.91"; then
-        vtepackage="vte-2.91"
-      else
-        vtepackage="vte-2.90"
-      fi
+    vteminversion="0.32.0"
+    if $pkg_config --exists "vte-2.91"; then
+      vtepackage="vte-2.91"
     else
-      vtepackage="vte"
-      vteminversion="0.24.0"
+      vtepackage="vte-2.90"
     fi
     if $pkg_config --exists "$vtepackage >= $vteminversion"; then
         vte_cflags=$($pkg_config --cflags $vtepackage)
@@ -2922,11 +2819,7 @@
         vteversion=$($pkg_config --modversion $vtepackage)
         vte="yes"
     elif test "$vte" = "yes"; then
-        if test "$gtkabi" = "3.0"; then
-            feature_not_found "vte" "Install libvte-2.90/2.91 devel"
-        else
-            feature_not_found "vte" "Install libvte devel"
-        fi
+        feature_not_found "vte" "Install libvte-2.90/2.91 devel"
     else
         vte="no"
     fi
@@ -3557,12 +3450,6 @@
 	       "build target"
 fi
 
-# g_test_trap_subprocess added in 2.38. Used by some tests.
-glib_subprocess=yes
-if ! $pkg_config --atleast-version=2.38 glib-2.0; then
-    glib_subprocess=no
-fi
-
 # Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage
 cat > $TMPC << EOF
 #include <glib.h>
@@ -3575,6 +3462,29 @@
     fi
 fi
 
+#########################################
+# zlib check
+
+if test "$zlib" != "no" ; then
+    if $pkg_config --exists zlib; then
+        zlib_cflags=$($pkg_config --cflags zlib)
+        zlib_libs=$($pkg_config --libs zlib)
+        QEMU_CFLAGS="$zlib_cflags $QEMU_CFLAGS"
+        LIBS="$zlib_libs $LIBS"
+    else
+        cat > $TMPC << EOF
+#include <zlib.h>
+int main(void) { zlibVersion(); return 0; }
+EOF
+        if compile_prog "" "-lz" ; then
+            LIBS="$LIBS -lz"
+        else
+            error_exit "zlib check failed" \
+                "Make sure to have the zlib libs and headers installed."
+        fi
+    fi
+fi
+
 ##########################################
 # SHA command probe for modules
 if test "$modules" = yes; then
@@ -4234,7 +4144,14 @@
   memfd=yes
 fi
 
-
+# check for usbfs
+have_usbfs=no
+if test "$linux_user" = "yes"; then
+  if check_include linux/usbdevice_fs.h; then
+    have_usbfs=yes
+  fi
+  have_usbfs=yes
+fi
 
 # check for fallocate
 fallocate=no
@@ -5126,7 +5043,7 @@
 # There is no point enabling this if cpuid.h is not usable,
 # since we won't be able to select the new routines.
 
-if test $cpuid_h = yes; then
+if test "$cpuid_h" = "yes" -a "$avx2_opt" != "no"; then
   cat > $TMPC << EOF
 #pragma GCC push_options
 #pragma GCC target("avx2")
@@ -5140,6 +5057,8 @@
 EOF
   if compile_object "" ; then
     avx2_opt="yes"
+  else
+    avx2_opt="no"
   fi
 fi
 
@@ -5186,6 +5105,21 @@
   fi
 fi
 
+cmpxchg128=no
+if test "$int128" = yes -a "$atomic128" = no; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  __sync_val_compare_and_swap_16(&x, y, x);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    cmpxchg128=yes
+  fi
+fi
+
 #########################################
 # See if 64-bit atomic operations are supported.
 # Note that without __atomic builtins, we can only
@@ -5721,6 +5655,9 @@
   if [ "$ivshmem" = "yes" ]; then
     tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
   fi
+  if [ "$posix" = "yes" ] && [ "$curl" = "yes" ]; then
+    tools="elf2dmp $tools"
+  fi
 fi
 if test "$softmmu" = yes ; then
   if test "$linux" = yes; then
@@ -5990,11 +5927,8 @@
 echo "VTE support       $vte $(echo_version $vte $vteversion)"
 echo "TLS priority      $tls_priority"
 echo "GNUTLS support    $gnutls"
-echo "GNUTLS rnd        $gnutls_rnd"
 echo "libgcrypt         $gcrypt"
-echo "libgcrypt kdf     $gcrypt_kdf"
 echo "nettle            $nettle $(echo_version $nettle $nettle_version)"
-echo "nettle kdf        $nettle_kdf"
 echo "libtasn1          $tasn1"
 echo "curses support    $curses"
 echo "virgl support     $virglrenderer $(echo_version $virglrenderer $virgl_version)"
@@ -6104,12 +6038,6 @@
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
 fi
 
-if test "$gtkabi" = "2.0"; then
-    echo
-    echo "WARNING: Use of GTK 2.0 is deprecated and will be removed in"
-    echo "WARNING: future releases. Please switch to using GTK 3.0"
-fi
-
 if test "$sdlabi" = "1.2"; then
     echo
     echo "WARNING: Use of SDL 1.2 is deprecated and will be removed in"
@@ -6345,6 +6273,9 @@
 if test "$memfd" = "yes" ; then
   echo "CONFIG_MEMFD=y" >> $config_host_mak
 fi
+if test "$have_usbfs" = "yes" ; then
+  echo "CONFIG_USBFS=y" >> $config_host_mak
+fi
 if test "$fallocate" = "yes" ; then
   echo "CONFIG_FALLOCATE=y" >> $config_host_mak
 fi
@@ -6426,7 +6357,6 @@
 fi
 if test "$gtk" = "yes" ; then
   echo "CONFIG_GTK=m" >> $config_host_mak
-  echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
   echo "GTK_CFLAGS=$gtk_cflags" >> $config_host_mak
   echo "GTK_LIBS=$gtk_libs" >> $config_host_mak
   if test "$gtk_gl" = "yes" ; then
@@ -6437,24 +6367,15 @@
 if test "$gnutls" = "yes" ; then
   echo "CONFIG_GNUTLS=y" >> $config_host_mak
 fi
-if test "$gnutls_rnd" = "yes" ; then
-  echo "CONFIG_GNUTLS_RND=y" >> $config_host_mak
-fi
 if test "$gcrypt" = "yes" ; then
   echo "CONFIG_GCRYPT=y" >> $config_host_mak
   if test "$gcrypt_hmac" = "yes" ; then
     echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak
   fi
-  if test "$gcrypt_kdf" = "yes" ; then
-    echo "CONFIG_GCRYPT_KDF=y" >> $config_host_mak
-  fi
 fi
 if test "$nettle" = "yes" ; then
   echo "CONFIG_NETTLE=y" >> $config_host_mak
   echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
-  if test "$nettle_kdf" = "yes" ; then
-    echo "CONFIG_NETTLE_KDF=y" >> $config_host_mak
-  fi
 fi
 if test "$tasn1" = "yes" ; then
   echo "CONFIG_TASN1=y" >> $config_host_mak
@@ -6699,6 +6620,10 @@
   echo "CONFIG_ATOMIC128=y" >> $config_host_mak
 fi
 
+if test "$cmpxchg128" = "yes" ; then
+  echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
+fi
+
 if test "$atomic64" = "yes" ; then
   echo "CONFIG_ATOMIC64=y" >> $config_host_mak
 fi
@@ -7024,12 +6949,14 @@
 
 case "$target_name" in
   i386)
+    mttcg="yes"
     gdb_xml_files="i386-32bit.xml i386-32bit-core.xml i386-32bit-sse.xml"
     target_compiler=$cross_cc_i386
     target_compiler_cflags=$cross_cc_ccflags_i386
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
+    mttcg="yes"
     gdb_xml_files="i386-64bit.xml i386-64bit-core.xml i386-64bit-sse.xml"
     target_compiler=$cross_cc_x86_64
   ;;
@@ -7344,6 +7271,9 @@
   ;;
   mips*)
     disas_config "MIPS"
+    if test -n "${cxx}"; then
+      disas_config "NANOMIPS"
+    fi
   ;;
   moxie*)
     disas_config "MOXIE"
@@ -7445,12 +7375,14 @@
 
 # build tree in object directory in case the source is not in the current directory
 DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests tests/vm"
+DIRS="$DIRS tests/fp"
 DIRS="$DIRS docs docs/interop fsdev scsi"
 DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios roms/vgabios"
 FILES="Makefile tests/tcg/Makefile qdict-test-data.txt"
 FILES="$FILES tests/tcg/cris/Makefile tests/tcg/cris/.gdbinit"
 FILES="$FILES tests/tcg/lm32/Makefile tests/tcg/xtensa/Makefile po/Makefile"
+FILES="$FILES tests/fp/Makefile"
 FILES="$FILES pc-bios/optionrom/Makefile pc-bios/keymaps"
 FILES="$FILES pc-bios/spapr-rtas/Makefile"
 FILES="$FILES pc-bios/s390-ccw/Makefile"
@@ -7528,6 +7460,46 @@
 # Compiler output produced by configure, useful for debugging
 # configure, is in config.log if it exists.
 EOD
+
+preserve_env() {
+    envname=$1
+
+    eval envval=\$$envname
+
+    if test -n "$envval"
+    then
+	echo "$envname='$envval'" >> config.status
+	echo "export $envname" >> config.status
+    else
+	echo "unset $envname" >> config.status
+    fi
+}
+
+# Preserve various env variables that influence what
+# features/build target configure will detect
+preserve_env AR
+preserve_env AS
+preserve_env CC
+preserve_env CPP
+preserve_env CXX
+preserve_env INSTALL
+preserve_env LD
+preserve_env LD_LIBRARY_PATH
+preserve_env LIBTOOL
+preserve_env MAKE
+preserve_env NM
+preserve_env OBJCOPY
+preserve_env PATH
+preserve_env PKG_CONFIG
+preserve_env PKG_CONFIG_LIBDIR
+preserve_env PKG_CONFIG_PATH
+preserve_env PYTHON
+preserve_env SDL_CONFIG
+preserve_env SDL2_CONFIG
+preserve_env SMBD
+preserve_env STRIP
+preserve_env WINDRES
+
 printf "exec" >>config.status
 printf " '%s'" "$0" "$@" >>config.status
 echo ' "$@"' >>config.status
diff --git a/contrib/elf2dmp/Makefile.objs b/contrib/elf2dmp/Makefile.objs
new file mode 100644
index 0000000..e3140f5
--- /dev/null
+++ b/contrib/elf2dmp/Makefile.objs
@@ -0,0 +1 @@
+elf2dmp-obj-y = main.o addrspace.o download.o pdb.o qemu_elf.o
diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c
new file mode 100644
index 0000000..8a76069
--- /dev/null
+++ b/contrib/elf2dmp/addrspace.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "addrspace.h"
+
+static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa)
+{
+    size_t i;
+    for (i = 0; i < ps->block_nr; i++) {
+        if (ps->block[i].paddr <= pa &&
+                pa <= ps->block[i].paddr + ps->block[i].size) {
+            return ps->block + i;
+        }
+    }
+
+    return NULL;
+}
+
+static uint8_t *pa_space_resolve(struct pa_space *ps, uint64_t pa)
+{
+    struct pa_block *block = pa_space_find_block(ps, pa);
+
+    if (!block) {
+        return NULL;
+    }
+
+    return block->addr + (pa - block->paddr);
+}
+
+int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf)
+{
+    Elf64_Half phdr_nr = elf_getphdrnum(qemu_elf->map);
+    Elf64_Phdr *phdr = elf64_getphdr(qemu_elf->map);
+    size_t block_i = 0;
+    size_t i;
+
+    ps->block_nr = 0;
+
+    for (i = 0; i < phdr_nr; i++) {
+        if (phdr[i].p_type == PT_LOAD) {
+            ps->block_nr++;
+        }
+    }
+
+    ps->block = malloc(sizeof(*ps->block) * ps->block_nr);
+    if (!ps->block) {
+        return 1;
+    }
+
+    for (i = 0; i < phdr_nr; i++) {
+        if (phdr[i].p_type == PT_LOAD) {
+            ps->block[block_i] = (struct pa_block) {
+                .addr = (uint8_t *)qemu_elf->map + phdr[i].p_offset,
+                .paddr = phdr[i].p_paddr,
+                .size = phdr[i].p_filesz,
+            };
+            block_i++;
+        }
+    }
+
+    return 0;
+}
+
+void pa_space_destroy(struct pa_space *ps)
+{
+    ps->block_nr = 0;
+    free(ps->block);
+}
+
+void va_space_set_dtb(struct va_space *vs, uint64_t dtb)
+{
+    vs->dtb = dtb & 0x00ffffffffff000;
+}
+
+void va_space_create(struct va_space *vs, struct pa_space *ps, uint64_t dtb)
+{
+    vs->ps = ps;
+    va_space_set_dtb(vs, dtb);
+}
+
+static uint64_t get_pml4e(struct va_space *vs, uint64_t va)
+{
+    uint64_t pa = (vs->dtb & 0xffffffffff000) | ((va & 0xff8000000000) >> 36);
+
+    return *(uint64_t *)pa_space_resolve(vs->ps, pa);
+}
+
+static uint64_t get_pdpi(struct va_space *vs, uint64_t va, uint64_t pml4e)
+{
+    uint64_t pdpte_paddr = (pml4e & 0xffffffffff000) |
+        ((va & 0x7FC0000000) >> 27);
+
+    return *(uint64_t *)pa_space_resolve(vs->ps, pdpte_paddr);
+}
+
+static uint64_t pde_index(uint64_t va)
+{
+    return (va >> 21) & 0x1FF;
+}
+
+static uint64_t pdba_base(uint64_t pdpe)
+{
+    return pdpe & 0xFFFFFFFFFF000;
+}
+
+static uint64_t get_pgd(struct va_space *vs, uint64_t va, uint64_t pdpe)
+{
+    uint64_t pgd_entry = pdba_base(pdpe) + pde_index(va) * 8;
+
+    return *(uint64_t *)pa_space_resolve(vs->ps, pgd_entry);
+}
+
+static uint64_t pte_index(uint64_t va)
+{
+    return (va >> 12) & 0x1FF;
+}
+
+static uint64_t ptba_base(uint64_t pde)
+{
+    return pde & 0xFFFFFFFFFF000;
+}
+
+static uint64_t get_pte(struct va_space *vs, uint64_t va, uint64_t pgd)
+{
+    uint64_t pgd_val = ptba_base(pgd) + pte_index(va) * 8;
+
+    return *(uint64_t *)pa_space_resolve(vs->ps, pgd_val);
+}
+
+static uint64_t get_paddr(uint64_t va, uint64_t pte)
+{
+    return (pte & 0xFFFFFFFFFF000) | (va & 0xFFF);
+}
+
+static bool is_present(uint64_t entry)
+{
+    return entry & 0x1;
+}
+
+static bool page_size_flag(uint64_t entry)
+{
+    return entry & (1 << 7);
+}
+
+static uint64_t get_1GB_paddr(uint64_t va, uint64_t pdpte)
+{
+    return (pdpte & 0xfffffc0000000) | (va & 0x3fffffff);
+}
+
+static uint64_t get_2MB_paddr(uint64_t va, uint64_t pgd_entry)
+{
+    return (pgd_entry & 0xfffffffe00000) | (va & 0x00000001fffff);
+}
+
+static uint64_t va_space_va2pa(struct va_space *vs, uint64_t va)
+{
+    uint64_t pml4e, pdpe, pgd, pte;
+
+    pml4e = get_pml4e(vs, va);
+    if (!is_present(pml4e)) {
+        return INVALID_PA;
+    }
+
+    pdpe = get_pdpi(vs, va, pml4e);
+    if (!is_present(pdpe)) {
+        return INVALID_PA;
+    }
+
+    if (page_size_flag(pdpe)) {
+        return get_1GB_paddr(va, pdpe);
+    }
+
+    pgd = get_pgd(vs, va, pdpe);
+    if (!is_present(pgd)) {
+        return INVALID_PA;
+    }
+
+    if (page_size_flag(pgd)) {
+        return get_2MB_paddr(va, pgd);
+    }
+
+    pte = get_pte(vs, va, pgd);
+    if (!is_present(pte)) {
+        return INVALID_PA;
+    }
+
+    return get_paddr(va, pte);
+}
+
+void *va_space_resolve(struct va_space *vs, uint64_t va)
+{
+    uint64_t pa = va_space_va2pa(vs, va);
+
+    if (pa == INVALID_PA) {
+        return NULL;
+    }
+
+    return pa_space_resolve(vs->ps, pa);
+}
+
+int va_space_rw(struct va_space *vs, uint64_t addr,
+        void *buf, size_t size, int is_write)
+{
+    while (size) {
+        uint64_t page = addr & PFN_MASK;
+        size_t s = (page + PAGE_SIZE) - addr;
+        void *ptr;
+
+        s = (s > size) ? size : s;
+
+        ptr = va_space_resolve(vs, addr);
+        if (!ptr) {
+            return 1;
+        }
+
+        if (is_write) {
+            memcpy(ptr, buf, s);
+        } else {
+            memcpy(buf, ptr, s);
+        }
+
+        size -= s;
+        buf = (uint8_t *)buf + s;
+        addr += s;
+    }
+
+    return 0;
+}
diff --git a/contrib/elf2dmp/addrspace.h b/contrib/elf2dmp/addrspace.h
new file mode 100644
index 0000000..d87f6a1
--- /dev/null
+++ b/contrib/elf2dmp/addrspace.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef ADDRSPACE_H
+#define ADDRSPACE_H
+
+#include "qemu_elf.h"
+
+#define PAGE_BITS 12
+#define PAGE_SIZE (1ULL << PAGE_BITS)
+#define PFN_MASK (~(PAGE_SIZE - 1))
+
+#define INVALID_PA  UINT64_MAX
+
+struct pa_block {
+    uint8_t *addr;
+    uint64_t paddr;
+    uint64_t size;
+};
+
+struct pa_space {
+    size_t block_nr;
+    struct pa_block *block;
+};
+
+struct va_space {
+    uint64_t dtb;
+    struct pa_space *ps;
+};
+
+int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf);
+void pa_space_destroy(struct pa_space *ps);
+
+void va_space_create(struct va_space *vs, struct pa_space *ps, uint64_t dtb);
+void va_space_set_dtb(struct va_space *vs, uint64_t dtb);
+void *va_space_resolve(struct va_space *vs, uint64_t va);
+int va_space_rw(struct va_space *vs, uint64_t addr,
+        void *buf, size_t size, int is_write);
+
+#endif /* ADDRSPACE_H */
diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c
new file mode 100644
index 0000000..d09e607
--- /dev/null
+++ b/contrib/elf2dmp/download.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <curl/curl.h>
+#include "download.h"
+
+int download_url(const char *name, const char *url)
+{
+    int err = 0;
+    FILE *file;
+    CURL *curl = curl_easy_init();
+
+    if (!curl) {
+        return 1;
+    }
+
+    file = fopen(name, "wb");
+    if (!file) {
+        err = 1;
+        goto out_curl;
+    }
+
+    curl_easy_setopt(curl, CURLOPT_URL, url);
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
+    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
+
+    if (curl_easy_perform(curl) != CURLE_OK) {
+        err = 1;
+        fclose(file);
+        unlink(name);
+        goto out_curl;
+    }
+
+    err = fclose(file);
+
+out_curl:
+    curl_easy_cleanup(curl);
+
+    return err;
+}
diff --git a/contrib/elf2dmp/download.h b/contrib/elf2dmp/download.h
new file mode 100644
index 0000000..5c27492
--- /dev/null
+++ b/contrib/elf2dmp/download.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef DOWNLOAD_H
+#define DOWNLOAD_H
+
+int download_url(const char *name, const char *url);
+
+#endif /* DOWNLOAD_H */
diff --git a/contrib/elf2dmp/err.h b/contrib/elf2dmp/err.h
new file mode 100644
index 0000000..5456bd5
--- /dev/null
+++ b/contrib/elf2dmp/err.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef ERR_H
+#define ERR_H
+
+#define eprintf(...) fprintf(stderr, __VA_ARGS__)
+
+#endif /* ERR_H */
diff --git a/contrib/elf2dmp/kdbg.h b/contrib/elf2dmp/kdbg.h
new file mode 100644
index 0000000..851b57c
--- /dev/null
+++ b/contrib/elf2dmp/kdbg.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef KDBG_H
+#define KDBG_H
+
+typedef struct DBGKD_GET_VERSION64 {
+    uint16_t  MajorVersion;
+    uint16_t  MinorVersion;
+    uint8_t   ProtocolVersion;
+    uint8_t   KdSecondaryVersion;
+    uint16_t  Flags;
+    uint16_t  MachineType;
+    uint8_t   MaxPacketType;
+    uint8_t   MaxStateChange;
+    uint8_t   MaxManipulate;
+    uint8_t   Simulation;
+    uint16_t  Unused[1];
+    uint64_t KernBase;
+    uint64_t PsLoadedModuleList;
+    uint64_t DebuggerDataList;
+} DBGKD_GET_VERSION64;
+
+typedef struct DBGKD_DEBUG_DATA_HEADER64 {
+    struct LIST_ENTRY64 {
+       struct LIST_ENTRY64 *Flink;
+       struct LIST_ENTRY64 *Blink;
+    } List;
+    uint32_t           OwnerTag;
+    uint32_t           Size;
+} DBGKD_DEBUG_DATA_HEADER64;
+
+typedef struct KDDEBUGGER_DATA64 {
+    DBGKD_DEBUG_DATA_HEADER64 Header;
+
+    uint64_t KernBase;
+    uint64_t BreakpointWithStatus;
+    uint64_t SavedContext;
+    uint16_t ThCallbackStack;
+    uint16_t NextCallback;
+    uint16_t FramePointer;
+    uint16_t PaeEnabled:1;
+    uint64_t KiCallUserMode;
+    uint64_t KeUserCallbackDispatcher;
+    uint64_t PsLoadedModuleList;
+    uint64_t PsActiveProcessHead;
+    uint64_t PspCidTable;
+    uint64_t ExpSystemResourcesList;
+    uint64_t ExpPagedPoolDescriptor;
+    uint64_t ExpNumberOfPagedPools;
+    uint64_t KeTimeIncrement;
+    uint64_t KeBugCheckCallbackListHead;
+    uint64_t KiBugcheckData;
+    uint64_t IopErrorLogListHead;
+    uint64_t ObpRootDirectoryObject;
+    uint64_t ObpTypeObjectType;
+    uint64_t MmSystemCacheStart;
+    uint64_t MmSystemCacheEnd;
+    uint64_t MmSystemCacheWs;
+    uint64_t MmPfnDatabase;
+    uint64_t MmSystemPtesStart;
+    uint64_t MmSystemPtesEnd;
+    uint64_t MmSubsectionBase;
+    uint64_t MmNumberOfPagingFiles;
+    uint64_t MmLowestPhysicalPage;
+    uint64_t MmHighestPhysicalPage;
+    uint64_t MmNumberOfPhysicalPages;
+    uint64_t MmMaximumNonPagedPoolInBytes;
+    uint64_t MmNonPagedSystemStart;
+    uint64_t MmNonPagedPoolStart;
+    uint64_t MmNonPagedPoolEnd;
+    uint64_t MmPagedPoolStart;
+    uint64_t MmPagedPoolEnd;
+    uint64_t MmPagedPoolInformation;
+    uint64_t MmPageSize;
+    uint64_t MmSizeOfPagedPoolInBytes;
+    uint64_t MmTotalCommitLimit;
+    uint64_t MmTotalCommittedPages;
+    uint64_t MmSharedCommit;
+    uint64_t MmDriverCommit;
+    uint64_t MmProcessCommit;
+    uint64_t MmPagedPoolCommit;
+    uint64_t MmExtendedCommit;
+    uint64_t MmZeroedPageListHead;
+    uint64_t MmFreePageListHead;
+    uint64_t MmStandbyPageListHead;
+    uint64_t MmModifiedPageListHead;
+    uint64_t MmModifiedNoWritePageListHead;
+    uint64_t MmAvailablePages;
+    uint64_t MmResidentAvailablePages;
+    uint64_t PoolTrackTable;
+    uint64_t NonPagedPoolDescriptor;
+    uint64_t MmHighestUserAddress;
+    uint64_t MmSystemRangeStart;
+    uint64_t MmUserProbeAddress;
+    uint64_t KdPrintCircularBuffer;
+    uint64_t KdPrintCircularBufferEnd;
+    uint64_t KdPrintWritePointer;
+    uint64_t KdPrintRolloverCount;
+    uint64_t MmLoadedUserImageList;
+
+    /* NT 5.1 Addition */
+
+    uint64_t NtBuildLab;
+    uint64_t KiNormalSystemCall;
+
+    /* NT 5.0 hotfix addition */
+
+    uint64_t KiProcessorBlock;
+    uint64_t MmUnloadedDrivers;
+    uint64_t MmLastUnloadedDriver;
+    uint64_t MmTriageActionTaken;
+    uint64_t MmSpecialPoolTag;
+    uint64_t KernelVerifier;
+    uint64_t MmVerifierData;
+    uint64_t MmAllocatedNonPagedPool;
+    uint64_t MmPeakCommitment;
+    uint64_t MmTotalCommitLimitMaximum;
+    uint64_t CmNtCSDVersion;
+
+    /* NT 5.1 Addition */
+
+    uint64_t MmPhysicalMemoryBlock;
+    uint64_t MmSessionBase;
+    uint64_t MmSessionSize;
+    uint64_t MmSystemParentTablePage;
+
+    /* Server 2003 addition */
+
+    uint64_t MmVirtualTranslationBase;
+    uint16_t OffsetKThreadNextProcessor;
+    uint16_t OffsetKThreadTeb;
+    uint16_t OffsetKThreadKernelStack;
+    uint16_t OffsetKThreadInitialStack;
+    uint16_t OffsetKThreadApcProcess;
+    uint16_t OffsetKThreadState;
+    uint16_t OffsetKThreadBStore;
+    uint16_t OffsetKThreadBStoreLimit;
+    uint16_t SizeEProcess;
+    uint16_t OffsetEprocessPeb;
+    uint16_t OffsetEprocessParentCID;
+    uint16_t OffsetEprocessDirectoryTableBase;
+    uint16_t SizePrcb;
+    uint16_t OffsetPrcbDpcRoutine;
+    uint16_t OffsetPrcbCurrentThread;
+    uint16_t OffsetPrcbMhz;
+    uint16_t OffsetPrcbCpuType;
+    uint16_t OffsetPrcbVendorString;
+    uint16_t OffsetPrcbProcStateContext;
+    uint16_t OffsetPrcbNumber;
+    uint16_t SizeEThread;
+    uint64_t KdPrintCircularBufferPtr;
+    uint64_t KdPrintBufferSize;
+    uint64_t KeLoaderBlock;
+    uint16_t SizePcr;
+    uint16_t OffsetPcrSelfPcr;
+    uint16_t OffsetPcrCurrentPrcb;
+    uint16_t OffsetPcrContainedPrcb;
+    uint16_t OffsetPcrInitialBStore;
+    uint16_t OffsetPcrBStoreLimit;
+    uint16_t OffsetPcrInitialStack;
+    uint16_t OffsetPcrStackLimit;
+    uint16_t OffsetPrcbPcrPage;
+    uint16_t OffsetPrcbProcStateSpecialReg;
+    uint16_t GdtR0Code;
+    uint16_t GdtR0Data;
+    uint16_t GdtR0Pcr;
+    uint16_t GdtR3Code;
+    uint16_t GdtR3Data;
+    uint16_t GdtR3Teb;
+    uint16_t GdtLdt;
+    uint16_t GdtTss;
+    uint16_t Gdt64R3CmCode;
+    uint16_t Gdt64R3CmTeb;
+    uint64_t IopNumTriageDumpDataBlocks;
+    uint64_t IopTriageDumpDataBlocks;
+
+    /* Longhorn addition */
+
+    uint64_t VfCrashDataBlock;
+    uint64_t MmBadPagesDetected;
+    uint64_t MmZeroedPageSingleBitErrorsDetected;
+
+    /* Windows 7 addition */
+
+    uint64_t EtwpDebuggerData;
+    uint16_t OffsetPrcbContext;
+} KDDEBUGGER_DATA64;
+
+#endif /* KDBG_H */
diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
new file mode 100644
index 0000000..9b93dab
--- /dev/null
+++ b/contrib/elf2dmp/main.c
@@ -0,0 +1,589 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "err.h"
+#include "addrspace.h"
+#include "pe.h"
+#include "pdb.h"
+#include "kdbg.h"
+#include "download.h"
+#include "qemu/win_dump_defs.h"
+
+#define SYM_URL_BASE    "https://msdl.microsoft.com/download/symbols/"
+#define PDB_NAME    "ntkrnlmp.pdb"
+
+#define INITIAL_MXCSR   0x1f80
+
+typedef struct idt_desc {
+    uint16_t offset1;   /* offset bits 0..15 */
+    uint16_t selector;
+    uint8_t ist;
+    uint8_t type_attr;
+    uint16_t offset2;   /* offset bits 16..31 */
+    uint32_t offset3;   /* offset bits 32..63 */
+    uint32_t rsrvd;
+} __attribute__ ((packed)) idt_desc_t;
+
+static uint64_t idt_desc_addr(idt_desc_t desc)
+{
+    return (uint64_t)desc.offset1 | ((uint64_t)desc.offset2 << 16) |
+          ((uint64_t)desc.offset3 << 32);
+}
+
+static const uint64_t SharedUserData = 0xfffff78000000000;
+
+#define KUSD_OFFSET_SUITE_MASK 0x2d0
+#define KUSD_OFFSET_PRODUCT_TYPE 0x264
+
+#define SYM_RESOLVE(base, r, s) ((s = pdb_resolve(base, r, #s)),\
+    s ? printf(#s" = 0x%016lx\n", s) : eprintf("Failed to resolve "#s"\n"), s)
+
+static uint64_t rol(uint64_t x, uint64_t y)
+{
+    return (x << y) | (x >> (64 - y));
+}
+
+/*
+ * Decoding algorithm can be found in Volatility project
+ */
+static void kdbg_decode(uint64_t *dst, uint64_t *src, size_t size,
+        uint64_t kwn, uint64_t kwa, uint64_t kdbe)
+{
+    size_t i;
+    assert(size % sizeof(uint64_t) == 0);
+    for (i = 0; i < size / sizeof(uint64_t); i++) {
+        uint64_t block;
+
+        block = src[i];
+        block = rol(block ^ kwn, (uint8_t)kwn);
+        block = __builtin_bswap64(block ^ kdbe) ^ kwa;
+        dst[i] = block;
+    }
+}
+
+static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb,
+        struct va_space *vs, uint64_t KdDebuggerDataBlock)
+{
+    const char OwnerTag[4] = "KDBG";
+    KDDEBUGGER_DATA64 *kdbg = NULL;
+    DBGKD_DEBUG_DATA_HEADER64 kdbg_hdr;
+    bool decode = false;
+    uint64_t kwn, kwa, KdpDataBlockEncoded;
+
+    if (va_space_rw(vs,
+                KdDebuggerDataBlock + offsetof(KDDEBUGGER_DATA64, Header),
+                &kdbg_hdr, sizeof(kdbg_hdr), 0)) {
+        eprintf("Failed to extract KDBG header\n");
+        return NULL;
+    }
+
+    if (memcmp(&kdbg_hdr.OwnerTag, OwnerTag, sizeof(OwnerTag))) {
+        uint64_t KiWaitNever, KiWaitAlways;
+
+        decode = true;
+
+        if (!SYM_RESOLVE(KernBase, pdb, KiWaitNever) ||
+                !SYM_RESOLVE(KernBase, pdb, KiWaitAlways) ||
+                !SYM_RESOLVE(KernBase, pdb, KdpDataBlockEncoded)) {
+            return NULL;
+        }
+
+        if (va_space_rw(vs, KiWaitNever, &kwn, sizeof(kwn), 0) ||
+                va_space_rw(vs, KiWaitAlways, &kwa, sizeof(kwa), 0)) {
+            return NULL;
+        }
+
+        printf("[KiWaitNever] = 0x%016lx\n", kwn);
+        printf("[KiWaitAlways] = 0x%016lx\n", kwa);
+
+        /*
+         * If KDBG header can be decoded, KDBG size is available
+         * and entire KDBG can be decoded.
+         */
+        printf("Decoding KDBG header...\n");
+        kdbg_decode((uint64_t *)&kdbg_hdr, (uint64_t *)&kdbg_hdr,
+                sizeof(kdbg_hdr), kwn, kwa, KdpDataBlockEncoded);
+
+        printf("Owner tag is \'%.4s\'\n", (char *)&kdbg_hdr.OwnerTag);
+        if (memcmp(&kdbg_hdr.OwnerTag, OwnerTag, sizeof(OwnerTag))) {
+            eprintf("Failed to decode KDBG header\n");
+            return NULL;
+        }
+    }
+
+    kdbg = malloc(kdbg_hdr.Size);
+    if (!kdbg) {
+        return NULL;
+    }
+
+    if (va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) {
+        eprintf("Failed to extract entire KDBG\n");
+        return NULL;
+    }
+
+    if (!decode) {
+        return kdbg;
+    }
+
+    printf("Decoding KdDebuggerDataBlock...\n");
+    kdbg_decode((uint64_t *)kdbg, (uint64_t *)kdbg, kdbg_hdr.Size,
+                kwn, kwa, KdpDataBlockEncoded);
+
+    va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 1);
+
+    return kdbg;
+}
+
+static void win_context_init_from_qemu_cpu_state(WinContext *ctx,
+        QEMUCPUState *s)
+{
+    WinContext win_ctx = (WinContext){
+        .ContextFlags = WIN_CTX_X64 | WIN_CTX_INT | WIN_CTX_SEG | WIN_CTX_CTL,
+        .MxCsr = INITIAL_MXCSR,
+
+        .SegCs = s->cs.selector,
+        .SegSs = s->ss.selector,
+        .SegDs = s->ds.selector,
+        .SegEs = s->es.selector,
+        .SegFs = s->fs.selector,
+        .SegGs = s->gs.selector,
+        .EFlags = (uint32_t)s->rflags,
+
+        .Rax = s->rax,
+        .Rbx = s->rbx,
+        .Rcx = s->rcx,
+        .Rdx = s->rdx,
+        .Rsp = s->rsp,
+        .Rbp = s->rbp,
+        .Rsi = s->rsi,
+        .Rdi = s->rdi,
+        .R8  = s->r8,
+        .R9  = s->r9,
+        .R10 = s->r10,
+        .R11 = s->r11,
+        .R12 = s->r12,
+        .R13 = s->r13,
+        .R14 = s->r14,
+        .R15 = s->r15,
+
+        .Rip = s->rip,
+        .FltSave = {
+            .MxCsr = INITIAL_MXCSR,
+        },
+    };
+
+    *ctx = win_ctx;
+}
+
+/*
+ * Finds paging-structure hierarchy base,
+ * if previously set doesn't give access to kernel structures
+ */
+static int fix_dtb(struct va_space *vs, QEMU_Elf *qe)
+{
+    /*
+     * Firstly, test previously set DTB.
+     */
+    if (va_space_resolve(vs, SharedUserData)) {
+        return 0;
+    }
+
+    /*
+     * Secondly, find CPU which run system task.
+     */
+    size_t i;
+    for (i = 0; i < qe->state_nr; i++) {
+        QEMUCPUState *s = qe->state[i];
+
+        if (is_system(s)) {
+            va_space_set_dtb(vs, s->cr[3]);
+            printf("DTB 0x%016lx has been found from CPU #%zu"
+                    " as system task CR3\n", vs->dtb, i);
+            return !(va_space_resolve(vs, SharedUserData));
+        }
+    }
+
+    /*
+     * Thirdly, use KERNEL_GS_BASE from CPU #0 as PRCB address and
+     * CR3 as [Prcb+0x7000]
+     */
+    if (qe->has_kernel_gs_base) {
+        QEMUCPUState *s = qe->state[0];
+        uint64_t Prcb = s->kernel_gs_base;
+        uint64_t *cr3 = va_space_resolve(vs, Prcb + 0x7000);
+
+        if (!cr3) {
+            return 1;
+        }
+
+        va_space_set_dtb(vs, *cr3);
+        printf("DirectoryTableBase = 0x%016lx has been found from CPU #0"
+                " as interrupt handling CR3\n", vs->dtb);
+        return !(va_space_resolve(vs, SharedUserData));
+    }
+
+    return 1;
+}
+
+static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps,
+        struct va_space *vs, uint64_t KdDebuggerDataBlock,
+        KDDEBUGGER_DATA64 *kdbg, uint64_t KdVersionBlock, int nr_cpus)
+{
+    uint32_t *suite_mask = va_space_resolve(vs, SharedUserData +
+            KUSD_OFFSET_SUITE_MASK);
+    int32_t *product_type = va_space_resolve(vs, SharedUserData +
+            KUSD_OFFSET_PRODUCT_TYPE);
+    DBGKD_GET_VERSION64 kvb;
+    WinDumpHeader64 h;
+    size_t i;
+
+    QEMU_BUILD_BUG_ON(KUSD_OFFSET_SUITE_MASK >= PAGE_SIZE);
+    QEMU_BUILD_BUG_ON(KUSD_OFFSET_PRODUCT_TYPE >= PAGE_SIZE);
+
+    if (!suite_mask || !product_type) {
+        return 1;
+    }
+
+    if (va_space_rw(vs, KdVersionBlock, &kvb, sizeof(kvb), 0)) {
+        eprintf("Failed to extract KdVersionBlock\n");
+        return 1;
+    }
+
+    h = (WinDumpHeader64) {
+        .Signature = "PAGE",
+        .ValidDump = "DU64",
+        .MajorVersion = kvb.MajorVersion,
+        .MinorVersion = kvb.MinorVersion,
+        .DirectoryTableBase = vs->dtb,
+        .PfnDatabase = kdbg->MmPfnDatabase,
+        .PsLoadedModuleList = kdbg->PsLoadedModuleList,
+        .PsActiveProcessHead = kdbg->PsActiveProcessHead,
+        .MachineImageType = kvb.MachineType,
+        .NumberProcessors = nr_cpus,
+        .BugcheckCode = LIVE_SYSTEM_DUMP,
+        .KdDebuggerDataBlock = KdDebuggerDataBlock,
+        .DumpType = 1,
+        .Comment = "Hello from elf2dmp!",
+        .SuiteMask = *suite_mask,
+        .ProductType = *product_type,
+        .SecondaryDataState = kvb.KdSecondaryVersion,
+        .PhysicalMemoryBlock = (WinDumpPhyMemDesc64) {
+            .NumberOfRuns = ps->block_nr,
+        },
+        .RequiredDumpSpace = sizeof(h),
+    };
+
+    for (i = 0; i < ps->block_nr; i++) {
+        h.PhysicalMemoryBlock.NumberOfPages += ps->block[i].size / PAGE_SIZE;
+        h.PhysicalMemoryBlock.Run[i] = (WinDumpPhyMemRun64) {
+            .BasePage = ps->block[i].paddr / PAGE_SIZE,
+            .PageCount = ps->block[i].size / PAGE_SIZE,
+        };
+    }
+
+    h.RequiredDumpSpace += h.PhysicalMemoryBlock.NumberOfPages << PAGE_BITS;
+
+    *hdr = h;
+
+    return 0;
+}
+
+static int fill_context(KDDEBUGGER_DATA64 *kdbg,
+        struct va_space *vs, QEMU_Elf *qe)
+{
+	int i;
+    for (i = 0; i < qe->state_nr; i++) {
+        uint64_t Prcb;
+        uint64_t Context;
+        WinContext ctx;
+        QEMUCPUState *s = qe->state[i];
+
+        if (va_space_rw(vs, kdbg->KiProcessorBlock + sizeof(Prcb) * i,
+                    &Prcb, sizeof(Prcb), 0)) {
+            eprintf("Failed to read CPU #%d PRCB location\n", i);
+            return 1;
+        }
+
+        if (va_space_rw(vs, Prcb + kdbg->OffsetPrcbContext,
+                    &Context, sizeof(Context), 0)) {
+            eprintf("Failed to read CPU #%d ContextFrame location\n", i);
+            return 1;
+        }
+
+        printf("Filling context for CPU #%d...\n", i);
+        win_context_init_from_qemu_cpu_state(&ctx, s);
+
+        if (va_space_rw(vs, Context, &ctx, sizeof(ctx), 1)) {
+            eprintf("Failed to fill CPU #%d context\n", i);
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static int write_dump(struct pa_space *ps,
+        WinDumpHeader64 *hdr, const char *name)
+{
+    FILE *dmp_file = fopen(name, "wb");
+    size_t i;
+
+    if (!dmp_file) {
+        eprintf("Failed to open output file \'%s\'\n", name);
+        return 1;
+    }
+
+    printf("Writing header to file...\n");
+
+    if (fwrite(hdr, sizeof(*hdr), 1, dmp_file) != 1) {
+        eprintf("Failed to write dump header\n");
+        fclose(dmp_file);
+        return 1;
+    }
+
+    for (i = 0; i < ps->block_nr; i++) {
+        struct pa_block *b = &ps->block[i];
+
+        printf("Writing block #%zu/%zu to file...\n", i, ps->block_nr);
+        if (fwrite(b->addr, b->size, 1, dmp_file) != 1) {
+            eprintf("Failed to write dump header\n");
+            fclose(dmp_file);
+            return 1;
+        }
+    }
+
+    return fclose(dmp_file);
+}
+
+static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr,
+        char *hash, struct va_space *vs)
+{
+    const char e_magic[2] = "MZ";
+    const char Signature[4] = "PE\0\0";
+    const char sign_rsds[4] = "RSDS";
+    IMAGE_DOS_HEADER *dos_hdr = start_addr;
+    IMAGE_NT_HEADERS64 nt_hdrs;
+    IMAGE_FILE_HEADER *file_hdr = &nt_hdrs.FileHeader;
+    IMAGE_OPTIONAL_HEADER64 *opt_hdr = &nt_hdrs.OptionalHeader;
+    IMAGE_DATA_DIRECTORY *data_dir = nt_hdrs.OptionalHeader.DataDirectory;
+    IMAGE_DEBUG_DIRECTORY debug_dir;
+    OMFSignatureRSDS rsds;
+    char *pdb_name;
+    size_t pdb_name_sz;
+    size_t i;
+
+    QEMU_BUILD_BUG_ON(sizeof(*dos_hdr) >= PAGE_SIZE);
+
+    if (memcmp(&dos_hdr->e_magic, e_magic, sizeof(e_magic))) {
+        return 1;
+    }
+
+    if (va_space_rw(vs, base + dos_hdr->e_lfanew,
+                &nt_hdrs, sizeof(nt_hdrs), 0)) {
+        return 1;
+    }
+
+    if (memcmp(&nt_hdrs.Signature, Signature, sizeof(Signature)) ||
+            file_hdr->Machine != 0x8664 || opt_hdr->Magic != 0x020b) {
+        return 1;
+    }
+
+    printf("Debug Directory RVA = 0x%016x\n",
+            data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress);
+
+    if (va_space_rw(vs,
+                base + data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress,
+                &debug_dir, sizeof(debug_dir), 0)) {
+        return 1;
+    }
+
+    if (debug_dir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) {
+        return 1;
+    }
+
+    if (va_space_rw(vs,
+                base + debug_dir.AddressOfRawData,
+                &rsds, sizeof(rsds), 0)) {
+        return 1;
+    }
+
+    printf("CodeView signature is \'%.4s\'\n", rsds.Signature);
+
+    if (memcmp(&rsds.Signature, sign_rsds, sizeof(sign_rsds))) {
+        return 1;
+    }
+
+    pdb_name_sz = debug_dir.SizeOfData - sizeof(rsds);
+    pdb_name = malloc(pdb_name_sz);
+    if (!pdb_name) {
+        return 1;
+    }
+
+    if (va_space_rw(vs, base + debug_dir.AddressOfRawData +
+                offsetof(OMFSignatureRSDS, name), pdb_name, pdb_name_sz, 0)) {
+        free(pdb_name);
+        return 1;
+    }
+
+    printf("PDB name is \'%s\', \'%s\' expected\n", pdb_name, PDB_NAME);
+
+    if (strcmp(pdb_name, PDB_NAME)) {
+        eprintf("Unexpected PDB name, it seems the kernel isn't found\n");
+        free(pdb_name);
+        return 1;
+    }
+
+    free(pdb_name);
+
+    sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds.guid.a, rsds.guid.b,
+            rsds.guid.c, rsds.guid.d[0], rsds.guid.d[1]);
+    hash += 20;
+    for (i = 0; i < 6; i++, hash += 2) {
+        sprintf(hash, "%.02x", rsds.guid.e[i]);
+    }
+
+    sprintf(hash, "%.01x", rsds.age);
+
+    return 0;
+}
+
+int main(int argc, char *argv[])
+{
+    int err = 0;
+    QEMU_Elf qemu_elf;
+    struct pa_space ps;
+    struct va_space vs;
+    QEMUCPUState *state;
+    idt_desc_t first_idt_desc;
+    uint64_t KernBase;
+    void *nt_start_addr = NULL;
+    WinDumpHeader64 header;
+    char pdb_hash[34];
+    char pdb_url[] = SYM_URL_BASE PDB_NAME
+        "/0123456789ABCDEF0123456789ABCDEFx/" PDB_NAME;
+    struct pdb_reader pdb;
+    uint64_t KdDebuggerDataBlock;
+    KDDEBUGGER_DATA64 *kdbg;
+    uint64_t KdVersionBlock;
+
+    if (argc != 3) {
+        eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]);
+        return 1;
+    }
+
+    if (QEMU_Elf_init(&qemu_elf, argv[1])) {
+        eprintf("Failed to initialize QEMU ELF dump\n");
+        return 1;
+    }
+
+    if (pa_space_create(&ps, &qemu_elf)) {
+        eprintf("Failed to initialize physical address space\n");
+        err = 1;
+        goto out_elf;
+    }
+
+    state = qemu_elf.state[0];
+    printf("CPU #0 CR3 is 0x%016lx\n", state->cr[3]);
+
+    va_space_create(&vs, &ps, state->cr[3]);
+    if (fix_dtb(&vs, &qemu_elf)) {
+        eprintf("Failed to find paging base\n");
+        err = 1;
+        goto out_elf;
+    }
+
+    printf("CPU #0 IDT is at 0x%016lx\n", state->idt.base);
+
+    if (va_space_rw(&vs, state->idt.base,
+                &first_idt_desc, sizeof(first_idt_desc), 0)) {
+        eprintf("Failed to get CPU #0 IDT[0]\n");
+        err = 1;
+        goto out_ps;
+    }
+    printf("CPU #0 IDT[0] -> 0x%016lx\n", idt_desc_addr(first_idt_desc));
+
+    KernBase = idt_desc_addr(first_idt_desc) & ~(PAGE_SIZE - 1);
+    printf("Searching kernel downwards from 0x%16lx...\n", KernBase);
+
+    for (; KernBase >= 0xfffff78000000000; KernBase -= PAGE_SIZE) {
+        nt_start_addr = va_space_resolve(&vs, KernBase);
+        if (!nt_start_addr) {
+            continue;
+        }
+
+        if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */
+            break;
+        }
+    }
+
+    printf("KernBase = 0x%16lx, signature is \'%.2s\'\n", KernBase,
+            (char *)nt_start_addr);
+
+    if (pe_get_pdb_symstore_hash(KernBase, nt_start_addr, pdb_hash, &vs)) {
+        eprintf("Failed to get PDB symbol store hash\n");
+        err = 1;
+        goto out_ps;
+    }
+
+    sprintf(pdb_url, "%s%s/%s/%s", SYM_URL_BASE, PDB_NAME, pdb_hash, PDB_NAME);
+    printf("PDB URL is %s\n", pdb_url);
+
+    if (download_url(PDB_NAME, pdb_url)) {
+        eprintf("Failed to download PDB file\n");
+        err = 1;
+        goto out_ps;
+    }
+
+    if (pdb_init_from_file(PDB_NAME, &pdb)) {
+        eprintf("Failed to initialize PDB reader\n");
+        err = 1;
+        goto out_pdb_file;
+    }
+
+    if (!SYM_RESOLVE(KernBase, &pdb, KdDebuggerDataBlock) ||
+            !SYM_RESOLVE(KernBase, &pdb, KdVersionBlock)) {
+        err = 1;
+        goto out_pdb;
+    }
+
+    kdbg = get_kdbg(KernBase, &pdb, &vs, KdDebuggerDataBlock);
+    if (!kdbg) {
+        err = 1;
+        goto out_pdb;
+    }
+
+    if (fill_header(&header, &ps, &vs, KdDebuggerDataBlock, kdbg,
+            KdVersionBlock, qemu_elf.state_nr)) {
+        err = 1;
+        goto out_pdb;
+    }
+
+    if (fill_context(kdbg, &vs, &qemu_elf)) {
+        err = 1;
+        goto out_pdb;
+    }
+
+    if (write_dump(&ps, &header, argv[2])) {
+        eprintf("Failed to save dump\n");
+        err = 1;
+        goto out_kdbg;
+    }
+
+out_kdbg:
+    free(kdbg);
+out_pdb:
+    pdb_exit(&pdb);
+out_pdb_file:
+    unlink(PDB_NAME);
+out_ps:
+    pa_space_destroy(&ps);
+out_elf:
+    QEMU_Elf_exit(&qemu_elf);
+
+    return err;
+}
diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c
new file mode 100644
index 0000000..bcb01b4
--- /dev/null
+++ b/contrib/elf2dmp/pdb.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * Based on source of Wine project
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "qemu/osdep.h"
+#include "pdb.h"
+#include "err.h"
+
+static uint32_t pdb_get_file_size(const struct pdb_reader *r, unsigned idx)
+{
+    return r->ds.toc->file_size[idx];
+}
+
+static pdb_seg *get_seg_by_num(struct pdb_reader *r, size_t n)
+{
+    size_t i = 0;
+    char *ptr;
+
+    for (ptr = r->segs; (ptr < r->segs + r->segs_size); ) {
+        i++;
+        ptr += 8;
+        if (i == n) {
+            break;
+        }
+        ptr += sizeof(pdb_seg);
+    }
+
+    return (pdb_seg *)ptr;
+}
+
+uint64_t pdb_find_public_v3_symbol(struct pdb_reader *r, const char *name)
+{
+    size_t size = pdb_get_file_size(r, r->symbols->gsym_file);
+    int length;
+    const union codeview_symbol *sym;
+    const uint8_t *root = r->modimage;
+    size_t i;
+
+    for (i = 0; i < size; i += length) {
+        sym = (const void *)(root + i);
+        length = sym->generic.len + 2;
+
+        if (!sym->generic.id || length < 4) {
+            break;
+        }
+
+        if (sym->generic.id == S_PUB_V3 &&
+                !strcmp(name, sym->public_v3.name)) {
+            pdb_seg *segment = get_seg_by_num(r, sym->public_v3.segment);
+            uint32_t sect_rva = segment->dword[1];
+            uint64_t rva = sect_rva + sym->public_v3.offset;
+
+            printf("%s: 0x%016x(%d:\'%.8s\') + 0x%08x = 0x%09lx\n", name,
+                    sect_rva, sym->public_v3.segment,
+                    ((char *)segment - 8), sym->public_v3.offset, rva);
+            return rva;
+        }
+    }
+
+    return 0;
+}
+
+uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name)
+{
+    uint64_t rva = pdb_find_public_v3_symbol(r, name);
+
+    if (!rva) {
+        return 0;
+    }
+
+    return img_base + rva;
+}
+
+static void pdb_reader_ds_exit(struct pdb_reader *r)
+{
+    free(r->ds.toc);
+}
+
+static void pdb_exit_symbols(struct pdb_reader *r)
+{
+    free(r->modimage);
+    free(r->symbols);
+}
+
+static void pdb_exit_segments(struct pdb_reader *r)
+{
+    free(r->segs);
+}
+
+static void *pdb_ds_read(const PDB_DS_HEADER *header,
+        const uint32_t *block_list, int size)
+{
+    int i, nBlocks;
+    uint8_t *buffer;
+
+    if (!size) {
+        return NULL;
+    }
+
+    nBlocks = (size + header->block_size - 1) / header->block_size;
+
+    buffer = malloc(nBlocks * header->block_size);
+    if (!buffer) {
+        return NULL;
+    }
+
+    for (i = 0; i < nBlocks; i++) {
+        memcpy(buffer + i * header->block_size, (const char *)header +
+                block_list[i] * header->block_size, header->block_size);
+    }
+
+    return buffer;
+}
+
+static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number)
+{
+    const uint32_t *block_list;
+    uint32_t block_size;
+    const uint32_t *file_size;
+    size_t i;
+
+    if (!r->ds.toc || file_number >= r->ds.toc->num_files) {
+        return NULL;
+    }
+
+    file_size = r->ds.toc->file_size;
+    r->file_used[file_number / 32] |= 1 << (file_number % 32);
+
+    if (file_size[file_number] == 0 || file_size[file_number] == 0xFFFFFFFF) {
+        return NULL;
+    }
+
+    block_list = file_size + r->ds.toc->num_files;
+    block_size = r->ds.header->block_size;
+
+    for (i = 0; i < file_number; i++) {
+        block_list += (file_size[i] + block_size - 1) / block_size;
+    }
+
+    return pdb_ds_read(r->ds.header, block_list, file_size[file_number]);
+}
+
+static int pdb_init_segments(struct pdb_reader *r)
+{
+    char *segs;
+    unsigned stream_idx = r->sidx.segments;
+
+    segs = pdb_ds_read_file(r, stream_idx);
+    if (!segs) {
+        return 1;
+    }
+
+    r->segs = segs;
+    r->segs_size = pdb_get_file_size(r, stream_idx);
+
+    return 0;
+}
+
+static int pdb_init_symbols(struct pdb_reader *r)
+{
+    int err = 0;
+    PDB_SYMBOLS *symbols;
+    PDB_STREAM_INDEXES *sidx = &r->sidx;
+
+    memset(sidx, -1, sizeof(*sidx));
+
+    symbols = pdb_ds_read_file(r, 3);
+    if (!symbols) {
+        return 1;
+    }
+
+    r->symbols = symbols;
+
+    if (symbols->stream_index_size != sizeof(PDB_STREAM_INDEXES)) {
+        err = 1;
+        goto out_symbols;
+    }
+
+    memcpy(sidx, (const char *)symbols + sizeof(PDB_SYMBOLS) +
+            symbols->module_size + symbols->offset_size +
+            symbols->hash_size + symbols->srcmodule_size +
+            symbols->pdbimport_size + symbols->unknown2_size, sizeof(*sidx));
+
+    /* Read global symbol table */
+    r->modimage = pdb_ds_read_file(r, symbols->gsym_file);
+    if (!r->modimage) {
+        err = 1;
+        goto out_symbols;
+    }
+
+    return 0;
+
+out_symbols:
+    free(symbols);
+
+    return err;
+}
+
+static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr)
+{
+    memset(r->file_used, 0, sizeof(r->file_used));
+    r->ds.header = hdr;
+    r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr +
+                hdr->toc_page * hdr->block_size), hdr->toc_size);
+
+    if (!r->ds.toc) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static int pdb_reader_init(struct pdb_reader *r, void *data)
+{
+    int err = 0;
+    const char pdb7[] = "Microsoft C/C++ MSF 7.00";
+
+    if (memcmp(data, pdb7, sizeof(pdb7) - 1)) {
+        return 1;
+    }
+
+    if (pdb_reader_ds_init(r, data)) {
+        return 1;
+    }
+
+    r->ds.root = pdb_ds_read_file(r, 1);
+    if (!r->ds.root) {
+        err = 1;
+        goto out_ds;
+    }
+
+    if (pdb_init_symbols(r)) {
+        err = 1;
+        goto out_root;
+    }
+
+    if (pdb_init_segments(r)) {
+        err = 1;
+        goto out_sym;
+    }
+
+    return 0;
+
+out_sym:
+    pdb_exit_symbols(r);
+out_root:
+    free(r->ds.root);
+out_ds:
+    pdb_reader_ds_exit(r);
+
+    return err;
+}
+
+static void pdb_reader_exit(struct pdb_reader *r)
+{
+    pdb_exit_segments(r);
+    pdb_exit_symbols(r);
+    free(r->ds.root);
+    pdb_reader_ds_exit(r);
+}
+
+int pdb_init_from_file(const char *name, struct pdb_reader *reader)
+{
+    int err = 0;
+    int fd;
+    void *map;
+    struct stat st;
+
+    fd = open(name, O_RDONLY, 0);
+    if (fd == -1) {
+        eprintf("Failed to open PDB file \'%s\'\n", name);
+        return 1;
+    }
+    reader->fd = fd;
+
+    fstat(fd, &st);
+    reader->file_size = st.st_size;
+
+    map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (map == MAP_FAILED) {
+        eprintf("Failed to map PDB file\n");
+        err = 1;
+        goto out_fd;
+    }
+
+    if (pdb_reader_init(reader, map)) {
+        err = 1;
+        goto out_unmap;
+    }
+
+    return 0;
+
+out_unmap:
+    munmap(map, st.st_size);
+out_fd:
+    close(fd);
+
+    return err;
+}
+
+void pdb_exit(struct pdb_reader *reader)
+{
+    munmap(reader->ds.header, reader->file_size);
+    close(reader->fd);
+    pdb_reader_exit(reader);
+}
diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h
new file mode 100644
index 0000000..4351a2d
--- /dev/null
+++ b/contrib/elf2dmp/pdb.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef PDB_H
+#define PDB_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct GUID {
+    unsigned int Data1;
+    unsigned short Data2;
+    unsigned short Data3;
+    unsigned char Data4[8];
+} GUID;
+
+struct PDB_FILE {
+    uint32_t size;
+    uint32_t unknown;
+};
+
+typedef struct PDB_DS_HEADER {
+    char signature[32];
+    uint32_t block_size;
+    uint32_t unknown1;
+    uint32_t num_pages;
+    uint32_t toc_size;
+    uint32_t unknown2;
+    uint32_t toc_page;
+} PDB_DS_HEADER;
+
+typedef struct PDB_DS_TOC {
+    uint32_t num_files;
+    uint32_t file_size[1];
+} PDB_DS_TOC;
+
+typedef struct PDB_DS_ROOT {
+    uint32_t Version;
+    uint32_t TimeDateStamp;
+    uint32_t Age;
+    GUID guid;
+    uint32_t cbNames;
+    char names[1];
+} PDB_DS_ROOT;
+
+typedef struct PDB_TYPES_OLD {
+    uint32_t version;
+    uint16_t first_index;
+    uint16_t last_index;
+    uint32_t type_size;
+    uint16_t file;
+    uint16_t pad;
+} PDB_TYPES_OLD;
+
+typedef struct PDB_TYPES {
+    uint32_t version;
+    uint32_t type_offset;
+    uint32_t first_index;
+    uint32_t last_index;
+    uint32_t type_size;
+    uint16_t file;
+    uint16_t pad;
+    uint32_t hash_size;
+    uint32_t hash_base;
+    uint32_t hash_offset;
+    uint32_t hash_len;
+    uint32_t search_offset;
+    uint32_t search_len;
+    uint32_t unknown_offset;
+    uint32_t unknown_len;
+} PDB_TYPES;
+
+typedef struct PDB_SYMBOL_RANGE {
+    uint16_t segment;
+    uint16_t pad1;
+    uint32_t offset;
+    uint32_t size;
+    uint32_t characteristics;
+    uint16_t index;
+    uint16_t pad2;
+} PDB_SYMBOL_RANGE;
+
+typedef struct PDB_SYMBOL_RANGE_EX {
+    uint16_t segment;
+    uint16_t pad1;
+    uint32_t offset;
+    uint32_t size;
+    uint32_t characteristics;
+    uint16_t index;
+    uint16_t pad2;
+    uint32_t timestamp;
+    uint32_t unknown;
+} PDB_SYMBOL_RANGE_EX;
+
+typedef struct PDB_SYMBOL_FILE {
+    uint32_t unknown1;
+    PDB_SYMBOL_RANGE range;
+    uint16_t flag;
+    uint16_t file;
+    uint32_t symbol_size;
+    uint32_t lineno_size;
+    uint32_t unknown2;
+    uint32_t nSrcFiles;
+    uint32_t attribute;
+    char filename[1];
+} PDB_SYMBOL_FILE;
+
+typedef struct PDB_SYMBOL_FILE_EX {
+    uint32_t unknown1;
+    PDB_SYMBOL_RANGE_EX range;
+    uint16_t flag;
+    uint16_t file;
+    uint32_t symbol_size;
+    uint32_t lineno_size;
+    uint32_t unknown2;
+    uint32_t nSrcFiles;
+    uint32_t attribute;
+    uint32_t reserved[2];
+    char filename[1];
+} PDB_SYMBOL_FILE_EX;
+
+typedef struct PDB_SYMBOL_SOURCE {
+    uint16_t nModules;
+    uint16_t nSrcFiles;
+    uint16_t table[1];
+} PDB_SYMBOL_SOURCE;
+
+typedef struct PDB_SYMBOL_IMPORT {
+    uint32_t unknown1;
+    uint32_t unknown2;
+    uint32_t TimeDateStamp;
+    uint32_t Age;
+    char filename[1];
+} PDB_SYMBOL_IMPORT;
+
+typedef struct PDB_SYMBOLS_OLD {
+    uint16_t hash1_file;
+    uint16_t hash2_file;
+    uint16_t gsym_file;
+    uint16_t pad;
+    uint32_t module_size;
+    uint32_t offset_size;
+    uint32_t hash_size;
+    uint32_t srcmodule_size;
+} PDB_SYMBOLS_OLD;
+
+typedef struct PDB_SYMBOLS {
+    uint32_t signature;
+    uint32_t version;
+    uint32_t unknown;
+    uint32_t hash1_file;
+    uint32_t hash2_file;
+    uint16_t gsym_file;
+    uint16_t unknown1;
+    uint32_t module_size;
+    uint32_t offset_size;
+    uint32_t hash_size;
+    uint32_t srcmodule_size;
+    uint32_t pdbimport_size;
+    uint32_t resvd0;
+    uint32_t stream_index_size;
+    uint32_t unknown2_size;
+    uint16_t resvd3;
+    uint16_t machine;
+    uint32_t resvd4;
+} PDB_SYMBOLS;
+
+typedef struct {
+    uint16_t FPO;
+    uint16_t unk0;
+    uint16_t unk1;
+    uint16_t unk2;
+    uint16_t unk3;
+    uint16_t segments;
+} PDB_STREAM_INDEXES_OLD;
+
+typedef struct {
+    uint16_t FPO;
+    uint16_t unk0;
+    uint16_t unk1;
+    uint16_t unk2;
+    uint16_t unk3;
+    uint16_t segments;
+    uint16_t unk4;
+    uint16_t unk5;
+    uint16_t unk6;
+    uint16_t FPO_EXT;
+    uint16_t unk7;
+} PDB_STREAM_INDEXES;
+
+union codeview_symbol {
+    struct {
+        int16_t len;
+        int16_t id;
+    } generic;
+
+    struct {
+        int16_t len;
+        int16_t id;
+        uint32_t symtype;
+        uint32_t offset;
+        uint16_t segment;
+        char name[1];
+    } public_v3;
+};
+
+#define S_PUB_V3        0x110E
+
+typedef struct pdb_seg {
+    uint32_t dword[8];
+} __attribute__ ((packed)) pdb_seg;
+
+#define IMAGE_FILE_MACHINE_I386 0x014c
+#define IMAGE_FILE_MACHINE_AMD64 0x8664
+
+struct pdb_reader {
+    int fd;
+    size_t file_size;
+    struct {
+        PDB_DS_HEADER *header;
+        PDB_DS_TOC *toc;
+        PDB_DS_ROOT *root;
+    } ds;
+    uint32_t file_used[1024];
+    PDB_SYMBOLS *symbols;
+    PDB_STREAM_INDEXES sidx;
+    uint8_t *modimage;
+    char *segs;
+    size_t segs_size;
+};
+
+int pdb_init_from_file(const char *name, struct pdb_reader *reader);
+void pdb_exit(struct pdb_reader *reader);
+uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name);
+uint64_t pdb_find_public_v3_symbol(struct pdb_reader *reader, const char *name);
+
+#endif /* PDB_H */
diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h
new file mode 100644
index 0000000..374e06a
--- /dev/null
+++ b/contrib/elf2dmp/pe.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef PE_H
+#define PE_H
+
+#include <stdint.h>
+
+typedef struct IMAGE_DOS_HEADER {
+    uint16_t  e_magic;      /* 0x00: MZ Header signature */
+    uint16_t  e_cblp;       /* 0x02: Bytes on last page of file */
+    uint16_t  e_cp;         /* 0x04: Pages in file */
+    uint16_t  e_crlc;       /* 0x06: Relocations */
+    uint16_t  e_cparhdr;    /* 0x08: Size of header in paragraphs */
+    uint16_t  e_minalloc;   /* 0x0a: Minimum extra paragraphs needed */
+    uint16_t  e_maxalloc;   /* 0x0c: Maximum extra paragraphs needed */
+    uint16_t  e_ss;         /* 0x0e: Initial (relative) SS value */
+    uint16_t  e_sp;         /* 0x10: Initial SP value */
+    uint16_t  e_csum;       /* 0x12: Checksum */
+    uint16_t  e_ip;         /* 0x14: Initial IP value */
+    uint16_t  e_cs;         /* 0x16: Initial (relative) CS value */
+    uint16_t  e_lfarlc;     /* 0x18: File address of relocation table */
+    uint16_t  e_ovno;       /* 0x1a: Overlay number */
+    uint16_t  e_res[4];     /* 0x1c: Reserved words */
+    uint16_t  e_oemid;      /* 0x24: OEM identifier (for e_oeminfo) */
+    uint16_t  e_oeminfo;    /* 0x26: OEM information; e_oemid specific */
+    uint16_t  e_res2[10];   /* 0x28: Reserved words */
+    uint32_t  e_lfanew;     /* 0x3c: Offset to extended header */
+} __attribute__ ((packed)) IMAGE_DOS_HEADER;
+
+typedef struct IMAGE_FILE_HEADER {
+  uint16_t  Machine;
+  uint16_t  NumberOfSections;
+  uint32_t  TimeDateStamp;
+  uint32_t  PointerToSymbolTable;
+  uint32_t  NumberOfSymbols;
+  uint16_t  SizeOfOptionalHeader;
+  uint16_t  Characteristics;
+} __attribute__ ((packed)) IMAGE_FILE_HEADER;
+
+typedef struct IMAGE_DATA_DIRECTORY {
+  uint32_t VirtualAddress;
+  uint32_t Size;
+} __attribute__ ((packed)) IMAGE_DATA_DIRECTORY;
+
+#define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16
+
+typedef struct IMAGE_OPTIONAL_HEADER64 {
+  uint16_t  Magic; /* 0x20b */
+  uint8_t   MajorLinkerVersion;
+  uint8_t   MinorLinkerVersion;
+  uint32_t  SizeOfCode;
+  uint32_t  SizeOfInitializedData;
+  uint32_t  SizeOfUninitializedData;
+  uint32_t  AddressOfEntryPoint;
+  uint32_t  BaseOfCode;
+  uint64_t  ImageBase;
+  uint32_t  SectionAlignment;
+  uint32_t  FileAlignment;
+  uint16_t  MajorOperatingSystemVersion;
+  uint16_t  MinorOperatingSystemVersion;
+  uint16_t  MajorImageVersion;
+  uint16_t  MinorImageVersion;
+  uint16_t  MajorSubsystemVersion;
+  uint16_t  MinorSubsystemVersion;
+  uint32_t  Win32VersionValue;
+  uint32_t  SizeOfImage;
+  uint32_t  SizeOfHeaders;
+  uint32_t  CheckSum;
+  uint16_t  Subsystem;
+  uint16_t  DllCharacteristics;
+  uint64_t  SizeOfStackReserve;
+  uint64_t  SizeOfStackCommit;
+  uint64_t  SizeOfHeapReserve;
+  uint64_t  SizeOfHeapCommit;
+  uint32_t  LoaderFlags;
+  uint32_t  NumberOfRvaAndSizes;
+  IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
+} __attribute__ ((packed)) IMAGE_OPTIONAL_HEADER64;
+
+typedef struct IMAGE_NT_HEADERS64 {
+  uint32_t Signature;
+  IMAGE_FILE_HEADER FileHeader;
+  IMAGE_OPTIONAL_HEADER64 OptionalHeader;
+} __attribute__ ((packed)) IMAGE_NT_HEADERS64;
+
+#define IMAGE_FILE_DEBUG_DIRECTORY  6
+
+typedef struct IMAGE_DEBUG_DIRECTORY {
+  uint32_t Characteristics;
+  uint32_t TimeDateStamp;
+  uint16_t MajorVersion;
+  uint16_t MinorVersion;
+  uint32_t Type;
+  uint32_t SizeOfData;
+  uint32_t AddressOfRawData;
+  uint32_t PointerToRawData;
+} __attribute__ ((packed)) IMAGE_DEBUG_DIRECTORY;
+
+#define IMAGE_DEBUG_TYPE_CODEVIEW   2
+
+typedef struct guid_t {
+    uint32_t a;
+    uint16_t b;
+    uint16_t c;
+    uint8_t d[2];
+    uint8_t  e[6];
+} __attribute__ ((packed)) guid_t;
+
+typedef struct OMFSignatureRSDS {
+    char        Signature[4];
+    guid_t      guid;
+    uint32_t    age;
+    char        name[];
+} __attribute__ ((packed)) OMFSignatureRSDS;
+
+#endif /* PE_H */
diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c
new file mode 100644
index 0000000..e9c0d25
--- /dev/null
+++ b/contrib/elf2dmp/qemu_elf.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "err.h"
+#include "qemu_elf.h"
+
+#define QEMU_NOTE_NAME "QEMU"
+
+#ifndef ROUND_UP
+#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
+#endif
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
+    ((DIV_ROUND_UP((hdr_size), 4) +                     \
+      DIV_ROUND_UP((name_size), 4) +                    \
+      DIV_ROUND_UP((desc_size), 4)) * 4)
+
+int is_system(QEMUCPUState *s)
+{
+    return s->gs.base >> 63;
+}
+
+static char *nhdr_get_name(Elf64_Nhdr *nhdr)
+{
+    return (char *)nhdr + ROUND_UP(sizeof(*nhdr), 4);
+}
+
+static void *nhdr_get_desc(Elf64_Nhdr *nhdr)
+{
+    return nhdr_get_name(nhdr) + ROUND_UP(nhdr->n_namesz, 4);
+}
+
+static Elf64_Nhdr *nhdr_get_next(Elf64_Nhdr *nhdr)
+{
+    return (void *)((uint8_t *)nhdr + ELF_NOTE_SIZE(sizeof(*nhdr),
+                nhdr->n_namesz, nhdr->n_descsz));
+}
+
+Elf64_Phdr *elf64_getphdr(void *map)
+{
+    Elf64_Ehdr *ehdr = map;
+    Elf64_Phdr *phdr = (void *)((uint8_t *)map + ehdr->e_phoff);
+
+    return phdr;
+}
+
+Elf64_Half elf_getphdrnum(void *map)
+{
+    Elf64_Ehdr *ehdr = map;
+
+    return ehdr->e_phnum;
+}
+
+static int init_states(QEMU_Elf *qe)
+{
+    Elf64_Phdr *phdr = elf64_getphdr(qe->map);
+    Elf64_Nhdr *start = (void *)((uint8_t *)qe->map + phdr[0].p_offset);
+    Elf64_Nhdr *end = (void *)((uint8_t *)start + phdr[0].p_memsz);
+    Elf64_Nhdr *nhdr;
+    size_t cpu_nr = 0;
+
+    if (phdr[0].p_type != PT_NOTE) {
+        eprintf("Failed to find PT_NOTE\n");
+        return 1;
+    }
+
+    qe->has_kernel_gs_base = 1;
+
+    for (nhdr = start; nhdr < end; nhdr = nhdr_get_next(nhdr)) {
+        if (!strcmp(nhdr_get_name(nhdr), QEMU_NOTE_NAME)) {
+            QEMUCPUState *state = nhdr_get_desc(nhdr);
+
+            if (state->size < sizeof(*state)) {
+                eprintf("CPU #%zu: QEMU CPU state size %u doesn't match\n",
+                        cpu_nr, state->size);
+                /*
+                 * We assume either every QEMU CPU state has KERNEL_GS_BASE or
+                 * no one has.
+                 */
+                qe->has_kernel_gs_base = 0;
+            }
+            cpu_nr++;
+        }
+    }
+
+    printf("%zu CPU states has been found\n", cpu_nr);
+
+    qe->state = malloc(sizeof(*qe->state) * cpu_nr);
+    if (!qe->state) {
+        return 1;
+    }
+
+    cpu_nr = 0;
+
+    for (nhdr = start; nhdr < end; nhdr = nhdr_get_next(nhdr)) {
+        if (!strcmp(nhdr_get_name(nhdr), QEMU_NOTE_NAME)) {
+            qe->state[cpu_nr] = nhdr_get_desc(nhdr);
+            cpu_nr++;
+        }
+    }
+
+    qe->state_nr = cpu_nr;
+
+    return 0;
+}
+
+static void exit_states(QEMU_Elf *qe)
+{
+    free(qe->state);
+}
+
+int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
+{
+    int err = 0;
+    struct stat st;
+
+    qe->fd = open(filename, O_RDONLY, 0);
+    if (qe->fd == -1) {
+        eprintf("Failed to open ELF dump file \'%s\'\n", filename);
+        return 1;
+    }
+
+    fstat(qe->fd, &st);
+    qe->size = st.st_size;
+
+    qe->map = mmap(NULL, qe->size, PROT_READ | PROT_WRITE,
+            MAP_PRIVATE, qe->fd, 0);
+    if (qe->map == MAP_FAILED) {
+        eprintf("Failed to map ELF file\n");
+        err = 1;
+        goto out_fd;
+    }
+
+    if (init_states(qe)) {
+        eprintf("Failed to extract QEMU CPU states\n");
+        err = 1;
+        goto out_unmap;
+    }
+
+    return 0;
+
+out_unmap:
+    munmap(qe->map, qe->size);
+out_fd:
+    close(qe->fd);
+
+    return err;
+}
+
+void QEMU_Elf_exit(QEMU_Elf *qe)
+{
+    exit_states(qe);
+    munmap(qe->map, qe->size);
+    close(qe->fd);
+}
diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h
new file mode 100644
index 0000000..d85d655
--- /dev/null
+++ b/contrib/elf2dmp/qemu_elf.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef QEMU_ELF_H
+#define QEMU_ELF_H
+
+#include <stdint.h>
+#include <elf.h>
+
+typedef struct QEMUCPUSegment {
+    uint32_t selector;
+    uint32_t limit;
+    uint32_t flags;
+    uint32_t pad;
+    uint64_t base;
+} QEMUCPUSegment;
+
+typedef struct QEMUCPUState {
+    uint32_t version;
+    uint32_t size;
+    uint64_t rax, rbx, rcx, rdx, rsi, rdi, rsp, rbp;
+    uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
+    uint64_t rip, rflags;
+    QEMUCPUSegment cs, ds, es, fs, gs, ss;
+    QEMUCPUSegment ldt, tr, gdt, idt;
+    uint64_t cr[5];
+    uint64_t kernel_gs_base;
+} QEMUCPUState;
+
+int is_system(QEMUCPUState *s);
+
+typedef struct QEMU_Elf {
+    int fd;
+    size_t size;
+    void *map;
+    QEMUCPUState **state;
+    size_t state_nr;
+    int has_kernel_gs_base;
+} QEMU_Elf;
+
+int QEMU_Elf_init(QEMU_Elf *qe, const char *filename);
+void QEMU_Elf_exit(QEMU_Elf *qe);
+
+Elf64_Phdr *elf64_getphdr(void *map);
+Elf64_Half elf_getphdrnum(void *map);
+
+#endif /* QEMU_ELF_H */
diff --git a/cpus.c b/cpus.c
index 7197883..3978f63 100644
--- a/cpus.c
+++ b/cpus.c
@@ -211,12 +211,12 @@
                 error_setg(errp, "No MTTCG when icount is enabled");
             } else {
 #ifndef TARGET_SUPPORTS_MTTCG
-                error_report("Guest not yet converted to MTTCG - "
-                             "you may get unexpected results");
+                warn_report("Guest not yet converted to MTTCG - "
+                            "you may get unexpected results");
 #endif
                 if (!check_tcg_memory_orders_compatible()) {
-                    error_report("Guest expects a stronger memory ordering "
-                                 "than the host provides");
+                    warn_report("Guest expects a stronger memory ordering "
+                                "than the host provides");
                     error_printf("This may cause strange/hard to debug errors\n");
                 }
                 mttcg_enabled = true;
@@ -245,21 +245,27 @@
  * account executed instructions. This is done by the TCG vCPU
  * thread so the main-loop can see time has moved forward.
  */
-void cpu_update_icount(CPUState *cpu)
+static void cpu_update_icount_locked(CPUState *cpu)
 {
     int64_t executed = cpu_get_icount_executed(cpu);
     cpu->icount_budget -= executed;
 
-#ifndef CONFIG_ATOMIC64
+    atomic_set_i64(&timers_state.qemu_icount,
+                   timers_state.qemu_icount + executed);
+}
+
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+void cpu_update_icount(CPUState *cpu)
+{
     seqlock_write_lock(&timers_state.vm_clock_seqlock,
                        &timers_state.vm_clock_lock);
-#endif
-    atomic_set__nocheck(&timers_state.qemu_icount,
-                        timers_state.qemu_icount + executed);
-#ifndef CONFIG_ATOMIC64
+    cpu_update_icount_locked(cpu);
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                          &timers_state.vm_clock_lock);
-#endif
 }
 
 static int64_t cpu_get_icount_raw_locked(void)
@@ -272,16 +278,17 @@
             exit(1);
         }
         /* Take into account what has run */
-        cpu_update_icount(cpu);
+        cpu_update_icount_locked(cpu);
     }
-    /* The read is protected by the seqlock, so __nocheck is okay.  */
-    return atomic_read__nocheck(&timers_state.qemu_icount);
+    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
+    return atomic_read_i64(&timers_state.qemu_icount);
 }
 
 static int64_t cpu_get_icount_locked(void)
 {
     int64_t icount = cpu_get_icount_raw_locked();
-    return atomic_read__nocheck(&timers_state.qemu_icount_bias) + cpu_icount_to_ns(icount);
+    return atomic_read_i64(&timers_state.qemu_icount_bias) +
+        cpu_icount_to_ns(icount);
 }
 
 int64_t cpu_get_icount_raw(void)
@@ -454,9 +461,9 @@
                    timers_state.icount_time_shift + 1);
     }
     last_delta = delta;
-    atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                        cur_icount - (timers_state.qemu_icount
-                                      << timers_state.icount_time_shift));
+    atomic_set_i64(&timers_state.qemu_icount_bias,
+                   cur_icount - (timers_state.qemu_icount
+                                 << timers_state.icount_time_shift));
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                          &timers_state.vm_clock_lock);
 }
@@ -502,8 +509,8 @@
     seqlock_write_lock(&timers_state.vm_clock_seqlock,
                        &timers_state.vm_clock_lock);
     if (runstate_is_running()) {
-        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
-                                     cpu_get_clock_locked());
+        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
+                                            cpu_get_clock_locked());
         int64_t warp_delta;
 
         warp_delta = clock - timers_state.vm_clock_warp_start;
@@ -516,8 +523,8 @@
             int64_t delta = clock - cur_icount;
             warp_delta = MIN(warp_delta, delta);
         }
-        atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                            timers_state.qemu_icount_bias + warp_delta);
+        atomic_set_i64(&timers_state.qemu_icount_bias,
+                       timers_state.qemu_icount_bias + warp_delta);
     }
     timers_state.vm_clock_warp_start = -1;
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
@@ -548,8 +555,8 @@
 
         seqlock_write_lock(&timers_state.vm_clock_seqlock,
                            &timers_state.vm_clock_lock);
-        atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                            timers_state.qemu_icount_bias + warp);
+        atomic_set_i64(&timers_state.qemu_icount_bias,
+                       timers_state.qemu_icount_bias + warp);
         seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                              &timers_state.vm_clock_lock);
 
@@ -576,18 +583,29 @@
         return;
     }
 
-    /* warp clock deterministically in record/replay mode */
-    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
-        return;
-    }
+    if (replay_mode != REPLAY_MODE_PLAY) {
+        if (!all_cpu_threads_idle()) {
+            return;
+        }
 
-    if (!all_cpu_threads_idle()) {
-        return;
-    }
+        if (qtest_enabled()) {
+            /* When testing, qtest commands advance icount.  */
+            return;
+        }
 
-    if (qtest_enabled()) {
-        /* When testing, qtest commands advance icount.  */
-        return;
+        replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
+    } else {
+        /* warp clock deterministically in record/replay mode */
+        if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
+            /* vCPU is sleeping and warp can't be started.
+               It is probably a race condition: notification sent
+               to vCPU was processed in advance and vCPU went to sleep.
+               Therefore we have to wake it up for doing someting. */
+            if (replay_has_checkpoint()) {
+                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+            }
+            return;
+        }
     }
 
     /* We want to use the earliest deadline from ALL vm_clocks */
@@ -620,8 +638,8 @@
              */
             seqlock_write_lock(&timers_state.vm_clock_seqlock,
                                &timers_state.vm_clock_lock);
-            atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                                timers_state.qemu_icount_bias + deadline);
+            atomic_set_i64(&timers_state.qemu_icount_bias,
+                           timers_state.qemu_icount_bias + deadline);
             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                                  &timers_state.vm_clock_lock);
             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -823,6 +841,7 @@
 void cpu_ticks_init(void)
 {
     seqlock_init(&timers_state.vm_clock_seqlock);
+    qemu_spin_init(&timers_state.vm_clock_lock);
     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                            cpu_throttle_timer_tick, NULL);
@@ -964,6 +983,8 @@
     if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                            kick_tcg_thread, NULL);
+    }
+    if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
     }
 }
@@ -971,9 +992,8 @@
 static void stop_tcg_kick_timer(void)
 {
     assert(!mttcg_enabled);
-    if (tcg_kick_vcpu_timer) {
+    if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
         timer_del(tcg_kick_vcpu_timer);
-        tcg_kick_vcpu_timer = NULL;
     }
 }
 
@@ -1405,7 +1425,8 @@
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
 #ifdef CONFIG_PROFILER
-    tcg_time += profile_getclock() - ti;
+    atomic_set(&tcg_ctx->prof.cpu_exec_time,
+               tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
 #endif
     return ret;
 }
diff --git a/crypto/Makefile.objs b/crypto/Makefile.objs
index 756bab1..256c9ac 100644
--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -20,11 +20,11 @@
 crypto-obj-y += tlssession.o
 crypto-obj-y += secret.o
 crypto-obj-$(CONFIG_GCRYPT) += random-gcrypt.o
-crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS_RND)) += random-gnutls.o
-crypto-obj-$(if $(CONFIG_GCRYPT),n,$(if $(CONFIG_GNUTLS_RND),n,y)) += random-platform.o
+crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS)) += random-gnutls.o
+crypto-obj-$(if $(CONFIG_GCRYPT),n,$(if $(CONFIG_GNUTLS),n,y)) += random-platform.o
 crypto-obj-y += pbkdf.o
-crypto-obj-$(CONFIG_NETTLE_KDF) += pbkdf-nettle.o
-crypto-obj-$(if $(CONFIG_NETTLE_KDF),n,$(CONFIG_GCRYPT_KDF)) += pbkdf-gcrypt.o
+crypto-obj-$(CONFIG_NETTLE) += pbkdf-nettle.o
+crypto-obj-$(if $(CONFIG_NETTLE),n,$(CONFIG_GCRYPT)) += pbkdf-gcrypt.o
 crypto-obj-y += ivgen.o
 crypto-obj-y += ivgen-essiv.o
 crypto-obj-y += ivgen-plain.o
diff --git a/crypto/init.c b/crypto/init.c
index f131c42..c301564 100644
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -37,33 +37,14 @@
 /* #define DEBUG_GNUTLS */
 
 /*
- * If GNUTLS is built against GCrypt then
- *
- *  - When GNUTLS >= 2.12, we must not initialize gcrypt threading
- *    because GNUTLS will do that itself
- *  - When GNUTLS < 2.12 we must always initialize gcrypt threading
- *  - When GNUTLS is disabled we must always initialize gcrypt threading
- *
- * But....
- *
- *    When gcrypt >= 1.6.0 we must not initialize gcrypt threading
- *    because gcrypt will do that itself.
- *
- * So we need to init gcrypt threading if
+ * We need to init gcrypt threading if
  *
  *   - gcrypt < 1.6.0
- * AND
- *      - gnutls < 2.12
- *   OR
- *      - gnutls is disabled
  *
  */
 
 #if (defined(CONFIG_GCRYPT) &&                  \
-     (!defined(CONFIG_GNUTLS) ||                \
-     (LIBGNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
-     (!defined(GCRYPT_VERSION_NUMBER) ||        \
-      (GCRYPT_VERSION_NUMBER < 0x010600)))
+     (GCRYPT_VERSION_NUMBER < 0x010600))
 #define QCRYPTO_INIT_GCRYPT_THREADS
 #else
 #undef QCRYPTO_INIT_GCRYPT_THREADS
diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c
index 98ee042..d6ab4a9 100644
--- a/crypto/tlscredsx509.c
+++ b/crypto/tlscredsx509.c
@@ -72,14 +72,6 @@
 }
 
 
-#if LIBGNUTLS_VERSION_NUMBER >= 2
-/*
- * The gnutls_x509_crt_get_basic_constraints function isn't
- * available in GNUTLS 1.0.x branches. This isn't critical
- * though, since gnutls_certificate_verify_peers2 will do
- * pretty much the same check at runtime, so we can just
- * disable this code
- */
 static int
 qcrypto_tls_creds_check_cert_basic_constraints(QCryptoTLSCredsX509 *creds,
                                                gnutls_x509_crt_t cert,
@@ -130,7 +122,6 @@
 
     return 0;
 }
-#endif
 
 
 static int
@@ -299,14 +290,12 @@
         return -1;
     }
 
-#if LIBGNUTLS_VERSION_NUMBER >= 2
     if (qcrypto_tls_creds_check_cert_basic_constraints(creds,
                                                        cert, certFile,
                                                        isServer, isCA,
                                                        errp) < 0) {
         return -1;
     }
-#endif
 
     if (qcrypto_tls_creds_check_cert_key_usage(creds,
                                                cert, certFile,
@@ -615,7 +604,6 @@
     }
 
     if (cert != NULL && key != NULL) {
-#if LIBGNUTLS_VERSION_NUMBER >= 0x030111
         char *password = NULL;
         if (creds->passwordid) {
             password = qcrypto_secret_lookup_as_utf8(creds->passwordid,
@@ -630,15 +618,6 @@
                                                     password,
                                                     0);
         g_free(password);
-#else /* LIBGNUTLS_VERSION_NUMBER < 0x030111 */
-        if (creds->passwordid) {
-            error_setg(errp, "PKCS8 decryption requires GNUTLS >= 3.1.11");
-            goto cleanup;
-        }
-        ret = gnutls_certificate_set_x509_key_file(creds->data,
-                                                   cert, key,
-                                                   GNUTLS_X509_FMT_PEM);
-#endif
         if (ret < 0) {
             error_setg(errp, "Cannot load certificate '%s' & key '%s': %s",
                        cert, key, gnutls_strerror(ret));
diff --git a/crypto/tlssession.c b/crypto/tlssession.c
index 66a6fbe1..2f28fa7 100644
--- a/crypto/tlssession.c
+++ b/crypto/tlssession.c
@@ -90,13 +90,7 @@
 }
 
 #define TLS_PRIORITY_ADDITIONAL_ANON "+ANON-DH"
-
-#if GNUTLS_VERSION_MAJOR >= 3
-#define TLS_ECDHE_PSK "+ECDHE-PSK:"
-#else
-#define TLS_ECDHE_PSK ""
-#endif
-#define TLS_PRIORITY_ADDITIONAL_PSK TLS_ECDHE_PSK "+DHE-PSK:+PSK"
+#define TLS_PRIORITY_ADDITIONAL_PSK "+ECDHE-PSK:+DHE-PSK:+PSK"
 
 QCryptoTLSSession *
 qcrypto_tls_session_new(QCryptoTLSCreds *creds,
diff --git a/crypto/xts.c b/crypto/xts.c
index 9521234..4277ad4 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -24,52 +24,75 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/bswap.h"
 #include "crypto/xts.h"
 
-static void xts_mult_x(uint8_t *I)
-{
-    int x;
-    uint8_t t, tt;
+typedef union {
+    uint8_t b[XTS_BLOCK_SIZE];
+    uint64_t u[2];
+} xts_uint128;
 
-    for (x = t = 0; x < 16; x++) {
-        tt = I[x] >> 7;
-        I[x] = ((I[x] << 1) | t) & 0xFF;
-        t = tt;
+static inline void xts_uint128_xor(xts_uint128 *D,
+                                   const xts_uint128 *S1,
+                                   const xts_uint128 *S2)
+{
+    D->u[0] = S1->u[0] ^ S2->u[0];
+    D->u[1] = S1->u[1] ^ S2->u[1];
+}
+
+static inline void xts_uint128_cpu_to_les(xts_uint128 *v)
+{
+    cpu_to_le64s(&v->u[0]);
+    cpu_to_le64s(&v->u[1]);
+}
+
+static inline void xts_uint128_le_to_cpus(xts_uint128 *v)
+{
+    le64_to_cpus(&v->u[0]);
+    le64_to_cpus(&v->u[1]);
+}
+
+static void xts_mult_x(xts_uint128 *I)
+{
+    uint64_t tt;
+
+    xts_uint128_le_to_cpus(I);
+
+    tt = I->u[0] >> 63;
+    I->u[0] <<= 1;
+
+    if (I->u[1] >> 63) {
+        I->u[0] ^= 0x87;
     }
-    if (tt) {
-        I[0] ^= 0x87;
-    }
+    I->u[1] <<= 1;
+    I->u[1] |= tt;
+
+    xts_uint128_cpu_to_les(I);
 }
 
 
 /**
- * xts_tweak_uncrypt:
+ * xts_tweak_encdec:
  * @param ctxt: the cipher context
  * @param func: the cipher function
- * @src: buffer providing the cipher text of XTS_BLOCK_SIZE bytes
- * @dst: buffer to output the plain text of XTS_BLOCK_SIZE bytes
+ * @src: buffer providing the input text of XTS_BLOCK_SIZE bytes
+ * @dst: buffer to output the output text of XTS_BLOCK_SIZE bytes
  * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes
  *
- * Decrypt data with a tweak
+ * Encrypt/decrypt data with a tweak
  */
-static void xts_tweak_decrypt(const void *ctx,
-                              xts_cipher_func *func,
-                              const uint8_t *src,
-                              uint8_t *dst,
-                              uint8_t *iv)
+static inline void xts_tweak_encdec(const void *ctx,
+                                    xts_cipher_func *func,
+                                    const xts_uint128 *src,
+                                    xts_uint128 *dst,
+                                    xts_uint128 *iv)
 {
-    unsigned long x;
-
     /* tweak encrypt block i */
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = src[x] ^ iv[x];
-    }
+    xts_uint128_xor(dst, src, iv);
 
-    func(ctx, XTS_BLOCK_SIZE, dst, dst);
+    func(ctx, XTS_BLOCK_SIZE, dst->b, dst->b);
 
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = dst[x] ^ iv[x];
-    }
+    xts_uint128_xor(dst, dst, iv);
 
     /* LFSR the tweak */
     xts_mult_x(iv);
@@ -85,7 +108,7 @@
                  uint8_t *dst,
                  const uint8_t *src)
 {
-    uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE];
+    xts_uint128 PP, CC, T;
     unsigned long i, m, mo, lim;
 
     /* get number of blocks */
@@ -102,72 +125,53 @@
     }
 
     /* encrypt the iv */
-    encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv);
+    encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
 
-    for (i = 0; i < lim; i++) {
-        xts_tweak_decrypt(datactx, decfunc, src, dst, T);
+    if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) &&
+        QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) {
+        xts_uint128 *S = (xts_uint128 *)src;
+        xts_uint128 *D = (xts_uint128 *)dst;
+        for (i = 0; i < lim; i++, S++, D++) {
+            xts_tweak_encdec(datactx, decfunc, S, D, &T);
+        }
+    } else {
+        xts_uint128 D;
 
-        src += XTS_BLOCK_SIZE;
-        dst += XTS_BLOCK_SIZE;
+        for (i = 0; i < lim; i++) {
+            memcpy(&D, src, XTS_BLOCK_SIZE);
+            xts_tweak_encdec(datactx, decfunc, &D, &D, &T);
+            memcpy(dst, &D, XTS_BLOCK_SIZE);
+            src += XTS_BLOCK_SIZE;
+            dst += XTS_BLOCK_SIZE;
+        }
     }
 
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
-        memcpy(CC, T, XTS_BLOCK_SIZE);
-        xts_mult_x(CC);
+        xts_uint128 S, D;
+        memcpy(&CC, &T, XTS_BLOCK_SIZE);
+        xts_mult_x(&CC);
 
         /* PP = tweak decrypt block m-1 */
-        xts_tweak_decrypt(datactx, decfunc, src, PP, CC);
+        memcpy(&S, src, XTS_BLOCK_SIZE);
+        xts_tweak_encdec(datactx, decfunc, &S, &PP, &CC);
 
         /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */
         for (i = 0; i < mo; i++) {
-            CC[i] = src[XTS_BLOCK_SIZE + i];
-            dst[XTS_BLOCK_SIZE + i] = PP[i];
+            CC.b[i] = src[XTS_BLOCK_SIZE + i];
+            dst[XTS_BLOCK_SIZE + i] = PP.b[i];
         }
         for (; i < XTS_BLOCK_SIZE; i++) {
-            CC[i] = PP[i];
+            CC.b[i] = PP.b[i];
         }
 
         /* Pm-1 = Tweak uncrypt CC */
-        xts_tweak_decrypt(datactx, decfunc, CC, dst, T);
+        xts_tweak_encdec(datactx, decfunc, &CC, &D, &T);
+        memcpy(dst, &D, XTS_BLOCK_SIZE);
     }
 
     /* Decrypt the iv back */
-    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T);
-}
-
-
-/**
- * xts_tweak_crypt:
- * @param ctxt: the cipher context
- * @param func: the cipher function
- * @src: buffer providing the plain text of XTS_BLOCK_SIZE bytes
- * @dst: buffer to output the cipher text of XTS_BLOCK_SIZE bytes
- * @iv: the initialization vector tweak of XTS_BLOCK_SIZE bytes
- *
- * Encrypt data with a tweak
- */
-static void xts_tweak_encrypt(const void *ctx,
-                              xts_cipher_func *func,
-                              const uint8_t *src,
-                              uint8_t *dst,
-                              uint8_t *iv)
-{
-    unsigned long x;
-
-    /* tweak encrypt block i */
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = src[x] ^ iv[x];
-    }
-
-    func(ctx, XTS_BLOCK_SIZE, dst, dst);
-
-    for (x = 0; x < XTS_BLOCK_SIZE; x++) {
-        dst[x] = dst[x] ^ iv[x];
-    }
-
-    /* LFSR the tweak */
-    xts_mult_x(iv);
+    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T.b);
 }
 
 
@@ -180,7 +184,7 @@
                  uint8_t *dst,
                  const uint8_t *src)
 {
-    uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE];
+    xts_uint128 PP, CC, T;
     unsigned long i, m, mo, lim;
 
     /* get number of blocks */
@@ -197,34 +201,50 @@
     }
 
     /* encrypt the iv */
-    encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv);
+    encfunc(tweakctx, XTS_BLOCK_SIZE, T.b, iv);
 
-    for (i = 0; i < lim; i++) {
-        xts_tweak_encrypt(datactx, encfunc, src, dst, T);
+    if (QEMU_PTR_IS_ALIGNED(src, sizeof(uint64_t)) &&
+        QEMU_PTR_IS_ALIGNED(dst, sizeof(uint64_t))) {
+        xts_uint128 *S = (xts_uint128 *)src;
+        xts_uint128 *D = (xts_uint128 *)dst;
+        for (i = 0; i < lim; i++, S++, D++) {
+            xts_tweak_encdec(datactx, encfunc, S, D, &T);
+        }
+    } else {
+        xts_uint128 D;
 
-        dst += XTS_BLOCK_SIZE;
-        src += XTS_BLOCK_SIZE;
+        for (i = 0; i < lim; i++) {
+            memcpy(&D, src, XTS_BLOCK_SIZE);
+            xts_tweak_encdec(datactx, encfunc, &D, &D, &T);
+            memcpy(dst, &D, XTS_BLOCK_SIZE);
+
+            dst += XTS_BLOCK_SIZE;
+            src += XTS_BLOCK_SIZE;
+        }
     }
 
     /* if length is not a multiple of XTS_BLOCK_SIZE then */
     if (mo > 0) {
+        xts_uint128 S, D;
         /* CC = tweak encrypt block m-1 */
-        xts_tweak_encrypt(datactx, encfunc, src, CC, T);
+        memcpy(&S, src, XTS_BLOCK_SIZE);
+        xts_tweak_encdec(datactx, encfunc, &S, &CC, &T);
 
         /* Cm = first length % XTS_BLOCK_SIZE bytes of CC */
         for (i = 0; i < mo; i++) {
-            PP[i] = src[XTS_BLOCK_SIZE + i];
-            dst[XTS_BLOCK_SIZE + i] = CC[i];
+            PP.b[i] = src[XTS_BLOCK_SIZE + i];
+            dst[XTS_BLOCK_SIZE + i] = CC.b[i];
         }
 
         for (; i < XTS_BLOCK_SIZE; i++) {
-            PP[i] = CC[i];
+            PP.b[i] = CC.b[i];
         }
 
         /* Cm-1 = Tweak encrypt PP */
-        xts_tweak_encrypt(datactx, encfunc, PP, dst, T);
+        xts_tweak_encdec(datactx, encfunc, &PP, &D, &T);
+        memcpy(dst, &D, XTS_BLOCK_SIZE);
     }
 
     /* Decrypt the iv back */
-    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T);
+    decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T.b);
 }
diff --git a/default-configs/alpha-softmmu.mak b/default-configs/alpha-softmmu.mak
index bbe361f..4d654ea 100644
--- a/default-configs/alpha-softmmu.mak
+++ b/default-configs/alpha-softmmu.mak
@@ -8,7 +8,6 @@
 CONFIG_I8254=y
 CONFIG_I8257=y
 CONFIG_PARALLEL=y
-CONFIG_PARALLEL_ISA=y
 CONFIG_FDC=y
 CONFIG_PCKBD=y
 CONFIG_VGA_CIRRUS=y
@@ -19,3 +18,4 @@
 CONFIG_I8259=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
+CONFIG_SMC37C669=y
diff --git a/default-configs/hyperv.mak b/default-configs/hyperv.mak
new file mode 100644
index 0000000..5d0d9fd
--- /dev/null
+++ b/default-configs/hyperv.mak
@@ -0,0 +1,2 @@
+CONFIG_HYPERV=$(CONFIG_KVM)
+CONFIG_HYPERV_TESTDEV=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 8c7d4a0..64c998c 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -3,6 +3,7 @@
 include pci.mak
 include sound.mak
 include usb.mak
+include hyperv.mak
 CONFIG_QXL=$(CONFIG_SPICE)
 CONFIG_VGA_ISA=y
 CONFIG_VGA_CIRRUS=y
@@ -50,7 +51,8 @@
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
 CONFIG_PVPANIC=y
-CONFIG_MEM_HOTPLUG=y
+CONFIG_MEM_DEVICE=y
+CONFIG_DIMM=y
 CONFIG_NVDIMM=y
 CONFIG_ACPI_NVDIMM=y
 CONFIG_PCIE_PORT=y
@@ -58,7 +60,6 @@
 CONFIG_IOH3420=y
 CONFIG_I82801B11=y
 CONFIG_SMBIOS=y
-CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
 CONFIG_ACPI_VMGENID=y
 CONFIG_FW_CFG_DMA=y
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index de53d20..6c7be12 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -44,5 +44,6 @@
 CONFIG_EDU=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
+CONFIG_BOCHS_DISPLAY=y
 CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM)
 CONFIG_ROCKER=y
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 3181bbf..23d871f 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -28,6 +28,7 @@
 CONFIG_DDC=y
 CONFIG_IDE_SII3112=y
 CONFIG_I2C=y
+CONFIG_AT24C=y
 CONFIG_BITBANG_I2C=y
 CONFIG_M41T80=y
 CONFIG_VGA_CIRRUS=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index b94af6c..f550573 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -16,4 +16,5 @@
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
-CONFIG_MEM_HOTPLUG=y
+CONFIG_MEM_DEVICE=y
+CONFIG_DIMM=y
diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
index d6b67d5..5eef375 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -7,3 +7,4 @@
 CONFIG_S390_FLIC_KVM=$(CONFIG_KVM)
 CONFIG_VFIO_CCW=$(CONFIG_LINUX)
 CONFIG_WDT_DIAG288=y
+CONFIG_VFIO_AP=$(CONFIG_LINUX)
diff --git a/default-configs/sparc64-softmmu.mak b/default-configs/sparc64-softmmu.mak
index 52edafe..ce63d47 100644
--- a/default-configs/sparc64-softmmu.mak
+++ b/default-configs/sparc64-softmmu.mak
@@ -16,5 +16,4 @@
 CONFIG_SUNHME=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_EMPTY_SLOT=y
 CONFIG_SUN4V_RTC=y
diff --git a/device-hotplug.c b/device-hotplug.c
index cd427e2..6090d5f 100644
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -28,6 +28,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "sysemu/sysemu.h"
@@ -36,6 +37,7 @@
 
 static DriveInfo *add_init_drive(const char *optstr)
 {
+    Error *err = NULL;
     DriveInfo *dinfo;
     QemuOpts *opts;
     MachineClass *mc;
@@ -45,8 +47,9 @@
         return NULL;
 
     mc = MACHINE_GET_CLASS(current_machine);
-    dinfo = drive_new(opts, mc->block_default_type);
+    dinfo = drive_new(opts, mc->block_default_type, &err);
     if (!dinfo) {
+        error_report_err(err);
         qemu_opts_del(opts);
         return NULL;
     }
diff --git a/disas/Makefile.objs b/disas/Makefile.objs
index 213be2f..3c1cdce 100644
--- a/disas/Makefile.objs
+++ b/disas/Makefile.objs
@@ -14,6 +14,7 @@
 common-obj-$(CONFIG_M68K_DIS) += m68k.o
 common-obj-$(CONFIG_MICROBLAZE_DIS) += microblaze.o
 common-obj-$(CONFIG_MIPS_DIS) += mips.o
+common-obj-$(CONFIG_NANOMIPS_DIS) += nanomips.o
 common-obj-$(CONFIG_NIOS2_DIS) += nios2.o
 common-obj-$(CONFIG_MOXIE_DIS) += moxie.o
 common-obj-$(CONFIG_PPC_DIS) += ppc.o
diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp
new file mode 100644
index 0000000..1238c2f
--- /dev/null
+++ b/disas/nanomips.cpp
@@ -0,0 +1,22242 @@
+/*
+ *  Source file for nanoMIPS disassembler component of QEMU
+ *
+ *  Copyright (C) 2018  Wave Computing
+ *  Copyright (C) 2018  Matthew Fortune <matthew.fortune@mips.com>
+ *  Copyright (C) 2018  Aleksandar Markovic <aleksandar.markovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+extern "C" {
+#include "qemu/osdep.h"
+#include "disas/bfd.h"
+}
+
+#include <cstring>
+#include <stdexcept>
+#include <sstream>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "nanomips.h"
+
+#define IMGASSERTONCE(test)
+
+
+int nanomips_dis(char *buf,
+                 unsigned address,
+                 unsigned short one,
+                 unsigned short two,
+                 unsigned short three)
+{
+    std::string disasm;
+    uint16 bits[3] = {one, two, three};
+
+    NMD::TABLE_ENTRY_TYPE type;
+    NMD d(address, NMD::ALL_ATTRIBUTES);
+    int size = d.Disassemble(bits, disasm, type);
+
+    strcpy(buf, disasm.c_str());
+    return size;
+}
+
+int print_insn_nanomips(bfd_vma memaddr, struct disassemble_info *info)
+{
+    int status;
+    bfd_byte buffer[2];
+    uint16_t insn1 = 0, insn2 = 0, insn3 = 0;
+    char buf[200];
+
+    info->bytes_per_chunk = 2;
+    info->display_endian = info->endian;
+    info->insn_info_valid = 1;
+    info->branch_delay_insns = 0;
+    info->data_size = 0;
+    info->insn_type = dis_nonbranch;
+    info->target = 0;
+    info->target2 = 0;
+
+    status = (*info->read_memory_func)(memaddr, buffer, 2, info);
+    if (status != 0) {
+        (*info->memory_error_func)(status, memaddr, info);
+        return -1;
+    }
+
+    if (info->endian == BFD_ENDIAN_BIG) {
+        insn1 = bfd_getb16(buffer);
+    } else {
+        insn1 = bfd_getl16(buffer);
+    }
+    (*info->fprintf_func)(info->stream, "%04x ", insn1);
+
+    /* Handle 32-bit opcodes.  */
+    if ((insn1 & 0x1000) == 0) {
+        status = (*info->read_memory_func)(memaddr + 2, buffer, 2, info);
+        if (status != 0) {
+            (*info->memory_error_func)(status, memaddr + 2, info);
+            return -1;
+        }
+
+        if (info->endian == BFD_ENDIAN_BIG) {
+            insn2 = bfd_getb16(buffer);
+        } else {
+            insn2 = bfd_getl16(buffer);
+        }
+        (*info->fprintf_func)(info->stream, "%04x ", insn2);
+    } else {
+        (*info->fprintf_func)(info->stream, "     ");
+    }
+    /* Handle 48-bit opcodes.  */
+    if ((insn1 >> 10) == 0x18) {
+        status = (*info->read_memory_func)(memaddr + 4, buffer, 2, info);
+        if (status != 0) {
+            (*info->memory_error_func)(status, memaddr + 4, info);
+            return -1;
+        }
+
+        if (info->endian == BFD_ENDIAN_BIG) {
+            insn3 = bfd_getb16(buffer);
+        } else {
+            insn3 = bfd_getl16(buffer);
+        }
+        (*info->fprintf_func)(info->stream, "%04x ", insn3);
+    } else {
+        (*info->fprintf_func)(info->stream, "     ");
+    }
+
+    int length = nanomips_dis(buf, memaddr, insn1, insn2, insn3);
+
+    /* FIXME: Should probably use a hash table on the major opcode here.  */
+
+    (*info->fprintf_func) (info->stream, "%s", buf);
+    if (length > 0) {
+        return length / 8;
+    }
+
+    info->insn_type = dis_noninsn;
+
+    return insn3 ? 6 : insn2 ? 4 : 2;
+}
+
+
+namespace img
+{
+    address addr32(address a)
+    {
+        return a;
+    }
+
+    std::string format(const char *format, ...)
+    {
+        char buffer[256];
+        va_list args;
+        va_start(args, format);
+        int err = vsprintf(buffer, format, args);
+        if (err < 0) {
+            perror(buffer);
+        }
+        va_end(args);
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s1,
+                       std::string s2)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s1.c_str(), s2.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s1,
+                       std::string s2,
+                       std::string s3)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s1.c_str(), s2.c_str(), s3.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s1,
+                       std::string s2,
+                       std::string s3,
+                       std::string s4)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s1.c_str(), s2.c_str(), s3.c_str(),
+                                s4.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s1,
+                       std::string s2,
+                       std::string s3,
+                       std::string s4,
+                       std::string s5)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s1.c_str(), s2.c_str(), s3.c_str(),
+                                s4.c_str(), s5.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       uint64 d,
+                       std::string s2)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, d, s2.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s1,
+                       uint64 d,
+                       std::string s2)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s1.c_str(), d, s2.c_str());
+
+        return buffer;
+    }
+
+    std::string format(const char *format,
+                       std::string s1,
+                       std::string s2,
+                       uint64 d)
+    {
+        char buffer[256];
+
+        sprintf(buffer, format, s1.c_str(), s2.c_str(), d);
+
+        return buffer;
+    }
+
+    char as_char(int c)
+    {
+        return static_cast<char>(c);
+    }
+};
+
+
+std::string to_string(img::address a)
+{
+    char buffer[256];
+    sprintf(buffer, "0x%08llx", a);
+    return buffer;
+}
+
+
+uint64 extract_bits(uint64 data, uint32 bit_offset, uint32 bit_size)
+{
+    return (data << (64 - (bit_size + bit_offset))) >> (64 - bit_size);
+}
+
+
+int64 sign_extend(int64 data, int msb)
+{
+    uint64 shift = 63 - msb;
+    return (data << shift) >> shift;
+}
+
+
+uint64 NMD::renumber_registers(uint64 index, uint64 *register_list,
+                               size_t register_list_size)
+{
+    if (index < register_list_size) {
+        return register_list[index];
+    }
+
+    throw std::runtime_error(img::format(
+                   "Invalid register mapping index %d, size of list = %d",
+                   index, register_list_size));
+}
+
+
+/*
+ * these functions should be decode functions but the json does not have
+ * decode sections so they are based on the encode, the equivalent decode
+ * functions need writing eventually.
+ */
+uint64 NMD::encode_gpr3(uint64 d)
+{
+    static uint64 register_list[] = { 16, 17, 18, 19,  4,  5,  6,  7 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::encode_gpr3_store(uint64 d)
+{
+    static uint64 register_list[] = {  0, 17, 18, 19,  4,  5,  6,  7 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::encode_rd1_from_rd(uint64 d)
+{
+    static uint64 register_list[] = {  4,  5 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::encode_gpr4_zero(uint64 d)
+{
+    static uint64 register_list[] = {  8,  9, 10,  0,  4,  5,  6,  7,
+                                      16, 17, 18, 19, 20, 21, 22, 23 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::encode_gpr4(uint64 d)
+{
+    static uint64 register_list[] = {  8,  9, 10, 11,  4,  5,  6,  7,
+                                      16, 17, 18, 19, 20, 21, 22, 23 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::encode_rd2_reg1(uint64 d)
+{
+    static uint64 register_list[] = {  4,  5,  6,  7 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::encode_rd2_reg2(uint64 d)
+{
+    static uint64 register_list[] = {  5,  6,  7,  8 };
+    return renumber_registers(d, register_list,
+               sizeof(register_list) / sizeof(register_list[0]));
+}
+
+
+uint64 NMD::copy(uint64 d)
+{
+    return d;
+}
+
+
+int64 NMD::copy(int64 d)
+{
+    return d;
+}
+
+
+int64 NMD::neg_copy(uint64 d)
+{
+    return 0ll - d;
+}
+
+
+int64 NMD::neg_copy(int64 d)
+{
+    return -d;
+}
+
+
+/* strange wrapper around  gpr3 */
+uint64 NMD::encode_rs3_and_check_rs3_ge_rt3(uint64 d)
+{
+return encode_gpr3(d);
+}
+
+
+/* strange wrapper around  gpr3 */
+uint64 NMD::encode_rs3_and_check_rs3_lt_rt3(uint64 d)
+{
+    return encode_gpr3(d);
+}
+
+
+/* nop - done by extraction function */
+uint64 NMD::encode_s_from_address(uint64 d)
+{
+    return d;
+}
+
+
+/* nop - done by extraction function */
+uint64 NMD::encode_u_from_address(uint64 d)
+{
+    return d;
+}
+
+
+/* nop - done by extraction function */
+uint64 NMD::encode_s_from_s_hi(uint64 d)
+{
+    return d;
+}
+
+
+uint64 NMD::encode_count3_from_count(uint64 d)
+{
+    IMGASSERTONCE(d < 8);
+    return d == 0ull ? 8ull : d;
+}
+
+
+uint64 NMD::encode_shift3_from_shift(uint64 d)
+{
+    IMGASSERTONCE(d < 8);
+    return d == 0ull ? 8ull : d;
+}
+
+
+/* special value for load literal */
+int64 NMD::encode_eu_from_s_li16(uint64 d)
+{
+    IMGASSERTONCE(d < 128);
+    return d == 127 ? -1 : (int64)d;
+}
+
+
+uint64 NMD::encode_msbd_from_size(uint64 d)
+{
+    IMGASSERTONCE(d < 32);
+    return d + 1;
+}
+
+
+uint64 NMD::encode_eu_from_u_andi16(uint64 d)
+{
+    IMGASSERTONCE(d < 16);
+    if (d == 12) {
+        return 0x00ffull;
+    }
+    if (d == 13) {
+        return 0xffffull;
+    }
+    return d;
+}
+
+
+uint64 NMD::encode_msbd_from_pos_and_size(uint64 d)
+{
+    IMGASSERTONCE(0);
+    return d;
+}
+
+
+/* save16 / restore16   ???? */
+uint64 NMD::encode_rt1_from_rt(uint64 d)
+{
+    return d ? 31 : 30;
+}
+
+
+/* ? */
+uint64 NMD::encode_lsb_from_pos_and_size(uint64 d)
+{
+    return d;
+}
+
+
+std::string NMD::save_restore_list(uint64 rt, uint64 count, uint64 gp)
+{
+    std::string str;
+
+    for (uint64 counter = 0; counter != count; counter++) {
+        bool use_gp = gp && (counter == count - 1);
+        uint64 this_rt = use_gp ? 28 : ((rt & 0x10) | (rt + counter)) & 0x1f;
+        str += img::format(",%s", GPR(this_rt));
+    }
+
+    return str;
+}
+
+
+std::string NMD::GPR(uint64 reg)
+{
+    static const char *gpr_reg[32] = {
+        "zero", "at",   "v0",   "v1",   "a0",   "a1",   "a2",   "a3",
+        "a4",   "a5",   "a6",   "a7",   "r12",  "r13",  "r14",  "r15",
+        "s0",   "s1",   "s2",   "s3",   "s4",   "s5",   "s6",   "s7",
+        "r24",  "r25",  "k0",   "k1",   "gp",   "sp",   "fp",   "ra"
+    };
+
+    if (reg < 32) {
+        return gpr_reg[reg];
+    }
+
+    throw std::runtime_error(img::format("Invalid GPR register index %d", reg));
+}
+
+
+std::string NMD::FPR(uint64 reg)
+{
+    static const char *fpr_reg[32] = {
+        "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+        "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
+        "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+        "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+    };
+
+    if (reg < 32) {
+        return fpr_reg[reg];
+    }
+
+    throw std::runtime_error(img::format("Invalid FPR register index %d", reg));
+}
+
+
+std::string NMD::AC(uint64 reg)
+{
+    static const char *ac_reg[4] = {
+        "ac0",  "ac1",  "ac2",  "ac3"
+    };
+
+    if (reg < 4) {
+        return ac_reg[reg];
+    }
+
+    throw std::runtime_error(img::format("Invalid AC register index %d", reg));
+}
+
+
+std::string NMD::IMMEDIATE(uint64 value)
+{
+    return img::format("0x%x", value);
+}
+
+
+std::string NMD::IMMEDIATE(int64 value)
+{
+    return img::format("%d", value);
+}
+
+
+std::string NMD::CPR(uint64 reg)
+{
+    /* needs more work */
+    return img::format("CP%d", reg);
+}
+
+
+std::string NMD::ADDRESS(uint64 value, int instruction_size)
+{
+    /* token for string replace */
+    /* const char TOKEN_REPLACE = (char)0xa2; */
+    img::address address = m_pc + value + instruction_size;
+    /* symbol replacement */
+    /* return img::as_char(TOKEN_REPLACE) + to_string(address); */
+    return to_string(address);
+}
+
+
+uint64 NMD::extract_op_code_value(const uint16 * data, int size)
+{
+    switch (size) {
+    case 16:
+        return data[0];
+    case 32:
+        return ((uint64)data[0] << 16) | data[1];
+    case 48:
+        return ((uint64)data[0] << 32) | ((uint64)data[1] << 16) | data[2];
+    default:
+        return data[0];
+    }
+}
+
+
+int NMD::Disassemble(const uint16 * data, std::string & dis,
+                     NMD::TABLE_ENTRY_TYPE & type)
+{
+    return Disassemble(data, dis, type, MAJOR, 2);
+}
+
+
+/*
+ * Recurse through tables until the instruction is found then return
+ * the string and size
+ *
+ * inputs:
+ *      pointer to a word stream,
+ *      disassember table and size
+ * returns:
+ *      instruction size    - negative is error
+ *      disassembly string  - on error will constain error string
+ */
+int NMD::Disassemble(const uint16 * data, std::string & dis,
+                     NMD::TABLE_ENTRY_TYPE & type, const Pool *table,
+                     int table_size)
+{
+    try
+    {
+        for (int i = 0; i < table_size; i++) {
+            uint64 op_code = extract_op_code_value(data,
+                                 table[i].instructions_size);
+            if ((op_code & table[i].mask) == table[i].value) {
+                /* possible match */
+                conditional_function cond = table[i].condition;
+                if ((cond == 0) || (this->*cond)(op_code)) {
+                    try
+                    {
+                        if (table[i].type == pool) {
+                            return Disassemble(data, dis, type,
+                                               table[i].next_table,
+                                               table[i].next_table_size);
+                        } else if ((table[i].type == instruction) ||
+                                   (table[i].type == call_instruction) ||
+                                   (table[i].type == branch_instruction) ||
+                                   (table[i].type == return_instruction)) {
+                            if ((table[i].attributes != 0) &&
+                                (m_requested_instruction_categories &
+                                 table[i].attributes) == 0) {
+                                /*
+                                 * failed due to instruction having
+                                 * an ASE attribute and the requested version
+                                 * not having that attribute
+                                 */
+                                dis = "ASE attribute missmatch";
+                                return -5;
+                            }
+                            disassembly_function dis_fn = table[i].disassembly;
+                            if (dis_fn == 0) {
+                                dis = "disassembler failure - bad table entry";
+                                return -6;
+                            }
+                            type = table[i].type;
+                            dis = (this->*dis_fn)(op_code);
+                            return table[i].instructions_size;
+                        } else {
+                            dis = "reserved instruction";
+                            return -2;
+                        }
+                    }
+                    catch (std::runtime_error & e)
+                    {
+                        dis = e.what();
+                        return -3;          /* runtime error */
+                    }
+                }
+            }
+        }
+    }
+    catch (std::exception & e)
+    {
+        dis = e.what();
+        return -4;          /* runtime error */
+    }
+
+    dis = "failed to disassemble";
+    return -1;      /* failed to disassemble        */
+}
+
+
+uint64 NMD::extract_code_18_to_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 19);
+    return value;
+}
+
+
+uint64 NMD::extract_shift3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 3);
+    return value;
+}
+
+
+uint64 NMD::extr_uil3il3bs9Fmsb11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 9) << 3;
+    return value;
+}
+
+
+uint64 NMD::extract_count_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 4);
+    return value;
+}
+
+
+uint64 NMD::extract_rtz3_9_8_7(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 7, 3);
+    return value;
+}
+
+
+uint64 NMD::extr_uil1il1bs17Fmsb17(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 1, 17) << 1;
+    return value;
+}
+
+
+int64 NMD::extr_sil11il0bs10Tmsb9(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 11, 10);
+    value = sign_extend(value, 9);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il11bs1_il1il1bs10Tmsb11(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 11;
+    value |= extract_bits(instruction, 1, 10) << 1;
+    value = sign_extend(value, 11);
+    return value;
+}
+
+
+uint64 NMD::extract_u_10(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 10, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_rtz4_27_26_25_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 3);
+    value |= extract_bits(instruction, 25, 1) << 3;
+    return value;
+}
+
+
+uint64 NMD::extract_sa_15_14_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_shift_4_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_shiftxil7il1bs4Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 7, 4) << 1;
+    return value;
+}
+
+
+uint64 NMD::extract_hint_25_24_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_count3_14_13_12(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 12, 3);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il31bs1_il2il21bs10_il12il12bs9Tmsb31(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 31;
+    value |= extract_bits(instruction, 2, 10) << 21;
+    value |= extract_bits(instruction, 12, 9) << 12;
+    value = sign_extend(value, 31);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il7bs1_il1il1bs6Tmsb7(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 7;
+    value |= extract_bits(instruction, 1, 6) << 1;
+    value = sign_extend(value, 7);
+    return value;
+}
+
+
+uint64 NMD::extract_u2_10_9(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 9, 2);
+    return value;
+}
+
+
+uint64 NMD::extract_code_25_24_23_22_21_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 10);
+    return value;
+}
+
+
+uint64 NMD::extract_rs_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_uil1il1bs2Fmsb2(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 1, 2) << 1;
+    return value;
+}
+
+
+uint64 NMD::extract_stripe_6(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 6, 1);
+    return value;
+}
+
+
+uint64 NMD::extr_xil17il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 17, 1);
+    return value;
+}
+
+
+uint64 NMD::extr_xil2il0bs1_il15il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 2, 1);
+    value |= extract_bits(instruction, 15, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_ac_13_12(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 14, 2);
+    return value;
+}
+
+
+uint64 NMD::extract_shift_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_rdl_25_24(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 24, 1);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il10bs1_il1il1bs9Tmsb10(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 10;
+    value |= extract_bits(instruction, 1, 9) << 1;
+    value = sign_extend(value, 10);
+    return value;
+}
+
+
+uint64 NMD::extract_eu_6_5_4_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 7);
+    return value;
+}
+
+
+uint64 NMD::extract_shift_5_4_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 6);
+    return value;
+}
+
+
+uint64 NMD::extr_xil10il0bs6Fmsb5(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 10, 6);
+    return value;
+}
+
+
+uint64 NMD::extract_count_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 4);
+    return value;
+}
+
+
+uint64 NMD::extract_code_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 3);
+    return value;
+}
+
+
+uint64 NMD::extr_xil10il0bs4_il22il0bs4Fmsb3(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 10, 4);
+    value |= extract_bits(instruction, 22, 4);
+    return value;
+}
+
+
+uint64 NMD::extract_u_11_10_9_8_7_6_5_4_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 12);
+    return value;
+}
+
+
+uint64 NMD::extract_rs_4_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_uil3il3bs18Fmsb20(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 18) << 3;
+    return value;
+}
+
+
+uint64 NMD::extr_xil12il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 12, 1);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il2bs4Fmsb5(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 4) << 2;
+    return value;
+}
+
+
+uint64 NMD::extract_cofun_25_24_23(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 23);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il2bs3Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 3) << 2;
+    return value;
+}
+
+
+uint64 NMD::extr_xil10il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 10, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_rd3_3_2_1(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 1, 3);
+    return value;
+}
+
+
+uint64 NMD::extract_sa_15_14_13_12(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 12, 4);
+    return value;
+}
+
+
+uint64 NMD::extract_rt_25_24_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_ru_7_6_5_4_3(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil21il0bs5Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil9il0bs3Fmsb2(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 9, 3);
+    return value;
+}
+
+
+uint64 NMD::extract_u_17_to_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 18);
+    return value;
+}
+
+
+uint64 NMD::extr_xil14il0bs1_il15il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 14, 1);
+    value |= extract_bits(instruction, 15, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_rsz4_4_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 3);
+    value |= extract_bits(instruction, 4, 1) << 3;
+    return value;
+}
+
+
+uint64 NMD::extr_xil24il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 24, 1);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il21bs1_il1il1bs20Tmsb21(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 21;
+    value |= extract_bits(instruction, 1, 20) << 1;
+    value = sign_extend(value, 21);
+    return value;
+}
+
+
+uint64 NMD::extract_op_25_to_3(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 23);
+    return value;
+}
+
+
+uint64 NMD::extract_rs4_4_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 3);
+    value |= extract_bits(instruction, 4, 1) << 3;
+    return value;
+}
+
+
+uint64 NMD::extract_bit_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 3);
+    return value;
+}
+
+
+uint64 NMD::extract_rt_41_40_39_38_37(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 37, 5);
+    return value;
+}
+
+
+int64 NMD::extract_shift_21_20_19_18_17_16(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 16, 6);
+    value = sign_extend(value, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil6il0bs3_il10il0bs1Fmsb2(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 6, 3);
+    value |= extract_bits(instruction, 10, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_rd2_3_8(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 1) << 1;
+    value |= extract_bits(instruction, 8, 1);
+    return value;
+}
+
+
+uint64 NMD::extr_xil16il0bs5Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_code_17_to_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 18);
+    return value;
+}
+
+
+uint64 NMD::extr_xil0il0bs12Fmsb11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 12);
+    return value;
+}
+
+
+uint64 NMD::extract_size_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+int64 NMD::extr_sil2il2bs6_il15il8bs1Tmsb8(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 2, 6) << 2;
+    value |= extract_bits(instruction, 15, 1) << 8;
+    value = sign_extend(value, 8);
+    return value;
+}
+
+
+uint64 NMD::extract_u_15_to_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 16);
+    return value;
+}
+
+
+uint64 NMD::extract_fs_15_14_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il0bs8_il15il8bs1Tmsb8(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 8);
+    value |= extract_bits(instruction, 15, 1) << 8;
+    value = sign_extend(value, 8);
+    return value;
+}
+
+
+uint64 NMD::extract_stype_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_rtl_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 9, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_hs_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil10il0bs1_il14il0bs2Fmsb1(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 10, 1);
+    value |= extract_bits(instruction, 14, 2);
+    return value;
+}
+
+
+uint64 NMD::extract_sel_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 3);
+    return value;
+}
+
+
+uint64 NMD::extract_lsb_4_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil14il0bs2Fmsb1(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 14, 2);
+    return value;
+}
+
+
+uint64 NMD::extract_gp_2(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 2, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_rt3_9_8_7(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 7, 3);
+    return value;
+}
+
+
+uint64 NMD::extract_ft_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_u_17_16_15_14_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 7);
+    return value;
+}
+
+
+uint64 NMD::extract_cs_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil16il0bs10Fmsb9(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 10);
+    return value;
+}
+
+
+uint64 NMD::extract_rt4_9_7_6_5(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 5, 3);
+    value |= extract_bits(instruction, 9, 1) << 3;
+    return value;
+}
+
+
+uint64 NMD::extract_msbt_10_9_8_7_6(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 6, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il2bs6Fmsb7(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 6) << 2;
+    return value;
+}
+
+
+uint64 NMD::extr_xil17il0bs9Fmsb8(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 17, 9);
+    return value;
+}
+
+
+uint64 NMD::extract_sa_15_14_13(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 13, 3);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il14bs1_il1il1bs13Tmsb14(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 14;
+    value |= extract_bits(instruction, 1, 13) << 1;
+    value = sign_extend(value, 14);
+    return value;
+}
+
+
+uint64 NMD::extract_rs3_6_5_4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 4, 3);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il32bs32Fmsb63(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 32) << 32;
+    return value;
+}
+
+
+uint64 NMD::extract_shift_10_9_8_7_6(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 6, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_cs_25_24_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_shiftx_11_10_9_8_7_6(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 6, 6);
+    return value;
+}
+
+
+uint64 NMD::extract_rt_9_8_7_6_5(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 5, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_op_25_24_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il2bs7Fmsb8(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 7) << 2;
+    return value;
+}
+
+
+uint64 NMD::extract_bit_16_15_14_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 6);
+    return value;
+}
+
+
+uint64 NMD::extr_xil10il0bs1_il11il0bs5Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 10, 1);
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_mask_20_19_18_17_16_15_14(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 14, 7);
+    return value;
+}
+
+
+uint64 NMD::extract_eu_3_2_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 4);
+    return value;
+}
+
+
+uint64 NMD::extr_uil4il4bs4Fmsb7(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 4, 4) << 4;
+    return value;
+}
+
+
+int64 NMD::extr_sil3il3bs5_il15il8bs1Tmsb8(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 3, 5) << 3;
+    value |= extract_bits(instruction, 15, 1) << 8;
+    value = sign_extend(value, 8);
+    return value;
+}
+
+
+uint64 NMD::extract_ft_15_14_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il16bs16_il16il0bs16Tmsb31(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 16) << 16;
+    value |= extract_bits(instruction, 16, 16);
+    value = sign_extend(value, 31);
+    return value;
+}
+
+
+uint64 NMD::extract_u_20_19_18_17_16_15_14_13(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 13, 8);
+    return value;
+}
+
+
+uint64 NMD::extr_xil15il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 15, 1);
+    return value;
+}
+
+
+uint64 NMD::extr_xil11il0bs5Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_uil2il2bs16Fmsb17(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 2, 16) << 2;
+    return value;
+}
+
+
+uint64 NMD::extract_rd_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_c0s_20_19_18_17_16(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_code_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 2);
+    return value;
+}
+
+
+int64 NMD::extr_sil0il25bs1_il1il1bs24Tmsb25(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 1) << 25;
+    value |= extract_bits(instruction, 1, 24) << 1;
+    value = sign_extend(value, 25);
+    return value;
+}
+
+
+uint64 NMD::extr_xil0il0bs3_il4il0bs1Fmsb2(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 3);
+    value |= extract_bits(instruction, 4, 1);
+    return value;
+}
+
+
+uint64 NMD::extract_u_1_0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 2);
+    return value;
+}
+
+
+uint64 NMD::extr_uil3il3bs1_il8il2bs1Fmsb3(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 3, 1) << 3;
+    value |= extract_bits(instruction, 8, 1) << 2;
+    return value;
+}
+
+
+uint64 NMD::extr_xil9il0bs3_il16il0bs5Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 9, 3);
+    value |= extract_bits(instruction, 16, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_fd_10_9_8_7_6(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil6il0bs3Fmsb2(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 6, 3);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il2bs5Fmsb6(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 5) << 2;
+    return value;
+}
+
+
+uint64 NMD::extract_rtz4_9_7_6_5(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 5, 3);
+    value |= extract_bits(instruction, 9, 1) << 3;
+    return value;
+}
+
+
+uint64 NMD::extract_sel_15_14_13_12_11(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 5);
+    return value;
+}
+
+
+uint64 NMD::extract_ct_25_24_23_22_21(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 21, 5);
+    return value;
+}
+
+
+uint64 NMD::extr_xil11il0bs1Fmsb0(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 11, 1);
+    return value;
+}
+
+
+uint64 NMD::extr_uil2il2bs19Fmsb20(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 2, 19) << 2;
+    return value;
+}
+
+
+int64 NMD::extract_s_4_2_1_0(uint64 instruction)
+{
+    int64 value = 0;
+    value |= extract_bits(instruction, 0, 3);
+    value |= extract_bits(instruction, 4, 1) << 3;
+    value = sign_extend(value, 3);
+    return value;
+}
+
+
+uint64 NMD::extr_uil0il1bs4Fmsb4(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 0, 4) << 1;
+    return value;
+}
+
+
+uint64 NMD::extr_xil9il0bs2Fmsb1(uint64 instruction)
+{
+    uint64 value = 0;
+    value |= extract_bits(instruction, 9, 2);
+    return value;
+}
+
+
+
+bool NMD::ADDIU_32__cond(uint64 instruction)
+{
+    uint64 rt = extract_rt_25_24_23_22_21(instruction);
+    return rt != 0;
+}
+
+
+bool NMD::ADDIU_RS5__cond(uint64 instruction)
+{
+    uint64 rt = extract_rt_9_8_7_6_5(instruction);
+    return rt != 0;
+}
+
+
+bool NMD::BALRSC_cond(uint64 instruction)
+{
+    uint64 rt = extract_rt_25_24_23_22_21(instruction);
+    return rt != 0;
+}
+
+
+bool NMD::BEQC_16__cond(uint64 instruction)
+{
+    uint64 rs3 = extract_rs3_6_5_4(instruction);
+    uint64 rt3 = extract_rt3_9_8_7(instruction);
+    uint64 u = extr_uil0il1bs4Fmsb4(instruction);
+    return rs3 < rt3 && u != 0;
+}
+
+
+bool NMD::BNEC_16__cond(uint64 instruction)
+{
+    uint64 rs3 = extract_rs3_6_5_4(instruction);
+    uint64 rt3 = extract_rt3_9_8_7(instruction);
+    uint64 u = extr_uil0il1bs4Fmsb4(instruction);
+    return rs3 >= rt3 && u != 0;
+}
+
+
+bool NMD::MOVE_cond(uint64 instruction)
+{
+    uint64 rt = extract_rt_9_8_7_6_5(instruction);
+    return rt != 0;
+}
+
+
+bool NMD::P16_BR1_cond(uint64 instruction)
+{
+    uint64 u = extr_uil0il1bs4Fmsb4(instruction);
+    return u != 0;
+}
+
+
+bool NMD::PREF_S9__cond(uint64 instruction)
+{
+    uint64 hint = extract_hint_25_24_23_22_21(instruction);
+    return hint != 31;
+}
+
+
+bool NMD::PREFE_cond(uint64 instruction)
+{
+    uint64 hint = extract_hint_25_24_23_22_21(instruction);
+    return hint != 31;
+}
+
+
+bool NMD::SLTU_cond(uint64 instruction)
+{
+    uint64 rd = extract_rd_20_19_18_17_16(instruction);
+    return rd != 0;
+}
+
+
+
+/*
+ * ABS.D fd, fs - Floating Point Absolute Value
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  010001     00000          000101
+ *    fmt -----
+ *               fs -----
+ *                    fd -----
+ */
+std::string NMD::ABS_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 fd_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fs = FPR(copy(fs_value));
+    std::string fd = FPR(copy(fd_value));
+
+    return img::format("ABS.D %s, %s", fd, fs);
+}
+
+
+/*
+ * ABS.S fd, fs - Floating Point Absolute Value
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  010001     00000          000101
+ *    fmt -----
+ *               fd -----
+ *                    fs -----
+ */
+std::string NMD::ABS_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 fd_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fs = FPR(copy(fs_value));
+    std::string fd = FPR(copy(fd_value));
+
+    return img::format("ABS.S %s, %s", fd, fs);
+}
+
+
+/*
+ * ABSQ_S.PH rt, rs - Find Absolute Value of Two Fractional Halfwords
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0001000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ABSQ_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("ABSQ_S.PH %s, %s", rt, rs);
+}
+
+
+/*
+ * ABSQ_S.QB rt, rs - Find Absolute Value of Four Fractional Byte Values
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0000000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ABSQ_S_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("ABSQ_S.QB %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ABSQ_S_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("ABSQ_S.W %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ACLR(uint64 instruction)
+{
+    uint64 bit_value = extract_bit_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string bit = IMMEDIATE(copy(bit_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("ACLR %s, %s(%s)", bit, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADD %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADD.D fd, fs, ft - Floating Point Add
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  010001                    000101
+ *    fmt -----
+ *          ft -----
+ *               fs -----
+ *                    fd -----
+ */
+std::string NMD::ADD_D(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string fd = FPR(copy(fd_value));
+
+    return img::format("ADD.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ * ADD.S fd, fs, ft - Floating Point Add
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  010001                    000101
+ *    fmt -----
+ *          ft -----
+ *               fs -----
+ *                    fd -----
+ */
+std::string NMD::ADD_S(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string fd = FPR(copy(fd_value));
+
+    return img::format("ADD.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_15_to_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ADDIU %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("ADDIU %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_GP48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("ADDIU %s, $%d, %s", rt, 28, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_GP_B_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_17_to_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ADDIU %s, $%d, %s", rt, 28, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_GP_W_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil2il2bs19Fmsb20(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ADDIU %s, $%d, %s", rt, 28, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_NEG_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(neg_copy(u_value));
+
+    return img::format("ADDIU %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_R1_SP_(uint64 instruction)
+{
+    uint64 u_value = extr_uil0il2bs6Fmsb7(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ADDIU %s, $%d, %s", rt3, 29, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          0010000100111111
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::ADDIU_R2_(uint64 instruction)
+{
+    uint64 u_value = extr_uil0il2bs3Fmsb4(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ADDIU %s, %s, %s", rt3, rs3, u);
+}
+
+
+/*
+ * ADDIU[RS5] rt, s5 - Add Signed Word and Set Carry Bit
+ *
+ *  5432109876543210
+ *  100100      1
+ *     rt -----
+ *           s - ---
+ */
+std::string NMD::ADDIU_RS5_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_9_8_7_6_5(instruction);
+    int64 s_value = extract_s_4_2_1_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("ADDIU %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDIUPC_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il21bs1_il1il1bs20Tmsb21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("ADDIUPC %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDIUPC_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 6);
+
+    return img::format("ADDIUPC %s, %s", rt, s);
+}
+
+
+/*
+ * ADDQ.PH rd, rt, rs - Add Fractional Halfword Vectors
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00000001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQ_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQ.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQ_S.PH rd, rt, rs - Add Fractional Halfword Vectors
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               10000001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQ_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQ_S.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQ_S.W rd, rt, rs - Add Fractional Words
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1100000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQ_S_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQ_S.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH.PH rd, rt, rs - Add Fractional Halfword Vectors And Shift Right
+ *                       to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQH_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQH.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.PH rd, rt, rs - Add Fractional Halfword Vectors And Shift Right
+ *                         to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               10001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQH_R_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQH_R.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQH_R_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQH_R.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               10010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDQH_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDQH.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDSC rd, rt, rs - Add Signed Word and Set Carry Bit
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDSC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDSC %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDU[16] rd3, rs3, rt3 -
+ *
+ *  5432109876543210
+ *  101100         0
+ *    rt3 ---
+ *       rs3 ---
+ *          rd3 ---
+ */
+std::string NMD::ADDU_16_(uint64 instruction)
+{
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+    uint64 rd3_value = extract_rd3_3_2_1(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string rd3 = GPR(encode_gpr3(rd3_value));
+
+    return img::format("ADDU %s, %s, %s", rd3, rs3, rt3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDU_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDU_4X4_(uint64 instruction)
+{
+    uint64 rs4_value = extract_rs4_4_2_1_0(instruction);
+    uint64 rt4_value = extract_rt4_9_7_6_5(instruction);
+
+    std::string rs4 = GPR(encode_gpr4(rs4_value));
+    std::string rt4 = GPR(encode_gpr4(rt4_value));
+
+    return img::format("ADDU %s, %s", rs4, rt4);
+}
+
+
+/*
+ * ADDU.PH rd, rt, rs - Unsigned Add Integer Halfwords
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00100001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDU_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDU.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDU.QB rd, rt, rs - Unsigned Add Quad Byte Vectors
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00011001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDU_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDU.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDU_S.PH rd, rt, rs - Unsigned Add Integer Halfwords
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               10100001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDU_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDU_S.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDU_S.QB rd, rt, rs - Unsigned Add Quad Byte Vectors
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               10011001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDU_S_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDU_S.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDUH.QB rd, rt, rs - Unsigned Add Vector Quad-Bytes And Right Shift
+ *                       to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00101001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDUH_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDUH.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDUH_R.QB rd, rt, rs - Unsigned Add Vector Quad-Bytes And Right Shift
+ *                         to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               10101001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDUH_R_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDUH_R.QB %s, %s, %s", rd, rs, rt);
+}
+
+/*
+ * ADDWC rd, rt, rs - Add Word with Carry Bit
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1111000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ADDWC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ADDWC %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ALUIPC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il31bs1_il2il21bs10_il12il12bs9Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("ALUIPC %s, %%pcrel_hi(%s)", rt, s);
+}
+
+
+/*
+ * AND[16] rt3, rs3 -
+ *
+ *  5432109876543210
+ *  101100
+ *    rt3 ---
+ *       rs3 ---
+ *           eu ----
+ */
+std::string NMD::AND_16_(uint64 instruction)
+{
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("AND %s, %s", rs3, rt3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::AND_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("AND %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ANDI rt, rs, u -
+ *
+ *  5432109876543210
+ *  101100
+ *    rt3 ---
+ *       rs3 ---
+ *           eu ----
+ */
+std::string NMD::ANDI_16_(uint64 instruction)
+{
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+    uint64 eu_value = extract_eu_3_2_1_0(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string eu = IMMEDIATE(encode_eu_from_u_andi16(eu_value));
+
+    return img::format("ANDI %s, %s, %s", rt3, rs3, eu);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ANDI_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ANDI %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::APPEND(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("APPEND %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ASET(uint64 instruction)
+{
+    uint64 bit_value = extract_bit_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string bit = IMMEDIATE(copy(bit_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("ASET %s, %s(%s)", bit, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BALC_16_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il10bs1_il1il1bs9Tmsb10(instruction);
+
+    std::string s = ADDRESS(encode_s_from_address(s_value), 2);
+
+    return img::format("BALC %s", s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BALC_32_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il25bs1_il1il1bs24Tmsb25(instruction);
+
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BALC %s", s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BALRSC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("BALRSC %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BBEQZC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 bit_value = extract_bit_16_15_14_13_12_11(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string bit = IMMEDIATE(copy(bit_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BBEQZC %s, %s, %s", rt, bit, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BBNEZC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 bit_value = extract_bit_16_15_14_13_12_11(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string bit = IMMEDIATE(copy(bit_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BBNEZC %s, %s, %s", rt, bit, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BC_16_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il10bs1_il1il1bs9Tmsb10(instruction);
+
+    std::string s = ADDRESS(encode_s_from_address(s_value), 2);
+
+    return img::format("BC %s", s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BC_32_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il25bs1_il1il1bs24Tmsb25(instruction);
+
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BC %s", s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BC1EQZC(uint64 instruction)
+{
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BC1EQZC %s, %s", ft, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BC1NEZC(uint64 instruction)
+{
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BC1NEZC %s, %s", ft, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BC2EQZC(uint64 instruction)
+{
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 ct_value = extract_ct_25_24_23_22_21(instruction);
+
+    std::string ct = CPR(copy(ct_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BC2EQZC %s, %s", ct, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BC2NEZC(uint64 instruction)
+{
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 ct_value = extract_ct_25_24_23_22_21(instruction);
+
+    std::string ct = CPR(copy(ct_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BC2NEZC %s, %s", ct, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BEQC_16_(uint64 instruction)
+{
+    uint64 u_value = extr_uil0il1bs4Fmsb4(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rs3 = GPR(encode_rs3_and_check_rs3_lt_rt3(rs3_value));
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = ADDRESS(encode_u_from_address(u_value), 2);
+
+    return img::format("BEQC %s, %s, %s", rs3, rt3, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BEQC_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BEQC %s, %s, %s", rs, rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BEQIC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+    uint64 u_value = extract_u_17_16_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BEQIC %s, %s, %s", rt, u, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BEQZC_16_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il7bs1_il1il1bs6Tmsb7(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 2);
+
+    return img::format("BEQZC %s, %s", rt3, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BGEC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BGEC %s, %s, %s", rs, rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BGEIC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+    uint64 u_value = extract_u_17_16_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BGEIC %s, %s, %s", rt, u, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BGEIUC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+    uint64 u_value = extract_u_17_16_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BGEIUC %s, %s, %s", rt, u, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BGEUC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BGEUC %s, %s, %s", rs, rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BLTC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BLTC %s, %s, %s", rs, rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BLTIC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+    uint64 u_value = extract_u_17_16_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BLTIC %s, %s, %s", rt, u, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BLTIUC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+    uint64 u_value = extract_u_17_16_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BLTIUC %s, %s, %s", rt, u, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BLTUC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BLTUC %s, %s, %s", rs, rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BNEC_16_(uint64 instruction)
+{
+    uint64 u_value = extr_uil0il1bs4Fmsb4(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rs3 = GPR(encode_rs3_and_check_rs3_ge_rt3(rs3_value));
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = ADDRESS(encode_u_from_address(u_value), 2);
+
+    return img::format("BNEC %s, %s, %s", rs3, rt3, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BNEC_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BNEC %s, %s, %s", rs, rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BNEIC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il11bs1_il1il1bs10Tmsb11(instruction);
+    uint64 u_value = extract_u_17_16_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BNEIC %s, %s, %s", rt, u, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BNEZC_16_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il7bs1_il1il1bs6Tmsb7(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 2);
+
+    return img::format("BNEZC %s, %s", rt3, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BPOSGE32C(uint64 instruction)
+{
+    int64 s_value = extr_sil0il14bs1_il1il1bs13Tmsb14(instruction);
+
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("BPOSGE32C %s", s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BREAK_16_(uint64 instruction)
+{
+    uint64 code_value = extract_code_2_1_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("BREAK %s", code);
+}
+
+
+/*
+ * BREAK code - Break. Cause a Breakpoint exception
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BREAK_32_(uint64 instruction)
+{
+    uint64 code_value = extract_code_18_to_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("BREAK %s", code);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::BRSC(uint64 instruction)
+{
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("BRSC %s", rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CACHE(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 op_value = extract_op_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string op = IMMEDIATE(copy(op_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("CACHE %s, %s(%s)", op, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CACHEE(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 op_value = extract_op_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string op = IMMEDIATE(copy(op_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("CACHEE %s, %s(%s)", op, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CEIL_L_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CEIL.L.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CEIL_L_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CEIL.L.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CEIL_W_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CEIL.W.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CEIL_W_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CEIL.W.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CFC1(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("CFC1 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CFC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("CFC2 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CLASS_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CLASS.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CLASS_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CLASS.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CLO(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("CLO %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CLZ(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("CLZ %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_AF_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.AF.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_AF_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.AF.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_EQ_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.EQ.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_EQ_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMP.EQ.PH %s, %s", rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_EQ_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.EQ.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_LE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.LE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_LE_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMP.LE.PH %s, %s", rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_LE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.LE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_LT_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.LT.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_LT_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMP.LT.PH %s, %s", rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_LT_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.LT.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_NE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.NE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_NE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.NE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_OR_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.OR.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_OR_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.OR.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SAF_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SAF.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SAF_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SAF.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SEQ_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SEQ.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SEQ_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SEQ.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SLE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SLE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SLE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SLE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SLT_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SLT.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SLT_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SLT.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SNE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SNE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SNE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SNE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SOR_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SOR.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SOR_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SOR.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SUEQ_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SUEQ.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SUEQ_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SUEQ.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SULE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SULE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SULE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SULE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SULT_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SULT.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SULT_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SULT.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SUN_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SUN.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SUNE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SUNE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SUNE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SUNE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_SUN_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.SUN.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_UEQ_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.UEQ.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_UEQ_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.UEQ.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_ULE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.ULE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_ULE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.ULE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_ULT_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.ULT.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_ULT_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.ULT.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_UN_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.UN.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_UNE_D(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.UNE.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_UNE_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.UNE.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMP_UN_S(uint64 instruction)
+{
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("CMP.UN.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPGDU_EQ_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPGDU.EQ.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPGDU_LE_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPGDU.LE.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPGDU_LT_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPGDU.LT.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPGU_EQ_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPGU.EQ.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPGU_LE_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPGU.LE.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPGU_LT_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPGU.LT.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPU_EQ_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPU.EQ.QB %s, %s", rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPU_LE_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPU.LE.QB %s, %s", rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CMPU_LT_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("CMPU.LT.QB %s, %s", rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::COP2_1(uint64 instruction)
+{
+    uint64 cofun_value = extract_cofun_25_24_23(instruction);
+
+    std::string cofun = IMMEDIATE(copy(cofun_value));
+
+    return img::format("COP2_1 %s", cofun);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CTC1(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("CTC1 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CTC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("CTC2 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_D_L(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.D.L %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_D_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.D.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_D_W(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.D.W %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_L_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.L.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_L_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.L.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_S_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.S.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_S_L(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.S.L %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_S_PL(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.S.PL %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_S_PU(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.S.PU %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_S_W(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.S.W %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_W_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.W.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::CVT_W_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("CVT.W.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DADDIU_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("DADDIU %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DADDIU_NEG_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(neg_copy(u_value));
+
+    return img::format("DADDIU %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DADDIU_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("DADDIU %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DADD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DADD %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DADDU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DADDU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DCLO(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("DCLO %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DCLZ(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("DCLZ %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DDIV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DDIV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DDIVU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DDIVU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DERET(uint64 instruction)
+{
+    (void)instruction;
+
+    return "DERET ";
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DEXTM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string lsb = IMMEDIATE(copy(lsb_value));
+    std::string msbd = IMMEDIATE(encode_msbd_from_size(msbd_value));
+
+    return img::format("DEXTM %s, %s, %s, %s", rt, rs, lsb, msbd);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DEXT(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string lsb = IMMEDIATE(copy(lsb_value));
+    std::string msbd = IMMEDIATE(encode_msbd_from_size(msbd_value));
+
+    return img::format("DEXT %s, %s, %s, %s", rt, rs, lsb, msbd);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DEXTU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string lsb = IMMEDIATE(copy(lsb_value));
+    std::string msbd = IMMEDIATE(encode_msbd_from_size(msbd_value));
+
+    return img::format("DEXTU %s, %s, %s, %s", rt, rs, lsb, msbd);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DINSM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string pos = IMMEDIATE(encode_lsb_from_pos_and_size(lsb_value));
+    std::string size = IMMEDIATE(encode_lsb_from_pos_and_size(msbd_value));
+    /* !!!!!!!!!! - no conversion function */
+
+    return img::format("DINSM %s, %s, %s, %s", rt, rs, pos, size);
+    /* hand edited */
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DINS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string pos = IMMEDIATE(encode_lsb_from_pos_and_size(lsb_value));
+    std::string size = IMMEDIATE(encode_lsb_from_pos_and_size(msbd_value));
+    /* !!!!!!!!!! - no conversion function */
+
+    return img::format("DINS %s, %s, %s, %s", rt, rs, pos, size);
+    /* hand edited */
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DINSU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string pos = IMMEDIATE(encode_lsb_from_pos_and_size(lsb_value));
+    std::string size = IMMEDIATE(encode_lsb_from_pos_and_size(msbd_value));
+    /* !!!!!!!!!! - no conversion function */
+
+    return img::format("DINSU %s, %s, %s, %s", rt, rs, pos, size);
+    /* hand edited */
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DI %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DIV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DIV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DIV_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("DIV.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DIV_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("DIV.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DIVU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DIVU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DLSA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 u2_value = extract_u2_10_9(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string u2 = IMMEDIATE(copy(u2_value));
+
+    return img::format("DLSA %s, %s, %s, %s", rd, rs, rt, u2);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DLUI_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    uint64 u_value = extr_uil0il32bs32Fmsb63(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("DLUI %s, %s", rt, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMFC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("DMFC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMFC1(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("DMFC1 %s, %s", rt, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMFC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("DMFC2 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMFGC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("DMFGC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMOD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMOD %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMODU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMODU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMTC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("DMTC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMTC1(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("DMTC1 %s, %s", rt, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMTC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("DMTC2 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMTGC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("DMTGC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMT(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMT %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMUH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMUH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMUHU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMUHU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMUL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMUL %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DMULU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DMULU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPA_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPA.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAQ_SA_L_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAQ_SA.L.W %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAQ_S_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAQ_S.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAQX_SA_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAQX_SA.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAQX_S_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAQX_S.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAU_H_QBL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAU.H.QBL %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAU_H_QBR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAU.H.QBR %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPAX_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPAX.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPS_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPS.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSQ_SA_L_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSQ_SA.L.W %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSQ_S_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSQ_S.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSQX_SA_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSQX_SA.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSQX_S_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSQX_S.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSU_H_QBL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSU.H.QBL %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSU_H_QBR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSU.H.QBR %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DPSX_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DPSX.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * DROTR -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DROTR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DROTR %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ * DROTR[32] -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0110
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DROTR32(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DROTR32 %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DROTRV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DROTRV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DROTX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_5_4_3_2_1_0(instruction);
+    uint64 shiftx_value = extract_shiftx_11_10_9_8_7_6(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+    std::string shiftx = IMMEDIATE(copy(shiftx_value));
+
+    return img::format("DROTX %s, %s, %s, %s", rt, rs, shift, shiftx);
+}
+
+
+/*
+ * DSLL -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0000
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DSLL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DSLL %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ * DSLL[32] -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0000
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DSLL32(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DSLL32 %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DSLLV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DSLLV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * DSRA -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0100
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DSRA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DSRA %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ * DSRA[32] -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0100
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DSRA32(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DSRA32 %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DSRAV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DSRAV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * DSRL -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0100
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DSRL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DSRL %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ * DSRL[32] -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  10o000          1100xxx0010
+ *     rt -----
+ *          rs -----
+ *                       shift -----
+ */
+std::string NMD::DSRL32(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("DSRL32 %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DSRLV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DSRLV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DSUB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DSUB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DSUBU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DSUBU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DVPE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DVPE %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::DVP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("DVP %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EHB(uint64 instruction)
+{
+    (void)instruction;
+
+    return "EHB ";
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("EI %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EMT(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("EMT %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ERET(uint64 instruction)
+{
+    (void)instruction;
+
+    return "ERET ";
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ERETNC(uint64 instruction)
+{
+    (void)instruction;
+
+    return "ERETNC ";
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EVP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("EVP %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EVPE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("EVPE %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXT(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string lsb = IMMEDIATE(copy(lsb_value));
+    std::string msbd = IMMEDIATE(encode_msbd_from_size(msbd_value));
+
+    return img::format("EXT %s, %s, %s, %s", rt, rs, lsb, msbd);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_10_9_8_7_6(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTD %s, %s, %s, %s", rd, rs, rt, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTD32(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_10_9_8_7_6(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTD32 %s, %s, %s, %s", rd, rs, rt, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTPDP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 size_value = extract_size_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string size = IMMEDIATE(copy(size_value));
+
+    return img::format("EXTPDP %s, %s, %s", rt, ac, size);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTPDPV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("EXTPDPV %s, %s, %s", rt, ac, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 size_value = extract_size_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string size = IMMEDIATE(copy(size_value));
+
+    return img::format("EXTP %s, %s, %s", rt, ac, size);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTPV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("EXTPV %s, %s, %s", rt, ac, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTR_RS_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_20_19_18_17_16(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTR_RS.W %s, %s, %s", rt, ac, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTR_R_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_20_19_18_17_16(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTR_R.W %s, %s, %s", rt, ac, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTR_S_H(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_20_19_18_17_16(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTR_S.H %s, %s, %s", rt, ac, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTR_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_20_19_18_17_16(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTR.W %s, %s, %s", rt, ac, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTRV_RS_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("EXTRV_RS.W %s, %s, %s", rt, ac, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTRV_R_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("EXTRV_R.W %s, %s, %s", rt, ac, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTRV_S_H(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("EXTRV_S.H %s, %s, %s", rt, ac, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::EXTRV_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("EXTRV.W %s, %s, %s", rt, ac, rs);
+}
+
+
+/*
+ * EXTW - Extract Word
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000                    011111
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ *                 shift -----
+ */
+std::string NMD::EXTW(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_10_9_8_7_6(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("EXTW %s, %s, %s, %s", rd, rs, rt, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::FLOOR_L_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("FLOOR.L.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::FLOOR_L_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("FLOOR.L.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::FLOOR_W_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("FLOOR.W.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::FLOOR_W_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("FLOOR.W.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::FORK(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("FORK %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::HYPCALL(uint64 instruction)
+{
+    uint64 code_value = extract_code_17_to_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("HYPCALL %s", code);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::HYPCALL_16_(uint64 instruction)
+{
+    uint64 code_value = extract_code_1_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("HYPCALL %s", code);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::INS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 msbd_value = extract_msbt_10_9_8_7_6(instruction);
+    uint64 lsb_value = extract_lsb_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string pos = IMMEDIATE(encode_lsb_from_pos_and_size(lsb_value));
+    std::string size = IMMEDIATE(encode_lsb_from_pos_and_size(msbd_value));
+    /* !!!!!!!!!! - no conversion function */
+
+    return img::format("INS %s, %s, %s, %s", rt, rs, pos, size);
+    /* hand edited */
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::INSV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("INSV %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::IRET(uint64 instruction)
+{
+    (void)instruction;
+
+    return "IRET ";
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::JALRC_16_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_9_8_7_6_5(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("JALRC $%d, %s", 31, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::JALRC_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("JALRC %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::JALRC_HB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("JALRC.HB %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::JRC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_9_8_7_6_5(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("JRC %s", rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LB_16_(uint64 instruction)
+{
+    uint64 u_value = extract_u_1_0(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("LB %s, %s(%s)", rt3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LB_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_17_to_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LB %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LB_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LB %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LB_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LB %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LBE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBU_16_(uint64 instruction)
+{
+    uint64 u_value = extract_u_1_0(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("LBU %s, %s(%s)", rt3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBU_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_17_to_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LBU %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBU_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LBU %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBU_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LBU %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBUE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LBUE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBUX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LBUX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LBX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LBX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LD_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil3il3bs18Fmsb20(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LD %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LD_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LD %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LD_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LD %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDC1_GP_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extr_uil2il2bs16Fmsb17(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LDC1 %s, %s($%d)", ft, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDC1_S9_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LDC1 %s, %s(%s)", ft, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDC1_U12_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LDC1 %s, %s(%s)", ft, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDC1XS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LDC1XS %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDC1X(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LDC1X %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDC2(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 ct_value = extract_ct_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ct = CPR(copy(ct_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LDC2 %s, %s(%s)", ct, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("LDM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDPC_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 6);
+
+    return img::format("LDPC %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LDX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LDXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LDXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LH_16_(uint64 instruction)
+{
+    uint64 u_value = extr_uil1il1bs2Fmsb2(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("LH %s, %s(%s)", rt3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LH_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil1il1bs17Fmsb17(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LH %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LH_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LH %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LH_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LH %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LHE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHU_16_(uint64 instruction)
+{
+    uint64 u_value = extr_uil1il1bs2Fmsb2(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("LHU %s, %s(%s)", rt3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHU_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil1il1bs17Fmsb17(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LHU %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHU_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LHU %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHU_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LHU %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHUE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LHUE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHUX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LHUX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHUXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LHUXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LHXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LHX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LHX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LI_16_(uint64 instruction)
+{
+    uint64 eu_value = extract_eu_6_5_4_3_2_1_0(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string eu = IMMEDIATE(encode_eu_from_s_li16(eu_value));
+
+    return img::format("LI %s, %s", rt3, eu);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LI_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("LI %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil2il2bs6_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LL %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LLD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil3il3bs5_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LLD %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LLDP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ru_value = extract_ru_7_6_5_4_3(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ru = GPR(copy(ru_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LLDP %s, %s, (%s)", rt, ru, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LLE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil2il2bs6_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LLE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LLWP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ru_value = extract_ru_7_6_5_4_3(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ru = GPR(copy(ru_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LLWP %s, %s, (%s)", rt, ru, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LLWPE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ru_value = extract_ru_7_6_5_4_3(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ru = GPR(copy(ru_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LLWPE %s, %s, (%s)", rt, ru, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LSA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 u2_value = extract_u2_10_9(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string u2 = IMMEDIATE(copy(u2_value));
+
+    return img::format("LSA %s, %s, %s, %s", rd, rs, rt, u2);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LUI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il31bs1_il2il21bs10_il12il12bs9Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("LUI %s, %%hi(%s)", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_16_(uint64 instruction)
+{
+    uint64 u_value = extr_uil0il2bs4Fmsb5(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("LW %s, %s(%s)", rt3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_4X4_(uint64 instruction)
+{
+    uint64 rs4_value = extract_rs4_4_2_1_0(instruction);
+    uint64 rt4_value = extract_rt4_9_7_6_5(instruction);
+    uint64 u_value = extr_uil3il3bs1_il8il2bs1Fmsb3(instruction);
+
+    std::string rt4 = GPR(encode_gpr4(rt4_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs4 = GPR(encode_gpr4(rs4_value));
+
+    return img::format("LW %s, %s(%s)", rt4, u, rs4);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil2il2bs19Fmsb20(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LW %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_GP16_(uint64 instruction)
+{
+    uint64 u_value = extr_uil0il2bs7Fmsb8(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LW %s, %s($%d)", rt3, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LW %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_SP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_9_8_7_6_5(instruction);
+    uint64 u_value = extr_uil0il2bs5Fmsb6(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LW %s, %s($%d)", rt, u, 29);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LW_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LW %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWC1_GP_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extr_uil2il2bs16Fmsb17(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LWC1 %s, %s($%d)", ft, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWC1_S9_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LWC1 %s, %s(%s)", ft, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWC1_U12_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LWC1 %s, %s(%s)", ft, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWC1X(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LWC1X %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWC1XS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LWC1XS %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWC2(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 ct_value = extract_ct_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ct = CPR(copy(ct_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LWC2 %s, %s(%s)", ct, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LWE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("LWM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWPC_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 6);
+
+    return img::format("LWPC %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWU_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil2il2bs16Fmsb17(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("LWU %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWU_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LWU %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWU_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("LWU %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWUX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LWUX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWUXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LWUXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LWX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWXS_16_(uint64 instruction)
+{
+    uint64 rd3_value = extract_rd3_3_2_1(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rd3 = GPR(encode_gpr3(rd3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string rt3 = IMMEDIATE(encode_gpr3(rt3_value));
+
+    return img::format("LWXS %s, %s(%s)", rd3, rs3, rt3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::LWXS_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("LWXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MADD_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MADD %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MADDF_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MADDF.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MADDF_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MADDF.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MADDU_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MADDU %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAQ_S_W_PHL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MAQ_S.W.PHL %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAQ_S_W_PHR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MAQ_S.W.PHR %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAQ_SA_W_PHL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MAQ_SA.W.PHL %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAQ_SA_W_PHR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MAQ_SA.W.PHR %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAX_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MAX.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAX_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MAX.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAXA_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MAXA.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MAXA_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MAXA.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MFC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFC1(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("MFC1 %s, %s", rt, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("MFC2 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFGC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MFGC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFHC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MFHC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFHC1(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("MFHC1 %s, %s", rt, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFHC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("MFHC2 %s, %s", rt, cs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFHGC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MFHGC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFHI_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("MFHI %s, %s", rt, ac);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFHTR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+    uint64 u_value = extract_u_10(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = IMMEDIATE(copy(c0s_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MFHTR %s, %s, %s, %s", rt, c0s, u, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFLO_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("MFLO %s, %s", rt, ac);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MFTR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+    uint64 u_value = extract_u_10(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = IMMEDIATE(copy(c0s_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MFTR %s, %s, %s, %s", rt, c0s, u, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MIN_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MIN.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MIN_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MIN.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MINA_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MINA.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MINA_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MINA.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MOD %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MODSUB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MODSUB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MODU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MODU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOV_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("MOV.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOV_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("MOV.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOVE_BALC(uint64 instruction)
+{
+    uint64 rd1_value = extract_rdl_25_24(instruction);
+    int64 s_value = extr_sil0il21bs1_il1il1bs20Tmsb21(instruction);
+    uint64 rtz4_value = extract_rtz4_27_26_25_23_22_21(instruction);
+
+    std::string rd1 = GPR(encode_rd1_from_rd(rd1_value));
+    std::string rtz4 = GPR(encode_gpr4_zero(rtz4_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 4);
+
+    return img::format("MOVE.BALC %s, %s, %s", rd1, rtz4, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOVEP(uint64 instruction)
+{
+    uint64 rsz4_value = extract_rsz4_4_2_1_0(instruction);
+    uint64 rtz4_value = extract_rtz4_9_7_6_5(instruction);
+    uint64 rd2_value = extract_rd2_3_8(instruction);
+
+    std::string rd2 = GPR(encode_rd2_reg1(rd2_value));
+    std::string re2 = GPR(encode_rd2_reg2(rd2_value));
+    /* !!!!!!!!!! - no conversion function */
+    std::string rsz4 = GPR(encode_gpr4_zero(rsz4_value));
+    std::string rtz4 = GPR(encode_gpr4_zero(rtz4_value));
+
+    return img::format("MOVEP %s, %s, %s, %s", rd2, re2, rsz4, rtz4);
+    /* hand edited */
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOVEP_REV_(uint64 instruction)
+{
+    uint64 rs4_value = extract_rs4_4_2_1_0(instruction);
+    uint64 rt4_value = extract_rt4_9_7_6_5(instruction);
+    uint64 rd2_value = extract_rd2_3_8(instruction);
+
+    std::string rs4 = GPR(encode_gpr4(rs4_value));
+    std::string rt4 = GPR(encode_gpr4(rt4_value));
+    std::string rd2 = GPR(encode_rd2_reg1(rd2_value));
+    std::string rs2 = GPR(encode_rd2_reg2(rd2_value));
+    /* !!!!!!!!!! - no conversion function */
+
+    return img::format("MOVEP %s, %s, %s, %s", rs4, rt4, rd2, rs2);
+    /* hand edited */
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOVE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_9_8_7_6_5(instruction);
+    uint64 rs_value = extract_rs_4_3_2_1_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("MOVE %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOVN(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MOVN %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MOVZ(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MOVZ %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MSUB_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MSUB %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MSUBF_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MSUBF.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MSUBF_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MSUBF.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MSUBU_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MSUBU %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MTC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTC1(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("MTC1 %s, %s", rt, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("MTC2 %s, %s", rt, cs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTGC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MTGC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MTHC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHC1(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("MTHC1 %s, %s", rt, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_20_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string cs = CPR(copy(cs_value));
+
+    return img::format("MTHC2 %s, %s", rt, cs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHGC0(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = CPR(copy(c0s_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MTHGC0 %s, %s, %s", rt, c0s, sel);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHI_DSP_(uint64 instruction)
+{
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("MTHI %s, %s", rs, ac);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHLIP(uint64 instruction)
+{
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("MTHLIP %s, %s", rs, ac);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTHTR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+    uint64 u_value = extract_u_10(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = IMMEDIATE(copy(c0s_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MTHTR %s, %s, %s, %s", rt, c0s, u, sel);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTLO_DSP_(uint64 instruction)
+{
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("MTLO %s, %s", rs, ac);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MTTR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 c0s_value = extract_c0s_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_15_14_13_12_11(instruction);
+    uint64 u_value = extract_u_10(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string c0s = IMMEDIATE(copy(c0s_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("MTTR %s, %s, %s, %s", rt, c0s, u, sel);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MUH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUHU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MUHU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUL_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MUL %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUL_4X4_(uint64 instruction)
+{
+    uint64 rs4_value = extract_rs4_4_2_1_0(instruction);
+    uint64 rt4_value = extract_rt4_9_7_6_5(instruction);
+
+    std::string rs4 = GPR(encode_gpr4(rs4_value));
+    std::string rt4 = GPR(encode_gpr4(rt4_value));
+
+    return img::format("MUL %s, %s", rs4, rt4);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUL_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MUL.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUL_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MUL.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUL_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MUL_S.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MUL_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("MUL.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULEQ_S_W_PHL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULEQ_S.W.PHL %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULEQ_S_W_PHR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULEQ_S.W.PHR %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULEU_S_PH_QBL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULEU_S.PH.QBL %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULEU_S_PH_QBR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULEU_S.PH.QBR %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULQ_RS_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULQ_RS.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULQ_RS_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULQ_RS.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULQ_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULQ_S.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULQ_S_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULQ_S.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULSA_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULSA.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULSAQ_S_W_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULSAQ_S.W.PH %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULT_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULT %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULTU_DSP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ac = AC(copy(ac_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULTU %s, %s, %s", ac, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::MULU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("MULU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::NEG_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("NEG.D %s, %s", ft, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::NEG_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("NEG.S %s, %s", ft, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::NOP_16_(uint64 instruction)
+{
+    (void)instruction;
+
+    return "NOP ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::NOP_32_(uint64 instruction)
+{
+    (void)instruction;
+
+    return "NOP ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::NOR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("NOR %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::NOT_16_(uint64 instruction)
+{
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("NOT %s, %s", rt3, rs3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::OR_16_(uint64 instruction)
+{
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+
+    return img::format("OR %s, %s", rs3, rt3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::OR_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("OR %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ORI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("ORI %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PACKRL_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PACKRL.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PAUSE(uint64 instruction)
+{
+    (void)instruction;
+
+    return "PAUSE ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PICK_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PICK.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PICK_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PICK.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEQ_W_PHL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEQ.W.PHL %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEQ_W_PHR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEQ.W.PHR %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEQU_PH_QBLA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEQU.PH.QBLA %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEQU_PH_QBL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEQU.PH.QBL %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEQU_PH_QBRA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEQU.PH.QBRA %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEQU_PH_QBR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEQU.PH.QBR %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEU_PH_QBLA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEU.PH.QBLA %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEU_PH_QBL(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEU.PH.QBL %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEU_PH_QBRA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEU.PH.QBRA %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECEU_PH_QBR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PRECEU.PH.QBR %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECR_QB_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PRECR.QB.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECR_SRA_PH_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("PRECR_SRA.PH.W %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECR_SRA_R_PH_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("PRECR_SRA_R.PH.W %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECRQ_PH_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PRECRQ.PH.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECRQ_QB_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PRECRQ.QB.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECRQ_RS_PH_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PRECRQ_RS.PH.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PRECRQU_S_QB_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("PRECRQU_S.QB.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PREF_S9_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 hint_value = extract_hint_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string hint = IMMEDIATE(copy(hint_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PREF %s, %s(%s)", hint, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PREF_U12_(uint64 instruction)
+{
+    uint64 hint_value = extract_hint_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string hint = IMMEDIATE(copy(hint_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PREF %s, %s(%s)", hint, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PREFE(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 hint_value = extract_hint_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string hint = IMMEDIATE(copy(hint_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("PREFE %s, %s(%s)", hint, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::PREPEND(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("PREPEND %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RADDU_W_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("RADDU.W.QB %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RDDSP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 mask_value = extract_mask_20_19_18_17_16_15_14(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string mask = IMMEDIATE(copy(mask_value));
+
+    return img::format("RDDSP %s, %s", rt, mask);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RDHWR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 hs_value = extract_hs_20_19_18_17_16(instruction);
+    uint64 sel_value = extract_sel_13_12_11(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string hs = CPR(copy(hs_value));
+    std::string sel = IMMEDIATE(copy(sel_value));
+
+    return img::format("RDHWR %s, %s, %s", rt, hs, sel);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RDPGPR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("RDPGPR %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RECIP_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("RECIP.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RECIP_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("RECIP.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::REPL_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil11il0bs10Tmsb9(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+
+    return img::format("REPL.PH %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::REPL_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_20_19_18_17_16_15_14_13(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("REPL.QB %s, %s", rt, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::REPLV_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("REPLV.PH %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::REPLV_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("REPLV.QB %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RESTORE_32_(uint64 instruction)
+{
+    uint64 count_value = extract_count_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil3il3bs9Fmsb11(instruction);
+    uint64 gp_value = extract_gp_2(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    return img::format("RESTORE %s%s", u,
+               save_restore_list(rt_value, count_value, gp_value));
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RESTORE_JRC_16_(uint64 instruction)
+{
+    uint64 count_value = extract_count_3_2_1_0(instruction);
+    uint64 rt1_value = extract_rtl_11(instruction);
+    uint64 u_value = extr_uil4il4bs4Fmsb7(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    return img::format("RESTORE.JRC %s%s", u,
+        save_restore_list(encode_rt1_from_rt(rt1_value), count_value, 0));
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RESTORE_JRC_32_(uint64 instruction)
+{
+    uint64 count_value = extract_count_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil3il3bs9Fmsb11(instruction);
+    uint64 gp_value = extract_gp_2(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    return img::format("RESTORE.JRC %s%s", u,
+               save_restore_list(rt_value, count_value, gp_value));
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RESTOREF(uint64 instruction)
+{
+    uint64 count_value = extract_count_19_18_17_16(instruction);
+    uint64 u_value = extr_uil3il3bs9Fmsb11(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string count = IMMEDIATE(copy(count_value));
+
+    return img::format("RESTOREF %s, %s", u, count);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RINT_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("RINT.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RINT_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("RINT.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROTR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("ROTR %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROTRV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("ROTRV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROTX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 shiftx_value = extr_shiftxil7il1bs4Fmsb4(instruction);
+    uint64 stripe_value = extract_stripe_6(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+    std::string shiftx = IMMEDIATE(copy(shiftx_value));
+    std::string stripe = IMMEDIATE(copy(stripe_value));
+
+    return img::format("ROTX %s, %s, %s, %s, %s",
+                       rt, rs, shift, shiftx, stripe);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROUND_L_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("ROUND.L.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROUND_L_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("ROUND.L.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROUND_W_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("ROUND.W.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::ROUND_W_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("ROUND.W.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RSQRT_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("RSQRT.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               x1110000101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::RSQRT_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("RSQRT.S %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SAVE_16_(uint64 instruction)
+{
+    uint64 count_value = extract_count_3_2_1_0(instruction);
+    uint64 rt1_value = extract_rtl_11(instruction);
+    uint64 u_value = extr_uil4il4bs4Fmsb7(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    return img::format("SAVE %s%s", u,
+        save_restore_list(encode_rt1_from_rt(rt1_value), count_value, 0));
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SAVE_32_(uint64 instruction)
+{
+    uint64 count_value = extract_count_19_18_17_16(instruction);
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil3il3bs9Fmsb11(instruction);
+    uint64 gp_value = extract_gp_2(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    return img::format("SAVE %s%s", u,
+               save_restore_list(rt_value, count_value, gp_value));
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SAVEF(uint64 instruction)
+{
+    uint64 count_value = extract_count_19_18_17_16(instruction);
+    uint64 u_value = extr_uil3il3bs9Fmsb11(instruction);
+
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string count = IMMEDIATE(copy(count_value));
+
+    return img::format("SAVEF %s, %s", u, count);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SB_16_(uint64 instruction)
+{
+    uint64 rtz3_value = extract_rtz3_9_8_7(instruction);
+    uint64 u_value = extract_u_1_0(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rtz3 = GPR(encode_gpr3_store(rtz3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("SB %s, %s(%s)", rtz3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SB_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_17_to_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SB %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SB_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SB %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SB_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SB %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SBE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SBE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SBX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SBX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SC(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil2il2bs6_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SC %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SCD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil3il3bs5_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SCD %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SCDP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ru_value = extract_ru_7_6_5_4_3(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ru = GPR(copy(ru_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SCDP %s, %s, (%s)", rt, ru, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SCE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil2il2bs6_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SCE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SCWP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ru_value = extract_ru_7_6_5_4_3(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ru = GPR(copy(ru_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SCWP %s, %s, (%s)", rt, ru, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SCWPE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ru_value = extract_ru_7_6_5_4_3(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string ru = GPR(copy(ru_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SCWPE %s, %s, (%s)", rt, ru, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SD_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil3il3bs18Fmsb20(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SD %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SD_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SD %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SD_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SD %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDBBP_16_(uint64 instruction)
+{
+    uint64 code_value = extract_code_2_1_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("SDBBP %s", code);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDBBP_32_(uint64 instruction)
+{
+    uint64 code_value = extract_code_18_to_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("SDBBP %s", code);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDC1_GP_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extr_uil2il2bs16Fmsb17(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SDC1 %s, %s($%d)", ft, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDC1_S9_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SDC1 %s, %s(%s)", ft, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDC1_U12_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SDC1 %s, %s(%s)", ft, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDC1X(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SDC1X %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDC1XS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SDC1XS %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string cs = CPR(copy(cs_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SDC2 %s, %s(%s)", cs, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("SDM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDPC_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 6);
+
+    return img::format("SDPC %s, %s", rt, s);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SDXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SDX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SDX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SEB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SEB %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SEH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SEH %s, %s", rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SEL_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SEL.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SEL_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SEL.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SELEQZ_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SELEQZ.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SELEQZ_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SELEQZ.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SELNEZ_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SELNEZ.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SELNEZ_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SELNEZ.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SEQI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SEQI %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SH_16_(uint64 instruction)
+{
+    uint64 rtz3_value = extract_rtz3_9_8_7(instruction);
+    uint64 u_value = extr_uil1il1bs2Fmsb2(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rtz3 = GPR(encode_gpr3_store(rtz3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("SH %s, %s(%s)", rtz3, u, rs3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SH_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil1il1bs17Fmsb17(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SH %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SH_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SH %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SH_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SH %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ * SHILO ac, shift - Shift an Accumulator Value Leaving the Result in the Same
+ *                     Accumulator
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000xxxx        xxxx0000011101
+ *      shift ------
+ *               ac --
+ */
+std::string NMD::SHILO(uint64 instruction)
+{
+    int64 shift_value = extract_shift_21_20_19_18_17_16(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string shift = IMMEDIATE(copy(shift_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("SHILO %s, %s", ac, shift);
+}
+
+
+/*
+ * SHILOV ac, rs - Variable Shift of Accumulator Value Leaving the Result in
+ *                   the Same Accumulator
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000xxxxx       01001001111111
+ *          rs -----
+ *               ac --
+ */
+std::string NMD::SHILOV(uint64 instruction)
+{
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+    uint64 ac_value = extract_ac_13_12(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string ac = AC(copy(ac_value));
+
+    return img::format("SHILOV %s, %s", ac, rs);
+}
+
+
+/*
+ * SHLL.PH rt, rs, sa - Shift Left Logical Vector Pair Halfwords
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000              001110110101
+ *     rt -----
+ *          rs -----
+ *               sa ----
+ */
+std::string NMD::SHLL_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHLL.PH %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ * SHLL.QB rt, rs, sa - Shift Left Logical Vector Quad Bytes
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000             0100001111111
+ *     rt -----
+ *          rs -----
+ *               sa ---
+ */
+std::string NMD::SHLL_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+    uint64 sa_value = extract_sa_15_14_13(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHLL.QB %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ * SHLL_S.PH rt, rs, sa - Shift Left Logical Vector Pair Halfwords (saturated)
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000              001110110101
+ *     rt -----
+ *          rs -----
+ *               sa ----
+ */
+std::string NMD::SHLL_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHLL_S.PH %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHLL_S_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHLL_S.W %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHLLV_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHLLV.PH %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHLLV_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHLLV.QB %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHLLV_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHLLV_S.PH %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHLLV_S_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHLLV_S.W %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRA_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRA.PH %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRA_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRA.QB %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRA_R_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRA_R.PH %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRA_R_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRA_R.QB %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRA_R_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRA_R.W %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRAV_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRAV.PH %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRAV_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRAV.QB %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRAV_R_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRAV_R.PH %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRAV_R_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRAV_R.QB %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRAV_R_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRAV_R.W %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRL_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13_12(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRL.PH %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRL_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 sa_value = extract_sa_15_14_13(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string sa = IMMEDIATE(copy(sa_value));
+
+    return img::format("SHRL.QB %s, %s, %s", rt, rs, sa);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRLV_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRLV.PH %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHRLV_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SHRLV.QB %s, %s, %s", rd, rt, rs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SHX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SHXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SHXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SIGRIE(uint64 instruction)
+{
+    uint64 code_value = extract_code_18_to_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("SIGRIE %s", code);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLL_16_(uint64 instruction)
+{
+    uint64 shift3_value = extract_shift3_2_1_0(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string shift3 = IMMEDIATE(encode_shift3_from_shift(shift3_value));
+
+    return img::format("SLL %s, %s, %s", rt3, rs3, shift3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLL_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("SLL %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLLV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SLLV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLT(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SLT %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLTI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SLTI %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLTIU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SLTIU %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SLTU(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SLTU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SOV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SOV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SPECIAL2(uint64 instruction)
+{
+    uint64 op_value = extract_op_25_to_3(instruction);
+
+    std::string op = IMMEDIATE(copy(op_value));
+
+    return img::format("SPECIAL2 %s", op);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SQRT_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("SQRT.D %s, %s", ft, fs);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SQRT_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("SQRT.S %s, %s", ft, fs);
+}
+
+
+/*
+ * SRA rd, rt, sa - Shift Word Right Arithmetic
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  00000000000               000011
+ *          rt -----
+ *               rd -----
+ *                    sa -----
+ */
+std::string NMD::SRA(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("SRA %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ * SRAV rd, rt, rs - Shift Word Right Arithmetic Variable
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00000000111
+ *     rs -----
+ *          rt -----
+ *               rd -----
+ */
+std::string NMD::SRAV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SRAV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00000000111
+ *     rs -----
+ *          rt -----
+ *               rd -----
+ */
+std::string NMD::SRL_16_(uint64 instruction)
+{
+    uint64 shift3_value = extract_shift3_2_1_0(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string shift3 = IMMEDIATE(encode_shift3_from_shift(shift3_value));
+
+    return img::format("SRL %s, %s, %s", rt3, rs3, shift3);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SRL_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 shift_value = extract_shift_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string shift = IMMEDIATE(copy(shift_value));
+
+    return img::format("SRL %s, %s, %s", rt, rs, shift);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SRLV(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SRLV %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUB_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SUB.D %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUB_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 fd_value = extract_fd_10_9_8_7_6(instruction);
+
+    std::string fd = FPR(copy(fd_value));
+    std::string fs = FPR(copy(fs_value));
+    std::string ft = FPR(copy(ft_value));
+
+    return img::format("SUB.S %s, %s, %s", fd, fs, ft);
+}
+
+
+/*
+ *
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQ_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQ.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBQH.PH rd, rt, rs - Subtract Fractional Halfword Vectors And Shift Right
+ *                         to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQ_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQ_S.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBQH.PH rd, rt, rs - Subtract Fractional Halfword Vectors And Shift Right
+ *                         to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQ_S_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQ_S.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBQH.PH rd, rt, rs - Subtract Fractional Halfword Vectors And Shift Right
+ *                         to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQH_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQH.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBQH.PH rd, rt, rs - Subtract Fractional Halfword Vectors And Shift Right
+ *                         to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQH_R_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQH_R.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBQH_R.PH rd, rt, rs - Subtract Fractional Halfword Vectors And Shift Right
+ *                           to Halve Results (rounding)
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               11001001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQH_R_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQH_R.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBQH.W rd, rs, rt - Subtract Fractional Words And Shift Right to Halve
+ *                        Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBQH_W(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBQH.W %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBU_16_(uint64 instruction)
+{
+    uint64 rd3_value = extract_rd3_3_2_1(instruction);
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rd3 = GPR(encode_gpr3(rd3_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+
+    return img::format("SUBU %s, %s, %s", rd3, rs3, rt3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBU_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBU %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBU.PH rd, rs, rt - Subtract Unsigned Integer Halfwords
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01100001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBU_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBU.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBU.QB rd, rs, rt - Subtract Unsigned Quad Byte Vector
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01011001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBU_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBU.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBU_S.PH rd, rs, rt - Subtract Unsigned Integer Halfwords (saturating)
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               11100001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBU_S_PH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBU_S.PH %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBU_S.QB rd, rs, rt - Subtract Unsigned Quad Byte Vector (saturating)
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               11011001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBU_S_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBU_S.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBUH.QB rd, rs, rt - Subtract Unsigned Bytes And Right Shift to Halve
+ *                         Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               01101001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBUH_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBUH.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * SUBUH_R.QB rd, rs, rt - Subtract Unsigned Bytes And Right Shift to Halve
+ *                           Results (rounding)
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               11101001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SUBUH_R_QB(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SUBUH_R.QB %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_16_(uint64 instruction)
+{
+    uint64 rtz3_value = extract_rtz3_9_8_7(instruction);
+    uint64 u_value = extr_uil0il2bs4Fmsb5(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rtz3 = GPR(encode_gpr3_store(rtz3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+
+    return img::format("SW %s, %s(%s)", rtz3, u, rs3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_4X4_(uint64 instruction)
+{
+    uint64 rs4_value = extract_rs4_4_2_1_0(instruction);
+    uint64 rtz4_value = extract_rtz4_9_7_6_5(instruction);
+    uint64 u_value = extr_uil3il3bs1_il8il2bs1Fmsb3(instruction);
+
+    std::string rtz4 = GPR(encode_gpr4_zero(rtz4_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs4 = GPR(encode_gpr4(rs4_value));
+
+    return img::format("SW %s, %s(%s)", rtz4, u, rs4);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_GP16_(uint64 instruction)
+{
+    uint64 rtz3_value = extract_rtz3_9_8_7(instruction);
+    uint64 u_value = extr_uil0il2bs7Fmsb8(instruction);
+
+    std::string rtz3 = GPR(encode_gpr3_store(rtz3_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SW %s, %s($%d)", rtz3, u, 28);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_GP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extr_uil2il2bs19Fmsb20(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SW %s, %s($%d)", rt, u, 28);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_S9_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SW %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_SP_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_9_8_7_6_5(instruction);
+    uint64 u_value = extr_uil0il2bs5Fmsb6(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SW %s, %s($%d)", rt, u, 29);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SW_U12_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SW %s, %s(%s)", rt, u, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWC1_GP_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extr_uil2il2bs16Fmsb17(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("SWC1 %s, %s($%d)", ft, u, 28);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWC1_S9_(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SWC1 %s, %s(%s)", ft, s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWC1_U12_(uint64 instruction)
+{
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string u = IMMEDIATE(copy(u_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SWC1 %s, %s(%s)", ft, u, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWC1X(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SWC1X %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWC1XS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 ft_value = extract_ft_15_14_13_12_11(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SWC1XS %s, %s(%s)", ft, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWC2(uint64 instruction)
+{
+    uint64 cs_value = extract_cs_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string cs = CPR(copy(cs_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SWC2 %s, %s(%s)", cs, s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SWE %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("SWM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWPC_48_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_41_40_39_38_37(instruction);
+    int64 s_value = extr_sil0il16bs16_il16il0bs16Tmsb31(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = ADDRESS(encode_s_from_address(s_value), 6);
+
+    return img::format("SWPC %s, %s", rt, s);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWX(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SWX %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SWXS(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("SWXS %s, %s(%s)", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SYNC(uint64 instruction)
+{
+    uint64 stype_value = extract_stype_20_19_18_17_16(instruction);
+
+    std::string stype = IMMEDIATE(copy(stype_value));
+
+    return img::format("SYNC %s", stype);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SYNCI(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SYNCI %s(%s)", s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SYNCIE(uint64 instruction)
+{
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("SYNCIE %s(%s)", s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::SYSCALL_16_(uint64 instruction)
+{
+    uint64 code_value = extract_code_1_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("SYSCALL %s", code);
+}
+
+
+/*
+ * SYSCALL code - System Call. Cause a System Call Exception
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  00000000000010
+ *           code ------------------
+ */
+std::string NMD::SYSCALL_32_(uint64 instruction)
+{
+    uint64 code_value = extract_code_17_to_0(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("SYSCALL %s", code);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TEQ(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("TEQ %s, %s", rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBGINV(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBGINV ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBGINVF(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBGINVF ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBGP(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBGP ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBGR(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBGR ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBGWI(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBGWI ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBGWR(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBGWR ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBINV(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBINV ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBINVF(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBINVF ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBP(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBP ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBR(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBR ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBWI(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBWI ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TLBWR(uint64 instruction)
+{
+    (void)instruction;
+
+    return "TLBWR ";
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TNE(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("TNE %s, %s", rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TRUNC_L_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("TRUNC.L.D %s, %s", ft, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TRUNC_L_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("TRUNC.L.S %s, %s", ft, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TRUNC_W_D(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("TRUNC.W.D %s, %s", ft, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::TRUNC_W_S(uint64 instruction)
+{
+    uint64 fs_value = extract_fs_15_14_13_12_11(instruction);
+    uint64 ft_value = extract_ft_20_19_18_17_16(instruction);
+
+    std::string ft = FPR(copy(ft_value));
+    std::string fs = FPR(copy(fs_value));
+
+    return img::format("TRUNC.W.S %s, %s", ft, fs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UALDM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("UALDM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UALH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("UALH %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UALWM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("UALWM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UASDM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("UASDM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UASH(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("UASH %s, %s(%s)", rt, s, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UASWM(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 count3_value = extract_count3_14_13_12(instruction);
+    int64 s_value = extr_sil0il0bs8_il15il8bs1Tmsb8(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string s = IMMEDIATE(copy(s_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string count3 = IMMEDIATE(encode_count3_from_count(count3_value));
+
+    return img::format("UASWM %s, %s(%s), %s", rt, s, rs, count3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::UDI(uint64 instruction)
+{
+    uint64 op_value = extract_op_25_to_3(instruction);
+
+    std::string op = IMMEDIATE(copy(op_value));
+
+    return img::format("UDI %s", op);
+}
+
+
+/*
+ * WAIT code - Enter Wait State
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000          1100001101111111
+ *   code ----------
+ */
+std::string NMD::WAIT(uint64 instruction)
+{
+    uint64 code_value = extract_code_25_24_23_22_21_20_19_18_17_16(instruction);
+
+    std::string code = IMMEDIATE(copy(code_value));
+
+    return img::format("WAIT %s", code);
+}
+
+
+/*
+ * WRDSP rt, mask - Write Fields to DSPControl Register from a GPR
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000            01011001111111
+ *     rt -----
+ *        mask -------
+ */
+std::string NMD::WRDSP(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 mask_value = extract_mask_20_19_18_17_16_15_14(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string mask = IMMEDIATE(copy(mask_value));
+
+    return img::format("WRDSP %s, %s", rt, mask);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::WRPGPR(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("WRPGPR %s, %s", rt, rs);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::XOR_16_(uint64 instruction)
+{
+    uint64 rt3_value = extract_rt3_9_8_7(instruction);
+    uint64 rs3_value = extract_rs3_6_5_4(instruction);
+
+    std::string rs3 = GPR(encode_gpr3(rs3_value));
+    std::string rt3 = GPR(encode_gpr3(rt3_value));
+
+    return img::format("XOR %s, %s", rs3, rt3);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::XOR_32_(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rd_value = extract_rd_20_19_18_17_16(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rd = GPR(copy(rd_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string rt = GPR(copy(rt_value));
+
+    return img::format("XOR %s, %s, %s", rd, rs, rt);
+}
+
+
+/*
+ * ADDQH_R.W rd, rt, rs - Add Fractional Words And Shift Right to Halve Results
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ *               rd -----
+ */
+std::string NMD::XORI(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 u_value = extract_u_11_10_9_8_7_6_5_4_3_2_1_0(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+    std::string u = IMMEDIATE(copy(u_value));
+
+    return img::format("XORI %s, %s, %s", rt, rs, u);
+}
+
+
+/*
+ * YIELD rt, rs -
+ *
+ *   3         2         1
+ *  10987654321098765432109876543210
+ *  001000               00010001101
+ *     rt -----
+ *          rs -----
+ */
+std::string NMD::YIELD(uint64 instruction)
+{
+    uint64 rt_value = extract_rt_25_24_23_22_21(instruction);
+    uint64 rs_value = extract_rs_20_19_18_17_16(instruction);
+
+    std::string rt = GPR(copy(rt_value));
+    std::string rs = GPR(copy(rs_value));
+
+    return img::format("YIELD %s, %s", rt, rs);
+}
+
+
+
+NMD::Pool NMD::P_SYSCALL[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfffc0000, 0x00080000, &NMD::SYSCALL_32_      , 0,
+       0x0                 },        /* SYSCALL[32] */
+    { instruction         , 0                   , 0   , 32,
+       0xfffc0000, 0x000c0000, &NMD::HYPCALL          , 0,
+       CP0_ | VZ_          },        /* HYPCALL */
+};
+
+
+NMD::Pool NMD::P_RI[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfff80000, 0x00000000, &NMD::SIGRIE           , 0,
+       0x0                 },        /* SIGRIE */
+    { pool                , P_SYSCALL           , 2   , 32,
+       0xfff80000, 0x00080000, 0                      , 0,
+       0x0                 },        /* P.SYSCALL */
+    { instruction         , 0                   , 0   , 32,
+       0xfff80000, 0x00100000, &NMD::BREAK_32_        , 0,
+       0x0                 },        /* BREAK[32] */
+    { instruction         , 0                   , 0   , 32,
+       0xfff80000, 0x00180000, &NMD::SDBBP_32_        , 0,
+       EJTAG_              },        /* SDBBP[32] */
+};
+
+
+NMD::Pool NMD::P_ADDIU[2] = {
+    { pool                , P_RI                , 4   , 32,
+       0xffe00000, 0x00000000, 0                      , 0,
+       0x0                 },        /* P.RI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000000, 0x00000000, &NMD::ADDIU_32_        , &NMD::ADDIU_32__cond   ,
+       0x0                 },        /* ADDIU[32] */
+};
+
+
+NMD::Pool NMD::P_TRAP[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000000, &NMD::TEQ              , 0,
+       XMMS_               },        /* TEQ */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000400, &NMD::TNE              , 0,
+       XMMS_               },        /* TNE */
+};
+
+
+NMD::Pool NMD::P_CMOVE[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000210, &NMD::MOVZ             , 0,
+       0x0                 },        /* MOVZ */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000610, &NMD::MOVN             , 0,
+       0x0                 },        /* MOVN */
+};
+
+
+NMD::Pool NMD::P_D_MT_VPE[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1f3fff, 0x20010ab0, &NMD::DMT              , 0,
+       MT_                 },        /* DMT */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1f3fff, 0x20000ab0, &NMD::DVPE             , 0,
+       MT_                 },        /* DVPE */
+};
+
+
+NMD::Pool NMD::P_E_MT_VPE[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1f3fff, 0x20010eb0, &NMD::EMT              , 0,
+       MT_                 },        /* EMT */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1f3fff, 0x20000eb0, &NMD::EVPE             , 0,
+       MT_                 },        /* EVPE */
+};
+
+
+NMD::Pool NMD::_P_MT_VPE[2] = {
+    { pool                , P_D_MT_VPE          , 2   , 32,
+       0xfc003fff, 0x20000ab0, 0                      , 0,
+       0x0                 },        /* P.D_MT_VPE */
+    { pool                , P_E_MT_VPE          , 2   , 32,
+       0xfc003fff, 0x20000eb0, 0                      , 0,
+       0x0                 },        /* P.E_MT_VPE */
+};
+
+
+NMD::Pool NMD::P_MT_VPE[8] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x200002b0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(0) */
+    { pool                , _P_MT_VPE           , 2   , 32,
+       0xfc003bff, 0x20000ab0, 0                      , 0,
+       0x0                 },        /* _P.MT_VPE */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x200012b0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x20001ab0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x200022b0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x20002ab0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x200032b0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003bff, 0x20003ab0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE~*(7) */
+};
+
+
+NMD::Pool NMD::P_DVP[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000390, &NMD::DVP              , 0,
+       0x0                 },        /* DVP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000790, &NMD::EVP              , 0,
+       0x0                 },        /* EVP */
+};
+
+
+NMD::Pool NMD::P_SLTU[2] = {
+    { pool                , P_DVP               , 2   , 32,
+       0xfc00fbff, 0x20000390, 0                      , 0,
+       0x0                 },        /* P.DVP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000390, &NMD::SLTU             , &NMD::SLTU_cond        ,
+       0x0                 },        /* SLTU */
+};
+
+
+NMD::Pool NMD::_POOL32A0[128] = {
+    { pool                , P_TRAP              , 2   , 32,
+       0xfc0003ff, 0x20000000, 0                      , 0,
+       0x0                 },        /* P.TRAP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000008, &NMD::SEB              , 0,
+       XMMS_               },        /* SEB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000010, &NMD::SLLV             , 0,
+       0x0                 },        /* SLLV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000018, &NMD::MUL_32_          , 0,
+       0x0                 },        /* MUL[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000020, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000028, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(5) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000030, &NMD::MFC0             , 0,
+       0x0                 },        /* MFC0 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000038, &NMD::MFHC0            , 0,
+       CP0_ | MVH_         },        /* MFHC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000040, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(8) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000048, &NMD::SEH              , 0,
+       0x0                 },        /* SEH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000050, &NMD::SRLV             , 0,
+       0x0                 },        /* SRLV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000058, &NMD::MUH              , 0,
+       0x0                 },        /* MUH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000060, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000068, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(13) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000070, &NMD::MTC0             , 0,
+       CP0_                },        /* MTC0 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000078, &NMD::MTHC0            , 0,
+       CP0_ | MVH_         },        /* MTHC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000080, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000088, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(17) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000090, &NMD::SRAV             , 0,
+       0x0                 },        /* SRAV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000098, &NMD::MULU             , 0,
+       0x0                 },        /* MULU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000a0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000a8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(21) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000b0, &NMD::MFGC0            , 0,
+       CP0_ | VZ_          },        /* MFGC0 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000b8, &NMD::MFHGC0           , 0,
+       CP0_ | VZ_ | MVH_   },        /* MFHGC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000c0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000c8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(25) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000d0, &NMD::ROTRV            , 0,
+       0x0                 },        /* ROTRV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000d8, &NMD::MUHU             , 0,
+       0x0                 },        /* MUHU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000e0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000e8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(29) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000f0, &NMD::MTGC0            , 0,
+       CP0_ | VZ_          },        /* MTGC0 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000f8, &NMD::MTHGC0           , 0,
+       CP0_ | VZ_ | MVH_   },        /* MTHGC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000100, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(32) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000108, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(33) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000110, &NMD::ADD              , 0,
+       XMMS_               },        /* ADD */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000118, &NMD::DIV              , 0,
+       0x0                 },        /* DIV */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000120, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(36) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000128, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(37) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000130, &NMD::DMFC0            , 0,
+       CP0_ | MIPS64_      },        /* DMFC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000138, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(39) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000140, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(40) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000148, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(41) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000150, &NMD::ADDU_32_         , 0,
+       0x0                 },        /* ADDU[32] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000158, &NMD::MOD              , 0,
+       0x0                 },        /* MOD */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000160, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(44) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000168, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(45) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000170, &NMD::DMTC0            , 0,
+       CP0_ | MIPS64_      },        /* DMTC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000178, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(47) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000180, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(48) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000188, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(49) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000190, &NMD::SUB              , 0,
+       XMMS_               },        /* SUB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000198, &NMD::DIVU             , 0,
+       0x0                 },        /* DIVU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001a0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(52) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001a8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(53) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001b0, &NMD::DMFGC0           , 0,
+       CP0_ | MIPS64_ | VZ_},        /* DMFGC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001b8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(55) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001c0, &NMD::RDHWR            , 0,
+       XMMS_               },        /* RDHWR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001c8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(57) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001d0, &NMD::SUBU_32_         , 0,
+       0x0                 },        /* SUBU[32] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001d8, &NMD::MODU             , 0,
+       0x0                 },        /* MODU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001e0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(60) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001e8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(61) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001f0, &NMD::DMTGC0           , 0,
+       CP0_ | MIPS64_ | VZ_},        /* DMTGC0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001f8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(63) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000200, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(64) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000208, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(65) */
+    { pool                , P_CMOVE             , 2   , 32,
+       0xfc0003ff, 0x20000210, 0                      , 0,
+       0x0                 },        /* P.CMOVE */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000218, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(67) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000220, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(68) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000228, &NMD::FORK             , 0,
+       MT_                 },        /* FORK */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000230, &NMD::MFTR             , 0,
+       MT_                 },        /* MFTR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000238, &NMD::MFHTR            , 0,
+       MT_                 },        /* MFHTR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000240, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(72) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000248, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(73) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000250, &NMD::AND_32_          , 0,
+       0x0                 },        /* AND[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000258, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(75) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000260, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(76) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000268, &NMD::YIELD            , 0,
+       MT_                 },        /* YIELD */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000270, &NMD::MTTR             , 0,
+       MT_                 },        /* MTTR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000278, &NMD::MTHTR            , 0,
+       MT_                 },        /* MTHTR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000280, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(80) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000288, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(81) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000290, &NMD::OR_32_           , 0,
+       0x0                 },        /* OR[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000298, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(83) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002a0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(84) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002a8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(85) */
+    { pool                , P_MT_VPE            , 8   , 32,
+       0xfc0003ff, 0x200002b0, 0                      , 0,
+       0x0                 },        /* P.MT_VPE */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002b8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(87) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002c0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(88) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002c8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(89) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002d0, &NMD::NOR              , 0,
+       0x0                 },        /* NOR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002d8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(91) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002e0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(92) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002e8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(93) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002f0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(94) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002f8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(95) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000300, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(96) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000308, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(97) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000310, &NMD::XOR_32_          , 0,
+       0x0                 },        /* XOR[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000318, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(99) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000320, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(100) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000328, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(101) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000330, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(102) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000338, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(103) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000340, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(104) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000348, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(105) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000350, &NMD::SLT              , 0,
+       0x0                 },        /* SLT */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000358, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(107) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000360, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(108) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000368, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(109) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000370, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(110) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000378, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(111) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000380, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(112) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000388, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(113) */
+    { pool                , P_SLTU              , 2   , 32,
+       0xfc0003ff, 0x20000390, 0                      , 0,
+       0x0                 },        /* P.SLTU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000398, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(115) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003a0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(116) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003a8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(117) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003b0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(118) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003b8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(119) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003c0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(120) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003c8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(121) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003d0, &NMD::SOV              , 0,
+       0x0                 },        /* SOV */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003d8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(123) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003e0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(124) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003e8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(125) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003f0, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(126) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003f8, 0                      , 0,
+       0x0                 },        /* _POOL32A0~*(127) */
+};
+
+
+NMD::Pool NMD::ADDQ__S__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000000d, &NMD::ADDQ_PH          , 0,
+       DSP_                },        /* ADDQ.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000040d, &NMD::ADDQ_S_PH        , 0,
+       DSP_                },        /* ADDQ_S.PH */
+};
+
+
+NMD::Pool NMD::MUL__S__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000002d, &NMD::MUL_PH           , 0,
+       DSP_                },        /* MUL.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000042d, &NMD::MUL_S_PH         , 0,
+       DSP_                },        /* MUL_S.PH */
+};
+
+
+NMD::Pool NMD::ADDQH__R__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000004d, &NMD::ADDQH_PH         , 0,
+       DSP_                },        /* ADDQH.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000044d, &NMD::ADDQH_R_PH       , 0,
+       DSP_                },        /* ADDQH_R.PH */
+};
+
+
+NMD::Pool NMD::ADDQH__R__W[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000008d, &NMD::ADDQH_W          , 0,
+       DSP_                },        /* ADDQH.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000048d, &NMD::ADDQH_R_W        , 0,
+       DSP_                },        /* ADDQH_R.W */
+};
+
+
+NMD::Pool NMD::ADDU__S__QB[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200000cd, &NMD::ADDU_QB          , 0,
+       DSP_                },        /* ADDU.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200004cd, &NMD::ADDU_S_QB        , 0,
+       DSP_                },        /* ADDU_S.QB */
+};
+
+
+NMD::Pool NMD::ADDU__S__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000010d, &NMD::ADDU_PH          , 0,
+       DSP_                },        /* ADDU.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000050d, &NMD::ADDU_S_PH        , 0,
+       DSP_                },        /* ADDU_S.PH */
+};
+
+
+NMD::Pool NMD::ADDUH__R__QB[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000014d, &NMD::ADDUH_QB         , 0,
+       DSP_                },        /* ADDUH.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000054d, &NMD::ADDUH_R_QB       , 0,
+       DSP_                },        /* ADDUH_R.QB */
+};
+
+
+NMD::Pool NMD::SHRAV__R__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000018d, &NMD::SHRAV_PH         , 0,
+       DSP_                },        /* SHRAV.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000058d, &NMD::SHRAV_R_PH       , 0,
+       DSP_                },        /* SHRAV_R.PH */
+};
+
+
+NMD::Pool NMD::SHRAV__R__QB[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200001cd, &NMD::SHRAV_QB         , 0,
+       DSP_                },        /* SHRAV.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200005cd, &NMD::SHRAV_R_QB       , 0,
+       DSP_                },        /* SHRAV_R.QB */
+};
+
+
+NMD::Pool NMD::SUBQ__S__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000020d, &NMD::SUBQ_PH          , 0,
+       DSP_                },        /* SUBQ.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000060d, &NMD::SUBQ_S_PH        , 0,
+       DSP_                },        /* SUBQ_S.PH */
+};
+
+
+NMD::Pool NMD::SUBQH__R__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000024d, &NMD::SUBQH_PH         , 0,
+       DSP_                },        /* SUBQH.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000064d, &NMD::SUBQH_R_PH       , 0,
+       DSP_                },        /* SUBQH_R.PH */
+};
+
+
+NMD::Pool NMD::SUBQH__R__W[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000028d, &NMD::SUBQH_W          , 0,
+       DSP_                },        /* SUBQH.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000068d, &NMD::SUBQH_R_W        , 0,
+       DSP_                },        /* SUBQH_R.W */
+};
+
+
+NMD::Pool NMD::SUBU__S__QB[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200002cd, &NMD::SUBU_QB          , 0,
+       DSP_                },        /* SUBU.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200006cd, &NMD::SUBU_S_QB        , 0,
+       DSP_                },        /* SUBU_S.QB */
+};
+
+
+NMD::Pool NMD::SUBU__S__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000030d, &NMD::SUBU_PH          , 0,
+       DSP_                },        /* SUBU.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000070d, &NMD::SUBU_S_PH        , 0,
+       DSP_                },        /* SUBU_S.PH */
+};
+
+
+NMD::Pool NMD::SHRA__R__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000335, &NMD::SHRA_PH          , 0,
+       DSP_                },        /* SHRA.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000735, &NMD::SHRA_R_PH        , 0,
+       DSP_                },        /* SHRA_R.PH */
+};
+
+
+NMD::Pool NMD::SUBUH__R__QB[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000034d, &NMD::SUBUH_QB         , 0,
+       DSP_                },        /* SUBUH.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000074d, &NMD::SUBUH_R_QB       , 0,
+       DSP_                },        /* SUBUH_R.QB */
+};
+
+
+NMD::Pool NMD::SHLLV__S__PH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000038d, &NMD::SHLLV_PH         , 0,
+       DSP_                },        /* SHLLV.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x2000078d, &NMD::SHLLV_S_PH       , 0,
+       DSP_                },        /* SHLLV_S.PH */
+};
+
+
+NMD::Pool NMD::SHLL__S__PH[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000fff, 0x200003b5, &NMD::SHLL_PH          , 0,
+       DSP_                },        /* SHLL.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x200007b5, 0                      , 0,
+       0x0                 },        /* SHLL[_S].PH~*(1) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000fff, 0x20000bb5, &NMD::SHLL_S_PH        , 0,
+       DSP_                },        /* SHLL_S.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x20000fb5, 0                      , 0,
+       0x0                 },        /* SHLL[_S].PH~*(3) */
+};
+
+
+NMD::Pool NMD::PRECR_SRA__R__PH_W[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200003cd, &NMD::PRECR_SRA_PH_W   , 0,
+       DSP_                },        /* PRECR_SRA.PH.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200007cd, &NMD::PRECR_SRA_R_PH_W , 0,
+       DSP_                },        /* PRECR_SRA_R.PH.W */
+};
+
+
+NMD::Pool NMD::_POOL32A5[128] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000005, &NMD::CMP_EQ_PH        , 0,
+       DSP_                },        /* CMP.EQ.PH */
+    { pool                , ADDQ__S__PH         , 2   , 32,
+       0xfc0003ff, 0x2000000d, 0                      , 0,
+       0x0                 },        /* ADDQ[_S].PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000015, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(2) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000001d, &NMD::SHILO            , 0,
+       DSP_                },        /* SHILO */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000025, &NMD::MULEQ_S_W_PHL    , 0,
+       DSP_                },        /* MULEQ_S.W.PHL */
+    { pool                , MUL__S__PH          , 2   , 32,
+       0xfc0003ff, 0x2000002d, 0                      , 0,
+       0x0                 },        /* MUL[_S].PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000035, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(6) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000003d, &NMD::REPL_PH          , 0,
+       DSP_                },        /* REPL.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000045, &NMD::CMP_LT_PH        , 0,
+       DSP_                },        /* CMP.LT.PH */
+    { pool                , ADDQH__R__PH        , 2   , 32,
+       0xfc0003ff, 0x2000004d, 0                      , 0,
+       0x0                 },        /* ADDQH[_R].PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000055, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000005d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(11) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000065, &NMD::MULEQ_S_W_PHR    , 0,
+       DSP_                },        /* MULEQ_S.W.PHR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000006d, &NMD::PRECR_QB_PH      , 0,
+       DSP_                },        /* PRECR.QB.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000075, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000007d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(15) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000085, &NMD::CMP_LE_PH        , 0,
+       DSP_                },        /* CMP.LE.PH */
+    { pool                , ADDQH__R__W         , 2   , 32,
+       0xfc0003ff, 0x2000008d, 0                      , 0,
+       0x0                 },        /* ADDQH[_R].W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000095, &NMD::MULEU_S_PH_QBL   , 0,
+       DSP_                },        /* MULEU_S.PH.QBL */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000009d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000a5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(20) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000ad, &NMD::PRECRQ_QB_PH     , 0,
+       DSP_                },        /* PRECRQ.QB.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000b5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000bd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(23) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000c5, &NMD::CMPGU_EQ_QB      , 0,
+       DSP_                },        /* CMPGU.EQ.QB */
+    { pool                , ADDU__S__QB         , 2   , 32,
+       0xfc0003ff, 0x200000cd, 0                      , 0,
+       0x0                 },        /* ADDU[_S].QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000d5, &NMD::MULEU_S_PH_QBR   , 0,
+       DSP_                },        /* MULEU_S.PH.QBR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000dd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000e5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(28) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000ed, &NMD::PRECRQ_PH_W      , 0,
+       DSP_                },        /* PRECRQ.PH.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000f5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200000fd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(31) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000105, &NMD::CMPGU_LT_QB      , 0,
+       DSP_                },        /* CMPGU.LT.QB */
+    { pool                , ADDU__S__PH         , 2   , 32,
+       0xfc0003ff, 0x2000010d, 0                      , 0,
+       0x0                 },        /* ADDU[_S].PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000115, &NMD::MULQ_RS_PH       , 0,
+       DSP_                },        /* MULQ_RS.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000011d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(35) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000125, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(36) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000012d, &NMD::PRECRQ_RS_PH_W   , 0,
+       DSP_                },        /* PRECRQ_RS.PH.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000135, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(38) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000013d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(39) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000145, &NMD::CMPGU_LE_QB      , 0,
+       DSP_                },        /* CMPGU.LE.QB */
+    { pool                , ADDUH__R__QB        , 2   , 32,
+       0xfc0003ff, 0x2000014d, 0                      , 0,
+       0x0                 },        /* ADDUH[_R].QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000155, &NMD::MULQ_S_PH        , 0,
+       DSP_                },        /* MULQ_S.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000015d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(43) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000165, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(44) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000016d, &NMD::PRECRQU_S_QB_PH  , 0,
+       DSP_                },        /* PRECRQU_S.QB.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000175, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(46) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000017d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(47) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000185, &NMD::CMPGDU_EQ_QB     , 0,
+       DSP_                },        /* CMPGDU.EQ.QB */
+    { pool                , SHRAV__R__PH        , 2   , 32,
+       0xfc0003ff, 0x2000018d, 0                      , 0,
+       0x0                 },        /* SHRAV[_R].PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000195, &NMD::MULQ_RS_W        , 0,
+       DSP_                },        /* MULQ_RS.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000019d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(51) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001a5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(52) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001ad, &NMD::PACKRL_PH        , 0,
+       DSP_                },        /* PACKRL.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001b5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(54) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001bd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(55) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001c5, &NMD::CMPGDU_LT_QB     , 0,
+       DSP_                },        /* CMPGDU.LT.QB */
+    { pool                , SHRAV__R__QB        , 2   , 32,
+       0xfc0003ff, 0x200001cd, 0                      , 0,
+       0x0                 },        /* SHRAV[_R].QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001d5, &NMD::MULQ_S_W         , 0,
+       DSP_                },        /* MULQ_S.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001dd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(59) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001e5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(60) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001ed, &NMD::PICK_QB          , 0,
+       DSP_                },        /* PICK.QB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001f5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(62) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200001fd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(63) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000205, &NMD::CMPGDU_LE_QB     , 0,
+       DSP_                },        /* CMPGDU.LE.QB */
+    { pool                , SUBQ__S__PH         , 2   , 32,
+       0xfc0003ff, 0x2000020d, 0                      , 0,
+       0x0                 },        /* SUBQ[_S].PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000215, &NMD::APPEND           , 0,
+       DSP_                },        /* APPEND */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000021d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(67) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000225, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(68) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000022d, &NMD::PICK_PH          , 0,
+       DSP_                },        /* PICK.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000235, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(70) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000023d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(71) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000245, &NMD::CMPU_EQ_QB       , 0,
+       DSP_                },        /* CMPU.EQ.QB */
+    { pool                , SUBQH__R__PH        , 2   , 32,
+       0xfc0003ff, 0x2000024d, 0                      , 0,
+       0x0                 },        /* SUBQH[_R].PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000255, &NMD::PREPEND          , 0,
+       DSP_                },        /* PREPEND */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000025d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(75) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000265, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(76) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000026d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(77) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000275, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(78) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000027d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(79) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000285, &NMD::CMPU_LT_QB       , 0,
+       DSP_                },        /* CMPU.LT.QB */
+    { pool                , SUBQH__R__W         , 2   , 32,
+       0xfc0003ff, 0x2000028d, 0                      , 0,
+       0x0                 },        /* SUBQH[_R].W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000295, &NMD::MODSUB           , 0,
+       DSP_                },        /* MODSUB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000029d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(83) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002a5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(84) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002ad, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(85) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002b5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(86) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002bd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(87) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002c5, &NMD::CMPU_LE_QB       , 0,
+       DSP_                },        /* CMPU.LE.QB */
+    { pool                , SUBU__S__QB         , 2   , 32,
+       0xfc0003ff, 0x200002cd, 0                      , 0,
+       0x0                 },        /* SUBU[_S].QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002d5, &NMD::SHRAV_R_W        , 0,
+       DSP_                },        /* SHRAV_R.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002dd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(91) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002e5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(92) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002ed, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(93) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002f5, &NMD::SHRA_R_W         , 0,
+       DSP_                },        /* SHRA_R.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200002fd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(95) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000305, &NMD::ADDQ_S_W         , 0,
+       DSP_                },        /* ADDQ_S.W */
+    { pool                , SUBU__S__PH         , 2   , 32,
+       0xfc0003ff, 0x2000030d, 0                      , 0,
+       0x0                 },        /* SUBU[_S].PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000315, &NMD::SHRLV_PH         , 0,
+       DSP_                },        /* SHRLV.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000031d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(99) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000325, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(100) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000032d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(101) */
+    { pool                , SHRA__R__PH         , 2   , 32,
+       0xfc0003ff, 0x20000335, 0                      , 0,
+       0x0                 },        /* SHRA[_R].PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000033d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(103) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000345, &NMD::SUBQ_S_W         , 0,
+       DSP_                },        /* SUBQ_S.W */
+    { pool                , SUBUH__R__QB        , 2   , 32,
+       0xfc0003ff, 0x2000034d, 0                      , 0,
+       0x0                 },        /* SUBUH[_R].QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000355, &NMD::SHRLV_QB         , 0,
+       DSP_                },        /* SHRLV.QB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000035d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(107) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000365, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(108) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000036d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(109) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000375, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(110) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000037d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(111) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000385, &NMD::ADDSC            , 0,
+       DSP_                },        /* ADDSC */
+    { pool                , SHLLV__S__PH        , 2   , 32,
+       0xfc0003ff, 0x2000038d, 0                      , 0,
+       0x0                 },        /* SHLLV[_S].PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x20000395, &NMD::SHLLV_QB         , 0,
+       DSP_                },        /* SHLLV.QB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x2000039d, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(115) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003a5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(116) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003ad, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(117) */
+    { pool                , SHLL__S__PH         , 4   , 32,
+       0xfc0003ff, 0x200003b5, 0                      , 0,
+       0x0                 },        /* SHLL[_S].PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003bd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(119) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003c5, &NMD::ADDWC            , 0,
+       DSP_                },        /* ADDWC */
+    { pool                , PRECR_SRA__R__PH_W  , 2   , 32,
+       0xfc0003ff, 0x200003cd, 0                      , 0,
+       0x0                 },        /* PRECR_SRA[_R].PH.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003d5, &NMD::SHLLV_S_W        , 0,
+       DSP_                },        /* SHLLV_S.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003dd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(123) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003e5, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(124) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003ed, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(125) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003f5, &NMD::SHLL_S_W         , 0,
+       DSP_                },        /* SHLL_S.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0x200003fd, 0                      , 0,
+       0x0                 },        /* _POOL32A5~*(127) */
+};
+
+
+NMD::Pool NMD::PP_LSX[16] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000007, &NMD::LBX              , 0,
+       0x0                 },        /* LBX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000087, &NMD::SBX              , 0,
+       XMMS_               },        /* SBX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000107, &NMD::LBUX             , 0,
+       0x0                 },        /* LBUX */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000187, 0                      , 0,
+       0x0                 },        /* PP.LSX~*(3) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000207, &NMD::LHX              , 0,
+       0x0                 },        /* LHX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000287, &NMD::SHX              , 0,
+       XMMS_               },        /* SHX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000307, &NMD::LHUX             , 0,
+       0x0                 },        /* LHUX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000387, &NMD::LWUX             , 0,
+       MIPS64_             },        /* LWUX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000407, &NMD::LWX              , 0,
+       0x0                 },        /* LWX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000487, &NMD::SWX              , 0,
+       XMMS_               },        /* SWX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000507, &NMD::LWC1X            , 0,
+       CP1_                },        /* LWC1X */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000587, &NMD::SWC1X            , 0,
+       CP1_                },        /* SWC1X */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000607, &NMD::LDX              , 0,
+       MIPS64_             },        /* LDX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000687, &NMD::SDX              , 0,
+       MIPS64_             },        /* SDX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000707, &NMD::LDC1X            , 0,
+       CP1_                },        /* LDC1X */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000787, &NMD::SDC1X            , 0,
+       CP1_                },        /* SDC1X */
+};
+
+
+NMD::Pool NMD::PP_LSXS[16] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000047, 0                      , 0,
+       0x0                 },        /* PP.LSXS~*(0) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0x200000c7, 0                      , 0,
+       0x0                 },        /* PP.LSXS~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000147, 0                      , 0,
+       0x0                 },        /* PP.LSXS~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0x200001c7, 0                      , 0,
+       0x0                 },        /* PP.LSXS~*(3) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000247, &NMD::LHXS             , 0,
+       0x0                 },        /* LHXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200002c7, &NMD::SHXS             , 0,
+       XMMS_               },        /* SHXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000347, &NMD::LHUXS            , 0,
+       0x0                 },        /* LHUXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200003c7, &NMD::LWUXS            , 0,
+       MIPS64_             },        /* LWUXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000447, &NMD::LWXS_32_         , 0,
+       0x0                 },        /* LWXS[32] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200004c7, &NMD::SWXS             , 0,
+       XMMS_               },        /* SWXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000547, &NMD::LWC1XS           , 0,
+       CP1_                },        /* LWC1XS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200005c7, &NMD::SWC1XS           , 0,
+       CP1_                },        /* SWC1XS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000647, &NMD::LDXS             , 0,
+       MIPS64_             },        /* LDXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200006c7, &NMD::SDXS             , 0,
+       MIPS64_             },        /* SDXS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x20000747, &NMD::LDC1XS           , 0,
+       CP1_                },        /* LDC1XS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0x200007c7, &NMD::SDC1XS           , 0,
+       CP1_                },        /* SDC1XS */
+};
+
+
+NMD::Pool NMD::P_LSX[2] = {
+    { pool                , PP_LSX              , 16  , 32,
+       0xfc00007f, 0x20000007, 0                      , 0,
+       0x0                 },        /* PP.LSX */
+    { pool                , PP_LSXS             , 16  , 32,
+       0xfc00007f, 0x20000047, 0                      , 0,
+       0x0                 },        /* PP.LSXS */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1_0[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000007f, &NMD::MFHI_DSP_        , 0,
+       DSP_                },        /* MFHI[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000107f, &NMD::MFLO_DSP_        , 0,
+       DSP_                },        /* MFLO[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000207f, &NMD::MTHI_DSP_        , 0,
+       DSP_                },        /* MTHI[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000307f, &NMD::MTLO_DSP_        , 0,
+       DSP_                },        /* MTLO[DSP] */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1_1[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000027f, &NMD::MTHLIP           , 0,
+       DSP_                },        /* MTHLIP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000127f, &NMD::SHILOV           , 0,
+       DSP_                },        /* SHILOV */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0x2000227f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_1~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0x2000327f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_1~*(3) */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1_3[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000067f, &NMD::RDDSP            , 0,
+       DSP_                },        /* RDDSP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000167f, &NMD::WRDSP            , 0,
+       DSP_                },        /* WRDSP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000267f, &NMD::EXTP             , 0,
+       DSP_                },        /* EXTP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x2000367f, &NMD::EXTPDP           , 0,
+       DSP_                },        /* EXTPDP */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1_4[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc001fff, 0x2000087f, &NMD::SHLL_QB          , 0,
+       DSP_                },        /* SHLL.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc001fff, 0x2000187f, &NMD::SHRL_QB          , 0,
+       DSP_                },        /* SHRL.QB */
+};
+
+
+NMD::Pool NMD::MAQ_S_A__W_PHR[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20000a7f, &NMD::MAQ_S_W_PHR      , 0,
+       DSP_                },        /* MAQ_S.W.PHR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20002a7f, &NMD::MAQ_SA_W_PHR     , 0,
+       DSP_                },        /* MAQ_SA.W.PHR */
+};
+
+
+NMD::Pool NMD::MAQ_S_A__W_PHL[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20001a7f, &NMD::MAQ_S_W_PHL      , 0,
+       DSP_                },        /* MAQ_S.W.PHL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20003a7f, &NMD::MAQ_SA_W_PHL     , 0,
+       DSP_                },        /* MAQ_SA.W.PHL */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1_5[2] = {
+    { pool                , MAQ_S_A__W_PHR      , 2   , 32,
+       0xfc001fff, 0x20000a7f, 0                      , 0,
+       0x0                 },        /* MAQ_S[A].W.PHR */
+    { pool                , MAQ_S_A__W_PHL      , 2   , 32,
+       0xfc001fff, 0x20001a7f, 0                      , 0,
+       0x0                 },        /* MAQ_S[A].W.PHL */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1_7[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20000e7f, &NMD::EXTR_W           , 0,
+       DSP_                },        /* EXTR.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20001e7f, &NMD::EXTR_R_W         , 0,
+       DSP_                },        /* EXTR_R.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20002e7f, &NMD::EXTR_RS_W        , 0,
+       DSP_                },        /* EXTR_RS.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20003e7f, &NMD::EXTR_S_H         , 0,
+       DSP_                },        /* EXTR_S.H */
+};
+
+
+NMD::Pool NMD::POOL32Axf_1[8] = {
+    { pool                , POOL32Axf_1_0       , 4   , 32,
+       0xfc000fff, 0x2000007f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_0 */
+    { pool                , POOL32Axf_1_1       , 4   , 32,
+       0xfc000fff, 0x2000027f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x2000047f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1~*(2) */
+    { pool                , POOL32Axf_1_3       , 4   , 32,
+       0xfc000fff, 0x2000067f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_3 */
+    { pool                , POOL32Axf_1_4       , 2   , 32,
+       0xfc000fff, 0x2000087f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_4 */
+    { pool                , POOL32Axf_1_5       , 2   , 32,
+       0xfc000fff, 0x20000a7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_5 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x20000c7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1~*(6) */
+    { pool                , POOL32Axf_1_7       , 4   , 32,
+       0xfc000fff, 0x20000e7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1_7 */
+};
+
+
+NMD::Pool NMD::POOL32Axf_2_DSP__0_7[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200000bf, &NMD::DPA_W_PH         , 0,
+       DSP_                },        /* DPA.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200002bf, &NMD::DPAQ_S_W_PH      , 0,
+       DSP_                },        /* DPAQ_S.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200004bf, &NMD::DPS_W_PH         , 0,
+       DSP_                },        /* DPS.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200006bf, &NMD::DPSQ_S_W_PH      , 0,
+       DSP_                },        /* DPSQ_S.W.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0x200008bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2(DSP)_0_7~*(4) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20000abf, &NMD::MADD_DSP_        , 0,
+       DSP_                },        /* MADD[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20000cbf, &NMD::MULT_DSP_        , 0,
+       DSP_                },        /* MULT[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20000ebf, &NMD::EXTRV_W          , 0,
+       DSP_                },        /* EXTRV.W */
+};
+
+
+NMD::Pool NMD::POOL32Axf_2_DSP__8_15[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200010bf, &NMD::DPAX_W_PH        , 0,
+       DSP_                },        /* DPAX.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200012bf, &NMD::DPAQ_SA_L_W      , 0,
+       DSP_                },        /* DPAQ_SA.L.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200014bf, &NMD::DPSX_W_PH        , 0,
+       DSP_                },        /* DPSX.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200016bf, &NMD::DPSQ_SA_L_W      , 0,
+       DSP_                },        /* DPSQ_SA.L.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0x200018bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2(DSP)_8_15~*(4) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20001abf, &NMD::MADDU_DSP_       , 0,
+       DSP_                },        /* MADDU[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20001cbf, &NMD::MULTU_DSP_       , 0,
+       DSP_                },        /* MULTU[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20001ebf, &NMD::EXTRV_R_W        , 0,
+       DSP_                },        /* EXTRV_R.W */
+};
+
+
+NMD::Pool NMD::POOL32Axf_2_DSP__16_23[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200020bf, &NMD::DPAU_H_QBL       , 0,
+       DSP_                },        /* DPAU.H.QBL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200022bf, &NMD::DPAQX_S_W_PH     , 0,
+       DSP_                },        /* DPAQX_S.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200024bf, &NMD::DPSU_H_QBL       , 0,
+       DSP_                },        /* DPSU.H.QBL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200026bf, &NMD::DPSQX_S_W_PH     , 0,
+       DSP_                },        /* DPSQX_S.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200028bf, &NMD::EXTPV            , 0,
+       DSP_                },        /* EXTPV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20002abf, &NMD::MSUB_DSP_        , 0,
+       DSP_                },        /* MSUB[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20002cbf, &NMD::MULSA_W_PH       , 0,
+       DSP_                },        /* MULSA.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20002ebf, &NMD::EXTRV_RS_W       , 0,
+       DSP_                },        /* EXTRV_RS.W */
+};
+
+
+NMD::Pool NMD::POOL32Axf_2_DSP__24_31[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200030bf, &NMD::DPAU_H_QBR       , 0,
+       DSP_                },        /* DPAU.H.QBR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200032bf, &NMD::DPAQX_SA_W_PH    , 0,
+       DSP_                },        /* DPAQX_SA.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200034bf, &NMD::DPSU_H_QBR       , 0,
+       DSP_                },        /* DPSU.H.QBR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200036bf, &NMD::DPSQX_SA_W_PH    , 0,
+       DSP_                },        /* DPSQX_SA.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x200038bf, &NMD::EXTPDPV          , 0,
+       DSP_                },        /* EXTPDPV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20003abf, &NMD::MSUBU_DSP_       , 0,
+       DSP_                },        /* MSUBU[DSP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20003cbf, &NMD::MULSAQ_S_W_PH    , 0,
+       DSP_                },        /* MULSAQ_S.W.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0x20003ebf, &NMD::EXTRV_S_H        , 0,
+       DSP_                },        /* EXTRV_S.H */
+};
+
+
+NMD::Pool NMD::POOL32Axf_2[4] = {
+    { pool                , POOL32Axf_2_DSP__0_7, 8   , 32,
+       0xfc0031ff, 0x200000bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2(DSP)_0_7 */
+    { pool                , POOL32Axf_2_DSP__8_15, 8   , 32,
+       0xfc0031ff, 0x200010bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2(DSP)_8_15 */
+    { pool                , POOL32Axf_2_DSP__16_23, 8   , 32,
+       0xfc0031ff, 0x200020bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2(DSP)_16_23 */
+    { pool                , POOL32Axf_2_DSP__24_31, 8   , 32,
+       0xfc0031ff, 0x200030bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2(DSP)_24_31 */
+};
+
+
+NMD::Pool NMD::POOL32Axf_4[128] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000013f, &NMD::ABSQ_S_QB        , 0,
+       DSP_                },        /* ABSQ_S.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000033f, &NMD::REPLV_PH         , 0,
+       DSP_                },        /* REPLV.PH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000053f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000073f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000093f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000d3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(7) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000113f, &NMD::ABSQ_S_PH        , 0,
+       DSP_                },        /* ABSQ_S.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000133f, &NMD::REPLV_QB         , 0,
+       DSP_                },        /* REPLV.QB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000153f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000173f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(11) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000193f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20001b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20001d3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20001f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(15) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000213f, &NMD::ABSQ_S_W         , 0,
+       DSP_                },        /* ABSQ_S.W */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000233f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(17) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000253f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000273f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000293f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20002b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20002d3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20002f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000313f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000333f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000353f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000373f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000393f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20003b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20003d3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20003f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(31) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000413f, &NMD::INSV             , 0,
+       DSP_                },        /* INSV */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000433f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(33) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000453f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(34) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000473f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(35) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000493f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(36) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20004b3f, &NMD::CLO              , 0,
+       XMMS_               },        /* CLO */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20004d3f, &NMD::MFC2             , 0,
+       CP2_                },        /* MFC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20004f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(39) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000513f, &NMD::PRECEQ_W_PHL     , 0,
+       DSP_                },        /* PRECEQ.W.PHL */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000533f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(41) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000553f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(42) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000573f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(43) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000593f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(44) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20005b3f, &NMD::CLZ              , 0,
+       XMMS_               },        /* CLZ */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20005d3f, &NMD::MTC2             , 0,
+       CP2_                },        /* MTC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20005f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(47) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000613f, &NMD::PRECEQ_W_PHR     , 0,
+       DSP_                },        /* PRECEQ.W.PHR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000633f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(49) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000653f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(50) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000673f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(51) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000693f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(52) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20006b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(53) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20006d3f, &NMD::DMFC2            , 0,
+       CP2_                },        /* DMFC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20006f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(55) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000713f, &NMD::PRECEQU_PH_QBL   , 0,
+       DSP_                },        /* PRECEQU.PH.QBL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000733f, &NMD::PRECEQU_PH_QBLA  , 0,
+       DSP_                },        /* PRECEQU.PH.QBLA */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000753f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(58) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000773f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(59) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000793f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(60) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20007b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(61) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20007d3f, &NMD::DMTC2            , 0,
+       CP2_                },        /* DMTC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20007f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(63) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000813f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(64) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000833f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(65) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000853f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(66) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000873f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(67) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000893f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(68) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20008b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(69) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20008d3f, &NMD::MFHC2            , 0,
+       CP2_                },        /* MFHC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20008f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(71) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000913f, &NMD::PRECEQU_PH_QBR   , 0,
+       DSP_                },        /* PRECEQU.PH.QBR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000933f, &NMD::PRECEQU_PH_QBRA  , 0,
+       DSP_                },        /* PRECEQU.PH.QBRA */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000953f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(74) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000973f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(75) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000993f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(76) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20009b3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(77) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x20009d3f, &NMD::MTHC2            , 0,
+       CP2_                },        /* MTHC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20009f3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(79) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000a13f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(80) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000a33f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(81) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000a53f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(82) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000a73f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(83) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000a93f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(84) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ab3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(85) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ad3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(86) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000af3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(87) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000b13f, &NMD::PRECEU_PH_QBL    , 0,
+       DSP_                },        /* PRECEU.PH.QBL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000b33f, &NMD::PRECEU_PH_QBLA   , 0,
+       DSP_                },        /* PRECEU.PH.QBLA */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000b53f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(90) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000b73f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(91) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000b93f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(92) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000bb3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(93) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000bd3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(94) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000bf3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(95) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c13f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(96) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c33f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(97) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c53f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(98) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c73f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(99) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c93f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(100) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000cb3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(101) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000cd3f, &NMD::CFC2             , 0,
+       CP2_                },        /* CFC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000cf3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(103) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d13f, &NMD::PRECEU_PH_QBR    , 0,
+       DSP_                },        /* PRECEU.PH.QBR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d33f, &NMD::PRECEU_PH_QBRA   , 0,
+       DSP_                },        /* PRECEU.PH.QBRA */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d53f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(106) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d73f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(107) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d93f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(108) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000db3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(109) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000dd3f, &NMD::CTC2             , 0,
+       CP2_                },        /* CTC2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000df3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(111) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e13f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(112) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e33f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(113) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e53f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(114) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e73f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(115) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e93f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(116) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000eb3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(117) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ed3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(118) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ef3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(119) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f13f, &NMD::RADDU_W_QB       , 0,
+       DSP_                },        /* RADDU.W.QB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f33f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(121) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f53f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(122) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f73f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(123) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f93f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(124) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000fb3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(125) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000fd3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(126) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ff3f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4~*(127) */
+};
+
+
+NMD::Pool NMD::POOL32Axf_5_group0[32] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000017f, &NMD::TLBGP            , 0,
+       CP0_ | VZ_ | TLB_   },        /* TLBGP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000037f, &NMD::TLBP             , 0,
+       CP0_ | TLB_         },        /* TLBP */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000057f, &NMD::TLBGINV          , 0,
+       CP0_ | VZ_ | TLB_ | TLBINV_},        /* TLBGINV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000077f, &NMD::TLBINV           , 0,
+       CP0_ | TLB_ | TLBINV_},        /* TLBINV */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000097f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20000f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(7) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000117f, &NMD::TLBGR            , 0,
+       CP0_ | VZ_ | TLB_   },        /* TLBGR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000137f, &NMD::TLBR             , 0,
+       CP0_ | TLB_         },        /* TLBR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000157f, &NMD::TLBGINVF         , 0,
+       CP0_ | VZ_ | TLB_ | TLBINV_},        /* TLBGINVF */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000177f, &NMD::TLBINVF          , 0,
+       CP0_ | TLB_ | TLBINV_},        /* TLBINVF */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000197f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20001b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20001d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20001f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(15) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000217f, &NMD::TLBGWI           , 0,
+       CP0_ | VZ_ | TLB_   },        /* TLBGWI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000237f, &NMD::TLBWI            , 0,
+       CP0_ | TLB_         },        /* TLBWI */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000257f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000277f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000297f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20002b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20002d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20002f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(23) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000317f, &NMD::TLBGWR           , 0,
+       CP0_ | VZ_ | TLB_   },        /* TLBGWR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000337f, &NMD::TLBWR            , 0,
+       CP0_ | TLB_         },        /* TLBWR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000357f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000377f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000397f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20003b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20003d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20003f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0~*(31) */
+};
+
+
+NMD::Pool NMD::POOL32Axf_5_group1[32] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000417f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(0) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000437f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000457f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(2) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000477f, &NMD::DI               , 0,
+       0x0                 },        /* DI */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000497f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20004b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20004d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20004f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000517f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(8) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000537f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(9) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000557f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(10) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000577f, &NMD::EI               , 0,
+       0x0                 },        /* EI */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000597f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20005b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20005d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20005f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(15) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000617f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000637f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(17) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000657f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000677f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000697f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20006b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20006d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20006f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000717f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000737f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000757f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000777f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000797f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20007b7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20007d7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x20007f7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1~*(31) */
+};
+
+
+NMD::Pool NMD::ERETx[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc01ffff, 0x2000f37f, &NMD::ERET             , 0,
+       0x0                 },        /* ERET */
+    { instruction         , 0                   , 0   , 32,
+       0xfc01ffff, 0x2001f37f, &NMD::ERETNC           , 0,
+       0x0                 },        /* ERETNC */
+};
+
+
+NMD::Pool NMD::POOL32Axf_5_group3[32] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c17f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(0) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c37f, &NMD::WAIT             , 0,
+       0x0                 },        /* WAIT */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c57f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c77f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000c97f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000cb7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000cd7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000cf7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d17f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(8) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d37f, &NMD::IRET             , 0,
+       MCU_                },        /* IRET */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d57f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d77f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(11) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000d97f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000db7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000dd7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000df7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(15) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e17f, &NMD::RDPGPR           , 0,
+       CP0_                },        /* RDPGPR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e37f, &NMD::DERET            , 0,
+       EJTAG_              },        /* DERET */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e57f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e77f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000e97f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000eb7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ed7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ef7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(23) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f17f, &NMD::WRPGPR           , 0,
+       CP0_                },        /* WRPGPR */
+    { pool                , ERETx               , 2   , 32,
+       0xfc00ffff, 0x2000f37f, 0                      , 0,
+       0x0                 },        /* ERETx */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f57f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f77f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000f97f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000fb7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000fd7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0x2000ff7f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3~*(31) */
+};
+
+
+NMD::Pool NMD::POOL32Axf_5[4] = {
+    { pool                , POOL32Axf_5_group0  , 32  , 32,
+       0xfc00c1ff, 0x2000017f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group0 */
+    { pool                , POOL32Axf_5_group1  , 32  , 32,
+       0xfc00c1ff, 0x2000417f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00c1ff, 0x2000817f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5~*(2) */
+    { pool                , POOL32Axf_5_group3  , 32  , 32,
+       0xfc00c1ff, 0x2000c17f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5_group3 */
+};
+
+
+NMD::Pool NMD::SHRA__R__QB[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc001fff, 0x200001ff, &NMD::SHRA_QB          , 0,
+       DSP_                },        /* SHRA.QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc001fff, 0x200011ff, &NMD::SHRA_R_QB        , 0,
+       DSP_                },        /* SHRA_R.QB */
+};
+
+
+NMD::Pool NMD::POOL32Axf_7[8] = {
+    { pool                , SHRA__R__QB         , 2   , 32,
+       0xfc000fff, 0x200001ff, 0                      , 0,
+       0x0                 },        /* SHRA[_R].QB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000fff, 0x200003ff, &NMD::SHRL_PH          , 0,
+       DSP_                },        /* SHRL.PH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000fff, 0x200005ff, &NMD::REPL_QB          , 0,
+       DSP_                },        /* REPL.QB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x200007ff, 0                      , 0,
+       0x0                 },        /* POOL32Axf_7~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x200009ff, 0                      , 0,
+       0x0                 },        /* POOL32Axf_7~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x20000bff, 0                      , 0,
+       0x0                 },        /* POOL32Axf_7~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x20000dff, 0                      , 0,
+       0x0                 },        /* POOL32Axf_7~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000fff, 0x20000fff, 0                      , 0,
+       0x0                 },        /* POOL32Axf_7~*(7) */
+};
+
+
+NMD::Pool NMD::POOL32Axf[8] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0x2000003f, 0                      , 0,
+       0x0                 },        /* POOL32Axf~*(0) */
+    { pool                , POOL32Axf_1         , 8   , 32,
+       0xfc0001ff, 0x2000007f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_1 */
+    { pool                , POOL32Axf_2         , 4   , 32,
+       0xfc0001ff, 0x200000bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf_2 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0x200000ff, 0                      , 0,
+       0x0                 },        /* POOL32Axf~*(3) */
+    { pool                , POOL32Axf_4         , 128 , 32,
+       0xfc0001ff, 0x2000013f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_4 */
+    { pool                , POOL32Axf_5         , 4   , 32,
+       0xfc0001ff, 0x2000017f, 0                      , 0,
+       0x0                 },        /* POOL32Axf_5 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0x200001bf, 0                      , 0,
+       0x0                 },        /* POOL32Axf~*(6) */
+    { pool                , POOL32Axf_7         , 8   , 32,
+       0xfc0001ff, 0x200001ff, 0                      , 0,
+       0x0                 },        /* POOL32Axf_7 */
+};
+
+
+NMD::Pool NMD::_POOL32A7[8] = {
+    { pool                , P_LSX               , 2   , 32,
+       0xfc00003f, 0x20000007, 0                      , 0,
+       0x0                 },        /* P.LSX */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00003f, 0x2000000f, &NMD::LSA              , 0,
+       0x0                 },        /* LSA */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0x20000017, 0                      , 0,
+       0x0                 },        /* _POOL32A7~*(2) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00003f, 0x2000001f, &NMD::EXTW             , 0,
+       0x0                 },        /* EXTW */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0x20000027, 0                      , 0,
+       0x0                 },        /* _POOL32A7~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0x2000002f, 0                      , 0,
+       0x0                 },        /* _POOL32A7~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0x20000037, 0                      , 0,
+       0x0                 },        /* _POOL32A7~*(6) */
+    { pool                , POOL32Axf           , 8   , 32,
+       0xfc00003f, 0x2000003f, 0                      , 0,
+       0x0                 },        /* POOL32Axf */
+};
+
+
+NMD::Pool NMD::P32A[8] = {
+    { pool                , _POOL32A0           , 128 , 32,
+       0xfc000007, 0x20000000, 0                      , 0,
+       0x0                 },        /* _POOL32A0 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000007, 0x20000001, &NMD::SPECIAL2         , 0,
+       UDI_                },        /* SPECIAL2 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000007, 0x20000002, &NMD::COP2_1           , 0,
+       CP2_                },        /* COP2_1 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000007, 0x20000003, &NMD::UDI              , 0,
+       UDI_                },        /* UDI */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0x20000004, 0                      , 0,
+       0x0                 },        /* P32A~*(4) */
+    { pool                , _POOL32A5           , 128 , 32,
+       0xfc000007, 0x20000005, 0                      , 0,
+       0x0                 },        /* _POOL32A5 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0x20000006, 0                      , 0,
+       0x0                 },        /* P32A~*(6) */
+    { pool                , _POOL32A7           , 8   , 32,
+       0xfc000007, 0x20000007, 0                      , 0,
+       0x0                 },        /* _POOL32A7 */
+};
+
+
+NMD::Pool NMD::P_GP_D[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000007, 0x40000001, &NMD::LD_GP_           , 0,
+       MIPS64_             },        /* LD[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000007, 0x40000005, &NMD::SD_GP_           , 0,
+       MIPS64_             },        /* SD[GP] */
+};
+
+
+NMD::Pool NMD::P_GP_W[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000003, 0x40000000, &NMD::ADDIU_GP_W_      , 0,
+       0x0                 },        /* ADDIU[GP.W] */
+    { pool                , P_GP_D              , 2   , 32,
+       0xfc000003, 0x40000001, 0                      , 0,
+       0x0                 },        /* P.GP.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000003, 0x40000002, &NMD::LW_GP_           , 0,
+       0x0                 },        /* LW[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000003, 0x40000003, &NMD::SW_GP_           , 0,
+       0x0                 },        /* SW[GP] */
+};
+
+
+NMD::Pool NMD::POOL48I[32] = {
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600000000000ull, &NMD::LI_48_           , 0,
+       XMMS_               },        /* LI[48] */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600100000000ull, &NMD::ADDIU_48_        , 0,
+       XMMS_               },        /* ADDIU[48] */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600200000000ull, &NMD::ADDIU_GP48_      , 0,
+       XMMS_               },        /* ADDIU[GP48] */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600300000000ull, &NMD::ADDIUPC_48_      , 0,
+       XMMS_               },        /* ADDIUPC[48] */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600400000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(4) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600500000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(5) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600600000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(6) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600700000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(7) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600800000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(8) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600900000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(9) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600a00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(10) */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600b00000000ull, &NMD::LWPC_48_         , 0,
+       XMMS_               },        /* LWPC[48] */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600c00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(12) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600d00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(13) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600e00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(14) */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x600f00000000ull, &NMD::SWPC_48_         , 0,
+       XMMS_               },        /* SWPC[48] */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601000000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(16) */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601100000000ull, &NMD::DADDIU_48_       , 0,
+       MIPS64_             },        /* DADDIU[48] */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601200000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(18) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601300000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(19) */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601400000000ull, &NMD::DLUI_48_         , 0,
+       MIPS64_             },        /* DLUI[48] */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601500000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(21) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601600000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(22) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601700000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(23) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601800000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(24) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601900000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(25) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601a00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(26) */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601b00000000ull, &NMD::LDPC_48_         , 0,
+       MIPS64_             },        /* LDPC[48] */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601c00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(28) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601d00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(29) */
+    { reserved_block      , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601e00000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I~*(30) */
+    { instruction         , 0                   , 0   , 48,
+       0xfc1f00000000ull, 0x601f00000000ull, &NMD::SDPC_48_         , 0,
+       MIPS64_             },        /* SDPC[48] */
+};
+
+
+NMD::Pool NMD::PP_SR[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc10f003, 0x80003000, &NMD::SAVE_32_         , 0,
+       0x0                 },        /* SAVE[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f003, 0x80003001, 0                      , 0,
+       0x0                 },        /* PP.SR~*(1) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc10f003, 0x80003002, &NMD::RESTORE_32_      , 0,
+       0x0                 },        /* RESTORE[32] */
+    { return_instruction  , 0                   , 0   , 32,
+       0xfc10f003, 0x80003003, &NMD::RESTORE_JRC_32_  , 0,
+       0x0                 },        /* RESTORE.JRC[32] */
+};
+
+
+NMD::Pool NMD::P_SR_F[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc10f007, 0x80103000, &NMD::SAVEF            , 0,
+       CP1_                },        /* SAVEF */
+    { instruction         , 0                   , 0   , 32,
+       0xfc10f007, 0x80103001, &NMD::RESTOREF         , 0,
+       CP1_                },        /* RESTOREF */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f007, 0x80103002, 0                      , 0,
+       0x0                 },        /* P.SR.F~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f007, 0x80103003, 0                      , 0,
+       0x0                 },        /* P.SR.F~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f007, 0x80103004, 0                      , 0,
+       0x0                 },        /* P.SR.F~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f007, 0x80103005, 0                      , 0,
+       0x0                 },        /* P.SR.F~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f007, 0x80103006, 0                      , 0,
+       0x0                 },        /* P.SR.F~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc10f007, 0x80103007, 0                      , 0,
+       0x0                 },        /* P.SR.F~*(7) */
+};
+
+
+NMD::Pool NMD::P_SR[2] = {
+    { pool                , PP_SR               , 4   , 32,
+       0xfc10f000, 0x80003000, 0                      , 0,
+       0x0                 },        /* PP.SR */
+    { pool                , P_SR_F              , 8   , 32,
+       0xfc10f000, 0x80103000, 0                      , 0,
+       0x0                 },        /* P.SR.F */
+};
+
+
+NMD::Pool NMD::P_SLL[5] = {
+    { instruction         , 0                   , 0   , 32,
+       0xffe0f1ff, 0x8000c000, &NMD::NOP_32_          , 0,
+       0x0                 },        /* NOP[32] */
+    { instruction         , 0                   , 0   , 32,
+       0xffe0f1ff, 0x8000c003, &NMD::EHB              , 0,
+       0x0                 },        /* EHB */
+    { instruction         , 0                   , 0   , 32,
+       0xffe0f1ff, 0x8000c005, &NMD::PAUSE            , 0,
+       0x0                 },        /* PAUSE */
+    { instruction         , 0                   , 0   , 32,
+       0xffe0f1ff, 0x8000c006, &NMD::SYNC             , 0,
+       0x0                 },        /* SYNC */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c000, &NMD::SLL_32_          , 0,
+       0x0                 },        /* SLL[32] */
+};
+
+
+NMD::Pool NMD::P_SHIFT[16] = {
+    { pool                , P_SLL               , 5   , 32,
+       0xfc00f1e0, 0x8000c000, 0                      , 0,
+       0x0                 },        /* P.SLL */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c020, 0                      , 0,
+       0x0                 },        /* P.SHIFT~*(1) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c040, &NMD::SRL_32_          , 0,
+       0x0                 },        /* SRL[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c060, 0                      , 0,
+       0x0                 },        /* P.SHIFT~*(3) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c080, &NMD::SRA              , 0,
+       0x0                 },        /* SRA */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c0a0, 0                      , 0,
+       0x0                 },        /* P.SHIFT~*(5) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c0c0, &NMD::ROTR             , 0,
+       0x0                 },        /* ROTR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c0e0, 0                      , 0,
+       0x0                 },        /* P.SHIFT~*(7) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c100, &NMD::DSLL             , 0,
+       MIPS64_             },        /* DSLL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c120, &NMD::DSLL32           , 0,
+       MIPS64_             },        /* DSLL32 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c140, &NMD::DSRL             , 0,
+       MIPS64_             },        /* DSRL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c160, &NMD::DSRL32           , 0,
+       MIPS64_             },        /* DSRL32 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c180, &NMD::DSRA             , 0,
+       MIPS64_             },        /* DSRA */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c1a0, &NMD::DSRA32           , 0,
+       MIPS64_             },        /* DSRA32 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c1c0, &NMD::DROTR            , 0,
+       MIPS64_             },        /* DROTR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f1e0, 0x8000c1e0, &NMD::DROTR32          , 0,
+       MIPS64_             },        /* DROTR32 */
+};
+
+
+NMD::Pool NMD::P_ROTX[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000d000, &NMD::ROTX             , 0,
+       XMMS_               },        /* ROTX */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f820, 0x8000d020, 0                      , 0,
+       0x0                 },        /* P.ROTX~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f820, 0x8000d800, 0                      , 0,
+       0x0                 },        /* P.ROTX~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f820, 0x8000d820, 0                      , 0,
+       0x0                 },        /* P.ROTX~*(3) */
+};
+
+
+NMD::Pool NMD::P_INS[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000e000, &NMD::INS              , 0,
+       XMMS_               },        /* INS */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000e020, &NMD::DINSU            , 0,
+       MIPS64_             },        /* DINSU */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000e800, &NMD::DINSM            , 0,
+       MIPS64_             },        /* DINSM */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000e820, &NMD::DINS             , 0,
+       MIPS64_             },        /* DINS */
+};
+
+
+NMD::Pool NMD::P_EXT[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000f000, &NMD::EXT              , 0,
+       XMMS_               },        /* EXT */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000f020, &NMD::DEXTU            , 0,
+       MIPS64_             },        /* DEXTU */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000f800, &NMD::DEXTM            , 0,
+       MIPS64_             },        /* DEXTM */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f820, 0x8000f820, &NMD::DEXT             , 0,
+       MIPS64_             },        /* DEXT */
+};
+
+
+NMD::Pool NMD::P_U12[16] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80000000, &NMD::ORI              , 0,
+       0x0                 },        /* ORI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80001000, &NMD::XORI             , 0,
+       0x0                 },        /* XORI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80002000, &NMD::ANDI_32_         , 0,
+       0x0                 },        /* ANDI[32] */
+    { pool                , P_SR                , 2   , 32,
+       0xfc00f000, 0x80003000, 0                      , 0,
+       0x0                 },        /* P.SR */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80004000, &NMD::SLTI             , 0,
+       0x0                 },        /* SLTI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80005000, &NMD::SLTIU            , 0,
+       0x0                 },        /* SLTIU */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80006000, &NMD::SEQI             , 0,
+       0x0                 },        /* SEQI */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x80007000, 0                      , 0,
+       0x0                 },        /* P.U12~*(7) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80008000, &NMD::ADDIU_NEG_       , 0,
+       0x0                 },        /* ADDIU[NEG] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x80009000, &NMD::DADDIU_U12_      , 0,
+       MIPS64_             },        /* DADDIU[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8000a000, &NMD::DADDIU_NEG_      , 0,
+       MIPS64_             },        /* DADDIU[NEG] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8000b000, &NMD::DROTX            , 0,
+       MIPS64_             },        /* DROTX */
+    { pool                , P_SHIFT             , 16  , 32,
+       0xfc00f000, 0x8000c000, 0                      , 0,
+       0x0                 },        /* P.SHIFT */
+    { pool                , P_ROTX              , 4   , 32,
+       0xfc00f000, 0x8000d000, 0                      , 0,
+       0x0                 },        /* P.ROTX */
+    { pool                , P_INS               , 4   , 32,
+       0xfc00f000, 0x8000e000, 0                      , 0,
+       0x0                 },        /* P.INS */
+    { pool                , P_EXT               , 4   , 32,
+       0xfc00f000, 0x8000f000, 0                      , 0,
+       0x0                 },        /* P.EXT */
+};
+
+
+NMD::Pool NMD::RINT_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000020, &NMD::RINT_S           , 0,
+       CP1_                },        /* RINT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000220, &NMD::RINT_D           , 0,
+       CP1_                },        /* RINT.D */
+};
+
+
+NMD::Pool NMD::ADD_fmt0[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000030, &NMD::ADD_S            , 0,
+       CP1_                },        /* ADD.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000230, 0                      , 0,
+       CP1_                },        /* ADD.fmt0~*(1) */
+};
+
+
+NMD::Pool NMD::SELEQZ_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000038, &NMD::SELEQZ_S         , 0,
+       CP1_                },        /* SELEQZ.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000238, &NMD::SELEQZ_D         , 0,
+       CP1_                },        /* SELEQZ.D */
+};
+
+
+NMD::Pool NMD::CLASS_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000060, &NMD::CLASS_S          , 0,
+       CP1_                },        /* CLASS.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000260, &NMD::CLASS_D          , 0,
+       CP1_                },        /* CLASS.D */
+};
+
+
+NMD::Pool NMD::SUB_fmt0[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000070, &NMD::SUB_S            , 0,
+       CP1_                },        /* SUB.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000270, 0                      , 0,
+       CP1_                },        /* SUB.fmt0~*(1) */
+};
+
+
+NMD::Pool NMD::SELNEZ_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000078, &NMD::SELNEZ_S         , 0,
+       CP1_                },        /* SELNEZ.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000278, &NMD::SELNEZ_D         , 0,
+       CP1_                },        /* SELNEZ.D */
+};
+
+
+NMD::Pool NMD::MUL_fmt0[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00000b0, &NMD::MUL_S            , 0,
+       CP1_                },        /* MUL.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00002b0, 0                      , 0,
+       CP1_                },        /* MUL.fmt0~*(1) */
+};
+
+
+NMD::Pool NMD::SEL_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00000b8, &NMD::SEL_S            , 0,
+       CP1_                },        /* SEL.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00002b8, &NMD::SEL_D            , 0,
+       CP1_                },        /* SEL.D */
+};
+
+
+NMD::Pool NMD::DIV_fmt0[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00000f0, &NMD::DIV_S            , 0,
+       CP1_                },        /* DIV.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00002f0, 0                      , 0,
+       CP1_                },        /* DIV.fmt0~*(1) */
+};
+
+
+NMD::Pool NMD::ADD_fmt1[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000130, &NMD::ADD_D            , 0,
+       CP1_                },        /* ADD.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000330, 0                      , 0,
+       CP1_                },        /* ADD.fmt1~*(1) */
+};
+
+
+NMD::Pool NMD::SUB_fmt1[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000170, &NMD::SUB_D            , 0,
+       CP1_                },        /* SUB.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa0000370, 0                      , 0,
+       CP1_                },        /* SUB.fmt1~*(1) */
+};
+
+
+NMD::Pool NMD::MUL_fmt1[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00001b0, &NMD::MUL_D            , 0,
+       CP1_                },        /* MUL.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00003b0, 0                      , 0,
+       CP1_                },        /* MUL.fmt1~*(1) */
+};
+
+
+NMD::Pool NMD::MADDF_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00001b8, &NMD::MADDF_S          , 0,
+       CP1_                },        /* MADDF.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00003b8, &NMD::MADDF_D          , 0,
+       CP1_                },        /* MADDF.D */
+};
+
+
+NMD::Pool NMD::DIV_fmt1[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00001f0, &NMD::DIV_D            , 0,
+       CP1_                },        /* DIV.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00003f0, 0                      , 0,
+       CP1_                },        /* DIV.fmt1~*(1) */
+};
+
+
+NMD::Pool NMD::MSUBF_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00001f8, &NMD::MSUBF_S          , 0,
+       CP1_                },        /* MSUBF.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0003ff, 0xa00003f8, &NMD::MSUBF_D          , 0,
+       CP1_                },        /* MSUBF.D */
+};
+
+
+NMD::Pool NMD::POOL32F_0[64] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000000, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(0) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000008, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000010, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000018, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(3) */
+    { pool                , RINT_fmt            , 2   , 32,
+       0xfc0001ff, 0xa0000020, 0                      , 0,
+       CP1_                },        /* RINT.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000028, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(5) */
+    { pool                , ADD_fmt0            , 2   , 32,
+       0xfc0001ff, 0xa0000030, 0                      , 0,
+       CP1_                },        /* ADD.fmt0 */
+    { pool                , SELEQZ_fmt          , 2   , 32,
+       0xfc0001ff, 0xa0000038, 0                      , 0,
+       CP1_                },        /* SELEQZ.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000040, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(8) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000048, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(9) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000050, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000058, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(11) */
+    { pool                , CLASS_fmt           , 2   , 32,
+       0xfc0001ff, 0xa0000060, 0                      , 0,
+       CP1_                },        /* CLASS.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000068, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(13) */
+    { pool                , SUB_fmt0            , 2   , 32,
+       0xfc0001ff, 0xa0000070, 0                      , 0,
+       CP1_                },        /* SUB.fmt0 */
+    { pool                , SELNEZ_fmt          , 2   , 32,
+       0xfc0001ff, 0xa0000078, 0                      , 0,
+       CP1_                },        /* SELNEZ.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000080, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000088, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(17) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000090, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000098, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000a0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000a8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(21) */
+    { pool                , MUL_fmt0            , 2   , 32,
+       0xfc0001ff, 0xa00000b0, 0                      , 0,
+       CP1_                },        /* MUL.fmt0 */
+    { pool                , SEL_fmt             , 2   , 32,
+       0xfc0001ff, 0xa00000b8, 0                      , 0,
+       CP1_                },        /* SEL.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000c0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000c8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000d0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000d8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000e0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000e8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(29) */
+    { pool                , DIV_fmt0            , 2   , 32,
+       0xfc0001ff, 0xa00000f0, 0                      , 0,
+       CP1_                },        /* DIV.fmt0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00000f8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(31) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000100, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(32) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000108, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(33) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000110, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(34) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000118, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(35) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000120, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(36) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000128, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(37) */
+    { pool                , ADD_fmt1            , 2   , 32,
+       0xfc0001ff, 0xa0000130, 0                      , 0,
+       CP1_                },        /* ADD.fmt1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000138, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(39) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000140, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(40) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000148, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(41) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000150, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(42) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000158, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(43) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000160, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(44) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000168, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(45) */
+    { pool                , SUB_fmt1            , 2   , 32,
+       0xfc0001ff, 0xa0000170, 0                      , 0,
+       CP1_                },        /* SUB.fmt1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000178, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(47) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000180, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(48) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000188, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(49) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000190, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(50) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa0000198, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(51) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001a0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(52) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001a8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(53) */
+    { pool                , MUL_fmt1            , 2   , 32,
+       0xfc0001ff, 0xa00001b0, 0                      , 0,
+       CP1_                },        /* MUL.fmt1 */
+    { pool                , MADDF_fmt           , 2   , 32,
+       0xfc0001ff, 0xa00001b8, 0                      , 0,
+       CP1_                },        /* MADDF.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001c0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(56) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001c8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(57) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001d0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(58) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001d8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(59) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001e0, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(60) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xa00001e8, 0                      , 0,
+       CP1_                },        /* POOL32F_0~*(61) */
+    { pool                , DIV_fmt1            , 2   , 32,
+       0xfc0001ff, 0xa00001f0, 0                      , 0,
+       CP1_                },        /* DIV.fmt1 */
+    { pool                , MSUBF_fmt           , 2   , 32,
+       0xfc0001ff, 0xa00001f8, 0                      , 0,
+       CP1_                },        /* MSUBF.fmt */
+};
+
+
+NMD::Pool NMD::MIN_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa0000003, &NMD::MIN_S            , 0,
+       CP1_                },        /* MIN.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa0000203, &NMD::MIN_D            , 0,
+       CP1_                },        /* MIN.D */
+};
+
+
+NMD::Pool NMD::MAX_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa000000b, &NMD::MAX_S            , 0,
+       CP1_                },        /* MAX.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa000020b, &NMD::MAX_D            , 0,
+       CP1_                },        /* MAX.D */
+};
+
+
+NMD::Pool NMD::MINA_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa0000023, &NMD::MINA_S           , 0,
+       CP1_                },        /* MINA.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa0000223, &NMD::MINA_D           , 0,
+       CP1_                },        /* MINA.D */
+};
+
+
+NMD::Pool NMD::MAXA_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa000002b, &NMD::MAXA_S           , 0,
+       CP1_                },        /* MAXA.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00023f, 0xa000022b, &NMD::MAXA_D           , 0,
+       CP1_                },        /* MAXA.D */
+};
+
+
+NMD::Pool NMD::CVT_L_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000013b, &NMD::CVT_L_S          , 0,
+       CP1_                },        /* CVT.L.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000413b, &NMD::CVT_L_D          , 0,
+       CP1_                },        /* CVT.L.D */
+};
+
+
+NMD::Pool NMD::RSQRT_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000023b, &NMD::RSQRT_S          , 0,
+       CP1_                },        /* RSQRT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000423b, &NMD::RSQRT_D          , 0,
+       CP1_                },        /* RSQRT.D */
+};
+
+
+NMD::Pool NMD::FLOOR_L_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000033b, &NMD::FLOOR_L_S        , 0,
+       CP1_                },        /* FLOOR.L.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000433b, &NMD::FLOOR_L_D        , 0,
+       CP1_                },        /* FLOOR.L.D */
+};
+
+
+NMD::Pool NMD::CVT_W_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000093b, &NMD::CVT_W_S          , 0,
+       CP1_                },        /* CVT.W.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000493b, &NMD::CVT_W_D          , 0,
+       CP1_                },        /* CVT.W.D */
+};
+
+
+NMD::Pool NMD::SQRT_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0000a3b, &NMD::SQRT_S           , 0,
+       CP1_                },        /* SQRT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0004a3b, &NMD::SQRT_D           , 0,
+       CP1_                },        /* SQRT.D */
+};
+
+
+NMD::Pool NMD::FLOOR_W_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0000b3b, &NMD::FLOOR_W_S        , 0,
+       CP1_                },        /* FLOOR.W.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0004b3b, &NMD::FLOOR_W_D        , 0,
+       CP1_                },        /* FLOOR.W.D */
+};
+
+
+NMD::Pool NMD::RECIP_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000123b, &NMD::RECIP_S          , 0,
+       CP1_                },        /* RECIP.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000523b, &NMD::RECIP_D          , 0,
+       CP1_                },        /* RECIP.D */
+};
+
+
+NMD::Pool NMD::CEIL_L_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000133b, &NMD::CEIL_L_S         , 0,
+       CP1_                },        /* CEIL.L.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000533b, &NMD::CEIL_L_D         , 0,
+       CP1_                },        /* CEIL.L.D */
+};
+
+
+NMD::Pool NMD::CEIL_W_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0001b3b, &NMD::CEIL_W_S         , 0,
+       CP1_                },        /* CEIL.W.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0005b3b, &NMD::CEIL_W_D         , 0,
+       CP1_                },        /* CEIL.W.D */
+};
+
+
+NMD::Pool NMD::TRUNC_L_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000233b, &NMD::TRUNC_L_S        , 0,
+       CP1_                },        /* TRUNC.L.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000633b, &NMD::TRUNC_L_D        , 0,
+       CP1_                },        /* TRUNC.L.D */
+};
+
+
+NMD::Pool NMD::TRUNC_W_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0002b3b, &NMD::TRUNC_W_S        , 0,
+       CP1_                },        /* TRUNC.W.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0006b3b, &NMD::TRUNC_W_D        , 0,
+       CP1_                },        /* TRUNC.W.D */
+};
+
+
+NMD::Pool NMD::ROUND_L_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000333b, &NMD::ROUND_L_S        , 0,
+       CP1_                },        /* ROUND.L.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000733b, &NMD::ROUND_L_D        , 0,
+       CP1_                },        /* ROUND.L.D */
+};
+
+
+NMD::Pool NMD::ROUND_W_fmt[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0003b3b, &NMD::ROUND_W_S        , 0,
+       CP1_                },        /* ROUND.W.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0007b3b, &NMD::ROUND_W_D        , 0,
+       CP1_                },        /* ROUND.W.D */
+};
+
+
+NMD::Pool NMD::POOL32Fxf_0[64] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000003b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(0) */
+    { pool                , CVT_L_fmt           , 2   , 32,
+       0xfc003fff, 0xa000013b, 0                      , 0,
+       CP1_                },        /* CVT.L.fmt */
+    { pool                , RSQRT_fmt           , 2   , 32,
+       0xfc003fff, 0xa000023b, 0                      , 0,
+       CP1_                },        /* RSQRT.fmt */
+    { pool                , FLOOR_L_fmt         , 2   , 32,
+       0xfc003fff, 0xa000033b, 0                      , 0,
+       CP1_                },        /* FLOOR.L.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000043b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000053b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000063b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000073b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000083b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(8) */
+    { pool                , CVT_W_fmt           , 2   , 32,
+       0xfc003fff, 0xa000093b, 0                      , 0,
+       CP1_                },        /* CVT.W.fmt */
+    { pool                , SQRT_fmt            , 2   , 32,
+       0xfc003fff, 0xa0000a3b, 0                      , 0,
+       CP1_                },        /* SQRT.fmt */
+    { pool                , FLOOR_W_fmt         , 2   , 32,
+       0xfc003fff, 0xa0000b3b, 0                      , 0,
+       CP1_                },        /* FLOOR.W.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0000c3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0000d3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0000e3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0000f3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(15) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000103b, &NMD::CFC1             , 0,
+       CP1_                },        /* CFC1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000113b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(17) */
+    { pool                , RECIP_fmt           , 2   , 32,
+       0xfc003fff, 0xa000123b, 0                      , 0,
+       CP1_                },        /* RECIP.fmt */
+    { pool                , CEIL_L_fmt          , 2   , 32,
+       0xfc003fff, 0xa000133b, 0                      , 0,
+       CP1_                },        /* CEIL.L.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000143b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000153b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000163b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000173b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(23) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000183b, &NMD::CTC1             , 0,
+       CP1_                },        /* CTC1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000193b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0001a3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(26) */
+    { pool                , CEIL_W_fmt          , 2   , 32,
+       0xfc003fff, 0xa0001b3b, 0                      , 0,
+       CP1_                },        /* CEIL.W.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0001c3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0001d3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0001e3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0001f3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(31) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000203b, &NMD::MFC1             , 0,
+       CP1_                },        /* MFC1 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000213b, &NMD::CVT_S_PL         , 0,
+       CP1_                },        /* CVT.S.PL */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000223b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(34) */
+    { pool                , TRUNC_L_fmt         , 2   , 32,
+       0xfc003fff, 0xa000233b, 0                      , 0,
+       CP1_                },        /* TRUNC.L.fmt */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000243b, &NMD::DMFC1            , 0,
+       CP1_ | MIPS64_      },        /* DMFC1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000253b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(37) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000263b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(38) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000273b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(39) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000283b, &NMD::MTC1             , 0,
+       CP1_                },        /* MTC1 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000293b, &NMD::CVT_S_PU         , 0,
+       CP1_                },        /* CVT.S.PU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0002a3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(42) */
+    { pool                , TRUNC_W_fmt         , 2   , 32,
+       0xfc003fff, 0xa0002b3b, 0                      , 0,
+       CP1_                },        /* TRUNC.W.fmt */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa0002c3b, &NMD::DMTC1            , 0,
+       CP1_ | MIPS64_      },        /* DMTC1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0002d3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(45) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0002e3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(46) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0002f3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(47) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000303b, &NMD::MFHC1            , 0,
+       CP1_                },        /* MFHC1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000313b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(49) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000323b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(50) */
+    { pool                , ROUND_L_fmt         , 2   , 32,
+       0xfc003fff, 0xa000333b, 0                      , 0,
+       CP1_                },        /* ROUND.L.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000343b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(52) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000353b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(53) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000363b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(54) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000373b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(55) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc003fff, 0xa000383b, &NMD::MTHC1            , 0,
+       CP1_                },        /* MTHC1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa000393b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(57) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0003a3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(58) */
+    { pool                , ROUND_W_fmt         , 2   , 32,
+       0xfc003fff, 0xa0003b3b, 0                      , 0,
+       CP1_                },        /* ROUND.W.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0003c3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(60) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0003d3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(61) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0003e3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(62) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc003fff, 0xa0003f3b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0~*(63) */
+};
+
+
+NMD::Pool NMD::MOV_fmt[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000007b, &NMD::MOV_S            , 0,
+       CP1_                },        /* MOV.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000207b, &NMD::MOV_D            , 0,
+       CP1_                },        /* MOV.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa000407b, 0                      , 0,
+       CP1_                },        /* MOV.fmt~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa000607b, 0                      , 0,
+       CP1_                },        /* MOV.fmt~*(3) */
+};
+
+
+NMD::Pool NMD::ABS_fmt[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000037b, &NMD::ABS_S            , 0,
+       CP1_                },        /* ABS.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000237b, &NMD::ABS_D            , 0,
+       CP1_                },        /* ABS.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa000437b, 0                      , 0,
+       CP1_                },        /* ABS.fmt~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa000637b, 0                      , 0,
+       CP1_                },        /* ABS.fmt~*(3) */
+};
+
+
+NMD::Pool NMD::NEG_fmt[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0000b7b, &NMD::NEG_S            , 0,
+       CP1_                },        /* NEG.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0002b7b, &NMD::NEG_D            , 0,
+       CP1_                },        /* NEG.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa0004b7b, 0                      , 0,
+       CP1_                },        /* NEG.fmt~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa0006b7b, 0                      , 0,
+       CP1_                },        /* NEG.fmt~*(3) */
+};
+
+
+NMD::Pool NMD::CVT_D_fmt[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000137b, &NMD::CVT_D_S          , 0,
+       CP1_                },        /* CVT.D.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000337b, &NMD::CVT_D_W          , 0,
+       CP1_                },        /* CVT.D.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa000537b, &NMD::CVT_D_L          , 0,
+       CP1_                },        /* CVT.D.L */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa000737b, 0                      , 0,
+       CP1_                },        /* CVT.D.fmt~*(3) */
+};
+
+
+NMD::Pool NMD::CVT_S_fmt[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0001b7b, &NMD::CVT_S_D          , 0,
+       CP1_                },        /* CVT.S.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0003b7b, &NMD::CVT_S_W          , 0,
+       CP1_                },        /* CVT.S.W */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007fff, 0xa0005b7b, &NMD::CVT_S_L          , 0,
+       CP1_                },        /* CVT.S.L */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007fff, 0xa0007b7b, 0                      , 0,
+       CP1_                },        /* CVT.S.fmt~*(3) */
+};
+
+
+NMD::Pool NMD::POOL32Fxf_1[32] = {
+    { pool                , MOV_fmt             , 4   , 32,
+       0xfc001fff, 0xa000007b, 0                      , 0,
+       CP1_                },        /* MOV.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000017b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000027b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(2) */
+    { pool                , ABS_fmt             , 4   , 32,
+       0xfc001fff, 0xa000037b, 0                      , 0,
+       CP1_                },        /* ABS.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000047b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000057b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000067b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000077b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000087b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(8) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000097b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(9) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0000a7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(10) */
+    { pool                , NEG_fmt             , 4   , 32,
+       0xfc001fff, 0xa0000b7b, 0                      , 0,
+       CP1_                },        /* NEG.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0000c7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0000d7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0000e7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0000f7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(15) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000107b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000117b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(17) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000127b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(18) */
+    { pool                , CVT_D_fmt           , 4   , 32,
+       0xfc001fff, 0xa000137b, 0                      , 0,
+       CP1_                },        /* CVT.D.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000147b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000157b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000167b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000177b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000187b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa000197b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0001a7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(26) */
+    { pool                , CVT_S_fmt           , 4   , 32,
+       0xfc001fff, 0xa0001b7b, 0                      , 0,
+       CP1_                },        /* CVT.S.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0001c7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0001d7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0001e7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc001fff, 0xa0001f7b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1~*(31) */
+};
+
+
+NMD::Pool NMD::POOL32Fxf[4] = {
+    { pool                , POOL32Fxf_0         , 64  , 32,
+       0xfc0000ff, 0xa000003b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_0 */
+    { pool                , POOL32Fxf_1         , 32  , 32,
+       0xfc0000ff, 0xa000007b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf_1 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0000ff, 0xa00000bb, 0                      , 0,
+       CP1_                },        /* POOL32Fxf~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0000ff, 0xa00000fb, 0                      , 0,
+       CP1_                },        /* POOL32Fxf~*(3) */
+};
+
+
+NMD::Pool NMD::POOL32F_3[8] = {
+    { pool                , MIN_fmt             , 2   , 32,
+       0xfc00003f, 0xa0000003, 0                      , 0,
+       CP1_                },        /* MIN.fmt */
+    { pool                , MAX_fmt             , 2   , 32,
+       0xfc00003f, 0xa000000b, 0                      , 0,
+       CP1_                },        /* MAX.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa0000013, 0                      , 0,
+       CP1_                },        /* POOL32F_3~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa000001b, 0                      , 0,
+       CP1_                },        /* POOL32F_3~*(3) */
+    { pool                , MINA_fmt            , 2   , 32,
+       0xfc00003f, 0xa0000023, 0                      , 0,
+       CP1_                },        /* MINA.fmt */
+    { pool                , MAXA_fmt            , 2   , 32,
+       0xfc00003f, 0xa000002b, 0                      , 0,
+       CP1_                },        /* MAXA.fmt */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa0000033, 0                      , 0,
+       CP1_                },        /* POOL32F_3~*(6) */
+    { pool                , POOL32Fxf           , 4   , 32,
+       0xfc00003f, 0xa000003b, 0                      , 0,
+       CP1_                },        /* POOL32Fxf */
+};
+
+
+NMD::Pool NMD::CMP_condn_S[32] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000005, &NMD::CMP_AF_S         , 0,
+       CP1_                },        /* CMP.AF.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000045, &NMD::CMP_UN_S         , 0,
+       CP1_                },        /* CMP.UN.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000085, &NMD::CMP_EQ_S         , 0,
+       CP1_                },        /* CMP.EQ.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00000c5, &NMD::CMP_UEQ_S        , 0,
+       CP1_                },        /* CMP.UEQ.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000105, &NMD::CMP_LT_S         , 0,
+       CP1_                },        /* CMP.LT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000145, &NMD::CMP_ULT_S        , 0,
+       CP1_                },        /* CMP.ULT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000185, &NMD::CMP_LE_S         , 0,
+       CP1_                },        /* CMP.LE.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00001c5, &NMD::CMP_ULE_S        , 0,
+       CP1_                },        /* CMP.ULE.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000205, &NMD::CMP_SAF_S        , 0,
+       CP1_                },        /* CMP.SAF.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000245, &NMD::CMP_SUN_S        , 0,
+       CP1_                },        /* CMP.SUN.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000285, &NMD::CMP_SEQ_S        , 0,
+       CP1_                },        /* CMP.SEQ.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00002c5, &NMD::CMP_SUEQ_S       , 0,
+       CP1_                },        /* CMP.SUEQ.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000305, &NMD::CMP_SLT_S        , 0,
+       CP1_                },        /* CMP.SLT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000345, &NMD::CMP_SULT_S       , 0,
+       CP1_                },        /* CMP.SULT.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000385, &NMD::CMP_SLE_S        , 0,
+       CP1_                },        /* CMP.SLE.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00003c5, &NMD::CMP_SULE_S       , 0,
+       CP1_                },        /* CMP.SULE.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000405, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(16) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000445, &NMD::CMP_OR_S         , 0,
+       CP1_                },        /* CMP.OR.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000485, &NMD::CMP_UNE_S        , 0,
+       CP1_                },        /* CMP.UNE.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00004c5, &NMD::CMP_NE_S         , 0,
+       CP1_                },        /* CMP.NE.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000505, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000545, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000585, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00005c5, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000605, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(24) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000645, &NMD::CMP_SOR_S        , 0,
+       CP1_                },        /* CMP.SOR.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000685, &NMD::CMP_SUNE_S       , 0,
+       CP1_                },        /* CMP.SUNE.S */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00006c5, &NMD::CMP_SNE_S        , 0,
+       CP1_                },        /* CMP.SNE.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000705, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000745, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000785, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00007c5, 0                      , 0,
+       CP1_                },        /* CMP.condn.S~*(31) */
+};
+
+
+NMD::Pool NMD::CMP_condn_D[32] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000015, &NMD::CMP_AF_D         , 0,
+       CP1_                },        /* CMP.AF.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000055, &NMD::CMP_UN_D         , 0,
+       CP1_                },        /* CMP.UN.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000095, &NMD::CMP_EQ_D         , 0,
+       CP1_                },        /* CMP.EQ.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00000d5, &NMD::CMP_UEQ_D        , 0,
+       CP1_                },        /* CMP.UEQ.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000115, &NMD::CMP_LT_D         , 0,
+       CP1_                },        /* CMP.LT.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000155, &NMD::CMP_ULT_D        , 0,
+       CP1_                },        /* CMP.ULT.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000195, &NMD::CMP_LE_D         , 0,
+       CP1_                },        /* CMP.LE.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00001d5, &NMD::CMP_ULE_D        , 0,
+       CP1_                },        /* CMP.ULE.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000215, &NMD::CMP_SAF_D        , 0,
+       CP1_                },        /* CMP.SAF.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000255, &NMD::CMP_SUN_D        , 0,
+       CP1_                },        /* CMP.SUN.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000295, &NMD::CMP_SEQ_D        , 0,
+       CP1_                },        /* CMP.SEQ.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00002d5, &NMD::CMP_SUEQ_D       , 0,
+       CP1_                },        /* CMP.SUEQ.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000315, &NMD::CMP_SLT_D        , 0,
+       CP1_                },        /* CMP.SLT.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000355, &NMD::CMP_SULT_D       , 0,
+       CP1_                },        /* CMP.SULT.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000395, &NMD::CMP_SLE_D        , 0,
+       CP1_                },        /* CMP.SLE.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00003d5, &NMD::CMP_SULE_D       , 0,
+       CP1_                },        /* CMP.SULE.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000415, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(16) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000455, &NMD::CMP_OR_D         , 0,
+       CP1_                },        /* CMP.OR.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000495, &NMD::CMP_UNE_D        , 0,
+       CP1_                },        /* CMP.UNE.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00004d5, &NMD::CMP_NE_D         , 0,
+       CP1_                },        /* CMP.NE.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000515, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000555, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000595, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00005d5, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000615, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(24) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000655, &NMD::CMP_SOR_D        , 0,
+       CP1_                },        /* CMP.SOR.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000695, &NMD::CMP_SUNE_D       , 0,
+       CP1_                },        /* CMP.SUNE.D */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00006d5, &NMD::CMP_SNE_D        , 0,
+       CP1_                },        /* CMP.SNE.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000715, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000755, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa0000795, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0007ff, 0xa00007d5, 0                      , 0,
+       CP1_                },        /* CMP.condn.D~*(31) */
+};
+
+
+NMD::Pool NMD::POOL32F_5[8] = {
+    { pool                , CMP_condn_S         , 32  , 32,
+       0xfc00003f, 0xa0000005, 0                      , 0,
+       CP1_                },        /* CMP.condn.S */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa000000d, 0                      , 0,
+       CP1_                },        /* POOL32F_5~*(1) */
+    { pool                , CMP_condn_D         , 32  , 32,
+       0xfc00003f, 0xa0000015, 0                      , 0,
+       CP1_                },        /* CMP.condn.D */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa000001d, 0                      , 0,
+       CP1_                },        /* POOL32F_5~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa0000025, 0                      , 0,
+       CP1_                },        /* POOL32F_5~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa000002d, 0                      , 0,
+       CP1_                },        /* POOL32F_5~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa0000035, 0                      , 0,
+       CP1_                },        /* POOL32F_5~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xa000003d, 0                      , 0,
+       CP1_                },        /* POOL32F_5~*(7) */
+};
+
+
+NMD::Pool NMD::POOL32F[8] = {
+    { pool                , POOL32F_0           , 64  , 32,
+       0xfc000007, 0xa0000000, 0                      , 0,
+       CP1_                },        /* POOL32F_0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xa0000001, 0                      , 0,
+       CP1_                },        /* POOL32F~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xa0000002, 0                      , 0,
+       CP1_                },        /* POOL32F~*(2) */
+    { pool                , POOL32F_3           , 8   , 32,
+       0xfc000007, 0xa0000003, 0                      , 0,
+       CP1_                },        /* POOL32F_3 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xa0000004, 0                      , 0,
+       CP1_                },        /* POOL32F~*(4) */
+    { pool                , POOL32F_5           , 8   , 32,
+       0xfc000007, 0xa0000005, 0                      , 0,
+       CP1_                },        /* POOL32F_5 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xa0000006, 0                      , 0,
+       CP1_                },        /* POOL32F~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xa0000007, 0                      , 0,
+       CP1_                },        /* POOL32F~*(7) */
+};
+
+
+NMD::Pool NMD::POOL32S_0[64] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000000, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(0) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000008, &NMD::DLSA             , 0,
+       MIPS64_             },        /* DLSA */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000010, &NMD::DSLLV            , 0,
+       MIPS64_             },        /* DSLLV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000018, &NMD::DMUL             , 0,
+       MIPS64_             },        /* DMUL */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000020, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000028, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000030, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000038, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000040, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(8) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000048, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(9) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000050, &NMD::DSRLV            , 0,
+       MIPS64_             },        /* DSRLV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000058, &NMD::DMUH             , 0,
+       MIPS64_             },        /* DMUH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000060, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000068, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000070, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000078, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(15) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000080, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000088, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(17) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000090, &NMD::DSRAV            , 0,
+       MIPS64_             },        /* DSRAV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000098, &NMD::DMULU            , 0,
+       MIPS64_             },        /* DMULU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000a0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000a8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000b0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000b8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000c0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000c8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(25) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000d0, &NMD::DROTRV           , 0,
+       MIPS64_             },        /* DROTRV */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000d8, &NMD::DMUHU            , 0,
+       MIPS64_             },        /* DMUHU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000e0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000e8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000f0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000f8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(31) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000100, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(32) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000108, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(33) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000110, &NMD::DADD             , 0,
+       MIPS64_             },        /* DADD */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000118, &NMD::DDIV             , 0,
+       MIPS64_             },        /* DDIV */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000120, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(36) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000128, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(37) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000130, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(38) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000138, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(39) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000140, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(40) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000148, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(41) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000150, &NMD::DADDU            , 0,
+       MIPS64_             },        /* DADDU */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000158, &NMD::DMOD             , 0,
+       MIPS64_             },        /* DMOD */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000160, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(44) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000168, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(45) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000170, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(46) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000178, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(47) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000180, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(48) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000188, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(49) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000190, &NMD::DSUB             , 0,
+       MIPS64_             },        /* DSUB */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc0000198, &NMD::DDIVU            , 0,
+       MIPS64_             },        /* DDIVU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001a0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(52) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001a8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(53) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001b0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(54) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001b8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(55) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001c0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(56) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001c8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(57) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001d0, &NMD::DSUBU            , 0,
+       MIPS64_             },        /* DSUBU */
+    { instruction         , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001d8, &NMD::DMODU            , 0,
+       MIPS64_             },        /* DMODU */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001e0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(60) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001e8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(61) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001f0, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(62) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001f8, 0                      , 0,
+       0x0                 },        /* POOL32S_0~*(63) */
+};
+
+
+NMD::Pool NMD::POOL32Sxf_4[128] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000013c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(0) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000033c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000053c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000073c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000093c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0000b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0000d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0000f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000113c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(8) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000133c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(9) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000153c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000173c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(11) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000193c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0001b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0001d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0001f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(15) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000213c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000233c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(17) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000253c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000273c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000293c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0002b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0002d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0002f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000313c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000333c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000353c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000373c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000393c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0003b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0003d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0003f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(31) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000413c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(32) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000433c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(33) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000453c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(34) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000473c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(35) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000493c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(36) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0004b3c, &NMD::DCLO             , 0,
+       MIPS64_             },        /* DCLO */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0004d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(38) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0004f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(39) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000513c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(40) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000533c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(41) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000553c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(42) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000573c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(43) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000593c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(44) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0005b3c, &NMD::DCLZ             , 0,
+       MIPS64_             },        /* DCLZ */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0005d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(46) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0005f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(47) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000613c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(48) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000633c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(49) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000653c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(50) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000673c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(51) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000693c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(52) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0006b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(53) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0006d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(54) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0006f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(55) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000713c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(56) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000733c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(57) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000753c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(58) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000773c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(59) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000793c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(60) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0007b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(61) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0007d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(62) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0007f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(63) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000813c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(64) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000833c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(65) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000853c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(66) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000873c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(67) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000893c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(68) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0008b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(69) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0008d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(70) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0008f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(71) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000913c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(72) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000933c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(73) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000953c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(74) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000973c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(75) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000993c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(76) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0009b3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(77) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0009d3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(78) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc0009f3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(79) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000a13c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(80) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000a33c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(81) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000a53c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(82) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000a73c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(83) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000a93c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(84) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000ab3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(85) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000ad3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(86) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000af3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(87) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000b13c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(88) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000b33c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(89) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000b53c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(90) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000b73c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(91) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000b93c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(92) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000bb3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(93) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000bd3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(94) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000bf3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(95) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000c13c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(96) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000c33c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(97) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000c53c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(98) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000c73c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(99) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000c93c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(100) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000cb3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(101) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000cd3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(102) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000cf3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(103) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000d13c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(104) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000d33c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(105) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000d53c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(106) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000d73c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(107) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000d93c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(108) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000db3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(109) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000dd3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(110) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000df3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(111) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000e13c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(112) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000e33c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(113) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000e53c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(114) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000e73c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(115) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000e93c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(116) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000eb3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(117) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000ed3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(118) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000ef3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(119) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000f13c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(120) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000f33c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(121) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000f53c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(122) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000f73c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(123) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000f93c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(124) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000fb3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(125) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000fd3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(126) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00ffff, 0xc000ff3c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4~*(127) */
+};
+
+
+NMD::Pool NMD::POOL32Sxf[8] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc000003c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(0) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc000007c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000bc, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00000fc, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(3) */
+    { pool                , POOL32Sxf_4         , 128 , 32,
+       0xfc0001ff, 0xc000013c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf_4 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc000017c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001bc, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc0001ff, 0xc00001fc, 0                      , 0,
+       0x0                 },        /* POOL32Sxf~*(7) */
+};
+
+
+NMD::Pool NMD::POOL32S_4[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00003f, 0xc0000004, &NMD::EXTD             , 0,
+       MIPS64_             },        /* EXTD */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00003f, 0xc000000c, &NMD::EXTD32           , 0,
+       MIPS64_             },        /* EXTD32 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xc0000014, 0                      , 0,
+       0x0                 },        /* POOL32S_4~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xc000001c, 0                      , 0,
+       0x0                 },        /* POOL32S_4~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xc0000024, 0                      , 0,
+       0x0                 },        /* POOL32S_4~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xc000002c, 0                      , 0,
+       0x0                 },        /* POOL32S_4~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00003f, 0xc0000034, 0                      , 0,
+       0x0                 },        /* POOL32S_4~*(6) */
+    { pool                , POOL32Sxf           , 8   , 32,
+       0xfc00003f, 0xc000003c, 0                      , 0,
+       0x0                 },        /* POOL32Sxf */
+};
+
+
+NMD::Pool NMD::POOL32S[8] = {
+    { pool                , POOL32S_0           , 64  , 32,
+       0xfc000007, 0xc0000000, 0                      , 0,
+       0x0                 },        /* POOL32S_0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xc0000001, 0                      , 0,
+       0x0                 },        /* POOL32S~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xc0000002, 0                      , 0,
+       0x0                 },        /* POOL32S~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xc0000003, 0                      , 0,
+       0x0                 },        /* POOL32S~*(3) */
+    { pool                , POOL32S_4           , 8   , 32,
+       0xfc000007, 0xc0000004, 0                      , 0,
+       0x0                 },        /* POOL32S_4 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xc0000005, 0                      , 0,
+       0x0                 },        /* POOL32S~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xc0000006, 0                      , 0,
+       0x0                 },        /* POOL32S~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000007, 0xc0000007, 0                      , 0,
+       0x0                 },        /* POOL32S~*(7) */
+};
+
+
+NMD::Pool NMD::P_LUI[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000002, 0xe0000000, &NMD::LUI              , 0,
+       0x0                 },        /* LUI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000002, 0xe0000002, &NMD::ALUIPC           , 0,
+       0x0                 },        /* ALUIPC */
+};
+
+
+NMD::Pool NMD::P_GP_LH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0001, 0x44100000, &NMD::LH_GP_           , 0,
+       0x0                 },        /* LH[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0001, 0x44100001, &NMD::LHU_GP_          , 0,
+       0x0                 },        /* LHU[GP] */
+};
+
+
+NMD::Pool NMD::P_GP_SH[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0001, 0x44140000, &NMD::SH_GP_           , 0,
+       0x0                 },        /* SH[GP] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1c0001, 0x44140001, 0                      , 0,
+       0x0                 },        /* P.GP.SH~*(1) */
+};
+
+
+NMD::Pool NMD::P_GP_CP1[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0003, 0x44180000, &NMD::LWC1_GP_         , 0,
+       CP1_                },        /* LWC1[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0003, 0x44180001, &NMD::SWC1_GP_         , 0,
+       CP1_                },        /* SWC1[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0003, 0x44180002, &NMD::LDC1_GP_         , 0,
+       CP1_                },        /* LDC1[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0003, 0x44180003, &NMD::SDC1_GP_         , 0,
+       CP1_                },        /* SDC1[GP] */
+};
+
+
+NMD::Pool NMD::P_GP_M64[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0003, 0x441c0000, &NMD::LWU_GP_          , 0,
+       MIPS64_             },        /* LWU[GP] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1c0003, 0x441c0001, 0                      , 0,
+       0x0                 },        /* P.GP.M64~*(1) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1c0003, 0x441c0002, 0                      , 0,
+       0x0                 },        /* P.GP.M64~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1c0003, 0x441c0003, 0                      , 0,
+       0x0                 },        /* P.GP.M64~*(3) */
+};
+
+
+NMD::Pool NMD::P_GP_BH[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0000, 0x44000000, &NMD::LB_GP_           , 0,
+       0x0                 },        /* LB[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0000, 0x44040000, &NMD::SB_GP_           , 0,
+       0x0                 },        /* SB[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0000, 0x44080000, &NMD::LBU_GP_          , 0,
+       0x0                 },        /* LBU[GP] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc1c0000, 0x440c0000, &NMD::ADDIU_GP_B_      , 0,
+       0x0                 },        /* ADDIU[GP.B] */
+    { pool                , P_GP_LH             , 2   , 32,
+       0xfc1c0000, 0x44100000, 0                      , 0,
+       0x0                 },        /* P.GP.LH */
+    { pool                , P_GP_SH             , 2   , 32,
+       0xfc1c0000, 0x44140000, 0                      , 0,
+       0x0                 },        /* P.GP.SH */
+    { pool                , P_GP_CP1            , 4   , 32,
+       0xfc1c0000, 0x44180000, 0                      , 0,
+       0x0                 },        /* P.GP.CP1 */
+    { pool                , P_GP_M64            , 4   , 32,
+       0xfc1c0000, 0x441c0000, 0                      , 0,
+       0x0                 },        /* P.GP.M64 */
+};
+
+
+NMD::Pool NMD::P_LS_U12[16] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84000000, &NMD::LB_U12_          , 0,
+       0x0                 },        /* LB[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84001000, &NMD::SB_U12_          , 0,
+       0x0                 },        /* SB[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84002000, &NMD::LBU_U12_         , 0,
+       0x0                 },        /* LBU[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84003000, &NMD::PREF_U12_        , 0,
+       0x0                 },        /* PREF[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84004000, &NMD::LH_U12_          , 0,
+       0x0                 },        /* LH[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84005000, &NMD::SH_U12_          , 0,
+       0x0                 },        /* SH[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84006000, &NMD::LHU_U12_         , 0,
+       0x0                 },        /* LHU[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84007000, &NMD::LWU_U12_         , 0,
+       MIPS64_             },        /* LWU[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84008000, &NMD::LW_U12_          , 0,
+       0x0                 },        /* LW[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x84009000, &NMD::SW_U12_          , 0,
+       0x0                 },        /* SW[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8400a000, &NMD::LWC1_U12_        , 0,
+       CP1_                },        /* LWC1[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8400b000, &NMD::SWC1_U12_        , 0,
+       CP1_                },        /* SWC1[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8400c000, &NMD::LD_U12_          , 0,
+       MIPS64_             },        /* LD[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8400d000, &NMD::SD_U12_          , 0,
+       MIPS64_             },        /* SD[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8400e000, &NMD::LDC1_U12_        , 0,
+       CP1_                },        /* LDC1[U12] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc00f000, 0x8400f000, &NMD::SDC1_U12_        , 0,
+       CP1_                },        /* SDC1[U12] */
+};
+
+
+NMD::Pool NMD::P_PREF_S9_[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xffe07f00, 0xa7e01800, &NMD::SYNCI            , 0,
+       0x0                 },        /* SYNCI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4001800, &NMD::PREF_S9_         , &NMD::PREF_S9__cond    ,
+       0x0                 },        /* PREF[S9] */
+};
+
+
+NMD::Pool NMD::P_LS_S0[16] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4000000, &NMD::LB_S9_           , 0,
+       0x0                 },        /* LB[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4000800, &NMD::SB_S9_           , 0,
+       0x0                 },        /* SB[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4001000, &NMD::LBU_S9_          , 0,
+       0x0                 },        /* LBU[S9] */
+    { pool                , P_PREF_S9_          , 2   , 32,
+       0xfc007f00, 0xa4001800, 0                      , 0,
+       0x0                 },        /* P.PREF[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4002000, &NMD::LH_S9_           , 0,
+       0x0                 },        /* LH[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4002800, &NMD::SH_S9_           , 0,
+       0x0                 },        /* SH[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4003000, &NMD::LHU_S9_          , 0,
+       0x0                 },        /* LHU[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4003800, &NMD::LWU_S9_          , 0,
+       MIPS64_             },        /* LWU[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4004000, &NMD::LW_S9_           , 0,
+       0x0                 },        /* LW[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4004800, &NMD::SW_S9_           , 0,
+       0x0                 },        /* SW[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4005000, &NMD::LWC1_S9_         , 0,
+       CP1_                },        /* LWC1[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4005800, &NMD::SWC1_S9_         , 0,
+       CP1_                },        /* SWC1[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4006000, &NMD::LD_S9_           , 0,
+       MIPS64_             },        /* LD[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4006800, &NMD::SD_S9_           , 0,
+       MIPS64_             },        /* SD[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4007000, &NMD::LDC1_S9_         , 0,
+       CP1_                },        /* LDC1[S9] */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4007800, &NMD::SDC1_S9_         , 0,
+       CP1_                },        /* SDC1[S9] */
+};
+
+
+NMD::Pool NMD::ASET_ACLR[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfe007f00, 0xa4001100, &NMD::ASET             , 0,
+       MCU_                },        /* ASET */
+    { instruction         , 0                   , 0   , 32,
+       0xfe007f00, 0xa6001100, &NMD::ACLR             , 0,
+       MCU_                },        /* ACLR */
+};
+
+
+NMD::Pool NMD::P_LL[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005100, &NMD::LL               , 0,
+       0x0                 },        /* LL */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005101, &NMD::LLWP             , 0,
+       XNP_                },        /* LLWP */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005102, 0                      , 0,
+       0x0                 },        /* P.LL~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005103, 0                      , 0,
+       0x0                 },        /* P.LL~*(3) */
+};
+
+
+NMD::Pool NMD::P_SC[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005900, &NMD::SC               , 0,
+       0x0                 },        /* SC */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005901, &NMD::SCWP             , 0,
+       XNP_                },        /* SCWP */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005902, 0                      , 0,
+       0x0                 },        /* P.SC~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005903, 0                      , 0,
+       0x0                 },        /* P.SC~*(3) */
+};
+
+
+NMD::Pool NMD::P_LLD[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007100, &NMD::LLD              , 0,
+       MIPS64_             },        /* LLD */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007101, &NMD::LLDP             , 0,
+       MIPS64_             },        /* LLDP */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007102, 0                      , 0,
+       0x0                 },        /* P.LLD~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007103, 0                      , 0,
+       0x0                 },        /* P.LLD~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007104, 0                      , 0,
+       0x0                 },        /* P.LLD~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007105, 0                      , 0,
+       0x0                 },        /* P.LLD~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007106, 0                      , 0,
+       0x0                 },        /* P.LLD~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007107, 0                      , 0,
+       0x0                 },        /* P.LLD~*(7) */
+};
+
+
+NMD::Pool NMD::P_SCD[8] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007900, &NMD::SCD              , 0,
+       MIPS64_             },        /* SCD */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007901, &NMD::SCDP             , 0,
+       MIPS64_             },        /* SCDP */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007902, 0                      , 0,
+       0x0                 },        /* P.SCD~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007903, 0                      , 0,
+       0x0                 },        /* P.SCD~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007904, 0                      , 0,
+       0x0                 },        /* P.SCD~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007905, 0                      , 0,
+       0x0                 },        /* P.SCD~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007906, 0                      , 0,
+       0x0                 },        /* P.SCD~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f07, 0xa4007907, 0                      , 0,
+       0x0                 },        /* P.SCD~*(7) */
+};
+
+
+NMD::Pool NMD::P_LS_S1[16] = {
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4000100, 0                      , 0,
+       0x0                 },        /* P.LS.S1~*(0) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4000900, 0                      , 0,
+       0x0                 },        /* P.LS.S1~*(1) */
+    { pool                , ASET_ACLR           , 2   , 32,
+       0xfc007f00, 0xa4001100, 0                      , 0,
+       0x0                 },        /* ASET_ACLR */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4001900, 0                      , 0,
+       0x0                 },        /* P.LS.S1~*(3) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4002100, &NMD::UALH             , 0,
+       XMMS_               },        /* UALH */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4002900, &NMD::UASH             , 0,
+       XMMS_               },        /* UASH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4003100, 0                      , 0,
+       0x0                 },        /* P.LS.S1~*(6) */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4003900, &NMD::CACHE            , 0,
+       CP0_                },        /* CACHE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4004100, &NMD::LWC2             , 0,
+       CP2_                },        /* LWC2 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4004900, &NMD::SWC2             , 0,
+       CP2_                },        /* SWC2 */
+    { pool                , P_LL                , 4   , 32,
+       0xfc007f00, 0xa4005100, 0                      , 0,
+       0x0                 },        /* P.LL */
+    { pool                , P_SC                , 4   , 32,
+       0xfc007f00, 0xa4005900, 0                      , 0,
+       0x0                 },        /* P.SC */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4006100, &NMD::LDC2             , 0,
+       CP2_                },        /* LDC2 */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4006900, &NMD::SDC2             , 0,
+       CP2_                },        /* SDC2 */
+    { pool                , P_LLD               , 8   , 32,
+       0xfc007f00, 0xa4007100, 0                      , 0,
+       0x0                 },        /* P.LLD */
+    { pool                , P_SCD               , 8   , 32,
+       0xfc007f00, 0xa4007900, 0                      , 0,
+       0x0                 },        /* P.SCD */
+};
+
+
+NMD::Pool NMD::P_PREFE[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xffe07f00, 0xa7e01a00, &NMD::SYNCIE           , 0,
+       CP0_ | EVA_         },        /* SYNCIE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4001a00, &NMD::PREFE            , &NMD::PREFE_cond       ,
+       CP0_ | EVA_         },        /* PREFE */
+};
+
+
+NMD::Pool NMD::P_LLE[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005200, &NMD::LLE              , 0,
+       CP0_ | EVA_         },        /* LLE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005201, &NMD::LLWPE            , 0,
+       CP0_ | EVA_         },        /* LLWPE */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005202, 0                      , 0,
+       0x0                 },        /* P.LLE~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005203, 0                      , 0,
+       0x0                 },        /* P.LLE~*(3) */
+};
+
+
+NMD::Pool NMD::P_SCE[4] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005a00, &NMD::SCE              , 0,
+       CP0_ | EVA_         },        /* SCE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005a01, &NMD::SCWPE            , 0,
+       CP0_ | EVA_         },        /* SCWPE */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005a02, 0                      , 0,
+       0x0                 },        /* P.SCE~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f03, 0xa4005a03, 0                      , 0,
+       0x0                 },        /* P.SCE~*(3) */
+};
+
+
+NMD::Pool NMD::P_LS_E0[16] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4000200, &NMD::LBE              , 0,
+       CP0_ | EVA_         },        /* LBE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4000a00, &NMD::SBE              , 0,
+       CP0_ | EVA_         },        /* SBE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4001200, &NMD::LBUE             , 0,
+       CP0_ | EVA_         },        /* LBUE */
+    { pool                , P_PREFE             , 2   , 32,
+       0xfc007f00, 0xa4001a00, 0                      , 0,
+       0x0                 },        /* P.PREFE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4002200, &NMD::LHE              , 0,
+       CP0_ | EVA_         },        /* LHE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4002a00, &NMD::SHE              , 0,
+       CP0_ | EVA_         },        /* SHE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4003200, &NMD::LHUE             , 0,
+       CP0_ | EVA_         },        /* LHUE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4003a00, &NMD::CACHEE           , 0,
+       CP0_ | EVA_         },        /* CACHEE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4004200, &NMD::LWE              , 0,
+       CP0_ | EVA_         },        /* LWE */
+    { instruction         , 0                   , 0   , 32,
+       0xfc007f00, 0xa4004a00, &NMD::SWE              , 0,
+       CP0_ | EVA_         },        /* SWE */
+    { pool                , P_LLE               , 4   , 32,
+       0xfc007f00, 0xa4005200, 0                      , 0,
+       0x0                 },        /* P.LLE */
+    { pool                , P_SCE               , 4   , 32,
+       0xfc007f00, 0xa4005a00, 0                      , 0,
+       0x0                 },        /* P.SCE */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4006200, 0                      , 0,
+       0x0                 },        /* P.LS.E0~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4006a00, 0                      , 0,
+       0x0                 },        /* P.LS.E0~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4007200, 0                      , 0,
+       0x0                 },        /* P.LS.E0~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc007f00, 0xa4007a00, 0                      , 0,
+       0x0                 },        /* P.LS.E0~*(15) */
+};
+
+
+NMD::Pool NMD::P_LS_WM[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000400, &NMD::LWM              , 0,
+       XMMS_               },        /* LWM */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000c00, &NMD::SWM              , 0,
+       XMMS_               },        /* SWM */
+};
+
+
+NMD::Pool NMD::P_LS_UAWM[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000500, &NMD::UALWM            , 0,
+       XMMS_               },        /* UALWM */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000d00, &NMD::UASWM            , 0,
+       XMMS_               },        /* UASWM */
+};
+
+
+NMD::Pool NMD::P_LS_DM[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000600, &NMD::LDM              , 0,
+       MIPS64_             },        /* LDM */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000e00, &NMD::SDM              , 0,
+       MIPS64_             },        /* SDM */
+};
+
+
+NMD::Pool NMD::P_LS_UADM[2] = {
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000700, &NMD::UALDM            , 0,
+       MIPS64_             },        /* UALDM */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000f00, 0xa4000f00, &NMD::UASDM            , 0,
+       MIPS64_             },        /* UASDM */
+};
+
+
+NMD::Pool NMD::P_LS_S9[8] = {
+    { pool                , P_LS_S0             , 16  , 32,
+       0xfc000700, 0xa4000000, 0                      , 0,
+       0x0                 },        /* P.LS.S0 */
+    { pool                , P_LS_S1             , 16  , 32,
+       0xfc000700, 0xa4000100, 0                      , 0,
+       0x0                 },        /* P.LS.S1 */
+    { pool                , P_LS_E0             , 16  , 32,
+       0xfc000700, 0xa4000200, 0                      , 0,
+       0x0                 },        /* P.LS.E0 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000700, 0xa4000300, 0                      , 0,
+       0x0                 },        /* P.LS.S9~*(3) */
+    { pool                , P_LS_WM             , 2   , 32,
+       0xfc000700, 0xa4000400, 0                      , 0,
+       0x0                 },        /* P.LS.WM */
+    { pool                , P_LS_UAWM           , 2   , 32,
+       0xfc000700, 0xa4000500, 0                      , 0,
+       0x0                 },        /* P.LS.UAWM */
+    { pool                , P_LS_DM             , 2   , 32,
+       0xfc000700, 0xa4000600, 0                      , 0,
+       0x0                 },        /* P.LS.DM */
+    { pool                , P_LS_UADM           , 2   , 32,
+       0xfc000700, 0xa4000700, 0                      , 0,
+       0x0                 },        /* P.LS.UADM */
+};
+
+
+NMD::Pool NMD::P_BAL[2] = {
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfe000000, 0x28000000, &NMD::BC_32_           , 0,
+       0x0                 },        /* BC[32] */
+    { call_instruction    , 0                   , 0   , 32,
+       0xfe000000, 0x2a000000, &NMD::BALC_32_         , 0,
+       0x0                 },        /* BALC[32] */
+};
+
+
+NMD::Pool NMD::P_BALRSC[2] = {
+    { branch_instruction  , 0                   , 0   , 32,
+       0xffe0f000, 0x48008000, &NMD::BRSC             , 0,
+       0x0                 },        /* BRSC */
+    { call_instruction    , 0                   , 0   , 32,
+       0xfc00f000, 0x48008000, &NMD::BALRSC           , &NMD::BALRSC_cond      ,
+       0x0                 },        /* BALRSC */
+};
+
+
+NMD::Pool NMD::P_J[16] = {
+    { call_instruction    , 0                   , 0   , 32,
+       0xfc00f000, 0x48000000, &NMD::JALRC_32_        , 0,
+       0x0                 },        /* JALRC[32] */
+    { call_instruction    , 0                   , 0   , 32,
+       0xfc00f000, 0x48001000, &NMD::JALRC_HB         , 0,
+       0x0                 },        /* JALRC.HB */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48002000, 0                      , 0,
+       0x0                 },        /* P.J~*(2) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48003000, 0                      , 0,
+       0x0                 },        /* P.J~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48004000, 0                      , 0,
+       0x0                 },        /* P.J~*(4) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48005000, 0                      , 0,
+       0x0                 },        /* P.J~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48006000, 0                      , 0,
+       0x0                 },        /* P.J~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48007000, 0                      , 0,
+       0x0                 },        /* P.J~*(7) */
+    { pool                , P_BALRSC            , 2   , 32,
+       0xfc00f000, 0x48008000, 0                      , 0,
+       0x0                 },        /* P.BALRSC */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x48009000, 0                      , 0,
+       0x0                 },        /* P.J~*(9) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x4800a000, 0                      , 0,
+       0x0                 },        /* P.J~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x4800b000, 0                      , 0,
+       0x0                 },        /* P.J~*(11) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x4800c000, 0                      , 0,
+       0x0                 },        /* P.J~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x4800d000, 0                      , 0,
+       0x0                 },        /* P.J~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x4800e000, 0                      , 0,
+       0x0                 },        /* P.J~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00f000, 0x4800f000, 0                      , 0,
+       0x0                 },        /* P.J~*(15) */
+};
+
+
+NMD::Pool NMD::P_BR3A[32] = {
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1fc000, 0x88004000, &NMD::BC1EQZC          , 0,
+       CP1_                },        /* BC1EQZC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1fc000, 0x88014000, &NMD::BC1NEZC          , 0,
+       CP1_                },        /* BC1NEZC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1fc000, 0x88024000, &NMD::BC2EQZC          , 0,
+       CP2_                },        /* BC2EQZC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1fc000, 0x88034000, &NMD::BC2NEZC          , 0,
+       CP2_                },        /* BC2NEZC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1fc000, 0x88044000, &NMD::BPOSGE32C        , 0,
+       DSP_                },        /* BPOSGE32C */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88054000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(5) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88064000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(6) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88074000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88084000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(8) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88094000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(9) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x880a4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(10) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x880b4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(11) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x880c4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(12) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x880d4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(13) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x880e4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(14) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x880f4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(15) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88104000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(16) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88114000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(17) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88124000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(18) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88134000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88144000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(20) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88154000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(21) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88164000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(22) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88174000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88184000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(24) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x88194000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x881a4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(26) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x881b4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x881c4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(28) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x881d4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(29) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x881e4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc1fc000, 0x881f4000, 0                      , 0,
+       0x0                 },        /* P.BR3A~*(31) */
+};
+
+
+NMD::Pool NMD::P_BR1[4] = {
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc00c000, 0x88000000, &NMD::BEQC_32_         , 0,
+       0x0                 },        /* BEQC[32] */
+    { pool                , P_BR3A              , 32  , 32,
+       0xfc00c000, 0x88004000, 0                      , 0,
+       0x0                 },        /* P.BR3A */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc00c000, 0x88008000, &NMD::BGEC             , 0,
+       0x0                 },        /* BGEC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc00c000, 0x8800c000, &NMD::BGEUC            , 0,
+       0x0                 },        /* BGEUC */
+};
+
+
+NMD::Pool NMD::P_BR2[4] = {
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc00c000, 0xa8000000, &NMD::BNEC_32_         , 0,
+       0x0                 },        /* BNEC[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc00c000, 0xa8004000, 0                      , 0,
+       0x0                 },        /* P.BR2~*(1) */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc00c000, 0xa8008000, &NMD::BLTC             , 0,
+       0x0                 },        /* BLTC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc00c000, 0xa800c000, &NMD::BLTUC            , 0,
+       0x0                 },        /* BLTUC */
+};
+
+
+NMD::Pool NMD::P_BRI[8] = {
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc8000000, &NMD::BEQIC            , 0,
+       0x0                 },        /* BEQIC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc8040000, &NMD::BBEQZC           , 0,
+       XMMS_               },        /* BBEQZC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc8080000, &NMD::BGEIC            , 0,
+       0x0                 },        /* BGEIC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc80c0000, &NMD::BGEIUC           , 0,
+       0x0                 },        /* BGEIUC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc8100000, &NMD::BNEIC            , 0,
+       0x0                 },        /* BNEIC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc8140000, &NMD::BBNEZC           , 0,
+       XMMS_               },        /* BBNEZC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc8180000, &NMD::BLTIC            , 0,
+       0x0                 },        /* BLTIC */
+    { branch_instruction  , 0                   , 0   , 32,
+       0xfc1c0000, 0xc81c0000, &NMD::BLTIUC           , 0,
+       0x0                 },        /* BLTIUC */
+};
+
+
+NMD::Pool NMD::P32[32] = {
+    { pool                , P_ADDIU             , 2   , 32,
+       0xfc000000, 0x00000000, 0                      , 0,
+       0x0                 },        /* P.ADDIU */
+    { pool                , P32A                , 8   , 32,
+       0xfc000000, 0x20000000, 0                      , 0,
+       0x0                 },        /* P32A */
+    { pool                , P_GP_W              , 4   , 32,
+       0xfc000000, 0x40000000, 0                      , 0,
+       0x0                 },        /* P.GP.W */
+    { pool                , POOL48I             , 32  , 48,
+       0xfc0000000000ull, 0x600000000000ull, 0                      , 0,
+       0x0                 },        /* POOL48I */
+    { pool                , P_U12               , 16  , 32,
+       0xfc000000, 0x80000000, 0                      , 0,
+       0x0                 },        /* P.U12 */
+    { pool                , POOL32F             , 8   , 32,
+       0xfc000000, 0xa0000000, 0                      , 0,
+       CP1_                },        /* POOL32F */
+    { pool                , POOL32S             , 8   , 32,
+       0xfc000000, 0xc0000000, 0                      , 0,
+       0x0                 },        /* POOL32S */
+    { pool                , P_LUI               , 2   , 32,
+       0xfc000000, 0xe0000000, 0                      , 0,
+       0x0                 },        /* P.LUI */
+    { instruction         , 0                   , 0   , 32,
+       0xfc000000, 0x04000000, &NMD::ADDIUPC_32_      , 0,
+       0x0                 },        /* ADDIUPC[32] */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x24000000, 0                      , 0,
+       0x0                 },        /* P32~*(5) */
+    { pool                , P_GP_BH             , 8   , 32,
+       0xfc000000, 0x44000000, 0                      , 0,
+       0x0                 },        /* P.GP.BH */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x64000000, 0                      , 0,
+       0x0                 },        /* P32~*(13) */
+    { pool                , P_LS_U12            , 16  , 32,
+       0xfc000000, 0x84000000, 0                      , 0,
+       0x0                 },        /* P.LS.U12 */
+    { pool                , P_LS_S9             , 8   , 32,
+       0xfc000000, 0xa4000000, 0                      , 0,
+       0x0                 },        /* P.LS.S9 */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0xc4000000, 0                      , 0,
+       0x0                 },        /* P32~*(25) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0xe4000000, 0                      , 0,
+       0x0                 },        /* P32~*(29) */
+    { call_instruction    , 0                   , 0   , 32,
+       0xfc000000, 0x08000000, &NMD::MOVE_BALC        , 0,
+       XMMS_               },        /* MOVE.BALC */
+    { pool                , P_BAL               , 2   , 32,
+       0xfc000000, 0x28000000, 0                      , 0,
+       0x0                 },        /* P.BAL */
+    { pool                , P_J                 , 16  , 32,
+       0xfc000000, 0x48000000, 0                      , 0,
+       0x0                 },        /* P.J */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x68000000, 0                      , 0,
+       0x0                 },        /* P32~*(14) */
+    { pool                , P_BR1               , 4   , 32,
+       0xfc000000, 0x88000000, 0                      , 0,
+       0x0                 },        /* P.BR1 */
+    { pool                , P_BR2               , 4   , 32,
+       0xfc000000, 0xa8000000, 0                      , 0,
+       0x0                 },        /* P.BR2 */
+    { pool                , P_BRI               , 8   , 32,
+       0xfc000000, 0xc8000000, 0                      , 0,
+       0x0                 },        /* P.BRI */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0xe8000000, 0                      , 0,
+       0x0                 },        /* P32~*(30) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x0c000000, 0                      , 0,
+       0x0                 },        /* P32~*(3) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x2c000000, 0                      , 0,
+       0x0                 },        /* P32~*(7) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x4c000000, 0                      , 0,
+       0x0                 },        /* P32~*(11) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x6c000000, 0                      , 0,
+       0x0                 },        /* P32~*(15) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0x8c000000, 0                      , 0,
+       0x0                 },        /* P32~*(19) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0xac000000, 0                      , 0,
+       0x0                 },        /* P32~*(23) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0xcc000000, 0                      , 0,
+       0x0                 },        /* P32~*(27) */
+    { reserved_block      , 0                   , 0   , 32,
+       0xfc000000, 0xec000000, 0                      , 0,
+       0x0                 },        /* P32~*(31) */
+};
+
+
+NMD::Pool NMD::P16_SYSCALL[2] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfffc    , 0x1008    , &NMD::SYSCALL_16_      , 0,
+       0x0                 },        /* SYSCALL[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfffc    , 0x100c    , &NMD::HYPCALL_16_      , 0,
+       CP0_ | VZ_          },        /* HYPCALL[16] */
+};
+
+
+NMD::Pool NMD::P16_RI[4] = {
+    { reserved_block      , 0                   , 0   , 16,
+       0xfff8    , 0x1000    , 0                      , 0,
+       0x0                 },        /* P16.RI~*(0) */
+    { pool                , P16_SYSCALL         , 2   , 16,
+       0xfff8    , 0x1008    , 0                      , 0,
+       0x0                 },        /* P16.SYSCALL */
+    { instruction         , 0                   , 0   , 16,
+       0xfff8    , 0x1010    , &NMD::BREAK_16_        , 0,
+       0x0                 },        /* BREAK[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfff8    , 0x1018    , &NMD::SDBBP_16_        , 0,
+       EJTAG_              },        /* SDBBP[16] */
+};
+
+
+NMD::Pool NMD::P16_MV[2] = {
+    { pool                , P16_RI              , 4   , 16,
+       0xffe0    , 0x1000    , 0                      , 0,
+       0x0                 },        /* P16.RI */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0x1000    , &NMD::MOVE             , &NMD::MOVE_cond        ,
+       0x0                 },        /* MOVE */
+};
+
+
+NMD::Pool NMD::P16_SHIFT[2] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfc08    , 0x3000    , &NMD::SLL_16_          , 0,
+       0x0                 },        /* SLL[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc08    , 0x3008    , &NMD::SRL_16_          , 0,
+       0x0                 },        /* SRL[16] */
+};
+
+
+NMD::Pool NMD::POOL16C_00[4] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfc0f    , 0x5000    , &NMD::NOT_16_          , 0,
+       0x0                 },        /* NOT[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc0f    , 0x5004    , &NMD::XOR_16_          , 0,
+       0x0                 },        /* XOR[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc0f    , 0x5008    , &NMD::AND_16_          , 0,
+       0x0                 },        /* AND[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc0f    , 0x500c    , &NMD::OR_16_           , 0,
+       0x0                 },        /* OR[16] */
+};
+
+
+NMD::Pool NMD::POOL16C_0[2] = {
+    { pool                , POOL16C_00          , 4   , 16,
+       0xfc03    , 0x5000    , 0                      , 0,
+       0x0                 },        /* POOL16C_00 */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc03    , 0x5002    , 0                      , 0,
+       0x0                 },        /* POOL16C_0~*(1) */
+};
+
+
+NMD::Pool NMD::P16C[2] = {
+    { pool                , POOL16C_0           , 2   , 16,
+       0xfc01    , 0x5000    , 0                      , 0,
+       0x0                 },        /* POOL16C_0 */
+    { instruction         , 0                   , 0   , 16,
+       0xfc01    , 0x5001    , &NMD::LWXS_16_         , 0,
+       0x0                 },        /* LWXS[16] */
+};
+
+
+NMD::Pool NMD::P16_A1[2] = {
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc40    , 0x7000    , 0                      , 0,
+       0x0                 },        /* P16.A1~*(0) */
+    { instruction         , 0                   , 0   , 16,
+       0xfc40    , 0x7040    , &NMD::ADDIU_R1_SP_     , 0,
+       0x0                 },        /* ADDIU[R1.SP] */
+};
+
+
+NMD::Pool NMD::P_ADDIU_RS5_[2] = {
+    { instruction         , 0                   , 0   , 16,
+       0xffe8    , 0x9008    , &NMD::NOP_16_          , 0,
+       0x0                 },        /* NOP[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc08    , 0x9008    , &NMD::ADDIU_RS5_       , &NMD::ADDIU_RS5__cond  ,
+       0x0                 },        /* ADDIU[RS5] */
+};
+
+
+NMD::Pool NMD::P16_A2[2] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfc08    , 0x9000    , &NMD::ADDIU_R2_        , 0,
+       0x0                 },        /* ADDIU[R2] */
+    { pool                , P_ADDIU_RS5_        , 2   , 16,
+       0xfc08    , 0x9008    , 0                      , 0,
+       0x0                 },        /* P.ADDIU[RS5] */
+};
+
+
+NMD::Pool NMD::P16_ADDU[2] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfc01    , 0xb000    , &NMD::ADDU_16_         , 0,
+       0x0                 },        /* ADDU[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc01    , 0xb001    , &NMD::SUBU_16_         , 0,
+       0x0                 },        /* SUBU[16] */
+};
+
+
+NMD::Pool NMD::P16_JRC[2] = {
+    { branch_instruction  , 0                   , 0   , 16,
+       0xfc1f    , 0xd800    , &NMD::JRC              , 0,
+       0x0                 },        /* JRC */
+    { call_instruction    , 0                   , 0   , 16,
+       0xfc1f    , 0xd810    , &NMD::JALRC_16_        , 0,
+       0x0                 },        /* JALRC[16] */
+};
+
+
+NMD::Pool NMD::P16_BR1[2] = {
+    { branch_instruction  , 0                   , 0   , 16,
+       0xfc00    , 0xd800    , &NMD::BEQC_16_         , &NMD::BEQC_16__cond    ,
+       XMMS_               },        /* BEQC[16] */
+    { branch_instruction  , 0                   , 0   , 16,
+       0xfc00    , 0xd800    , &NMD::BNEC_16_         , &NMD::BNEC_16__cond    ,
+       XMMS_               },        /* BNEC[16] */
+};
+
+
+NMD::Pool NMD::P16_BR[2] = {
+    { pool                , P16_JRC             , 2   , 16,
+       0xfc0f    , 0xd800    , 0                      , 0,
+       0x0                 },        /* P16.JRC */
+    { pool                , P16_BR1             , 2   , 16,
+       0xfc00    , 0xd800    , 0                      , &NMD::P16_BR1_cond     ,
+       0x0                 },        /* P16.BR1 */
+};
+
+
+NMD::Pool NMD::P16_SR[2] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfd00    , 0x1c00    , &NMD::SAVE_16_         , 0,
+       0x0                 },        /* SAVE[16] */
+    { return_instruction  , 0                   , 0   , 16,
+       0xfd00    , 0x1d00    , &NMD::RESTORE_JRC_16_  , 0,
+       0x0                 },        /* RESTORE.JRC[16] */
+};
+
+
+NMD::Pool NMD::P16_4X4[4] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfd08    , 0x3c00    , &NMD::ADDU_4X4_        , 0,
+       XMMS_               },        /* ADDU[4X4] */
+    { instruction         , 0                   , 0   , 16,
+       0xfd08    , 0x3c08    , &NMD::MUL_4X4_         , 0,
+       XMMS_               },        /* MUL[4X4] */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfd08    , 0x3d00    , 0                      , 0,
+       0x0                 },        /* P16.4X4~*(2) */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfd08    , 0x3d08    , 0                      , 0,
+       0x0                 },        /* P16.4X4~*(3) */
+};
+
+
+NMD::Pool NMD::P16_LB[4] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfc0c    , 0x5c00    , &NMD::LB_16_           , 0,
+       0x0                 },        /* LB[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc0c    , 0x5c04    , &NMD::SB_16_           , 0,
+       0x0                 },        /* SB[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc0c    , 0x5c08    , &NMD::LBU_16_          , 0,
+       0x0                 },        /* LBU[16] */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc0c    , 0x5c0c    , 0                      , 0,
+       0x0                 },        /* P16.LB~*(3) */
+};
+
+
+NMD::Pool NMD::P16_LH[4] = {
+    { instruction         , 0                   , 0   , 16,
+       0xfc09    , 0x7c00    , &NMD::LH_16_           , 0,
+       0x0                 },        /* LH[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc09    , 0x7c01    , &NMD::SH_16_           , 0,
+       0x0                 },        /* SH[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc09    , 0x7c08    , &NMD::LHU_16_          , 0,
+       0x0                 },        /* LHU[16] */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc09    , 0x7c09    , 0                      , 0,
+       0x0                 },        /* P16.LH~*(3) */
+};
+
+
+NMD::Pool NMD::P16[32] = {
+    { pool                , P16_MV              , 2   , 16,
+       0xfc00    , 0x1000    , 0                      , 0,
+       0x0                 },        /* P16.MV */
+    { pool                , P16_SHIFT           , 2   , 16,
+       0xfc00    , 0x3000    , 0                      , 0,
+       0x0                 },        /* P16.SHIFT */
+    { pool                , P16C                , 2   , 16,
+       0xfc00    , 0x5000    , 0                      , 0,
+       0x0                 },        /* P16C */
+    { pool                , P16_A1              , 2   , 16,
+       0xfc00    , 0x7000    , 0                      , 0,
+       0x0                 },        /* P16.A1 */
+    { pool                , P16_A2              , 2   , 16,
+       0xfc00    , 0x9000    , 0                      , 0,
+       0x0                 },        /* P16.A2 */
+    { pool                , P16_ADDU            , 2   , 16,
+       0xfc00    , 0xb000    , 0                      , 0,
+       0x0                 },        /* P16.ADDU */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xd000    , &NMD::LI_16_           , 0,
+       0x0                 },        /* LI[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xf000    , &NMD::ANDI_16_         , 0,
+       0x0                 },        /* ANDI[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0x1400    , &NMD::LW_16_           , 0,
+       0x0                 },        /* LW[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0x3400    , &NMD::LW_SP_           , 0,
+       0x0                 },        /* LW[SP] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0x5400    , &NMD::LW_GP16_         , 0,
+       0x0                 },        /* LW[GP16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0x7400    , &NMD::LW_4X4_          , 0,
+       XMMS_               },        /* LW[4X4] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0x9400    , &NMD::SW_16_           , 0,
+       0x0                 },        /* SW[16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xb400    , &NMD::SW_SP_           , 0,
+       0x0                 },        /* SW[SP] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xd400    , &NMD::SW_GP16_         , 0,
+       0x0                 },        /* SW[GP16] */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xf400    , &NMD::SW_4X4_          , 0,
+       XMMS_               },        /* SW[4X4] */
+    { branch_instruction  , 0                   , 0   , 16,
+       0xfc00    , 0x1800    , &NMD::BC_16_           , 0,
+       0x0                 },        /* BC[16] */
+    { call_instruction    , 0                   , 0   , 16,
+       0xfc00    , 0x3800    , &NMD::BALC_16_         , 0,
+       0x0                 },        /* BALC[16] */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc00    , 0x5800    , 0                      , 0,
+       0x0                 },        /* P16~*(10) */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc00    , 0x7800    , 0                      , 0,
+       0x0                 },        /* P16~*(14) */
+    { branch_instruction  , 0                   , 0   , 16,
+       0xfc00    , 0x9800    , &NMD::BEQZC_16_        , 0,
+       0x0                 },        /* BEQZC[16] */
+    { branch_instruction  , 0                   , 0   , 16,
+       0xfc00    , 0xb800    , &NMD::BNEZC_16_        , 0,
+       0x0                 },        /* BNEZC[16] */
+    { pool                , P16_BR              , 2   , 16,
+       0xfc00    , 0xd800    , 0                      , 0,
+       0x0                 },        /* P16.BR */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc00    , 0xf800    , 0                      , 0,
+       0x0                 },        /* P16~*(30) */
+    { pool                , P16_SR              , 2   , 16,
+       0xfc00    , 0x1c00    , 0                      , 0,
+       0x0                 },        /* P16.SR */
+    { pool                , P16_4X4             , 4   , 16,
+       0xfc00    , 0x3c00    , 0                      , 0,
+       0x0                 },        /* P16.4X4 */
+    { pool                , P16_LB              , 4   , 16,
+       0xfc00    , 0x5c00    , 0                      , 0,
+       0x0                 },        /* P16.LB */
+    { pool                , P16_LH              , 4   , 16,
+       0xfc00    , 0x7c00    , 0                      , 0,
+       0x0                 },        /* P16.LH */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc00    , 0x9c00    , 0                      , 0,
+       0x0                 },        /* P16~*(19) */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xbc00    , &NMD::MOVEP            , 0,
+       XMMS_               },        /* MOVEP */
+    { reserved_block      , 0                   , 0   , 16,
+       0xfc00    , 0xdc00    , 0                      , 0,
+       0x0                 },        /* P16~*(27) */
+    { instruction         , 0                   , 0   , 16,
+       0xfc00    , 0xfc00    , &NMD::MOVEP_REV_       , 0,
+       XMMS_               },        /* MOVEP[REV] */
+};
+
+
+NMD::Pool NMD::MAJOR[2] = {
+    { pool                , P32                 , 32  , 32,
+       0x10000000, 0x00000000, 0                      , 0,
+       0x0                 },        /* P32 */
+    { pool                , P16                 , 32  , 16,
+       0x1000    , 0x1000    , 0                      , 0,
+       0x0                 },        /* P16 */
+};
diff --git a/disas/nanomips.h b/disas/nanomips.h
new file mode 100644
index 0000000..84cc9a6
--- /dev/null
+++ b/disas/nanomips.h
@@ -0,0 +1,1099 @@
+/*
+ *  Header file for nanoMIPS disassembler component of QEMU
+ *
+ *  Copyright (C) 2018  Wave Computing
+ *  Copyright (C) 2018  Matthew Fortune <matthew.fortune@mips.com>
+ *  Copyright (C) 2018  Aleksandar Markovic <aleksandar.markovic@wavecomp.com>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef NANOMIPS_DISASSEMBLER_H
+#define NANOMIPS_DISASSEMBLER_H
+
+#include <string>
+
+typedef unsigned short uint16;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+namespace img
+{
+    typedef unsigned long long address;
+}
+
+
+class NMD
+{
+public:
+
+    enum TABLE_ENTRY_TYPE {
+        instruction,
+        call_instruction,
+        branch_instruction,
+        return_instruction,
+        reserved_block,
+        pool,
+    };
+
+    enum TABLE_ATTRIBUTE_TYPE {
+        MIPS64_    = 0x00000001,
+        XNP_       = 0x00000002,
+        XMMS_      = 0x00000004,
+        EVA_       = 0x00000008,
+        DSP_       = 0x00000010,
+        MT_        = 0x00000020,
+        EJTAG_     = 0x00000040,
+        TLBINV_    = 0x00000080,
+        CP0_       = 0x00000100,
+        CP1_       = 0x00000200,
+        CP2_       = 0x00000400,
+        UDI_       = 0x00000800,
+        MCU_       = 0x00001000,
+        VZ_        = 0x00002000,
+        TLB_       = 0x00004000,
+        MVH_       = 0x00008000,
+        ALL_ATTRIBUTES = 0xffffffffull,
+    };
+
+
+    NMD(img::address pc, TABLE_ATTRIBUTE_TYPE requested_instruction_categories)
+        : m_pc(pc)
+        , m_requested_instruction_categories(requested_instruction_categories)
+    {
+    }
+
+    int Disassemble(const uint16 *data, std::string & dis,
+                    TABLE_ENTRY_TYPE & type);
+
+private:
+
+    img::address           m_pc;
+    TABLE_ATTRIBUTE_TYPE   m_requested_instruction_categories;
+
+    typedef std::string(NMD:: *disassembly_function)(uint64 instruction);
+    typedef bool(NMD:: *conditional_function)(uint64 instruction);
+
+    struct Pool {
+        TABLE_ENTRY_TYPE     type;
+        struct Pool          *next_table;
+        int                  next_table_size;
+        int                  instructions_size;
+        uint64               mask;
+        uint64               value;
+        disassembly_function disassembly;
+        conditional_function condition;
+        uint64               attributes;
+    };
+
+    uint64 extract_op_code_value(const uint16 *data, int size);
+    int Disassemble(const uint16 *data, std::string & dis,
+                    TABLE_ENTRY_TYPE & type, const Pool *table, int table_size);
+
+    uint64 renumber_registers(uint64 index, uint64 *register_list,
+                              size_t register_list_size);
+    uint64 encode_gpr3(uint64 d);
+    uint64 encode_gpr3_store(uint64 d);
+    uint64 encode_rd1_from_rd(uint64 d);
+    uint64 encode_gpr4_zero(uint64 d);
+    uint64 encode_gpr4(uint64 d);
+    uint64 encode_rd2_reg1(uint64 d);
+    uint64 encode_rd2_reg2(uint64 d);
+
+    uint64 copy(uint64 d);
+    int64 copy(int64 d);
+    int64 neg_copy(uint64 d);
+    int64 neg_copy(int64 d);
+    uint64 encode_rs3_and_check_rs3_ge_rt3(uint64 d);
+    uint64 encode_rs3_and_check_rs3_lt_rt3(uint64 d);
+    uint64 encode_s_from_address(uint64 d);
+    uint64 encode_u_from_address(uint64 d);
+    uint64 encode_s_from_s_hi(uint64 d);
+    uint64 encode_count3_from_count(uint64 d);
+    uint64 encode_shift3_from_shift(uint64 d);
+    int64 encode_eu_from_s_li16(uint64 d);
+    uint64 encode_msbd_from_size(uint64 d);
+    uint64 encode_eu_from_u_andi16(uint64 d);
+
+    uint64 encode_msbd_from_pos_and_size(uint64 d);
+
+    uint64 encode_rt1_from_rt(uint64 d);
+    uint64 encode_lsb_from_pos_and_size(uint64 d);
+
+    std::string save_restore_list(uint64 rt, uint64 count, uint64 gp);
+
+    std::string GPR(uint64 reg);
+    std::string FPR(uint64 reg);
+    std::string AC(uint64 reg);
+    std::string IMMEDIATE(uint64 value);
+    std::string IMMEDIATE(int64 value);
+    std::string CPR(uint64 reg);
+    std::string ADDRESS(uint64 value, int instruction_size);
+
+    int64 extract_s_4_2_1_0(uint64 instruction);
+    int64 extr_sil0il0bs8_il15il8bs1Tmsb8(uint64 instruction);
+    int64 extr_sil0il10bs1_il1il1bs9Tmsb10(uint64 instruction);
+    int64 extr_sil0il11bs1_il1il1bs10Tmsb11(uint64 instruction);
+    int64 extr_sil0il14bs1_il1il1bs13Tmsb14(uint64 instruction);
+    int64 extr_sil0il16bs16_il16il0bs16Tmsb31(uint64 instruction);
+    int64 extr_sil0il21bs1_il1il1bs20Tmsb21(uint64 instruction);
+    int64 extr_sil0il25bs1_il1il1bs24Tmsb25(uint64 instruction);
+    int64 extr_sil0il31bs1_il2il21bs10_il12il12bs9Tmsb31(uint64 instruction);
+    int64 extr_sil0il7bs1_il1il1bs6Tmsb7(uint64 instruction);
+    int64 extr_sil11il0bs10Tmsb9(uint64 instruction);
+    int64 extract_shift_21_20_19_18_17_16(uint64 instruction);
+    int64 extr_sil2il2bs6_il15il8bs1Tmsb8(uint64 instruction);
+    int64 extr_sil3il3bs5_il15il8bs1Tmsb8(uint64 instruction);
+
+    uint64 extract_ac_13_12(uint64 instruction);
+    uint64 extract_bit_16_15_14_13_12_11(uint64 instruction);
+    uint64 extract_bit_23_22_21(uint64 instruction);
+    uint64 extract_c0s_20_19_18_17_16(uint64 instruction);
+    uint64 extract_code_17_to_0(uint64 instruction);
+    uint64 extract_code_18_to_0(uint64 instruction);
+    uint64 extract_code_1_0(uint64 instruction);
+    uint64 extract_code_2_1_0(uint64 instruction);
+    uint64 extract_code_25_24_23_22_21_20_19_18_17_16(uint64 instruction);
+    uint64 extract_cofun_25_24_23(uint64 instruction);
+    uint64 extract_count3_14_13_12(uint64 instruction);
+    uint64 extract_count_3_2_1_0(uint64 instruction);
+    uint64 extract_count_19_18_17_16(uint64 instruction);
+    uint64 extract_cs_20_19_18_17_16(uint64 instruction);
+    uint64 extract_cs_25_24_23_22_21(uint64 instruction);
+    uint64 extract_ct_25_24_23_22_21(uint64 instruction);
+    uint64 extract_eu_3_2_1_0(uint64 instruction);
+    uint64 extract_eu_6_5_4_3_2_1_0(uint64 instruction);
+    uint64 extract_fd_10_9_8_7_6(uint64 instruction);
+    uint64 extract_fs_15_14_13_12_11(uint64 instruction);
+    uint64 extract_ft_15_14_13_12_11(uint64 instruction);
+    uint64 extract_ft_20_19_18_17_16(uint64 instruction);
+    uint64 extract_gp_2(uint64 instruction);
+    uint64 extract_hint_25_24_23_22_21(uint64 instruction);
+    uint64 extract_hs_20_19_18_17_16(uint64 instruction);
+    uint64 extract_lsb_4_3_2_1_0(uint64 instruction);
+    uint64 extract_mask_20_19_18_17_16_15_14(uint64 instruction);
+    uint64 extract_msbt_10_9_8_7_6(uint64 instruction);
+    uint64 extract_op_25_24_23_22_21(uint64 instruction);
+    uint64 extract_op_25_to_3(uint64 instruction);
+    uint64 extract_rdl_25_24(uint64 instruction);
+    uint64 extract_rd2_3_8(uint64 instruction);
+    uint64 extract_rd3_3_2_1(uint64 instruction);
+    uint64 extract_rd_20_19_18_17_16(uint64 instruction);
+    uint64 extract_rs3_6_5_4(uint64 instruction);
+    uint64 extract_rs4_4_2_1_0(uint64 instruction);
+    uint64 extract_rs_4_3_2_1_0(uint64 instruction);
+    uint64 extract_rs_20_19_18_17_16(uint64 instruction);
+    uint64 extract_rsz4_4_2_1_0(uint64 instruction);
+    uint64 extract_rtl_11(uint64 instruction);
+    uint64 extract_rt3_9_8_7(uint64 instruction);
+    uint64 extract_rt4_9_7_6_5(uint64 instruction);
+    uint64 extract_rt_25_24_23_22_21(uint64 instruction);
+    uint64 extract_rt_41_40_39_38_37(uint64 instruction);
+    uint64 extract_rt_9_8_7_6_5(uint64 instruction);
+    uint64 extract_rtz3_9_8_7(uint64 instruction);
+    uint64 extract_rtz4_27_26_25_23_22_21(uint64 instruction);
+    uint64 extract_rtz4_9_7_6_5(uint64 instruction);
+    uint64 extract_ru_7_6_5_4_3(uint64 instruction);
+    uint64 extract_sa_15_14_13_12_11(uint64 instruction);
+    uint64 extract_sa_15_14_13_12(uint64 instruction);
+    uint64 extract_sa_15_14_13(uint64 instruction);
+    uint64 extract_sel_13_12_11(uint64 instruction);
+    uint64 extract_sel_15_14_13_12_11(uint64 instruction);
+    uint64 extract_shift3_2_1_0(uint64 instruction);
+    uint64 extract_shift_4_3_2_1_0(uint64 instruction);
+    uint64 extract_shift_5_4_3_2_1_0(uint64 instruction);
+    uint64 extract_shift_20_19_18_17_16(uint64 instruction);
+    uint64 extract_shift_10_9_8_7_6(uint64 instruction);
+    uint64 extract_shiftx_11_10_9_8_7_6(uint64 instruction);
+    uint64 extr_shiftxil7il1bs4Fmsb4(uint64 instruction);
+    uint64 extract_size_20_19_18_17_16(uint64 instruction);
+    uint64 extract_stripe_6(uint64 instruction);
+    uint64 extract_stype_20_19_18_17_16(uint64 instruction);
+    uint64 extract_u2_10_9(uint64 instruction);
+    uint64 extract_u_11_10_9_8_7_6_5_4_3_2_1_0(uint64 instruction);
+    uint64 extract_u_15_to_0(uint64 instruction);
+    uint64 extract_u_17_to_0(uint64 instruction);
+    uint64 extract_u_1_0(uint64 instruction);
+    uint64 extr_uil0il1bs4Fmsb4(uint64 instruction);
+    uint64 extr_uil0il2bs3Fmsb4(uint64 instruction);
+    uint64 extr_uil0il2bs4Fmsb5(uint64 instruction);
+    uint64 extr_uil0il2bs5Fmsb6(uint64 instruction);
+    uint64 extr_uil0il2bs6Fmsb7(uint64 instruction);
+    uint64 extr_uil0il2bs7Fmsb8(uint64 instruction);
+    uint64 extr_uil0il32bs32Fmsb63(uint64 instruction);
+    uint64 extract_u_10(uint64 instruction);
+    uint64 extract_u_17_16_15_14_13_12_11(uint64 instruction);
+    uint64 extract_u_20_19_18_17_16_15_14_13(uint64 instruction);
+    uint64 extr_uil1il1bs17Fmsb17(uint64 instruction);
+    uint64 extr_uil1il1bs2Fmsb2(uint64 instruction);
+    uint64 extr_uil2il2bs16Fmsb17(uint64 instruction);
+    uint64 extr_uil2il2bs19Fmsb20(uint64 instruction);
+    uint64 extr_uil3il3bs18Fmsb20(uint64 instruction);
+    uint64 extr_uil3il3bs1_il8il2bs1Fmsb3(uint64 instruction);
+    uint64 extr_uil3il3bs9Fmsb11(uint64 instruction);
+    uint64 extr_uil4il4bs4Fmsb7(uint64 instruction);
+    uint64 extr_xil0il0bs12Fmsb11(uint64 instruction);
+    uint64 extr_xil0il0bs3_il4il0bs1Fmsb2(uint64 instruction);
+    uint64 extr_xil10il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil10il0bs1_il11il0bs5Fmsb4(uint64 instruction);
+    uint64 extr_xil10il0bs1_il14il0bs2Fmsb1(uint64 instruction);
+    uint64 extr_xil10il0bs4_il22il0bs4Fmsb3(uint64 instruction);
+    uint64 extr_xil10il0bs6Fmsb5(uint64 instruction);
+    uint64 extr_xil11il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil11il0bs5Fmsb4(uint64 instruction);
+    uint64 extr_xil12il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil14il0bs1_il15il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil14il0bs2Fmsb1(uint64 instruction);
+    uint64 extr_xil15il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil16il0bs10Fmsb9(uint64 instruction);
+    uint64 extr_xil16il0bs5Fmsb4(uint64 instruction);
+    uint64 extr_xil17il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil17il0bs9Fmsb8(uint64 instruction);
+    uint64 extr_xil21il0bs5Fmsb4(uint64 instruction);
+    uint64 extr_xil24il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil2il0bs1_il15il0bs1Fmsb0(uint64 instruction);
+    uint64 extr_xil6il0bs3Fmsb2(uint64 instruction);
+    uint64 extr_xil6il0bs3_il10il0bs1Fmsb2(uint64 instruction);
+    uint64 extr_xil9il0bs2Fmsb1(uint64 instruction);
+    uint64 extr_xil9il0bs3Fmsb2(uint64 instruction);
+    uint64 extr_xil9il0bs3_il16il0bs5Fmsb4(uint64 instruction);
+
+    bool ADDIU_32__cond(uint64 instruction);
+    bool ADDIU_RS5__cond(uint64 instruction);
+    bool BALRSC_cond(uint64 instruction);
+    bool BEQC_16__cond(uint64 instruction);
+    bool BNEC_16__cond(uint64 instruction);
+    bool MOVE_cond(uint64 instruction);
+    bool P16_BR1_cond(uint64 instruction);
+    bool PREF_S9__cond(uint64 instruction);
+    bool PREFE_cond(uint64 instruction);
+    bool SLTU_cond(uint64 instruction);
+
+    std::string ABS_D(uint64 instruction);
+    std::string ABS_S(uint64 instruction);
+    std::string ABSQ_S_PH(uint64 instruction);
+    std::string ABSQ_S_QB(uint64 instruction);
+    std::string ABSQ_S_W(uint64 instruction);
+    std::string ACLR(uint64 instruction);
+    std::string ADD(uint64 instruction);
+    std::string ADD_D(uint64 instruction);
+    std::string ADD_S(uint64 instruction);
+    std::string ADDIU_32_(uint64 instruction);
+    std::string ADDIU_48_(uint64 instruction);
+    std::string ADDIU_GP48_(uint64 instruction);
+    std::string ADDIU_GP_B_(uint64 instruction);
+    std::string ADDIU_GP_W_(uint64 instruction);
+    std::string ADDIU_NEG_(uint64 instruction);
+    std::string ADDIU_R1_SP_(uint64 instruction);
+    std::string ADDIU_R2_(uint64 instruction);
+    std::string ADDIU_RS5_(uint64 instruction);
+    std::string ADDIUPC_32_(uint64 instruction);
+    std::string ADDIUPC_48_(uint64 instruction);
+    std::string ADDQ_PH(uint64 instruction);
+    std::string ADDQ_S_PH(uint64 instruction);
+    std::string ADDQ_S_W(uint64 instruction);
+    std::string ADDQH_PH(uint64 instruction);
+    std::string ADDQH_R_PH(uint64 instruction);
+    std::string ADDQH_R_W(uint64 instruction);
+    std::string ADDQH_W(uint64 instruction);
+    std::string ADDSC(uint64 instruction);
+    std::string ADDU_16_(uint64 instruction);
+    std::string ADDU_32_(uint64 instruction);
+    std::string ADDU_4X4_(uint64 instruction);
+    std::string ADDU_PH(uint64 instruction);
+    std::string ADDU_QB(uint64 instruction);
+    std::string ADDU_S_PH(uint64 instruction);
+    std::string ADDU_S_QB(uint64 instruction);
+    std::string ADDUH_QB(uint64 instruction);
+    std::string ADDUH_R_QB(uint64 instruction);
+    std::string ADDWC(uint64 instruction);
+    std::string ALUIPC(uint64 instruction);
+    std::string AND_16_(uint64 instruction);
+    std::string AND_32_(uint64 instruction);
+    std::string ANDI_16_(uint64 instruction);
+    std::string ANDI_32_(uint64 instruction);
+    std::string APPEND(uint64 instruction);
+    std::string ASET(uint64 instruction);
+    std::string BALC_16_(uint64 instruction);
+    std::string BALC_32_(uint64 instruction);
+    std::string BALRSC(uint64 instruction);
+    std::string BBEQZC(uint64 instruction);
+    std::string BBNEZC(uint64 instruction);
+    std::string BC_16_(uint64 instruction);
+    std::string BC_32_(uint64 instruction);
+    std::string BC1EQZC(uint64 instruction);
+    std::string BC1NEZC(uint64 instruction);
+    std::string BC2EQZC(uint64 instruction);
+    std::string BC2NEZC(uint64 instruction);
+    std::string BEQC_16_(uint64 instruction);
+    std::string BEQC_32_(uint64 instruction);
+    std::string BEQIC(uint64 instruction);
+    std::string BEQZC_16_(uint64 instruction);
+    std::string BGEC(uint64 instruction);
+    std::string BGEIC(uint64 instruction);
+    std::string BGEIUC(uint64 instruction);
+    std::string BGEUC(uint64 instruction);
+    std::string BLTC(uint64 instruction);
+    std::string BLTIC(uint64 instruction);
+    std::string BLTIUC(uint64 instruction);
+    std::string BLTUC(uint64 instruction);
+    std::string BNEC_16_(uint64 instruction);
+    std::string BNEC_32_(uint64 instruction);
+    std::string BNEIC(uint64 instruction);
+    std::string BNEZC_16_(uint64 instruction);
+    std::string BPOSGE32C(uint64 instruction);
+    std::string BREAK_16_(uint64 instruction);
+    std::string BREAK_32_(uint64 instruction);
+    std::string BRSC(uint64 instruction);
+    std::string CACHE(uint64 instruction);
+    std::string CACHEE(uint64 instruction);
+    std::string CEIL_L_D(uint64 instruction);
+    std::string CEIL_L_S(uint64 instruction);
+    std::string CEIL_W_D(uint64 instruction);
+    std::string CEIL_W_S(uint64 instruction);
+    std::string CFC1(uint64 instruction);
+    std::string CFC2(uint64 instruction);
+    std::string CLASS_D(uint64 instruction);
+    std::string CLASS_S(uint64 instruction);
+    std::string CLO(uint64 instruction);
+    std::string CLZ(uint64 instruction);
+    std::string CMP_AF_D(uint64 instruction);
+    std::string CMP_AF_S(uint64 instruction);
+    std::string CMP_EQ_D(uint64 instruction);
+    std::string CMP_EQ_PH(uint64 instruction);
+    std::string CMP_EQ_S(uint64 instruction);
+    std::string CMP_LE_D(uint64 instruction);
+    std::string CMP_LE_PH(uint64 instruction);
+    std::string CMP_LE_S(uint64 instruction);
+    std::string CMP_LT_D(uint64 instruction);
+    std::string CMP_LT_PH(uint64 instruction);
+    std::string CMP_LT_S(uint64 instruction);
+    std::string CMP_NE_D(uint64 instruction);
+    std::string CMP_NE_S(uint64 instruction);
+    std::string CMP_OR_D(uint64 instruction);
+    std::string CMP_OR_S(uint64 instruction);
+    std::string CMP_SAF_D(uint64 instruction);
+    std::string CMP_SAF_S(uint64 instruction);
+    std::string CMP_SEQ_D(uint64 instruction);
+    std::string CMP_SEQ_S(uint64 instruction);
+    std::string CMP_SLE_D(uint64 instruction);
+    std::string CMP_SLE_S(uint64 instruction);
+    std::string CMP_SLT_D(uint64 instruction);
+    std::string CMP_SLT_S(uint64 instruction);
+    std::string CMP_SNE_D(uint64 instruction);
+    std::string CMP_SNE_S(uint64 instruction);
+    std::string CMP_SOR_D(uint64 instruction);
+    std::string CMP_SOR_S(uint64 instruction);
+    std::string CMP_SUEQ_D(uint64 instruction);
+    std::string CMP_SUEQ_S(uint64 instruction);
+    std::string CMP_SULE_D(uint64 instruction);
+    std::string CMP_SULE_S(uint64 instruction);
+    std::string CMP_SULT_D(uint64 instruction);
+    std::string CMP_SULT_S(uint64 instruction);
+    std::string CMP_SUN_D(uint64 instruction);
+    std::string CMP_SUN_S(uint64 instruction);
+    std::string CMP_SUNE_D(uint64 instruction);
+    std::string CMP_SUNE_S(uint64 instruction);
+    std::string CMP_UEQ_D(uint64 instruction);
+    std::string CMP_UEQ_S(uint64 instruction);
+    std::string CMP_ULE_D(uint64 instruction);
+    std::string CMP_ULE_S(uint64 instruction);
+    std::string CMP_ULT_D(uint64 instruction);
+    std::string CMP_ULT_S(uint64 instruction);
+    std::string CMP_UN_D(uint64 instruction);
+    std::string CMP_UN_S(uint64 instruction);
+    std::string CMP_UNE_D(uint64 instruction);
+    std::string CMP_UNE_S(uint64 instruction);
+    std::string CMPGDU_EQ_QB(uint64 instruction);
+    std::string CMPGDU_LE_QB(uint64 instruction);
+    std::string CMPGDU_LT_QB(uint64 instruction);
+    std::string CMPGU_EQ_QB(uint64 instruction);
+    std::string CMPGU_LE_QB(uint64 instruction);
+    std::string CMPGU_LT_QB(uint64 instruction);
+    std::string CMPU_EQ_QB(uint64 instruction);
+    std::string CMPU_LE_QB(uint64 instruction);
+    std::string CMPU_LT_QB(uint64 instruction);
+    std::string COP2_1(uint64 instruction);
+    std::string CTC1(uint64 instruction);
+    std::string CTC2(uint64 instruction);
+    std::string CVT_D_L(uint64 instruction);
+    std::string CVT_D_S(uint64 instruction);
+    std::string CVT_D_W(uint64 instruction);
+    std::string CVT_L_D(uint64 instruction);
+    std::string CVT_L_S(uint64 instruction);
+    std::string CVT_S_D(uint64 instruction);
+    std::string CVT_S_L(uint64 instruction);
+    std::string CVT_S_PL(uint64 instruction);
+    std::string CVT_S_PU(uint64 instruction);
+    std::string CVT_S_W(uint64 instruction);
+    std::string CVT_W_D(uint64 instruction);
+    std::string CVT_W_S(uint64 instruction);
+    std::string DADDIU_48_(uint64 instruction);
+    std::string DADDIU_NEG_(uint64 instruction);
+    std::string DADDIU_U12_(uint64 instruction);
+    std::string DADD(uint64 instruction);
+    std::string DADDU(uint64 instruction);
+    std::string DCLO(uint64 instruction);
+    std::string DCLZ(uint64 instruction);
+    std::string DDIV(uint64 instruction);
+    std::string DDIVU(uint64 instruction);
+    std::string DERET(uint64 instruction);
+    std::string DEXTM(uint64 instruction);
+    std::string DEXT(uint64 instruction);
+    std::string DEXTU(uint64 instruction);
+    std::string DINSM(uint64 instruction);
+    std::string DINS(uint64 instruction);
+    std::string DINSU(uint64 instruction);
+    std::string DI(uint64 instruction);
+    std::string DIV(uint64 instruction);
+    std::string DIV_D(uint64 instruction);
+    std::string DIV_S(uint64 instruction);
+    std::string DIVU(uint64 instruction);
+    std::string DLSA(uint64 instruction);
+    std::string DLUI_48_(uint64 instruction);
+    std::string DMFC0(uint64 instruction);
+    std::string DMFC1(uint64 instruction);
+    std::string DMFC2(uint64 instruction);
+    std::string DMFGC0(uint64 instruction);
+    std::string DMOD(uint64 instruction);
+    std::string DMODU(uint64 instruction);
+    std::string DMTC0(uint64 instruction);
+    std::string DMTC1(uint64 instruction);
+    std::string DMTC2(uint64 instruction);
+    std::string DMTGC0(uint64 instruction);
+    std::string DMT(uint64 instruction);
+    std::string DMUH(uint64 instruction);
+    std::string DMUHU(uint64 instruction);
+    std::string DMUL(uint64 instruction);
+    std::string DMULU(uint64 instruction);
+    std::string DPAQ_S_W_PH(uint64 instruction);
+    std::string DPAQ_SA_L_W(uint64 instruction);
+    std::string DPAQX_S_W_PH(uint64 instruction);
+    std::string DPAQX_SA_W_PH(uint64 instruction);
+    std::string DPAU_H_QBL(uint64 instruction);
+    std::string DPAU_H_QBR(uint64 instruction);
+    std::string DPA_W_PH(uint64 instruction);
+    std::string DPAX_W_PH(uint64 instruction);
+    std::string DPS_W_PH(uint64 instruction);
+    std::string DPSQ_SA_L_W(uint64 instruction);
+    std::string DPSQ_S_W_PH(uint64 instruction);
+    std::string DPSQX_SA_W_PH(uint64 instruction);
+    std::string DPSQX_S_W_PH(uint64 instruction);
+    std::string DPSU_H_QBL(uint64 instruction);
+    std::string DPSU_H_QBR(uint64 instruction);
+    std::string DPSX_W_PH(uint64 instruction);
+    std::string DROTR(uint64 instruction);
+    std::string DROTR32(uint64 instruction);
+    std::string DROTRV(uint64 instruction);
+    std::string DROTX(uint64 instruction);
+    std::string DSLL(uint64 instruction);
+    std::string DSLL32(uint64 instruction);
+    std::string DSLLV(uint64 instruction);
+    std::string DSRA(uint64 instruction);
+    std::string DSRA32(uint64 instruction);
+    std::string DSRAV(uint64 instruction);
+    std::string DSRL32(uint64 instruction);
+    std::string DSRL(uint64 instruction);
+    std::string DSRLV(uint64 instruction);
+    std::string DSUB(uint64 instruction);
+    std::string DSUBU(uint64 instruction);
+    std::string DVP(uint64 instruction);
+    std::string DVPE(uint64 instruction);
+    std::string EHB(uint64 instruction);
+    std::string EI(uint64 instruction);
+    std::string EMT(uint64 instruction);
+    std::string ERET(uint64 instruction);
+    std::string ERETNC(uint64 instruction);
+    std::string EVP(uint64 instruction);
+    std::string EVPE(uint64 instruction);
+    std::string EXT(uint64 instruction);
+    std::string EXTD(uint64 instruction);
+    std::string EXTD32(uint64 instruction);
+    std::string EXTP(uint64 instruction);
+    std::string EXTPDP(uint64 instruction);
+    std::string EXTPDPV(uint64 instruction);
+    std::string EXTPV(uint64 instruction);
+    std::string EXTR_RS_W(uint64 instruction);
+    std::string EXTR_R_W(uint64 instruction);
+    std::string EXTR_S_H(uint64 instruction);
+    std::string EXTR_W(uint64 instruction);
+    std::string EXTRV_R_W(uint64 instruction);
+    std::string EXTRV_RS_W(uint64 instruction);
+    std::string EXTRV_S_H(uint64 instruction);
+    std::string EXTRV_W(uint64 instruction);
+    std::string EXTW(uint64 instruction);
+    std::string FLOOR_L_D(uint64 instruction);
+    std::string FLOOR_L_S(uint64 instruction);
+    std::string FLOOR_W_D(uint64 instruction);
+    std::string FLOOR_W_S(uint64 instruction);
+    std::string FORK(uint64 instruction);
+    std::string HYPCALL(uint64 instruction);
+    std::string HYPCALL_16_(uint64 instruction);
+    std::string INS(uint64 instruction);
+    std::string INSV(uint64 instruction);
+    std::string IRET(uint64 instruction);
+    std::string JALRC_16_(uint64 instruction);
+    std::string JALRC_32_(uint64 instruction);
+    std::string JALRC_HB(uint64 instruction);
+    std::string JRC(uint64 instruction);
+    std::string LB_16_(uint64 instruction);
+    std::string LB_GP_(uint64 instruction);
+    std::string LB_S9_(uint64 instruction);
+    std::string LB_U12_(uint64 instruction);
+    std::string LBE(uint64 instruction);
+    std::string LBU_16_(uint64 instruction);
+    std::string LBU_GP_(uint64 instruction);
+    std::string LBU_S9_(uint64 instruction);
+    std::string LBU_U12_(uint64 instruction);
+    std::string LBUE(uint64 instruction);
+    std::string LBUX(uint64 instruction);
+    std::string LBX(uint64 instruction);
+    std::string LD_GP_(uint64 instruction);
+    std::string LD_S9_(uint64 instruction);
+    std::string LD_U12_(uint64 instruction);
+    std::string LDC1_GP_(uint64 instruction);
+    std::string LDC1_S9_(uint64 instruction);
+    std::string LDC1_U12_(uint64 instruction);
+    std::string LDC1X(uint64 instruction);
+    std::string LDC1XS(uint64 instruction);
+    std::string LDC2(uint64 instruction);
+    std::string LDM(uint64 instruction);
+    std::string LDPC_48_(uint64 instruction);
+    std::string LDX(uint64 instruction);
+    std::string LDXS(uint64 instruction);
+    std::string LH_16_(uint64 instruction);
+    std::string LH_GP_(uint64 instruction);
+    std::string LH_S9_(uint64 instruction);
+    std::string LH_U12_(uint64 instruction);
+    std::string LHE(uint64 instruction);
+    std::string LHU_16_(uint64 instruction);
+    std::string LHU_GP_(uint64 instruction);
+    std::string LHU_S9_(uint64 instruction);
+    std::string LHU_U12_(uint64 instruction);
+    std::string LHUE(uint64 instruction);
+    std::string LHUX(uint64 instruction);
+    std::string LHUXS(uint64 instruction);
+    std::string LHX(uint64 instruction);
+    std::string LHXS(uint64 instruction);
+    std::string LI_16_(uint64 instruction);
+    std::string LI_48_(uint64 instruction);
+    std::string LL(uint64 instruction);
+    std::string LLD(uint64 instruction);
+    std::string LLDP(uint64 instruction);
+    std::string LLE(uint64 instruction);
+    std::string LLWP(uint64 instruction);
+    std::string LLWPE(uint64 instruction);
+    std::string LSA(uint64 instruction);
+    std::string LUI(uint64 instruction);
+    std::string LW_16_(uint64 instruction);
+    std::string LW_4X4_(uint64 instruction);
+    std::string LWC1_GP_(uint64 instruction);
+    std::string LWC1_S9_(uint64 instruction);
+    std::string LWC1_U12_(uint64 instruction);
+    std::string LWC1X(uint64 instruction);
+    std::string LWC1XS(uint64 instruction);
+    std::string LWC2(uint64 instruction);
+    std::string LWE(uint64 instruction);
+    std::string LW_GP_(uint64 instruction);
+    std::string LW_GP16_(uint64 instruction);
+    std::string LWM(uint64 instruction);
+    std::string LWPC_48_(uint64 instruction);
+    std::string LW_S9_(uint64 instruction);
+    std::string LW_SP_(uint64 instruction);
+    std::string LW_U12_(uint64 instruction);
+    std::string LWU_GP_(uint64 instruction);
+    std::string LWU_S9_(uint64 instruction);
+    std::string LWU_U12_(uint64 instruction);
+    std::string LWUX(uint64 instruction);
+    std::string LWUXS(uint64 instruction);
+    std::string LWX(uint64 instruction);
+    std::string LWXS_16_(uint64 instruction);
+    std::string LWXS_32_(uint64 instruction);
+    std::string MADD_DSP_(uint64 instruction);
+    std::string MADDF_D(uint64 instruction);
+    std::string MADDF_S(uint64 instruction);
+    std::string MADDU_DSP_(uint64 instruction);
+    std::string MAQ_S_W_PHL(uint64 instruction);
+    std::string MAQ_S_W_PHR(uint64 instruction);
+    std::string MAQ_SA_W_PHL(uint64 instruction);
+    std::string MAQ_SA_W_PHR(uint64 instruction);
+    std::string MAX_D(uint64 instruction);
+    std::string MAX_S(uint64 instruction);
+    std::string MAXA_D(uint64 instruction);
+    std::string MAXA_S(uint64 instruction);
+    std::string MFC0(uint64 instruction);
+    std::string MFC1(uint64 instruction);
+    std::string MFC2(uint64 instruction);
+    std::string MFGC0(uint64 instruction);
+    std::string MFHC0(uint64 instruction);
+    std::string MFHC1(uint64 instruction);
+    std::string MFHC2(uint64 instruction);
+    std::string MFHGC0(uint64 instruction);
+    std::string MFHI_DSP_(uint64 instruction);
+    std::string MFHTR(uint64 instruction);
+    std::string MFLO_DSP_(uint64 instruction);
+    std::string MFTR(uint64 instruction);
+    std::string MIN_D(uint64 instruction);
+    std::string MIN_S(uint64 instruction);
+    std::string MINA_D(uint64 instruction);
+    std::string MINA_S(uint64 instruction);
+    std::string MOD(uint64 instruction);
+    std::string MODSUB(uint64 instruction);
+    std::string MODU(uint64 instruction);
+    std::string MOV_D(uint64 instruction);
+    std::string MOV_S(uint64 instruction);
+    std::string MOVE_BALC(uint64 instruction);
+    std::string MOVEP(uint64 instruction);
+    std::string MOVEP_REV_(uint64 instruction);
+    std::string MOVE(uint64 instruction);
+    std::string MOVN(uint64 instruction);
+    std::string MOVZ(uint64 instruction);
+    std::string MSUB_DSP_(uint64 instruction);
+    std::string MSUBF_D(uint64 instruction);
+    std::string MSUBF_S(uint64 instruction);
+    std::string MSUBU_DSP_(uint64 instruction);
+    std::string MTC0(uint64 instruction);
+    std::string MTC1(uint64 instruction);
+    std::string MTC2(uint64 instruction);
+    std::string MTGC0(uint64 instruction);
+    std::string MTHC0(uint64 instruction);
+    std::string MTHC1(uint64 instruction);
+    std::string MTHC2(uint64 instruction);
+    std::string MTHGC0(uint64 instruction);
+    std::string MTHI_DSP_(uint64 instruction);
+    std::string MTHLIP(uint64 instruction);
+    std::string MTHTR(uint64 instruction);
+    std::string MTLO_DSP_(uint64 instruction);
+    std::string MTTR(uint64 instruction);
+    std::string MUH(uint64 instruction);
+    std::string MUHU(uint64 instruction);
+    std::string MUL_32_(uint64 instruction);
+    std::string MUL_4X4_(uint64 instruction);
+    std::string MUL_D(uint64 instruction);
+    std::string MUL_PH(uint64 instruction);
+    std::string MUL_S(uint64 instruction);
+    std::string MUL_S_PH(uint64 instruction);
+    std::string MULEQ_S_W_PHL(uint64 instruction);
+    std::string MULEQ_S_W_PHR(uint64 instruction);
+    std::string MULEU_S_PH_QBL(uint64 instruction);
+    std::string MULEU_S_PH_QBR(uint64 instruction);
+    std::string MULQ_RS_PH(uint64 instruction);
+    std::string MULQ_RS_W(uint64 instruction);
+    std::string MULQ_S_PH(uint64 instruction);
+    std::string MULQ_S_W(uint64 instruction);
+    std::string MULSA_W_PH(uint64 instruction);
+    std::string MULSAQ_S_W_PH(uint64 instruction);
+    std::string MULT_DSP_(uint64 instruction);
+    std::string MULTU_DSP_(uint64 instruction);
+    std::string MULU(uint64 instruction);
+    std::string NEG_D(uint64 instruction);
+    std::string NEG_S(uint64 instruction);
+    std::string NOP_16_(uint64 instruction);
+    std::string NOP_32_(uint64 instruction);
+    std::string NOR(uint64 instruction);
+    std::string NOT_16_(uint64 instruction);
+    std::string OR_16_(uint64 instruction);
+    std::string OR_32_(uint64 instruction);
+    std::string ORI(uint64 instruction);
+    std::string PACKRL_PH(uint64 instruction);
+    std::string PAUSE(uint64 instruction);
+    std::string PICK_PH(uint64 instruction);
+    std::string PICK_QB(uint64 instruction);
+    std::string PRECEQ_W_PHL(uint64 instruction);
+    std::string PRECEQ_W_PHR(uint64 instruction);
+    std::string PRECEQU_PH_QBL(uint64 instruction);
+    std::string PRECEQU_PH_QBLA(uint64 instruction);
+    std::string PRECEQU_PH_QBR(uint64 instruction);
+    std::string PRECEQU_PH_QBRA(uint64 instruction);
+    std::string PRECEU_PH_QBL(uint64 instruction);
+    std::string PRECEU_PH_QBLA(uint64 instruction);
+    std::string PRECEU_PH_QBR(uint64 instruction);
+    std::string PRECEU_PH_QBRA(uint64 instruction);
+    std::string PRECR_QB_PH(uint64 instruction);
+    std::string PRECR_SRA_PH_W(uint64 instruction);
+    std::string PRECR_SRA_R_PH_W(uint64 instruction);
+    std::string PRECRQ_PH_W(uint64 instruction);
+    std::string PRECRQ_QB_PH(uint64 instruction);
+    std::string PRECRQ_RS_PH_W(uint64 instruction);
+    std::string PRECRQU_S_QB_PH(uint64 instruction);
+    std::string PREF_S9_(uint64 instruction);
+    std::string PREF_U12_(uint64 instruction);
+    std::string PREFE(uint64 instruction);
+    std::string PREPEND(uint64 instruction);
+    std::string RADDU_W_QB(uint64 instruction);
+    std::string RDDSP(uint64 instruction);
+    std::string RDHWR(uint64 instruction);
+    std::string RDPGPR(uint64 instruction);
+    std::string RECIP_D(uint64 instruction);
+    std::string RECIP_S(uint64 instruction);
+    std::string REPL_PH(uint64 instruction);
+    std::string REPL_QB(uint64 instruction);
+    std::string REPLV_PH(uint64 instruction);
+    std::string REPLV_QB(uint64 instruction);
+    std::string RESTORE_32_(uint64 instruction);
+    std::string RESTORE_JRC_16_(uint64 instruction);
+    std::string RESTORE_JRC_32_(uint64 instruction);
+    std::string RESTOREF(uint64 instruction);
+    std::string RINT_D(uint64 instruction);
+    std::string RINT_S(uint64 instruction);
+    std::string ROTR(uint64 instruction);
+    std::string ROTRV(uint64 instruction);
+    std::string ROTX(uint64 instruction);
+    std::string ROUND_L_D(uint64 instruction);
+    std::string ROUND_L_S(uint64 instruction);
+    std::string ROUND_W_D(uint64 instruction);
+    std::string ROUND_W_S(uint64 instruction);
+    std::string RSQRT_D(uint64 instruction);
+    std::string RSQRT_S(uint64 instruction);
+    std::string SAVE_16_(uint64 instruction);
+    std::string SAVE_32_(uint64 instruction);
+    std::string SAVEF(uint64 instruction);
+    std::string SB_16_(uint64 instruction);
+    std::string SB_GP_(uint64 instruction);
+    std::string SB_S9_(uint64 instruction);
+    std::string SB_U12_(uint64 instruction);
+    std::string SBE(uint64 instruction);
+    std::string SBX(uint64 instruction);
+    std::string SC(uint64 instruction);
+    std::string SCD(uint64 instruction);
+    std::string SCDP(uint64 instruction);
+    std::string SCE(uint64 instruction);
+    std::string SCWP(uint64 instruction);
+    std::string SCWPE(uint64 instruction);
+    std::string SD_GP_(uint64 instruction);
+    std::string SD_S9_(uint64 instruction);
+    std::string SD_U12_(uint64 instruction);
+    std::string SDBBP_16_(uint64 instruction);
+    std::string SDBBP_32_(uint64 instruction);
+    std::string SDC1_GP_(uint64 instruction);
+    std::string SDC1_S9_(uint64 instruction);
+    std::string SDC1_U12_(uint64 instruction);
+    std::string SDC1X(uint64 instruction);
+    std::string SDC1XS(uint64 instruction);
+    std::string SDC2(uint64 instruction);
+    std::string SDM(uint64 instruction);
+    std::string SDPC_48_(uint64 instruction);
+    std::string SDX(uint64 instruction);
+    std::string SDXS(uint64 instruction);
+    std::string SEB(uint64 instruction);
+    std::string SEH(uint64 instruction);
+    std::string SEL_D(uint64 instruction);
+    std::string SEL_S(uint64 instruction);
+    std::string SELEQZ_D(uint64 instruction);
+    std::string SELEQZ_S(uint64 instruction);
+    std::string SELNEZ_D(uint64 instruction);
+    std::string SELNEZ_S(uint64 instruction);
+    std::string SEQI(uint64 instruction);
+    std::string SH_16_(uint64 instruction);
+    std::string SH_GP_(uint64 instruction);
+    std::string SH_S9_(uint64 instruction);
+    std::string SH_U12_(uint64 instruction);
+    std::string SHE(uint64 instruction);
+    std::string SHILO(uint64 instruction);
+    std::string SHILOV(uint64 instruction);
+    std::string SHLL_PH(uint64 instruction);
+    std::string SHLL_QB(uint64 instruction);
+    std::string SHLL_S_PH(uint64 instruction);
+    std::string SHLL_S_W(uint64 instruction);
+    std::string SHLLV_PH(uint64 instruction);
+    std::string SHLLV_QB(uint64 instruction);
+    std::string SHLLV_S_PH(uint64 instruction);
+    std::string SHLLV_S_W(uint64 instruction);
+    std::string SHRA_PH(uint64 instruction);
+    std::string SHRA_QB(uint64 instruction);
+    std::string SHRA_R_PH(uint64 instruction);
+    std::string SHRA_R_QB(uint64 instruction);
+    std::string SHRA_R_W(uint64 instruction);
+    std::string SHRAV_PH(uint64 instruction);
+    std::string SHRAV_QB(uint64 instruction);
+    std::string SHRAV_R_PH(uint64 instruction);
+    std::string SHRAV_R_QB(uint64 instruction);
+    std::string SHRAV_R_W(uint64 instruction);
+    std::string SHRL_PH(uint64 instruction);
+    std::string SHRL_QB(uint64 instruction);
+    std::string SHRLV_PH(uint64 instruction);
+    std::string SHRLV_QB(uint64 instruction);
+    std::string SHX(uint64 instruction);
+    std::string SHXS(uint64 instruction);
+    std::string SIGRIE(uint64 instruction);
+    std::string SLL_16_(uint64 instruction);
+    std::string SLL_32_(uint64 instruction);
+    std::string SLLV(uint64 instruction);
+    std::string SLT(uint64 instruction);
+    std::string SLTI(uint64 instruction);
+    std::string SLTIU(uint64 instruction);
+    std::string SLTU(uint64 instruction);
+    std::string SOV(uint64 instruction);
+    std::string SPECIAL2(uint64 instruction);
+    std::string SQRT_D(uint64 instruction);
+    std::string SQRT_S(uint64 instruction);
+    std::string SRA(uint64 instruction);
+    std::string SRAV(uint64 instruction);
+    std::string SRL_16_(uint64 instruction);
+    std::string SRL_32_(uint64 instruction);
+    std::string SRLV(uint64 instruction);
+    std::string SUB(uint64 instruction);
+    std::string SUB_D(uint64 instruction);
+    std::string SUB_S(uint64 instruction);
+    std::string SUBQ_PH(uint64 instruction);
+    std::string SUBQ_S_PH(uint64 instruction);
+    std::string SUBQ_S_W(uint64 instruction);
+    std::string SUBQH_PH(uint64 instruction);
+    std::string SUBQH_R_PH(uint64 instruction);
+    std::string SUBQH_R_W(uint64 instruction);
+    std::string SUBQH_W(uint64 instruction);
+    std::string SUBU_16_(uint64 instruction);
+    std::string SUBU_32_(uint64 instruction);
+    std::string SUBU_PH(uint64 instruction);
+    std::string SUBU_QB(uint64 instruction);
+    std::string SUBU_S_PH(uint64 instruction);
+    std::string SUBU_S_QB(uint64 instruction);
+    std::string SUBUH_QB(uint64 instruction);
+    std::string SUBUH_R_QB(uint64 instruction);
+    std::string SW_16_(uint64 instruction);
+    std::string SW_4X4_(uint64 instruction);
+    std::string SW_GP16_(uint64 instruction);
+    std::string SW_GP_(uint64 instruction);
+    std::string SW_S9_(uint64 instruction);
+    std::string SW_SP_(uint64 instruction);
+    std::string SW_U12_(uint64 instruction);
+    std::string SWC1_GP_(uint64 instruction);
+    std::string SWC1_S9_(uint64 instruction);
+    std::string SWC1_U12_(uint64 instruction);
+    std::string SWC1X(uint64 instruction);
+    std::string SWC1XS(uint64 instruction);
+    std::string SWC2(uint64 instruction);
+    std::string SWE(uint64 instruction);
+    std::string SWM(uint64 instruction);
+    std::string SWPC_48_(uint64 instruction);
+    std::string SWX(uint64 instruction);
+    std::string SWXS(uint64 instruction);
+    std::string SYNC(uint64 instruction);
+    std::string SYNCI(uint64 instruction);
+    std::string SYNCIE(uint64 instruction);
+    std::string SYSCALL_16_(uint64 instruction);
+    std::string SYSCALL_32_(uint64 instruction);
+    std::string TEQ(uint64 instruction);
+    std::string TLBGINV(uint64 instruction);
+    std::string TLBGINVF(uint64 instruction);
+    std::string TLBGP(uint64 instruction);
+    std::string TLBGR(uint64 instruction);
+    std::string TLBGWI(uint64 instruction);
+    std::string TLBGWR(uint64 instruction);
+    std::string TLBINV(uint64 instruction);
+    std::string TLBINVF(uint64 instruction);
+    std::string TLBP(uint64 instruction);
+    std::string TLBR(uint64 instruction);
+    std::string TLBWI(uint64 instruction);
+    std::string TLBWR(uint64 instruction);
+    std::string TNE(uint64 instruction);
+    std::string TRUNC_L_D(uint64 instruction);
+    std::string TRUNC_L_S(uint64 instruction);
+    std::string TRUNC_W_D(uint64 instruction);
+    std::string TRUNC_W_S(uint64 instruction);
+    std::string UALDM(uint64 instruction);
+    std::string UALH(uint64 instruction);
+    std::string UALWM(uint64 instruction);
+    std::string UASDM(uint64 instruction);
+    std::string UASH(uint64 instruction);
+    std::string UASWM(uint64 instruction);
+    std::string UDI(uint64 instruction);
+    std::string WAIT(uint64 instruction);
+    std::string WRDSP(uint64 instruction);
+    std::string WRPGPR(uint64 instruction);
+    std::string XOR_16_(uint64 instruction);
+    std::string XOR_32_(uint64 instruction);
+    std::string XORI(uint64 instruction);
+    std::string YIELD(uint64 instruction);
+
+    static Pool P_SYSCALL[2];
+    static Pool P_RI[4];
+    static Pool P_ADDIU[2];
+    static Pool P_TRAP[2];
+    static Pool P_CMOVE[2];
+    static Pool P_D_MT_VPE[2];
+    static Pool P_E_MT_VPE[2];
+    static Pool _P_MT_VPE[2];
+    static Pool P_MT_VPE[8];
+    static Pool P_DVP[2];
+    static Pool P_SLTU[2];
+    static Pool _POOL32A0[128];
+    static Pool ADDQ__S__PH[2];
+    static Pool MUL__S__PH[2];
+    static Pool ADDQH__R__PH[2];
+    static Pool ADDQH__R__W[2];
+    static Pool ADDU__S__QB[2];
+    static Pool ADDU__S__PH[2];
+    static Pool ADDUH__R__QB[2];
+    static Pool SHRAV__R__PH[2];
+    static Pool SHRAV__R__QB[2];
+    static Pool SUBQ__S__PH[2];
+    static Pool SUBQH__R__PH[2];
+    static Pool SUBQH__R__W[2];
+    static Pool SUBU__S__QB[2];
+    static Pool SUBU__S__PH[2];
+    static Pool SHRA__R__PH[2];
+    static Pool SUBUH__R__QB[2];
+    static Pool SHLLV__S__PH[2];
+    static Pool SHLL__S__PH[4];
+    static Pool PRECR_SRA__R__PH_W[2];
+    static Pool _POOL32A5[128];
+    static Pool PP_LSX[16];
+    static Pool PP_LSXS[16];
+    static Pool P_LSX[2];
+    static Pool POOL32Axf_1_0[4];
+    static Pool POOL32Axf_1_1[4];
+    static Pool POOL32Axf_1_3[4];
+    static Pool POOL32Axf_1_4[2];
+    static Pool MAQ_S_A__W_PHR[2];
+    static Pool MAQ_S_A__W_PHL[2];
+    static Pool POOL32Axf_1_5[2];
+    static Pool POOL32Axf_1_7[4];
+    static Pool POOL32Axf_1[8];
+    static Pool POOL32Axf_2_DSP__0_7[8];
+    static Pool POOL32Axf_2_DSP__8_15[8];
+    static Pool POOL32Axf_2_DSP__16_23[8];
+    static Pool POOL32Axf_2_DSP__24_31[8];
+    static Pool POOL32Axf_2[4];
+    static Pool POOL32Axf_4[128];
+    static Pool POOL32Axf_5_group0[32];
+    static Pool POOL32Axf_5_group1[32];
+    static Pool ERETx[2];
+    static Pool POOL32Axf_5_group3[32];
+    static Pool POOL32Axf_5[4];
+    static Pool SHRA__R__QB[2];
+    static Pool POOL32Axf_7[8];
+    static Pool POOL32Axf[8];
+    static Pool _POOL32A7[8];
+    static Pool P32A[8];
+    static Pool P_GP_D[2];
+    static Pool P_GP_W[4];
+    static Pool POOL48I[32];
+    static Pool PP_SR[4];
+    static Pool P_SR_F[8];
+    static Pool P_SR[2];
+    static Pool P_SLL[5];
+    static Pool P_SHIFT[16];
+    static Pool P_ROTX[4];
+    static Pool P_INS[4];
+    static Pool P_EXT[4];
+    static Pool P_U12[16];
+    static Pool RINT_fmt[2];
+    static Pool ADD_fmt0[2];
+    static Pool SELEQZ_fmt[2];
+    static Pool CLASS_fmt[2];
+    static Pool SUB_fmt0[2];
+    static Pool SELNEZ_fmt[2];
+    static Pool MUL_fmt0[2];
+    static Pool SEL_fmt[2];
+    static Pool DIV_fmt0[2];
+    static Pool ADD_fmt1[2];
+    static Pool SUB_fmt1[2];
+    static Pool MUL_fmt1[2];
+    static Pool MADDF_fmt[2];
+    static Pool DIV_fmt1[2];
+    static Pool MSUBF_fmt[2];
+    static Pool POOL32F_0[64];
+    static Pool MIN_fmt[2];
+    static Pool MAX_fmt[2];
+    static Pool MINA_fmt[2];
+    static Pool MAXA_fmt[2];
+    static Pool CVT_L_fmt[2];
+    static Pool RSQRT_fmt[2];
+    static Pool FLOOR_L_fmt[2];
+    static Pool CVT_W_fmt[2];
+    static Pool SQRT_fmt[2];
+    static Pool FLOOR_W_fmt[2];
+    static Pool RECIP_fmt[2];
+    static Pool CEIL_L_fmt[2];
+    static Pool CEIL_W_fmt[2];
+    static Pool TRUNC_L_fmt[2];
+    static Pool TRUNC_W_fmt[2];
+    static Pool ROUND_L_fmt[2];
+    static Pool ROUND_W_fmt[2];
+    static Pool POOL32Fxf_0[64];
+    static Pool MOV_fmt[4];
+    static Pool ABS_fmt[4];
+    static Pool NEG_fmt[4];
+    static Pool CVT_D_fmt[4];
+    static Pool CVT_S_fmt[4];
+    static Pool POOL32Fxf_1[32];
+    static Pool POOL32Fxf[4];
+    static Pool POOL32F_3[8];
+    static Pool CMP_condn_S[32];
+    static Pool CMP_condn_D[32];
+    static Pool POOL32F_5[8];
+    static Pool POOL32F[8];
+    static Pool POOL32S_0[64];
+    static Pool POOL32Sxf_4[128];
+    static Pool POOL32Sxf[8];
+    static Pool POOL32S_4[8];
+    static Pool POOL32S[8];
+    static Pool P_LUI[2];
+    static Pool P_GP_LH[2];
+    static Pool P_GP_SH[2];
+    static Pool P_GP_CP1[4];
+    static Pool P_GP_M64[4];
+    static Pool P_GP_BH[8];
+    static Pool P_LS_U12[16];
+    static Pool P_PREF_S9_[2];
+    static Pool P_LS_S0[16];
+    static Pool ASET_ACLR[2];
+    static Pool P_LL[4];
+    static Pool P_SC[4];
+    static Pool P_LLD[8];
+    static Pool P_SCD[8];
+    static Pool P_LS_S1[16];
+    static Pool P_PREFE[2];
+    static Pool P_LLE[4];
+    static Pool P_SCE[4];
+    static Pool P_LS_E0[16];
+    static Pool P_LS_WM[2];
+    static Pool P_LS_UAWM[2];
+    static Pool P_LS_DM[2];
+    static Pool P_LS_UADM[2];
+    static Pool P_LS_S9[8];
+    static Pool P_BAL[2];
+    static Pool P_BALRSC[2];
+    static Pool P_J[16];
+    static Pool P_BR3A[32];
+    static Pool P_BR1[4];
+    static Pool P_BR2[4];
+    static Pool P_BRI[8];
+    static Pool P32[32];
+    static Pool P16_SYSCALL[2];
+    static Pool P16_RI[4];
+    static Pool P16_MV[2];
+    static Pool P16_SHIFT[2];
+    static Pool POOL16C_00[4];
+    static Pool POOL16C_0[2];
+    static Pool P16C[2];
+    static Pool P16_A1[2];
+    static Pool P_ADDIU_RS5_[2];
+    static Pool P16_A2[2];
+    static Pool P16_ADDU[2];
+    static Pool P16_JRC[2];
+    static Pool P16_BR1[2];
+    static Pool P16_BR[2];
+    static Pool P16_SR[2];
+    static Pool P16_4X4[4];
+    static Pool P16_LB[4];
+    static Pool P16_LH[4];
+    static Pool P16[32];
+    static Pool MAJOR[2];
+
+};
+
+#endif
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
index 70cfb9c..6302469 100644
--- a/docs/COLO-FT.txt
+++ b/docs/COLO-FT.txt
@@ -110,6 +110,40 @@
 HeartBeat has not been implemented yet, so you need to trigger failover process
 by using 'x-colo-lost-heartbeat' command.
 
+== COLO operation status ==
+
++-----------------+
+|                 |
+|    Start COLO   |
+|                 |
++--------+--------+
+         |
+         |  Main qmp command:
+         |  migrate-set-capabilities with x-colo
+         |  migrate
+         |
+         v
++--------+--------+
+|                 |
+|  COLO running   |
+|                 |
++--------+--------+
+         |
+         |  Main qmp command:
+         |  x-colo-lost-heartbeat
+         |  or
+         |  some error happened
+         v
++--------+--------+
+|                 |  send qmp event:
+|  COLO failover  |  COLO_EXIT
+|                 |
++-----------------+
+
+COLO use the qmp command to switch and report operation status.
+The diagram just shows the main qmp command, you can get the detail
+in test procedure.
+
 == Test procedure ==
 1. Startup qemu
 Primary:
diff --git a/docs/devel/memory.txt b/docs/devel/memory.txt
index c1dee12..42577e1 100644
--- a/docs/devel/memory.txt
+++ b/docs/devel/memory.txt
@@ -326,8 +326,15 @@
 MMIO Operations
 ---------------
 
-MMIO regions are provided with ->read() and ->write() callbacks; in addition
-various constraints can be supplied to control how these callbacks are called:
+MMIO regions are provided with ->read() and ->write() callbacks,
+which are sufficient for most devices. Some devices change behaviour
+based on the attributes used for the memory transaction, or need
+to be able to respond that the access should provoke a bus error
+rather than completing successfully; those devices can use the
+->read_with_attrs() and ->write_with_attrs() callbacks instead.
+
+In addition various constraints can be supplied to control how these
+callbacks are called:
 
  - .valid.min_access_size, .valid.max_access_size define the access sizes
    (in bytes) which the device accepts; accesses outside this range will
@@ -342,5 +349,3 @@
  - .impl.unaligned specifies that the *implementation* supports unaligned
    accesses; if false, unaligned accesses will be emulated by two aligned
    accesses.
- - .old_mmio eases the porting of code that was formerly using
-   cpu_register_io_memory(). It should not be used in new code.
diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst
index 727c401..a227754 100644
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@@ -43,15 +43,13 @@
 
 3. Add the test to ``tests/Makefile.include``. First, name the unit test
    program and add it to ``$(check-unit-y)``; then add a rule to build the
-   executable. Optionally, you can add a magical variable to support ``gcov``.
-   For example:
+   executable.  For example:
 
 .. code::
 
   check-unit-y += tests/foo-test$(EXESUF)
   tests/foo-test$(EXESUF): tests/foo-test.o $(test-util-obj-y)
   ...
-  gcov-files-foo-test-y = util/foo.c
 
 Since unit tests don't require environment variables, the simplest way to debug
 a unit test failure is often directly invoking it or even running it under
@@ -61,6 +59,7 @@
 and gtester options. If necessary, you can run
 
 .. code::
+
   make check-unit V=1
 
 and copy the actual command line which executes the unit test, then run
@@ -118,6 +117,7 @@
 from the output of
 
 .. code::
+
   make check-qtest V=1
 
 which you can run manually.
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 5f158a6..7231c2d 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -49,8 +49,9 @@
 and "-device" are provided.
 
 For above command line options, if the guest OS has the proper NVDIMM
-driver, it should be able to detect a NVDIMM device which is in the
-persistent memory mode and whose size is $NVDIMM_SIZE.
+driver (e.g. "CONFIG_ACPI_NFIT=y" under Linux), it should be able to
+detect a NVDIMM device which is in the persistent memory mode and whose
+size is $NVDIMM_SIZE.
 
 Note:
 
diff --git a/docs/vfio-ap.txt b/docs/vfio-ap.txt
new file mode 100644
index 0000000..1233968
--- /dev/null
+++ b/docs/vfio-ap.txt
@@ -0,0 +1,825 @@
+Adjunct Processor (AP) Device
+=============================
+
+Contents:
+=========
+* Introduction
+* AP Architectural Overview
+* Start Interpretive Execution (SIE) Instruction
+* AP Matrix Configuration on Linux Host
+* Starting a Linux Guest Configured with an AP Matrix
+* Example: Configure AP Matrices for Three Linux Guests
+
+Introduction:
+============
+The IBM Adjunct Processor (AP) Cryptographic Facility is comprised
+of three AP instructions and from 1 to 256 PCIe cryptographic adapter cards.
+These AP devices provide cryptographic functions to all CPUs assigned to a
+linux system running in an IBM Z system LPAR.
+
+On s390x, AP adapter cards are exposed via the AP bus. This document
+describes how those cards may be made available to KVM guests using the
+VFIO mediated device framework.
+
+AP Architectural Overview:
+=========================
+In order understand the terminology used in the rest of this document, let's
+start with some definitions:
+
+* AP adapter
+
+  An AP adapter is an IBM Z adapter card that can perform cryptographic
+  functions. There can be from 0 to 256 adapters assigned to an LPAR depending
+  on the machine model. Adapters assigned to the LPAR in which a linux host is
+  running will be available to the linux host. Each adapter is identified by a
+  number from 0 to 255; however, the maximum adapter number allowed is
+  determined by machine model. When installed, an AP adapter is accessed by
+  AP instructions executed by any CPU.
+
+* AP domain
+
+  An adapter is partitioned into domains. Each domain can be thought of as
+  a set of hardware registers for processing AP instructions. An adapter can
+  hold up to 256 domains; however, the maximum domain number allowed is
+  determined by machine model. Each domain is identified by a number from 0 to
+  255. Domains can be further classified into two types:
+
+    * Usage domains are domains that can be accessed directly to process AP
+      commands
+
+    * Control domains are domains that are accessed indirectly by AP
+      commands sent to a usage domain to control or change the domain; for
+      example, to set a secure private key for the domain.
+
+* AP Queue
+
+  An AP queue is the means by which an AP command-request message is sent to an
+  AP usage domain inside a specific AP. An AP queue is identified by a tuple
+  comprised of an AP adapter ID (APID) and an AP queue index (APQI). The
+  APQI corresponds to a given usage domain number within the adapter. This tuple
+  forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP
+  instructions include a field containing the APQN to identify the AP queue to
+  which the AP command-request message is to be sent for processing.
+
+* AP Instructions:
+
+  There are three AP instructions:
+
+  * NQAP: to enqueue an AP command-request message to a queue
+  * DQAP: to dequeue an AP command-reply message from a queue
+  * PQAP: to administer the queues
+
+  AP instructions identify the domain that is targeted to process the AP
+  command; this must be one of the usage domains. An AP command may modify a
+  domain that is not one of the usage domains, but the modified domain
+  must be one of the control domains.
+
+Start Interpretive Execution (SIE) Instruction
+==============================================
+A KVM guest is started by executing the Start Interpretive Execution (SIE)
+instruction. The SIE state description is a control block that contains the
+state information for a KVM guest and is supplied as input to the SIE
+instruction. The SIE state description contains a satellite control block called
+the Crypto Control Block (CRYCB). The CRYCB contains three fields to identify
+the adapters, usage domains and control domains assigned to the KVM guest:
+
+* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
+  to the KVM guest. Each bit in the mask, from left to right, corresponds to
+  an APID from 0-255. If a bit is set, the corresponding adapter is valid for
+  use by the KVM guest.
+
+* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
+  assigned to the KVM guest. Each bit in the mask, from left to right,
+  corresponds to  an AP queue index (APQI) from 0-255. If a bit is set, the
+  corresponding queue is valid for use by the KVM guest.
+
+* The AP Domain Mask field is a bit mask that identifies the AP control domains
+  assigned to the KVM guest. The ADM bit mask controls which domains can be
+  changed by an AP command-request message sent to a usage domain from the
+  guest. Each bit in the mask, from left to right, corresponds to a domain from
+  0-255. If a bit is set, the corresponding domain can be modified by an AP
+  command-request message sent to a usage domain.
+
+If you recall from the description of an AP Queue, AP instructions include
+an APQN to identify the AP adapter and AP queue to which an AP command-request
+message is to be sent (NQAP and PQAP instructions), or from which a
+command-reply message is to be received (DQAP instruction). The validity of an
+APQN is defined by the matrix calculated from the APM and AQM; it is the
+cross product of all assigned adapter numbers (APM) with all assigned queue
+indexes (AQM). For example, if adapters 1 and 2 and usage domains 5 and 6 are
+assigned to a guest, the APQNs (1,5), (1,6), (2,5) and (2,6) will be valid for
+the guest.
+
+The APQNs can provide secure key functionality - i.e., a private key is stored
+on the adapter card for each of its domains - so each APQN must be assigned to
+at most one guest or the linux host.
+
+   Example 1: Valid configuration:
+   ------------------------------
+   Guest1: adapters 1,2  domains 5,6
+   Guest2: adapter  1,2  domain 7
+
+   This is valid because both guests have a unique set of APQNs: Guest1 has
+   APQNs (1,5), (1,6), (2,5) and (2,6); Guest2 has APQNs (1,7) and (2,7).
+
+   Example 2: Valid configuration:
+   ------------------------------
+   Guest1: adapters 1,2 domains 5,6
+   Guest2: adapters 3,4 domains 5,6
+
+   This is also valid because both guests have a unique set of APQNs:
+      Guest1 has APQNs (1,5), (1,6), (2,5), (2,6);
+      Guest2 has APQNs (3,5), (3,6), (4,5), (4,6)
+
+   Example 3: Invalid configuration:
+   --------------------------------
+   Guest1: adapters 1,2  domains 5,6
+   Guest2: adapter  1    domains 6,7
+
+   This is an invalid configuration because both guests have access to
+   APQN (1,6).
+
+AP Matrix Configuration on Linux Host:
+=====================================
+A linux system is a guest of the LPAR in which it is running and has access to
+the AP resources configured for the LPAR. The LPAR's AP matrix is
+configured via its Activation Profile which can be edited on the HMC. When the
+linux system is started, the AP bus will detect the AP devices assigned to the
+LPAR and create the following in sysfs:
+
+/sys/bus/ap
+... [devices]
+...... xx.yyyy
+...... ...
+...... cardxx
+...... ...
+
+Where:
+    cardxx     is AP adapter number xx (in hex)
+....xx.yyyy    is an APQN with xx specifying the APID and yyyy specifying the
+               APQI
+
+For example, if AP adapters 5 and 6 and domains 4, 71 (0x47), 171 (0xab) and
+255 (0xff) are configured for the LPAR, the sysfs representation on the linux
+host system would look like this:
+
+/sys/bus/ap
+... [devices]
+...... 05.0004
+...... 05.0047
+...... 05.00ab
+...... 05.00ff
+...... 06.0004
+...... 06.0047
+...... 06.00ab
+...... 06.00ff
+...... card05
+...... card06
+
+A set of default device drivers are also created to control each type of AP
+device that can be assigned to the LPAR on which a linux host is running:
+
+/sys/bus/ap
+... [drivers]
+...... [cex2acard]        for Crypto Express 2/3 accelerator cards
+...... [cex2aqueue]       for AP queues served by Crypto Express 2/3
+                          accelerator cards
+...... [cex4card]         for Crypto Express 4/5/6 accelerator and coprocessor
+                          cards
+...... [cex4queue]        for AP queues served by Crypto Express 4/5/6
+                          accelerator and coprocessor cards
+...... [pcixcccard]       for Crypto Express 2/3 coprocessor cards
+...... [pcixccqueue]      for AP queues served by Crypto Express 2/3
+                          coprocessor cards
+
+Binding AP devices to device drivers
+------------------------------------
+There are two sysfs files that specify bitmasks marking a subset of the APQN
+range as 'usable by the default AP queue device drivers' or 'not usable by the
+default device drivers' and thus available for use by the alternate device
+driver(s). The sysfs locations of the masks are:
+
+   /sys/bus/ap/apmask
+   /sys/bus/ap/aqmask
+
+   The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
+   (APID). Each bit in the mask, from left to right (i.e., from most significant
+   to least significant bit in big endian order), corresponds to an APID from
+   0-255. If a bit is set, the APID is marked as usable only by the default AP
+   queue device drivers; otherwise, the APID is usable by the vfio_ap
+   device driver.
+
+   The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
+   (APQI). Each bit in the mask, from left to right (i.e., from most significant
+   to least significant bit in big endian order), corresponds to an APQI from
+   0-255. If a bit is set, the APQI is marked as usable only by the default AP
+   queue device drivers; otherwise, the APQI is usable by the vfio_ap device
+   driver.
+
+   Take, for example, the following mask:
+
+      0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+    It indicates:
+
+      1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
+      belong to the vfio_ap device driver's pool.
+
+   The APQN of each AP queue device assigned to the linux host is checked by the
+   AP bus against the set of APQNs derived from the cross product of APIDs
+   and APQIs marked as usable only by the default AP queue device drivers. If a
+   match is detected,  only the default AP queue device drivers will be probed;
+   otherwise, the vfio_ap device driver will be probed.
+
+   By default, the two masks are set to reserve all APQNs for use by the default
+   AP queue device drivers. There are two ways the default masks can be changed:
+
+   1. The sysfs mask files can be edited by echoing a string into the
+      respective sysfs mask file in one of two formats:
+
+      * An absolute hex string starting with 0x - like "0x12345678" - sets
+        the mask. If the given string is shorter than the mask, it is padded
+        with 0s on the right; for example, specifying a mask value of 0x41 is
+        the same as specifying:
+
+           0x4100000000000000000000000000000000000000000000000000000000000000
+
+        Keep in mind that the mask reads from left to right (i.e., most
+        significant to least significant bit in big endian order), so the mask
+        above identifies device numbers 1 and 7 (01000001).
+
+        If the string is longer than the mask, the operation is terminated with
+        an error (EINVAL).
+
+      * Individual bits in the mask can be switched on and off by specifying
+        each bit number to be switched in a comma separated list. Each bit
+        number string must be prepended with a ('+') or minus ('-') to indicate
+        the corresponding bit is to be switched on ('+') or off ('-'). Some
+        valid values are:
+
+           "+0"    switches bit 0 on
+           "-13"   switches bit 13 off
+           "+0x41" switches bit 65 on
+           "-0xff" switches bit 255 off
+
+           The following example:
+              +0,-6,+0x47,-0xf0
+
+              Switches bits 0 and 71 (0x47) on
+              Switches bits 6 and 240 (0xf0) off
+
+        Note that the bits not specified in the list remain as they were before
+        the operation.
+
+   2. The masks can also be changed at boot time via parameters on the kernel
+      command line like this:
+
+         ap.apmask=0xffff ap.aqmask=0x40
+
+         This would create the following masks:
+
+            apmask:
+            0xffff000000000000000000000000000000000000000000000000000000000000
+
+            aqmask:
+            0x4000000000000000000000000000000000000000000000000000000000000000
+
+         Resulting in these two pools:
+
+            default drivers pool:    adapter 0-15, domain 1
+            alternate drivers pool:  adapter 16-255, domains 0, 2-255
+
+Configuring an AP matrix for a linux guest.
+------------------------------------------
+The sysfs interfaces for configuring an AP matrix for a guest are built on the
+VFIO mediated device framework. To configure an AP matrix for a guest, a
+mediated matrix device must first be created for the /sys/devices/vfio_ap/matrix
+device. When the vfio_ap device driver is loaded, it registers with the VFIO
+mediated device framework. When the driver registers, the sysfs interfaces for
+creating mediated matrix devices is created:
+
+/sys/devices
+... [vfio_ap]
+......[matrix]
+......... [mdev_supported_types]
+............ [vfio_ap-passthrough]
+............... create
+............... [devices]
+
+A mediated AP matrix device is created by writing a UUID to the attribute file
+named 'create', for example:
+
+   uuidgen > create
+
+   or
+
+   echo $uuid > create
+
+When a mediated AP matrix device is created, a sysfs directory named after
+the UUID is created in the 'devices' subdirectory:
+
+/sys/devices
+... [vfio_ap]
+......[matrix]
+......... [mdev_supported_types]
+............ [vfio_ap-passthrough]
+............... create
+............... [devices]
+.................. [$uuid]
+
+There will also be three sets of attribute files created in the mediated
+matrix device's sysfs directory to configure an AP matrix for the
+KVM guest:
+
+/sys/devices
+... [vfio_ap]
+......[matrix]
+......... [mdev_supported_types]
+............ [vfio_ap-passthrough]
+............... create
+............... [devices]
+.................. [$uuid]
+..................... assign_adapter
+..................... assign_control_domain
+..................... assign_domain
+..................... matrix
+..................... unassign_adapter
+..................... unassign_control_domain
+..................... unassign_domain
+
+assign_adapter
+   To assign an AP adapter to the mediated matrix device, its APID is written
+   to the 'assign_adapter' file. This may be done multiple times to assign more
+   than one adapter. The APID may be specified using conventional semantics
+   as a decimal, hexadecimal, or octal number. For example, to assign adapters
+   4, 5 and 16 to a mediated matrix device in decimal, hexadecimal and octal
+   respectively:
+
+       echo 4 > assign_adapter
+       echo 0x5 > assign_adapter
+       echo 020 > assign_adapter
+
+   In order to successfully assign an adapter:
+
+   * The adapter number specified must represent a value from 0 up to the
+     maximum adapter number allowed by the machine model. If an adapter number
+     higher than the maximum is specified, the operation will terminate with
+     an error (ENODEV).
+
+   * All APQNs that can be derived from the adapter ID being assigned and the
+     IDs of the previously assigned domains must be bound to the vfio_ap device
+     driver. If no domains have yet been assigned, then there must be at least
+     one APQN with the specified APID bound to the vfio_ap driver. If no such
+     APQNs are bound to the driver, the operation will terminate with an
+     error (EADDRNOTAVAIL).
+
+     No APQN that can be derived from the adapter ID and the IDs of the
+     previously assigned domains can be assigned to another mediated matrix
+     device. If an APQN is assigned to another mediated matrix device, the
+     operation will terminate with an error (EADDRINUSE).
+
+unassign_adapter
+   To unassign an AP adapter, its APID is written to the 'unassign_adapter'
+   file. This may also be done multiple times to unassign more than one adapter.
+
+assign_domain
+   To assign a usage domain, the domain number is written into the
+   'assign_domain' file. This may be done multiple times to assign more than one
+   usage domain. The domain number is specified using conventional semantics as
+   a decimal, hexadecimal, or octal number. For example, to assign usage domains
+   4, 8, and 71 to a mediated matrix device in decimal, hexadecimal and octal
+   respectively:
+
+      echo 4 > assign_domain
+      echo 0x8 > assign_domain
+      echo 0107 > assign_domain
+
+   In order to successfully assign a domain:
+
+   * The domain number specified must represent a value from 0 up to the
+     maximum domain number allowed by the machine model. If a domain number
+     higher than the maximum is specified, the operation will terminate with
+     an error (ENODEV).
+
+   * All APQNs that can be derived from the domain ID being assigned and the IDs
+     of the previously assigned adapters must be bound to the vfio_ap device
+     driver. If no domains have yet been assigned, then there must be at least
+     one APQN with the specified APQI bound to the vfio_ap driver. If no such
+     APQNs are bound to the driver, the operation will terminate with an
+     error (EADDRNOTAVAIL).
+
+     No APQN that can be derived from the domain ID being assigned and the IDs
+     of the previously assigned adapters can be assigned to another mediated
+     matrix device. If an APQN is assigned to another mediated matrix device,
+     the operation will terminate with an error (EADDRINUSE).
+
+unassign_domain
+   To unassign a usage domain, the domain number is written into the
+   'unassign_domain' file. This may be done multiple times to unassign more than
+   one usage domain.
+
+assign_control_domain
+   To assign a control domain, the domain number is written into the
+   'assign_control_domain' file. This may be done multiple times to
+   assign more than one control domain. The domain number may be specified using
+   conventional semantics as a decimal, hexadecimal, or octal number. For
+   example, to assign  control domains 4, 8, and 71 to  a mediated matrix device
+   in decimal, hexadecimal and octal respectively:
+
+      echo 4 > assign_domain
+      echo 0x8 > assign_domain
+      echo 0107 > assign_domain
+
+   In order to successfully assign a control domain, the domain number
+   specified must represent a value from 0 up to the maximum domain number
+   allowed by the machine model. If a control domain number higher than the
+   maximum is specified, the operation will terminate with an error (ENODEV).
+
+unassign_control_domain
+   To unassign a control domain, the domain number is written into the
+   'unassign_domain' file. This may be done multiple times to unassign more than
+   one control domain.
+
+Notes: Hot plug/unplug is not currently supported for mediated AP matrix
+devices, so no changes to the AP matrix will be allowed while a guest using
+the mediated matrix device is running. Attempts to assign an adapter,
+domain or control domain will be rejected and an error (EBUSY) returned.
+
+Starting a Linux Guest Configured with an AP Matrix:
+===================================================
+To provide a mediated matrix device for use by a guest, the following option
+must be specified on the QEMU command line:
+
+   -device vfio_ap,sysfsdev=$path-to-mdev
+
+The sysfsdev parameter specifies the path to the mediated matrix device.
+There are a number of ways to specify this path:
+
+/sys/devices/vfio_ap/matrix/$uuid
+/sys/bus/mdev/devices/$uuid
+/sys/bus/mdev/drivers/vfio_mdev/$uuid
+/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid
+
+When the linux guest is started, the guest will open the mediated
+matrix device's file descriptor to get information about the mediated matrix
+device. The vfio_ap device driver will update the APM, AQM, and ADM fields in
+the guest's CRYCB with the adapter, usage domain and control domains assigned
+via the mediated matrix device's sysfs attribute files. Programs running on the
+linux guest will then:
+
+1. Have direct access to the APQNs derived from the cross product of the AP
+   adapter numbers (APID) and queue indexes (APQI) specified in the APM and AQM
+   fields of the guests's CRYCB respectively. These APQNs identify the AP queues
+   that are valid for use by the guest; meaning, AP commands can be sent by the
+   guest to any of these queues for processing.
+
+2. Have authorization to process AP commands to change a control domain
+   identified in the ADM field of the guest's CRYCB. The AP command must be sent
+   to a valid APQN (see 1 above).
+
+CPU model features:
+
+Three CPU model features are available for controlling guest access to AP
+facilities:
+
+1. AP facilities feature
+
+   The AP facilities feature indicates that AP facilities are installed on the
+   guest. This feature will be exposed for use only if the AP facilities
+   are installed on the host system. The feature is s390-specific and is
+   represented as a parameter of the -cpu option on the QEMU command line:
+
+      qemu-system-s390x -cpu $model,ap=on|off
+
+      Where:
+
+         $model is the CPU model defined for the guest (defaults to the model of
+                the host system if not specified).
+
+         ap=on|off indicates whether AP facilities are installed (on) or not
+                   (off). The default for CPU models zEC12 or newer
+                   is ap=on. AP facilities must be installed on the guest if a
+                   vfio-ap device (-device vfio-ap,sysfsdev=$path) is configured
+                   for the guest, or the guest will fail to start.
+
+2. Query Configuration Information (QCI) facility
+
+   The QCI facility is used by the AP bus running on the guest to query the
+   configuration of the AP facilities. This facility will be available
+   only if the QCI facility is installed on the host system. The feature is
+   s390-specific and is represented as a parameter of the -cpu option on the
+   QEMU command line:
+
+      qemu-system-s390x -cpu $model,apqci=on|off
+
+      Where:
+
+         $model is the CPU model defined for the guest
+
+         apqci=on|off indicates whether the QCI facility is installed (on) or
+                      not (off). The default for CPU models zEC12 or newer
+                      is apqci=on; for older models, QCI will not be installed.
+
+                      If QCI is installed (apqci=on) but AP facilities are not
+                      (ap=off), an error message will be logged, but the guest
+                      will be allowed to start. It makes no sense to have QCI
+                      installed if the AP facilities are not; this is considered
+                      an invalid configuration.
+
+                      If the QCI facility is not installed, APQNs with an APQI
+                      greater than 15 will not be detected by the AP bus
+                      running on the guest.
+
+3. Adjunct Process Facility Test (APFT) facility
+
+   The APFT facility is used by the AP bus running on the guest to test the
+   AP facilities available for a given AP queue. This facility will be available
+   only if the APFT facility is installed on the host system. The feature is
+   s390-specific and is represented as a parameter of the -cpu option on the
+   QEMU command line:
+
+      qemu-system-s390x -cpu $model,apft=on|off
+
+      Where:
+
+         $model is the CPU model defined for the guest (defaults to the model of
+                the host system if not specified).
+
+         apft=on|off indicates whether the APFT facility is installed (on) or
+                     not (off). The default for CPU models zEC12 and
+                     newer is apft=on for older models, APFT will not be
+                     installed.
+
+                     If APFT is installed (apft=on) but AP facilities are not
+                     (ap=off), an error message will be logged, but the guest
+                     will be allowed to start. It makes no sense to have APFT
+                     installed if the AP facilities are not; this is considered
+                     an invalid configuration.
+
+                     It also makes no sense to turn APFT off because the AP bus
+                     running on the guest will not detect CEX4 and newer devices
+                     without it. Since only CEX4 and newer devices are supported
+                     for guest usage, no AP devices can be made accessible to a
+                     guest started without APFT installed.
+
+Example: Configure AP Matrixes for Three Linux Guests:
+=====================================================
+Let's now provide an example to illustrate how KVM guests may be given
+access to AP facilities. For this example, we will show how to configure
+three guests such that executing the lszcrypt command on the guests would
+look like this:
+
+Guest1
+------
+CARD.DOMAIN TYPE  MODE
+------------------------------
+05          CEX5C CCA-Coproc
+05.0004     CEX5C CCA-Coproc
+05.00ab     CEX5C CCA-Coproc
+06          CEX5A Accelerator
+06.0004     CEX5A Accelerator
+06.00ab     CEX5C CCA-Coproc
+
+Guest2
+------
+CARD.DOMAIN TYPE  MODE
+------------------------------
+05          CEX5A Accelerator
+05.0047     CEX5A Accelerator
+05.00ff     CEX5A Accelerator (5,4), (5,171), (6,4), (6,171),
+
+Guest3
+------
+CARD.DOMAIN TYPE  MODE
+------------------------------
+06          CEX5A Accelerator
+06.0047     CEX5A Accelerator
+06.00ff     CEX5A Accelerator
+
+These are the steps:
+
+1. Install the vfio_ap module on the linux host. The dependency chain for the
+   vfio_ap module is:
+   * iommu
+   * s390
+   * zcrypt
+   * vfio
+   * vfio_mdev
+   * vfio_mdev_device
+   * KVM
+
+   To build the vfio_ap module, the kernel build must be configured with the
+   following Kconfig elements selected:
+   * IOMMU_SUPPORT
+   * S390
+   * ZCRYPT
+   * S390_AP_IOMMU
+   * VFIO
+   * VFIO_MDEV
+   * VFIO_MDEV_DEVICE
+   * KVM
+
+   If using make menuconfig select the following to build the vfio_ap module:
+   -> Device Drivers
+      -> IOMMU Hardware Support
+         select S390 AP IOMMU Support
+      -> VFIO Non-Privileged userspace driver framework
+         -> Mediated device driver frramework
+            -> VFIO driver for Mediated devices
+   -> I/O subsystem
+      -> VFIO support for AP devices
+
+2. Secure the AP queues to be used by the three guests so that the host can not
+   access them. To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff,
+   06.0004, 06.0047, 06.00ab, and 06.00ff for use by the vfio_ap device driver,
+   the corresponding APQNs must be removed from the default queue drivers pool
+   as follows:
+
+      echo -5,-6 > /sys/bus/ap/apmask
+
+      echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
+
+   This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
+   06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
+   sysfs directory for the vfio_ap device driver will now contain symbolic links
+   to the AP queue devices bound to it:
+
+   /sys/bus/ap
+   ... [drivers]
+   ...... [vfio_ap]
+   ......... [05.0004]
+   ......... [05.0047]
+   ......... [05.00ab]
+   ......... [05.00ff]
+   ......... [06.0004]
+   ......... [06.0047]
+   ......... [06.00ab]
+   ......... [06.00ff]
+
+   Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
+   can be bound to the vfio_ap device driver. The reason for this is to
+   simplify the implementation by not needlessly complicating the design by
+   supporting older devices that will go out of service in the relatively near
+   future, and for which there are few older systems on which to test.
+
+   The administrator, therefore, must take care to secure only AP queues that
+   can be bound to the vfio_ap device driver. The device type for a given AP
+   queue device can be read from the parent card's sysfs directory. For example,
+   to see the hardware type of the queue 05.0004:
+
+   cat /sys/bus/ap/devices/card05/hwtype
+
+   The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
+   vfio_ap device driver.
+
+3. Create the mediated devices needed to configure the AP matrixes for the
+   three guests and to provide an interface to the vfio_ap driver for
+   use by the guests:
+
+   /sys/devices/vfio_ap/matrix/
+   --- [mdev_supported_types]
+   ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+   --------- create
+   --------- [devices]
+
+   To create the mediated devices for the three guests:
+
+       uuidgen > create
+       uuidgen > create
+       uuidgen > create
+
+        or
+
+        echo $uuid1 > create
+        echo $uuid2 > create
+        echo $uuid3 > create
+
+   This will create three mediated devices in the [devices] subdirectory named
+   after the UUID used to create the mediated device. We'll call them $uuid1,
+   $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
+
+   /sys/devices/vfio_ap/matrix/
+   --- [mdev_supported_types]
+   ------ [vfio_ap-passthrough]
+   --------- [devices]
+   ------------ [$uuid1]
+   --------------- assign_adapter
+   --------------- assign_control_domain
+   --------------- assign_domain
+   --------------- matrix
+   --------------- unassign_adapter
+   --------------- unassign_control_domain
+   --------------- unassign_domain
+
+   ------------ [$uuid2]
+   --------------- assign_adapter
+   --------------- assign_control_domain
+   --------------- assign_domain
+   --------------- matrix
+   --------------- unassign_adapter
+   ----------------unassign_control_domain
+   ----------------unassign_domain
+
+   ------------ [$uuid3]
+   --------------- assign_adapter
+   --------------- assign_control_domain
+   --------------- assign_domain
+   --------------- matrix
+   --------------- unassign_adapter
+   ----------------unassign_control_domain
+   ----------------unassign_domain
+
+4. The administrator now needs to configure the matrixes for the mediated
+   devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
+
+   This is how the matrix is configured for Guest1:
+
+      echo 5 > assign_adapter
+      echo 6 > assign_adapter
+      echo 4 > assign_domain
+      echo 0xab > assign_domain
+
+      Control domains can similarly be assigned using the assign_control_domain
+      sysfs file.
+
+      If a mistake is made configuring an adapter, domain or control domain,
+      you can use the unassign_xxx interfaces to unassign the adapter, domain or
+      control domain.
+
+      To display the matrix configuration for Guest1:
+
+         cat matrix
+
+         The output will display the APQNs in the format xx.yyyy, where xx is
+         the adapter number and yyyy is the domain number. The output for Guest1
+         will look like this:
+
+         05.0004
+         05.00ab
+         06.0004
+         06.00ab
+
+   This is how the matrix is configured for Guest2:
+
+      echo 5 > assign_adapter
+      echo 0x47 > assign_domain
+      echo 0xff > assign_domain
+
+   This is how the matrix is configured for Guest3:
+
+      echo 6 > assign_adapter
+      echo 0x47 > assign_domain
+      echo 0xff > assign_domain
+
+5. Start Guest1:
+
+   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+
+7. Start Guest2:
+
+   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+
+7. Start Guest3:
+
+   /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+      -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+
+When the guest is shut down, the mediated matrix devices may be removed.
+
+Using our example again, to remove the mediated matrix device $uuid1:
+
+   /sys/devices/vfio_ap/matrix/
+      --- [mdev_supported_types]
+      ------ [vfio_ap-passthrough]
+      --------- [devices]
+      ------------ [$uuid1]
+      --------------- remove
+
+
+   echo 1 > remove
+
+   This will remove all of the mdev matrix device's sysfs structures including
+   the mdev device itself. To recreate and reconfigure the mdev matrix device,
+   all of the steps starting with step 3 will have to be performed again. Note
+   that the remove will fail if a guest using the mdev is still running.
+
+   It is not necessary to remove an mdev matrix device, but one may want to
+   remove it if no guest will use it during the remaining lifetime of the linux
+   host. If the mdev matrix device is removed, one may want to also reconfigure
+   the pool of adapters and queues reserved for use by the default drivers.
+
+Limitations
+===========
+* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
+  to the default drivers pool of a queue that is still assigned to a mediated
+  device in use by a guest. It is incumbent upon the administrator to
+  ensure there is no mediated device in use by a guest to which the APQN is
+  assigned lest the host be given access to the private data of the AP queue
+  device, such as a private key configured specifically for the guest.
+
+* Dynamically modifying the AP matrix for a running guest (which would amount to
+  hot(un)plug of AP devices for the guest) is currently not supported
+
+* Live guest migration is not supported for guests using AP devices.
diff --git a/exec.c b/exec.c
index 6826c83..bb6170d 100644
--- a/exec.c
+++ b/exec.c
@@ -965,6 +965,7 @@
         tcg_target_initialized = true;
         cc->tcg_initialize();
     }
+    tlb_init(cpu);
 
 #ifndef CONFIG_USER_ONLY
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
@@ -1734,7 +1735,7 @@
 }
 #endif
 
-#ifdef __linux__
+#ifdef CONFIG_POSIX
 static int64_t get_file_size(int fd)
 {
     int64_t size = lseek(fd, 0, SEEK_END);
@@ -2230,7 +2231,7 @@
     }
 }
 
-#ifdef __linux__
+#ifdef CONFIG_POSIX
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd,
                                  Error **errp)
@@ -3906,11 +3907,6 @@
 }
 #endif
 
-/*
- * A helper function for the _utterly broken_ virtio device model to find out if
- * it's running on a big endian machine. Don't do this at home kids!
- */
-bool target_words_bigendian(void);
 bool target_words_bigendian(void)
 {
 #if defined(TARGET_WORDS_BIGENDIAN)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 59ca356..e1eef95 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -726,8 +726,7 @@
  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  */
 
-float16  __attribute__((flatten)) float16_add(float16 a, float16 b,
-                                              float_status *status)
+float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
 {
     FloatParts pa = float16_unpack_canonical(a, status);
     FloatParts pb = float16_unpack_canonical(b, status);
@@ -736,8 +735,7 @@
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
-                                             float_status *status)
+float32 QEMU_FLATTEN float32_add(float32 a, float32 b, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -746,8 +744,7 @@
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
-                                             float_status *status)
+float64 QEMU_FLATTEN float64_add(float64 a, float64 b, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);
@@ -756,8 +753,7 @@
     return float64_round_pack_canonical(pr, status);
 }
 
-float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
-                                             float_status *status)
+float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
 {
     FloatParts pa = float16_unpack_canonical(a, status);
     FloatParts pb = float16_unpack_canonical(b, status);
@@ -766,8 +762,7 @@
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
-                                             float_status *status)
+float32 QEMU_FLATTEN float32_sub(float32 a, float32 b, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -776,8 +771,7 @@
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
-                                             float_status *status)
+float64 QEMU_FLATTEN float64_sub(float64 a, float64 b, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);
@@ -835,8 +829,7 @@
     g_assert_not_reached();
 }
 
-float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
-                                             float_status *status)
+float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
 {
     FloatParts pa = float16_unpack_canonical(a, status);
     FloatParts pb = float16_unpack_canonical(b, status);
@@ -845,8 +838,7 @@
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
-                                             float_status *status)
+float32 QEMU_FLATTEN float32_mul(float32 a, float32 b, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pb = float32_unpack_canonical(b, status);
@@ -855,8 +847,7 @@
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
-                                             float_status *status)
+float64 QEMU_FLATTEN float64_mul(float64 a, float64 b, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pb = float64_unpack_canonical(b, status);
@@ -1068,7 +1059,7 @@
     return a;
 }
 
-float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
+float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
                                                 int flags, float_status *status)
 {
     FloatParts pa = float16_unpack_canonical(a, status);
@@ -1079,7 +1070,7 @@
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
+float32 QEMU_FLATTEN float32_muladd(float32 a, float32 b, float32 c,
                                                 int flags, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
@@ -1090,7 +1081,7 @@
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
+float64 QEMU_FLATTEN float64_muladd(float64 a, float64 b, float64 c,
                                                 int flags, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
@@ -1112,19 +1103,38 @@
     bool sign = a.sign ^ b.sign;
 
     if (a.cls == float_class_normal && b.cls == float_class_normal) {
-        uint64_t temp_lo, temp_hi;
+        uint64_t n0, n1, q, r;
         int exp = a.exp - b.exp;
+
+        /*
+         * We want a 2*N / N-bit division to produce exactly an N-bit
+         * result, so that we do not lose any precision and so that we
+         * do not have to renormalize afterward.  If A.frac < B.frac,
+         * then division would produce an (N-1)-bit result; shift A left
+         * by one to produce the an N-bit result, and decrement the
+         * exponent to match.
+         *
+         * The udiv_qrnnd algorithm that we're using requires normalization,
+         * i.e. the msb of the denominator must be set.  Since we know that
+         * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
+         * by one (more), and the remainder must be shifted right by one.
+         */
         if (a.frac < b.frac) {
             exp -= 1;
-            shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
-                              &temp_hi, &temp_lo);
+            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
         } else {
-            shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
-                              &temp_hi, &temp_lo);
+            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
         }
-        /* LSB of quot is set if inexact which roundandpack will use
-         * to set flags. Yet again we re-use a for the result */
-        a.frac = div128To64(temp_lo, temp_hi, b.frac);
+        q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
+
+        /*
+         * Set lsb if there is a remainder, to set inexact.
+         * As mentioned above, to find the actual value of the remainder we
+         * would need to shift right, but (1) we are only concerned about
+         * non-zero-ness, and (2) the remainder will always be even because
+         * both inputs to the division primitive are even.
+         */
+        a.frac = q | (r != 0);
         a.sign = sign;
         a.exp = exp;
         return a;
@@ -1409,13 +1419,6 @@
     return float64_round_pack_canonical(pr, s);
 }
 
-float64 float64_trunc_to_int(float64 a, float_status *s)
-{
-    FloatParts pa = float64_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, float_round_to_zero, 0, s);
-    return float64_round_pack_canonical(pr, s);
-}
-
 /*
  * Returns the result of converting the floating-point value `a' to
  * the two's complement integer format. The conversion is performed
@@ -2402,21 +2405,21 @@
     return a;
 }
 
-float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
+float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
 {
     FloatParts pa = float16_unpack_canonical(a, status);
     FloatParts pr = sqrt_float(pa, status, &float16_params);
     return float16_round_pack_canonical(pr, status);
 }
 
-float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
+float32 QEMU_FLATTEN float32_sqrt(float32 a, float_status *status)
 {
     FloatParts pa = float32_unpack_canonical(a, status);
     FloatParts pr = sqrt_float(pa, status, &float32_params);
     return float32_round_pack_canonical(pr, status);
 }
 
-float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
+float64 QEMU_FLATTEN float64_sqrt(float64 a, float_status *status)
 {
     FloatParts pa = float64_unpack_canonical(a, status);
     FloatParts pr = sqrt_float(pa, status, &float64_params);
@@ -2690,7 +2693,7 @@
 {
     int8_t shiftCount;
 
-    shiftCount = countLeadingZeros32( aSig ) - 8;
+    shiftCount = clz32(aSig) - 8;
     *zSigPtr = aSig<<shiftCount;
     *zExpPtr = 1 - shiftCount;
 
@@ -2798,7 +2801,7 @@
 {
     int8_t shiftCount;
 
-    shiftCount = countLeadingZeros32( zSig ) - 1;
+    shiftCount = clz32(zSig) - 1;
     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
                                status);
 
@@ -2831,7 +2834,7 @@
 {
     int8_t shiftCount;
 
-    shiftCount = countLeadingZeros64( aSig ) - 11;
+    shiftCount = clz64(aSig) - 11;
     *zSigPtr = aSig<<shiftCount;
     *zExpPtr = 1 - shiftCount;
 
@@ -2969,7 +2972,7 @@
 {
     int8_t shiftCount;
 
-    shiftCount = countLeadingZeros64( zSig ) - 1;
+    shiftCount = clz64(zSig) - 1;
     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
                                status);
 
@@ -2987,7 +2990,7 @@
 {
     int8_t shiftCount;
 
-    shiftCount = countLeadingZeros64( aSig );
+    shiftCount = clz64(aSig);
     *zSigPtr = aSig<<shiftCount;
     *zExpPtr = 1 - shiftCount;
 }
@@ -3226,7 +3229,7 @@
         zSig1 = 0;
         zExp -= 64;
     }
-    shiftCount = countLeadingZeros64( zSig0 );
+    shiftCount = clz64(zSig0);
     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
     zExp -= shiftCount;
     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
@@ -3303,7 +3306,7 @@
     int8_t shiftCount;
 
     if ( aSig0 == 0 ) {
-        shiftCount = countLeadingZeros64( aSig1 ) - 15;
+        shiftCount = clz64(aSig1) - 15;
         if ( shiftCount < 0 ) {
             *zSig0Ptr = aSig1>>( - shiftCount );
             *zSig1Ptr = aSig1<<( shiftCount & 63 );
@@ -3315,7 +3318,7 @@
         *zExpPtr = - shiftCount - 63;
     }
     else {
-        shiftCount = countLeadingZeros64( aSig0 ) - 15;
+        shiftCount = clz64(aSig0) - 15;
         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
         *zExpPtr = 1 - shiftCount;
     }
@@ -3504,7 +3507,7 @@
         zSig1 = 0;
         zExp -= 64;
     }
-    shiftCount = countLeadingZeros64( zSig0 ) - 15;
+    shiftCount = clz64(zSig0) - 15;
     if ( 0 <= shiftCount ) {
         zSig2 = 0;
         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
@@ -3536,7 +3539,7 @@
     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
     zSign = ( a < 0 );
     absA = zSign ? - a : a;
-    shiftCount = countLeadingZeros32( absA ) + 32;
+    shiftCount = clz32(absA) + 32;
     zSig = absA;
     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
 
@@ -3558,7 +3561,7 @@
     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
     zSign = ( a < 0 );
     absA = zSign ? - a : a;
-    shiftCount = countLeadingZeros32( absA ) + 17;
+    shiftCount = clz32(absA) + 17;
     zSig0 = absA;
     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
 
@@ -3580,7 +3583,7 @@
     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
     zSign = ( a < 0 );
     absA = zSign ? - a : a;
-    shiftCount = countLeadingZeros64( absA );
+    shiftCount = clz64(absA);
     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
 
 }
@@ -3602,7 +3605,7 @@
     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
     zSign = ( a < 0 );
     absA = zSign ? - a : a;
-    shiftCount = countLeadingZeros64( absA ) + 49;
+    shiftCount = clz64(absA) + 49;
     zExp = 0x406E - shiftCount;
     if ( 64 <= shiftCount ) {
         zSig1 = 0;
diff --git a/fsdev/qemu-fsdev-dummy.c b/fsdev/qemu-fsdev-dummy.c
index 6dc0fbc..489cd29 100644
--- a/fsdev/qemu-fsdev-dummy.c
+++ b/fsdev/qemu-fsdev-dummy.c
@@ -15,7 +15,7 @@
 #include "qemu/config-file.h"
 #include "qemu/module.h"
 
-int qemu_fsdev_add(QemuOpts *opts)
+int qemu_fsdev_add(QemuOpts *opts, Error **errp)
 {
     return 0;
 }
diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c
index 8a4afbf..7a3b87c 100644
--- a/fsdev/qemu-fsdev.c
+++ b/fsdev/qemu-fsdev.c
@@ -30,7 +30,7 @@
     { .name = "proxy", .ops = &proxy_ops},
 };
 
-int qemu_fsdev_add(QemuOpts *opts)
+int qemu_fsdev_add(QemuOpts *opts, Error **errp)
 {
     int i;
     struct FsDriverListEntry *fsle;
@@ -38,10 +38,9 @@
     const char *fsdriver = qemu_opt_get(opts, "fsdriver");
     const char *writeout = qemu_opt_get(opts, "writeout");
     bool ro = qemu_opt_get_bool(opts, "readonly", 0);
-    Error *local_err = NULL;
 
     if (!fsdev_id) {
-        error_report("fsdev: No id specified");
+        error_setg(errp, "fsdev: No id specified");
         return -1;
     }
 
@@ -53,11 +52,11 @@
         }
 
         if (i == ARRAY_SIZE(FsDrivers)) {
-            error_report("fsdev: fsdriver %s not found", fsdriver);
+            error_setg(errp, "fsdev: fsdriver %s not found", fsdriver);
             return -1;
         }
     } else {
-        error_report("fsdev: No fsdriver specified");
+        error_setg(errp, "fsdev: No fsdriver specified");
         return -1;
     }
 
@@ -76,8 +75,7 @@
     }
 
     if (fsle->fse.ops->parse_opts) {
-        if (fsle->fse.ops->parse_opts(opts, &fsle->fse, &local_err)) {
-            error_report_err(local_err);
+        if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) {
             g_free(fsle->fse.fsdev_id);
             g_free(fsle);
             return -1;
diff --git a/fsdev/qemu-fsdev.h b/fsdev/qemu-fsdev.h
index 65e4b1c..d9716b4 100644
--- a/fsdev/qemu-fsdev.h
+++ b/fsdev/qemu-fsdev.h
@@ -38,7 +38,7 @@
     QTAILQ_ENTRY(FsDriverListEntry) next;
 } FsDriverListEntry;
 
-int qemu_fsdev_add(QemuOpts *opts);
+int qemu_fsdev_add(QemuOpts *opts, Error **errp);
 FsDriverEntry *get_fsdev_fsentry(char *id);
 extern FileOperations local_ops;
 extern FileOperations handle_ops;
diff --git a/gdbstub.c b/gdbstub.c
index d6ab950..c4e4f9f 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -20,7 +20,6 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
-#include "cpu.h"
 #include "trace-root.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
@@ -2038,7 +2037,11 @@
             sigaction(SIGINT, &act, NULL);
         }
 #endif
-        chr = qemu_chr_new_noreplay("gdb", device);
+        /*
+         * FIXME: it's a bit weird to allow using a mux chardev here
+         * and implicitly setup a monitor. We may want to break this.
+         */
+        chr = qemu_chr_new_noreplay("gdb", device, true);
         if (!chr)
             return -1;
     }
diff --git a/hmp.c b/hmp.c
index 61ef120..7828f93 100644
--- a/hmp.c
+++ b/hmp.c
@@ -837,8 +837,10 @@
 
     monitor_printf(mon, ": PCI device %04" PRIx64 ":%04" PRIx64 "\n",
                    dev->id->vendor, dev->id->device);
-    monitor_printf(mon, "      PCI subsystem %04" PRIx64 ":%04" PRIx64 "\n",
-                   dev->id->subsystem_vendor, dev->id->subsystem);
+    if (dev->id->has_subsystem_vendor && dev->id->has_subsystem) {
+        monitor_printf(mon, "      PCI subsystem %04" PRIx64 ":%04" PRIx64 "\n",
+                       dev->id->subsystem_vendor, dev->id->subsystem);
+    }
 
     if (dev->has_irq) {
         monitor_printf(mon, "      IRQ %" PRId64 ".\n", dev->irq);
diff --git a/hw/9pfs/9p-handle.c b/hw/9pfs/9p-handle.c
index f3641db..3465b1e 100644
--- a/hw/9pfs/9p-handle.c
+++ b/hw/9pfs/9p-handle.c
@@ -19,6 +19,7 @@
 #include <grp.h>
 #include <sys/socket.h>
 #include <sys/un.h>
+#include "qapi/error.h"
 #include "qemu/xattr.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
@@ -655,12 +656,13 @@
     warn_report("handle backend is deprecated");
 
     if (sec_model) {
-        error_report("Invalid argument security_model specified with handle fsdriver");
+        error_setg(errp,
+                   "Invalid argument security_model specified with handle fsdriver");
         return -1;
     }
 
     if (!path) {
-        error_report("fsdev: No path specified");
+        error_setg(errp, "fsdev: No path specified");
         return -1;
     }
     fse->path = g_strdup(path);
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index c30f4f2..08e673a 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1509,8 +1509,8 @@
 
     fsdev_throttle_parse_opts(opts, &fse->fst, &local_err);
     if (local_err) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "invalid throttle configuration: ");
+        error_propagate_prepend(errp, local_err,
+                                "invalid throttle configuration: ");
         return -1;
     }
 
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 6026780..3f54a21 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -14,6 +14,7 @@
 #include "hw/9pfs/9p.h"
 #include "hw/xen/xen_backend.h"
 #include "hw/9pfs/xen-9pfs.h"
+#include "qapi/error.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "fsdev/qemu-fsdev.h"
@@ -355,6 +356,7 @@
 
 static int xen_9pfs_connect(struct XenDevice *xendev)
 {
+    Error *err = NULL;
     int i;
     Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
     V9fsState *s = &xen_9pdev->state;
@@ -452,7 +454,10 @@
     qemu_opt_set(fsdev, "path", xen_9pdev->path, NULL);
     qemu_opt_set(fsdev, "security_model", xen_9pdev->security_model, NULL);
     qemu_opts_set_id(fsdev, s->fsconf.fsdev_id);
-    qemu_fsdev_add(fsdev);
+    qemu_fsdev_add(fsdev, &err);
+    if (err) {
+        error_report_err(err);
+    }
     v9fs_device_realize_common(s, &xen_9p_transport, NULL);
 
     return 0;
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index a19c141..39d882a 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -9,6 +9,7 @@
 devices-dirs-$(CONFIG_SOFTMMU) += display/
 devices-dirs-$(CONFIG_SOFTMMU) += dma/
 devices-dirs-$(CONFIG_SOFTMMU) += gpio/
+devices-dirs-$(CONFIG_HYPERV) += hyperv/
 devices-dirs-$(CONFIG_SOFTMMU) += i2c/
 devices-dirs-$(CONFIG_SOFTMMU) += ide/
 devices-dirs-$(CONFIG_SOFTMMU) += input/
@@ -33,7 +34,7 @@
 devices-dirs-$(CONFIG_SOFTMMU) += virtio/
 devices-dirs-$(CONFIG_SOFTMMU) += watchdog/
 devices-dirs-$(CONFIG_SOFTMMU) += xen/
-devices-dirs-$(CONFIG_MEM_HOTPLUG) += mem/
+devices-dirs-$(CONFIG_MEM_DEVICE) += mem/
 devices-dirs-$(CONFIG_SOFTMMU) += smbios/
 devices-dirs-y += core/
 common-obj-y += $(devices-dirs-y)
diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c
index 80b987f..dd62f2a 100644
--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -150,7 +150,8 @@
         }
 
         if (initrd_filename) {
-            long initrd_base, initrd_size;
+            long initrd_base;
+            int64_t initrd_size;
 
             initrd_size = get_image_size(initrd_filename);
             if (initrd_size < 0) {
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index d74b5b5..8004afe 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -932,23 +932,10 @@
     return b;
 }
 
-static int typhoon_pcihost_init(SysBusDevice *dev)
-{
-    return 0;
-}
-
-static void typhoon_pcihost_class_init(ObjectClass *klass, void *data)
-{
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
-
-    k->init = typhoon_pcihost_init;
-}
-
 static const TypeInfo typhoon_pcihost_info = {
     .name          = TYPE_TYPHOON_PCI_HOST_BRIDGE,
     .parent        = TYPE_PCI_HOST_BRIDGE,
     .instance_size = sizeof(TyphoonState),
-    .class_init    = typhoon_pcihost_class_init,
 };
 
 static void typhoon_iommu_memory_region_class_init(ObjectClass *klass,
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 20c71d7..586baa9 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -24,6 +24,7 @@
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "exec/address-spaces.h"
+#include "qemu/units.h"
 
 /* Kernel boot protocol is specified in the kernel docs
  * Documentation/arm/Booting and Documentation/arm64/booting.txt
@@ -36,6 +37,8 @@
 #define ARM64_TEXT_OFFSET_OFFSET    8
 #define ARM64_MAGIC_OFFSET          56
 
+#define BOOTLOADER_MAX_SIZE         (4 * KiB)
+
 AddressSpace *arm_boot_address_space(ARMCPU *cpu,
                                      const struct arm_boot_info *info)
 {
@@ -184,6 +187,8 @@
         code[i] = tswap32(insn);
     }
 
+    assert((len * sizeof(uint32_t)) < BOOTLOADER_MAX_SIZE);
+
     rom_add_blob_fixed_as(name, code, len * sizeof(uint32_t), addr, as);
 
     g_free(code);
@@ -919,6 +924,19 @@
         memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals));
         if (hdrvals[1] != 0) {
             kernel_load_offset = le64_to_cpu(hdrvals[0]);
+
+            /*
+             * We write our startup "bootloader" at the very bottom of RAM,
+             * so that bit can't be used for the image. Luckily the Image
+             * format specification is that the image requests only an offset
+             * from a 2MB boundary, not an absolute load address. So if the
+             * image requests an offset that might mean it overlaps with the
+             * bootloader, we can just load it starting at 2MB+offset rather
+             * than 0MB + offset.
+             */
+            if (kernel_load_offset < BOOTLOADER_MAX_SIZE) {
+                kernel_load_offset += 2 * MiB;
+            }
         }
     }
 
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index c807010..9648b3a 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -1693,9 +1693,10 @@
     }
 
     wm8750_dev = i2c_create_slave(i2c, TYPE_WM8750, MP_WM_ADDR);
-    dev = qdev_create(NULL, "mv88w8618_audio");
+    dev = qdev_create(NULL, TYPE_MV88W8618_AUDIO);
     s = SYS_BUS_DEVICE(dev);
-    qdev_prop_set_ptr(dev, "wm8750", wm8750_dev);
+    object_property_set_link(OBJECT(dev), OBJECT(wm8750_dev),
+                             TYPE_WM8750, NULL);
     qdev_init_nofail(dev);
     sysbus_mmio_map(s, 0, MP_AUDIO_BASE);
     sysbus_connect_irq(s, 0, pic[MP_AUDIO_IRQ]);
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index 43d6a7b..0e24c80 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -50,11 +50,13 @@
     PlatformBusDevice *pbus;
 } PlatformBusFDTData;
 
-/* struct that associates a device type name and a node creation function */
-typedef struct NodeCreationPair {
+/* struct that allows to match a device and create its FDT node */
+typedef struct BindingEntry {
     const char *typename;
-    int (*add_fdt_node_fn)(SysBusDevice *sbdev, void *opaque);
-} NodeCreationPair;
+    const char *compat;
+    int  (*add_fn)(SysBusDevice *sbdev, void *opaque);
+    bool (*match_fn)(SysBusDevice *sbdev, const struct BindingEntry *combo);
+} BindingEntry;
 
 /* helpers */
 
@@ -413,6 +415,27 @@
     return 0;
 }
 
+/* DT compatible matching */
+static bool vfio_platform_match(SysBusDevice *sbdev,
+                                const BindingEntry *entry)
+{
+    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+    const char *compat;
+    unsigned int n;
+
+    for (n = vdev->num_compat, compat = vdev->compat; n > 0;
+         n--, compat += strlen(compat) + 1) {
+        if (!strcmp(entry->compat, compat)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+#define VFIO_PLATFORM_BINDING(compat, add_fn) \
+    {TYPE_VFIO_PLATFORM, (compat), (add_fn), vfio_platform_match}
+
 #endif /* CONFIG_LINUX */
 
 static int no_fdt_node(SysBusDevice *sbdev, void *opaque)
@@ -420,14 +443,23 @@
     return 0;
 }
 
-/* list of supported dynamic sysbus devices */
-static const NodeCreationPair add_fdt_node_functions[] = {
+/* Device type based matching */
+static bool type_match(SysBusDevice *sbdev, const BindingEntry *entry)
+{
+    return !strcmp(object_get_typename(OBJECT(sbdev)), entry->typename);
+}
+
+#define TYPE_BINDING(type, add_fn) {(type), NULL, (add_fn), type_match}
+
+/* list of supported dynamic sysbus bindings */
+static const BindingEntry bindings[] = {
 #ifdef CONFIG_LINUX
-    {TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node},
-    {TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node},
+    TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
+    TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
+    VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
 #endif
-    {TYPE_RAMFB_DEVICE, no_fdt_node},
-    {"", NULL}, /* last element */
+    TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
+    TYPE_BINDING("", NULL), /* last element */
 };
 
 /* Generic Code */
@@ -446,10 +478,11 @@
 {
     int i, ret;
 
-    for (i = 0; i < ARRAY_SIZE(add_fdt_node_functions); i++) {
-        if (!strcmp(object_get_typename(OBJECT(sbdev)),
-                    add_fdt_node_functions[i].typename)) {
-            ret = add_fdt_node_functions[i].add_fdt_node_fn(sbdev, opaque);
+    for (i = 0; i < ARRAY_SIZE(bindings); i++) {
+        const BindingEntry *iter = &bindings[i];
+
+        if (iter->match_fn(sbdev, iter)) {
+            ret = iter->add_fn(sbdev, opaque);
             assert(!ret);
             return;
         }
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 0b57f87..9f67782 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -712,6 +712,10 @@
         /* Mark as not usable by the normal world */
         qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
         qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
+
+        qemu_fdt_add_subnode(vms->fdt, "/secure-chosen");
+        qemu_fdt_setprop_string(vms->fdt, "/secure-chosen", "stdout-path",
+                                nodename);
     }
 
     g_free(nodename);
@@ -1758,6 +1762,7 @@
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
     mc->pci_allow_0_address = true;
@@ -1926,6 +1931,8 @@
 {
     virt_machine_2_11_options(mc);
     SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_10);
+    /* before 2.11 we never faulted accesses to bad addresses */
+    mc->ignore_memory_transaction_failures = true;
 }
 DEFINE_VIRT_MACHINE(2, 10)
 
diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
index 337402e..d799533 100644
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -123,6 +123,10 @@
 
 #define MUTE_SHIFT 15
 
+#define TYPE_AC97 "AC97"
+#define AC97(obj) \
+    OBJECT_CHECK(AC97LinkState, (obj), TYPE_AC97)
+
 #define REC_MASK 7
 enum {
     REC_MIC = 0,
@@ -1340,7 +1344,7 @@
 
 static void ac97_realize(PCIDevice *dev, Error **errp)
 {
-    AC97LinkState *s = DO_UPCAST (AC97LinkState, dev, dev);
+    AC97LinkState *s = AC97(dev);
     uint8_t *c = s->dev.config;
 
     /* TODO: no need to override */
@@ -1389,7 +1393,7 @@
 
 static void ac97_exit(PCIDevice *dev)
 {
-    AC97LinkState *s = DO_UPCAST(AC97LinkState, dev, dev);
+    AC97LinkState *s = AC97(dev);
 
     AUD_close_in(&s->card, s->voice_pi);
     AUD_close_out(&s->card, s->voice_po);
@@ -1399,7 +1403,7 @@
 
 static int ac97_init (PCIBus *bus)
 {
-    pci_create_simple (bus, -1, "AC97");
+    pci_create_simple(bus, -1, TYPE_AC97);
     return 0;
 }
 
@@ -1427,7 +1431,7 @@
 }
 
 static const TypeInfo ac97_info = {
-    .name          = "AC97",
+    .name          = TYPE_AC97,
     .parent        = TYPE_PCI_DEVICE,
     .instance_size = sizeof (AC97LinkState),
     .class_init    = ac97_class_init,
diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
index dd75c9e..97789a0 100644
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -506,10 +506,13 @@
                 d - &s->chan[0], val >> 16, (val & 0xffff));
         break;
 
+    case ES1370_REG_ADC_FRAMEADR:
+        d += 2;
+        goto frameadr;
     case ES1370_REG_DAC1_FRAMEADR:
     case ES1370_REG_DAC2_FRAMEADR:
-    case ES1370_REG_ADC_FRAMEADR:
         d += (addr - ES1370_REG_DAC1_FRAMEADR) >> 3;
+    frameadr:
         d->frame_addr = val;
         ldebug ("chan %td frame address %#x\n", d - &s->chan[0], val);
         break;
@@ -521,10 +524,13 @@
         lwarn ("writing to phantom frame address %#x\n", val);
         break;
 
+    case ES1370_REG_ADC_FRAMECNT:
+        d += 2;
+        goto framecnt;
     case ES1370_REG_DAC1_FRAMECNT:
     case ES1370_REG_DAC2_FRAMECNT:
-    case ES1370_REG_ADC_FRAMECNT:
         d += (addr - ES1370_REG_DAC1_FRAMECNT) >> 3;
+    framecnt:
         d->frame_cnt = val;
         d->leftover = 0;
         ldebug ("chan %td frame count %d, buffer size %d\n",
@@ -579,10 +585,13 @@
 #endif
         break;
 
+    case ES1370_REG_ADC_FRAMECNT:
+        d += 2;
+        goto framecnt;
     case ES1370_REG_DAC1_FRAMECNT:
     case ES1370_REG_DAC2_FRAMECNT:
-    case ES1370_REG_ADC_FRAMECNT:
         d += (addr - ES1370_REG_DAC1_FRAMECNT) >> 3;
+    framecnt:
         val = d->frame_cnt;
 #ifdef DEBUG_ES1370
         {
@@ -596,10 +605,13 @@
 #endif
         break;
 
+    case ES1370_REG_ADC_FRAMEADR:
+        d += 2;
+        goto frameadr;
     case ES1370_REG_DAC1_FRAMEADR:
     case ES1370_REG_DAC2_FRAMEADR:
-    case ES1370_REG_ADC_FRAMEADR:
         d += (addr - ES1370_REG_DAC1_FRAMEADR) >> 3;
+    frameadr:
         val = d->frame_addr;
         break;
 
diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c
index e546892..6600ab4 100644
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c
@@ -15,6 +15,7 @@
 #include "hw/i2c/i2c.h"
 #include "hw/audio/wm8750.h"
 #include "audio/audio.h"
+#include "qapi/error.h"
 
 #define MP_AUDIO_SIZE           0x00001000
 
@@ -38,7 +39,6 @@
 #define MP_AUDIO_CLOCK_24MHZ    (1 << 9)
 #define MP_AUDIO_MONO           (1 << 14)
 
-#define TYPE_MV88W8618_AUDIO "mv88w8618_audio"
 #define MV88W8618_AUDIO(obj) \
     OBJECT_CHECK(mv88w8618_audio_state, (obj), TYPE_MV88W8618_AUDIO)
 
@@ -252,6 +252,11 @@
     memory_region_init_io(&s->iomem, obj, &mv88w8618_audio_ops, s,
                           "audio", MP_AUDIO_SIZE);
     sysbus_init_mmio(dev, &s->iomem);
+
+    object_property_add_link(OBJECT(dev), "wm8750", TYPE_WM8750,
+                             (Object **) &s->wm,
+                             qdev_prop_allow_set_link_before_realize,
+                             0, &error_abort);
 }
 
 static void mv88w8618_audio_realize(DeviceState *dev, Error **errp)
@@ -279,11 +284,6 @@
     }
 };
 
-static Property mv88w8618_audio_properties[] = {
-    DEFINE_PROP_PTR("wm8750", mv88w8618_audio_state, wm),
-    {/* end of list */},
-};
-
 static void mv88w8618_audio_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -291,8 +291,6 @@
     dc->realize = mv88w8618_audio_realize;
     dc->reset = mv88w8618_audio_reset;
     dc->vmsd = &mv88w8618_audio_vmsd;
-    dc->props = mv88w8618_audio_properties;
-    /* Reason: pointer property "wm8750" */
     dc->user_creatable = false;
 }
 
diff --git a/hw/char/serial.c b/hw/char/serial.c
index 251f40f..02463e3 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -345,9 +345,9 @@
     default:
     case 0:
         if (s->lcr & UART_LCR_DLAB) {
-            if (size == 2) {
+            if (size == 1) {
                 s->divider = (s->divider & 0xff00) | val;
-            } else if (size == 4) {
+            } else {
                 s->divider = val;
             }
             serial_update_parameters(s);
diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c
index 373a405..1283156 100644
--- a/hw/char/sh_serial.c
+++ b/hw/char/sh_serial.c
@@ -29,6 +29,7 @@
 #include "hw/sh4/sh.h"
 #include "chardev/char-fe.h"
 #include "qapi/error.h"
+#include "qemu/timer.h"
 
 //#define DEBUG_SERIAL
 
@@ -63,6 +64,8 @@
     int rtrg;
 
     CharBackend chr;
+    QEMUTimer *fifo_timeout_timer;
+    uint64_t etu; /* Elementary Time Unit (ns) */
 
     qemu_irq eri;
     qemu_irq rxi;
@@ -314,6 +317,16 @@
     return sh_serial_can_receive(s);
 }
 
+static void sh_serial_timeout_int(void *opaque)
+{
+    sh_serial_state *s = opaque;
+
+    s->flags |= SH_SERIAL_FLAG_RDF;
+    if (s->scr & (1 << 6) && s->rxi) {
+        qemu_set_irq(s->rxi, 1);
+    }
+}
+
 static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size)
 {
     sh_serial_state *s = opaque;
@@ -330,8 +343,12 @@
                 if (s->rx_cnt >= s->rtrg) {
                     s->flags |= SH_SERIAL_FLAG_RDF;
                     if (s->scr & (1 << 6) && s->rxi) {
+                        timer_del(s->fifo_timeout_timer);
                         qemu_set_irq(s->rxi, 1);
                     }
+                } else {
+                    timer_mod(s->fifo_timeout_timer,
+                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 15 * s->etu);
                 }
             }
         }
@@ -402,6 +419,9 @@
                                  sh_serial_event, NULL, s, NULL, true);
     }
 
+    s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                         sh_serial_timeout_int, s);
+    s->etu = NANOSECONDS_PER_SECOND / 9600;
     s->eri = eri_source;
     s->rxi = rxi_source;
     s->txi = txi_source;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index d2dd8ab..04e3ebe 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -667,9 +667,9 @@
 
     /* The config space (ignored on the far end in current versions) */
     get_config(vdev, (uint8_t *)&config);
-    qemu_put_be16s(f, &config.cols);
-    qemu_put_be16s(f, &config.rows);
-    qemu_put_be32s(f, &config.max_nr_ports);
+    qemu_put_be16(f, config.cols);
+    qemu_put_be16(f, config.rows);
+    qemu_put_be32(f, config.max_nr_ports);
 
     /* The ports map */
     max_nr_ports = s->serial.max_virtserial_ports;
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 8b4b4bf..44f7236 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -207,7 +207,11 @@
     } else {
         snprintf(label, sizeof(label), "xencons%d", con->xendev.dev);
         qemu_chr_fe_init(&con->chr,
-                         qemu_chr_new(label, output), &error_abort);
+                         /*
+                          * FIXME: sure we want to support implicit
+                          * muxed monitors here?
+                          */
+                         qemu_chr_new_mux_mon(label, output), &error_abort);
     }
 
     xenstore_store_pv_console_info(con->xendev.dev,
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index eb88ca9..a799c83 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -20,6 +20,5 @@
 common-obj-$(CONFIG_SOFTMMU) += or-irq.o
 common-obj-$(CONFIG_SOFTMMU) += split-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
-
-obj-$(CONFIG_SOFTMMU) += generic-loader.o
-obj-$(CONFIG_SOFTMMU) += null-machine.o
+common-obj-$(CONFIG_SOFTMMU) += generic-loader.o
+common-obj-$(CONFIG_SOFTMMU) += null-machine.o
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index fde32cb..fbae05f 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -130,11 +130,7 @@
         s->cpu = first_cpu;
     }
 
-#ifdef TARGET_WORDS_BIGENDIAN
-    big_endian = 1;
-#else
-    big_endian = 0;
-#endif
+    big_endian = target_words_bigendian();
 
     if (s->file) {
         AddressSpace *as = s->cpu ? s->cpu->as :  NULL;
@@ -204,6 +200,7 @@
     dc->unrealize = generic_loader_unrealize;
     dc->props = generic_loader_props;
     dc->desc = "Generic Loader";
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static TypeInfo generic_loader_info = {
diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c
index 2253072..17ac986 100644
--- a/hw/core/hotplug.c
+++ b/hw/core/hotplug.c
@@ -35,16 +35,6 @@
     }
 }
 
-void hotplug_handler_post_plug(HotplugHandler *plug_handler,
-                               DeviceState *plugged_dev)
-{
-    HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
-
-    if (hdc->post_plug) {
-        hdc->post_plug(plug_handler, plugged_dev);
-    }
-}
-
 void hotplug_handler_unplug_request(HotplugHandler *plug_handler,
                                     DeviceState *plugged_dev,
                                     Error **errp)
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 390987a..aa0b3fc 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -61,9 +61,10 @@
 static int roms_loaded;
 
 /* return the size or -1 if error */
-int get_image_size(const char *filename)
+int64_t get_image_size(const char *filename)
 {
-    int fd, size;
+    int fd;
+    int64_t size;
     fd = open(filename, O_RDONLY | O_BINARY);
     if (fd < 0)
         return -1;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1987557..da50ad6 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -636,7 +636,7 @@
         machine_get_memory_encryption, machine_set_memory_encryption,
         &error_abort);
     object_class_property_set_description(oc, "memory-encryption",
-        "Set memory encyption object to use", &error_abort);
+        "Set memory encryption object to use", &error_abort);
 }
 
 static void machine_class_base_init(ObjectClass *oc, void *data)
diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c
index cde4d3e..76d3f8e 100644
--- a/hw/core/null-machine.c
+++ b/hw/core/null-machine.c
@@ -18,7 +18,7 @@
 #include "hw/boards.h"
 #include "sysemu/sysemu.h"
 #include "exec/address-spaces.h"
-#include "cpu.h"
+#include "qom/cpu.h"
 
 static void machine_none_init(MachineState *mch)
 {
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 046d8f1..6b3cc55 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -832,14 +832,6 @@
 
         DEVICE_LISTENER_CALL(realize, Forward, dev);
 
-        if (hotplug_ctrl) {
-            hotplug_handler_plug(hotplug_ctrl, dev, &local_err);
-        }
-
-        if (local_err != NULL) {
-            goto post_realize_fail;
-        }
-
         /*
          * always free/re-initialize here since the value cannot be cleaned up
          * in device_unrealize due to its usage later on in the unplug path
@@ -869,8 +861,12 @@
         dev->pending_deleted_event = false;
 
         if (hotplug_ctrl) {
-            hotplug_handler_post_plug(hotplug_ctrl, dev);
-        }
+            hotplug_handler_plug(hotplug_ctrl, dev, &local_err);
+            if (local_err != NULL) {
+                goto child_realize_fail;
+            }
+       }
+
     } else if (!value && dev->realized) {
         Error **local_errp = NULL;
         QLIST_FOREACH(bus, &dev->child_bus, sibling) {
diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index 780a76b..97acd5b 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -5,6 +5,7 @@
 
 common-obj-$(CONFIG_ADS7846) += ads7846.o
 common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
+common-obj-$(call land,$(CONFIG_VGA_CIRRUS),$(CONFIG_VGA_ISA))+=cirrus_vga_isa.o
 common-obj-$(CONFIG_G364FB) += g364fb.o
 common-obj-$(CONFIG_JAZZ_LED) += jazz_led.o
 common-obj-$(CONFIG_PL110) += pl110.o
@@ -14,11 +15,12 @@
 common-obj-$(CONFIG_XEN) += xenfb.o
 
 common-obj-$(CONFIG_VGA_PCI) += vga-pci.o
-common-obj-$(CONFIG_VGA_PCI) += bochs-display.o
 common-obj-$(CONFIG_VGA_PCI) += edid-region.o
 common-obj-$(CONFIG_VGA_ISA) += vga-isa.o
 common-obj-$(CONFIG_VGA_ISA_MM) += vga-isa-mm.o
 common-obj-$(CONFIG_VMWARE_VGA) += vmware_vga.o
+common-obj-$(CONFIG_BOCHS_DISPLAY) += bochs-display.o
+common-obj-$(CONFIG_BOCHS_DISPLAY) += edid-region.o
 
 common-obj-$(CONFIG_BLIZZARD) += blizzard.o
 common-obj-$(CONFIG_EXYNOS4) += exynos4210_fimd.o
diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c
index d534d00..599863e 100644
--- a/hw/display/bcm2835_fb.c
+++ b/hw/display/bcm2835_fb.c
@@ -190,7 +190,7 @@
     }
 
     if (s->invalidate) {
-        hwaddr base = s->config.base + xoff + yoff * src_width;
+        hwaddr base = s->config.base + xoff + (hwaddr)yoff * src_width;
         framebuffer_update_memory_section(&s->fbsection, s->dma_mr,
                                           base,
                                           s->config.yres, src_width);
diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c
index 09d8944..3d439eb 100644
--- a/hw/display/bochs-display.c
+++ b/hw/display/bochs-display.c
@@ -9,6 +9,7 @@
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
 #include "hw/display/bochs-vbe.h"
+#include "hw/display/edid.h"
 
 #include "qapi/error.h"
 
@@ -35,9 +36,13 @@
     MemoryRegion     mmio;
     MemoryRegion     vbe;
     MemoryRegion     qext;
+    MemoryRegion     edid;
 
     /* device config */
     uint64_t         vgamem;
+    bool             enable_edid;
+    qemu_edid_info   edid_info;
+    uint8_t          edid_blob[256];
 
     /* device registers */
     uint16_t         vbe_regs[VBE_DISPI_INDEX_NB];
@@ -283,6 +288,12 @@
     pci_register_bar(&s->pci, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram);
     pci_register_bar(&s->pci, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
 
+    if (s->enable_edid) {
+        qemu_edid_generate(s->edid_blob, sizeof(s->edid_blob), &s->edid_info);
+        qemu_edid_region_io(&s->edid, obj, s->edid_blob, sizeof(s->edid_blob));
+        memory_region_add_subregion(&s->mmio, 0, &s->edid);
+    }
+
     if (pci_bus_is_express(pci_get_bus(dev))) {
         dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
         ret = pcie_endpoint_cap_init(dev, 0x80);
@@ -325,6 +336,8 @@
 
 static Property bochs_display_properties[] = {
     DEFINE_PROP_SIZE("vgamem", BochsDisplayState, vgamem, 16 * MiB),
+    DEFINE_PROP_BOOL("edid", BochsDisplayState, enable_edid, false),
+    DEFINE_EDID_PROPERTIES(BochsDisplayState, edid_info),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index 1c199ab..e50d97e 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -307,7 +307,7 @@
         ret = load_image_mr(fcode_filename, &s->rom);
         g_free(fcode_filename);
         if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
-            error_report("cg3: could not load prom '%s'", CG3_ROM_FILE);
+            warn_report("cg3: could not load prom '%s'", CG3_ROM_FILE);
         }
     }
 
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 04c87c8..d9b854d 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -33,8 +33,8 @@
 #include "hw/hw.h"
 #include "hw/pci/pci.h"
 #include "ui/pixel_ops.h"
-#include "vga_int.h"
 #include "hw/loader.h"
+#include "cirrus_vga_internal.h"
 
 /*
  * TODO:
@@ -52,16 +52,6 @@
  *
  ***************************************/
 
-// ID
-#define CIRRUS_ID_CLGD5422  (0x23<<2)
-#define CIRRUS_ID_CLGD5426  (0x24<<2)
-#define CIRRUS_ID_CLGD5424  (0x25<<2)
-#define CIRRUS_ID_CLGD5428  (0x26<<2)
-#define CIRRUS_ID_CLGD5430  (0x28<<2)
-#define CIRRUS_ID_CLGD5434  (0x2A<<2)
-#define CIRRUS_ID_CLGD5436  (0x2B<<2)
-#define CIRRUS_ID_CLGD5446  (0x2E<<2)
-
 // sequencer 0x07
 #define CIRRUS_SR7_BPP_VGA            0x00
 #define CIRRUS_SR7_BPP_SVGA           0x01
@@ -176,65 +166,10 @@
 
 #define CIRRUS_PNPMMIO_SIZE         0x1000
 
-struct CirrusVGAState;
-typedef void (*cirrus_bitblt_rop_t) (struct CirrusVGAState *s,
-                                     uint32_t dstaddr, uint32_t srcaddr,
-				     int dstpitch, int srcpitch,
-				     int bltwidth, int bltheight);
 typedef void (*cirrus_fill_t)(struct CirrusVGAState *s,
                               uint32_t dstaddr, int dst_pitch,
                               int width, int height);
 
-typedef struct CirrusVGAState {
-    VGACommonState vga;
-
-    MemoryRegion cirrus_vga_io;
-    MemoryRegion cirrus_linear_io;
-    MemoryRegion cirrus_linear_bitblt_io;
-    MemoryRegion cirrus_mmio_io;
-    MemoryRegion pci_bar;
-    bool linear_vram;  /* vga.vram mapped over cirrus_linear_io */
-    MemoryRegion low_mem_container; /* container for 0xa0000-0xc0000 */
-    MemoryRegion low_mem;           /* always mapped, overridden by: */
-    MemoryRegion cirrus_bank[2];    /*   aliases at 0xa0000-0xb0000  */
-    uint32_t cirrus_addr_mask;
-    uint32_t linear_mmio_mask;
-    uint8_t cirrus_shadow_gr0;
-    uint8_t cirrus_shadow_gr1;
-    uint8_t cirrus_hidden_dac_lockindex;
-    uint8_t cirrus_hidden_dac_data;
-    uint32_t cirrus_bank_base[2];
-    uint32_t cirrus_bank_limit[2];
-    uint8_t cirrus_hidden_palette[48];
-    bool enable_blitter;
-    int cirrus_blt_pixelwidth;
-    int cirrus_blt_width;
-    int cirrus_blt_height;
-    int cirrus_blt_dstpitch;
-    int cirrus_blt_srcpitch;
-    uint32_t cirrus_blt_fgcol;
-    uint32_t cirrus_blt_bgcol;
-    uint32_t cirrus_blt_dstaddr;
-    uint32_t cirrus_blt_srcaddr;
-    uint8_t cirrus_blt_mode;
-    uint8_t cirrus_blt_modeext;
-    cirrus_bitblt_rop_t cirrus_rop;
-#define CIRRUS_BLTBUFSIZE (2048 * 4) /* one line width */
-    uint8_t cirrus_bltbuf[CIRRUS_BLTBUFSIZE];
-    uint8_t *cirrus_srcptr;
-    uint8_t *cirrus_srcptr_end;
-    uint32_t cirrus_srccounter;
-    /* hwcursor display state */
-    int last_hw_cursor_size;
-    int last_hw_cursor_x;
-    int last_hw_cursor_y;
-    int last_hw_cursor_y_start;
-    int last_hw_cursor_y_end;
-    int real_vram_size; /* XXX: suppress that */
-    int device_id;
-    int bustype;
-} CirrusVGAState;
-
 typedef struct PCICirrusVGAState {
     PCIDevice dev;
     CirrusVGAState cirrus_vga;
@@ -244,16 +179,6 @@
 #define PCI_CIRRUS_VGA(obj) \
     OBJECT_CHECK(PCICirrusVGAState, (obj), TYPE_PCI_CIRRUS_VGA)
 
-#define TYPE_ISA_CIRRUS_VGA "isa-cirrus-vga"
-#define ISA_CIRRUS_VGA(obj) \
-    OBJECT_CHECK(ISACirrusVGAState, (obj), TYPE_ISA_CIRRUS_VGA)
-
-typedef struct ISACirrusVGAState {
-    ISADevice parent_obj;
-
-    CirrusVGAState cirrus_vga;
-} ISACirrusVGAState;
-
 static uint8_t rop_to_index[256];
 
 /***************************************
@@ -2829,7 +2754,7 @@
     return 0;
 }
 
-static const VMStateDescription vmstate_cirrus_vga = {
+const VMStateDescription vmstate_cirrus_vga = {
     .name = "cirrus_vga",
     .version_id = 2,
     .minimum_version_id = 1,
@@ -2932,10 +2857,9 @@
     },
 };
 
-static void cirrus_init_common(CirrusVGAState *s, Object *owner,
-                               int device_id, int is_pci,
-                               MemoryRegion *system_memory,
-                               MemoryRegion *system_io)
+void cirrus_init_common(CirrusVGAState *s, Object *owner,
+                        int device_id, int is_pci,
+                        MemoryRegion *system_memory, MemoryRegion *system_io)
 {
     int i;
     static int inited;
@@ -3031,62 +2955,6 @@
 
 /***************************************
  *
- *  ISA bus support
- *
- ***************************************/
-
-static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp)
-{
-    ISADevice *isadev = ISA_DEVICE(dev);
-    ISACirrusVGAState *d = ISA_CIRRUS_VGA(dev);
-    VGACommonState *s = &d->cirrus_vga.vga;
-
-    /* follow real hardware, cirrus card emulated has 4 MB video memory.
-       Also accept 8 MB/16 MB for backward compatibility. */
-    if (s->vram_size_mb != 4 && s->vram_size_mb != 8 &&
-        s->vram_size_mb != 16) {
-        error_setg(errp, "Invalid cirrus_vga ram size '%u'",
-                   s->vram_size_mb);
-        return;
-    }
-    s->global_vmstate = true;
-    vga_common_init(s, OBJECT(dev));
-    cirrus_init_common(&d->cirrus_vga, OBJECT(dev), CIRRUS_ID_CLGD5430, 0,
-                       isa_address_space(isadev),
-                       isa_address_space_io(isadev));
-    s->con = graphic_console_init(dev, 0, s->hw_ops, s);
-    rom_add_vga(VGABIOS_CIRRUS_FILENAME);
-    /* XXX ISA-LFB support */
-    /* FIXME not qdev yet */
-}
-
-static Property isa_cirrus_vga_properties[] = {
-    DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState,
-                       cirrus_vga.vga.vram_size_mb, 4),
-    DEFINE_PROP_BOOL("blitter", struct ISACirrusVGAState,
-                     cirrus_vga.enable_blitter, true),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->vmsd  = &vmstate_cirrus_vga;
-    dc->realize = isa_cirrus_vga_realizefn;
-    dc->props = isa_cirrus_vga_properties;
-    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
-}
-
-static const TypeInfo isa_cirrus_vga_info = {
-    .name          = TYPE_ISA_CIRRUS_VGA,
-    .parent        = TYPE_ISA_DEVICE,
-    .instance_size = sizeof(ISACirrusVGAState),
-    .class_init = isa_cirrus_vga_class_init,
-};
-
-/***************************************
- *
  *  PCI bus support
  *
  ***************************************/
@@ -3171,7 +3039,6 @@
 
 static void cirrus_vga_register_types(void)
 {
-    type_register_static(&isa_cirrus_vga_info);
     type_register_static(&cirrus_vga_info);
 }
 
diff --git a/hw/display/cirrus_vga_internal.h b/hw/display/cirrus_vga_internal.h
new file mode 100644
index 0000000..a78ebbd
--- /dev/null
+++ b/hw/display/cirrus_vga_internal.h
@@ -0,0 +1,103 @@
+/*
+ * QEMU Cirrus CLGD 54xx VGA Emulator, ISA bus support
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2004 Makoto Suzuki (suzu)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CIRRUS_VGA_INTERNAL_H
+#define CIRRUS_VGA_INTERNAL_H
+
+#include "vga_int.h"
+
+/* IDs */
+#define CIRRUS_ID_CLGD5422  (0x23 << 2)
+#define CIRRUS_ID_CLGD5426  (0x24 << 2)
+#define CIRRUS_ID_CLGD5424  (0x25 << 2)
+#define CIRRUS_ID_CLGD5428  (0x26 << 2)
+#define CIRRUS_ID_CLGD5430  (0x28 << 2)
+#define CIRRUS_ID_CLGD5434  (0x2A << 2)
+#define CIRRUS_ID_CLGD5436  (0x2B << 2)
+#define CIRRUS_ID_CLGD5446  (0x2E << 2)
+
+extern const VMStateDescription vmstate_cirrus_vga;
+
+struct CirrusVGAState;
+typedef void (*cirrus_bitblt_rop_t)(struct CirrusVGAState *s,
+                                    uint32_t dstaddr, uint32_t srcaddr,
+                                    int dstpitch, int srcpitch,
+                                    int bltwidth, int bltheight);
+
+typedef struct CirrusVGAState {
+    VGACommonState vga;
+
+    MemoryRegion cirrus_vga_io;
+    MemoryRegion cirrus_linear_io;
+    MemoryRegion cirrus_linear_bitblt_io;
+    MemoryRegion cirrus_mmio_io;
+    MemoryRegion pci_bar;
+    bool linear_vram;  /* vga.vram mapped over cirrus_linear_io */
+    MemoryRegion low_mem_container; /* container for 0xa0000-0xc0000 */
+    MemoryRegion low_mem;           /* always mapped, overridden by: */
+    MemoryRegion cirrus_bank[2];    /*   aliases at 0xa0000-0xb0000  */
+    uint32_t cirrus_addr_mask;
+    uint32_t linear_mmio_mask;
+    uint8_t cirrus_shadow_gr0;
+    uint8_t cirrus_shadow_gr1;
+    uint8_t cirrus_hidden_dac_lockindex;
+    uint8_t cirrus_hidden_dac_data;
+    uint32_t cirrus_bank_base[2];
+    uint32_t cirrus_bank_limit[2];
+    uint8_t cirrus_hidden_palette[48];
+    bool enable_blitter;
+    int cirrus_blt_pixelwidth;
+    int cirrus_blt_width;
+    int cirrus_blt_height;
+    int cirrus_blt_dstpitch;
+    int cirrus_blt_srcpitch;
+    uint32_t cirrus_blt_fgcol;
+    uint32_t cirrus_blt_bgcol;
+    uint32_t cirrus_blt_dstaddr;
+    uint32_t cirrus_blt_srcaddr;
+    uint8_t cirrus_blt_mode;
+    uint8_t cirrus_blt_modeext;
+    cirrus_bitblt_rop_t cirrus_rop;
+#define CIRRUS_BLTBUFSIZE (2048 * 4) /* one line width */
+    uint8_t cirrus_bltbuf[CIRRUS_BLTBUFSIZE];
+    uint8_t *cirrus_srcptr;
+    uint8_t *cirrus_srcptr_end;
+    uint32_t cirrus_srccounter;
+    /* hwcursor display state */
+    int last_hw_cursor_size;
+    int last_hw_cursor_x;
+    int last_hw_cursor_y;
+    int last_hw_cursor_y_start;
+    int last_hw_cursor_y_end;
+    int real_vram_size; /* XXX: suppress that */
+    int device_id;
+    int bustype;
+} CirrusVGAState;
+
+void cirrus_init_common(CirrusVGAState *s, Object *owner,
+                        int device_id, int is_pci,
+                        MemoryRegion *system_memory, MemoryRegion *system_io);
+
+#endif
diff --git a/hw/display/cirrus_vga_isa.c b/hw/display/cirrus_vga_isa.c
new file mode 100644
index 0000000..fa10b74
--- /dev/null
+++ b/hw/display/cirrus_vga_isa.c
@@ -0,0 +1,98 @@
+/*
+ * QEMU Cirrus CLGD 54xx VGA Emulator, ISA bus support
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2004 Makoto Suzuki (suzu)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/loader.h"
+#include "hw/isa/isa.h"
+#include "cirrus_vga_internal.h"
+
+#define TYPE_ISA_CIRRUS_VGA "isa-cirrus-vga"
+#define ISA_CIRRUS_VGA(obj) \
+    OBJECT_CHECK(ISACirrusVGAState, (obj), TYPE_ISA_CIRRUS_VGA)
+
+typedef struct ISACirrusVGAState {
+    ISADevice parent_obj;
+
+    CirrusVGAState cirrus_vga;
+} ISACirrusVGAState;
+
+static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp)
+{
+    ISADevice *isadev = ISA_DEVICE(dev);
+    ISACirrusVGAState *d = ISA_CIRRUS_VGA(dev);
+    VGACommonState *s = &d->cirrus_vga.vga;
+
+    /* follow real hardware, cirrus card emulated has 4 MB video memory.
+       Also accept 8 MB/16 MB for backward compatibility. */
+    if (s->vram_size_mb != 4 && s->vram_size_mb != 8 &&
+        s->vram_size_mb != 16) {
+        error_setg(errp, "Invalid cirrus_vga ram size '%u'",
+                   s->vram_size_mb);
+        return;
+    }
+    s->global_vmstate = true;
+    vga_common_init(s, OBJECT(dev));
+    cirrus_init_common(&d->cirrus_vga, OBJECT(dev), CIRRUS_ID_CLGD5430, 0,
+                       isa_address_space(isadev),
+                       isa_address_space_io(isadev));
+    s->con = graphic_console_init(dev, 0, s->hw_ops, s);
+    rom_add_vga(VGABIOS_CIRRUS_FILENAME);
+    /* XXX ISA-LFB support */
+    /* FIXME not qdev yet */
+}
+
+static Property isa_cirrus_vga_properties[] = {
+    DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState,
+                       cirrus_vga.vga.vram_size_mb, 4),
+    DEFINE_PROP_BOOL("blitter", struct ISACirrusVGAState,
+                     cirrus_vga.enable_blitter, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd  = &vmstate_cirrus_vga;
+    dc->realize = isa_cirrus_vga_realizefn;
+    dc->props = isa_cirrus_vga_properties;
+    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+}
+
+static const TypeInfo isa_cirrus_vga_info = {
+    .name          = TYPE_ISA_CIRRUS_VGA,
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(ISACirrusVGAState),
+    .class_init = isa_cirrus_vga_class_init,
+};
+
+static void cirrus_vga_isa_register_types(void)
+{
+    type_register_static(&isa_cirrus_vga_info);
+}
+
+type_init(cirrus_vga_isa_register_types)
diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c
index c80397e..bdf5e1d 100644
--- a/hw/display/edid-generate.c
+++ b/hw/display/edid-generate.c
@@ -223,7 +223,7 @@
 
     uint32_t clock  = 75 * (xres + xblank) * (yres + yblank);
 
-    *(uint32_t *)(desc) = cpu_to_le32(clock / 10000);
+    stl_le_p(desc, clock / 10000);
 
     desc[2] = xres   & 0xff;
     desc[3] = xblank & 0xff;
@@ -301,7 +301,7 @@
     /* =============== set defaults  =============== */
 
     if (!info->vendor || strlen(info->vendor) != 3) {
-        info->vendor = "EMU";
+        info->vendor = "RHT";
     }
     if (!info->name) {
         info->name = "QEMU Monitor";
@@ -342,9 +342,9 @@
                           (((info->vendor[2] - '@') & 0x1f) <<  0));
     uint16_t model_nr = 0x1234;
     uint32_t serial_nr = info->serial ? atoi(info->serial) : 0;
-    *(uint16_t *)(edid +  8) = cpu_to_be16(vendor_id);
-    *(uint16_t *)(edid + 10) = cpu_to_le16(model_nr);
-    *(uint32_t *)(edid + 12) = cpu_to_le32(serial_nr);
+    stw_be_p(edid +  8, vendor_id);
+    stw_le_p(edid + 10, model_nr);
+    stl_le_p(edid + 12, serial_nr);
 
     /* manufacture week and year */
     edid[16] = 42;
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 7479864..9087db5 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -290,7 +290,7 @@
     }
 
     cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST);
-    if (cfg->count == 1) {
+    if (cfg != NULL && cfg->count == 1) {
         qxl->guest_primary.resized = 1;
         qxl->guest_head0_width  = cfg->heads[0].width;
         qxl->guest_head0_height = cfg->heads[0].height;
@@ -848,7 +848,7 @@
         qxl->guest_primary.commands++;
         qxl_track_command(qxl, ext);
         qxl_log_command(qxl, "csr", ext);
-        if (qxl->id == 0) {
+        if (qxl->have_vga) {
             qxl_render_cursor(qxl, ext);
         }
         trace_qxl_ring_cursor_get(qxl->id, qxl_mode_to_string(qxl->mode));
@@ -1255,7 +1255,7 @@
     d->current_async = QXL_UNDEFINED_IO;
     qemu_mutex_unlock(&d->async_lock);
 
-    if (d->id == 0) {
+    if (d->have_vga) {
         qxl_enter_vga_mode(d);
     } else {
         d->mode = QXL_MODE_UNDEFINED;
@@ -1893,7 +1893,31 @@
         trace_qxl_send_events_vm_stopped(d->id, events);
         return;
     }
-    old_pending = atomic_fetch_or(&d->ram->int_pending, le_events);
+    /*
+     * Older versions of Spice forgot to define the QXLRam struct
+     * with the '__aligned__(4)' attribute. clang 7 and newer will
+     * thus warn that atomic_fetch_or(&d->ram->int_pending, ...)
+     * might be a misaligned atomic access, and will generate an
+     * out-of-line call for it, which results in a link error since
+     * we don't currently link against libatomic.
+     *
+     * In fact we set up d->ram in init_qxl_ram() so it always starts
+     * at a 4K boundary, so we know that &d->ram->int_pending is
+     * naturally aligned for a uint32_t. Newer Spice versions
+     * (with Spice commit beda5ec7a6848be20c0cac2a9a8ef2a41e8069c1)
+     * will fix the bug directly. To deal with older versions,
+     * we tell the compiler to assume the address really is aligned.
+     * Any compiler which cares about the misalignment will have
+     * __builtin_assume_aligned.
+     */
+#ifdef HAS_ASSUME_ALIGNED
+#define ALIGNED_UINT32_PTR(P) ((uint32_t *)__builtin_assume_aligned(P, 4))
+#else
+#define ALIGNED_UINT32_PTR(P) ((uint32_t *)P)
+#endif
+
+    old_pending = atomic_fetch_or(ALIGNED_UINT32_PTR(&d->ram->int_pending),
+                                  le_events);
     if ((old_pending & le_events) == le_events) {
         return;
     }
@@ -2115,7 +2139,7 @@
 
     memory_region_init_io(&qxl->io_bar, OBJECT(qxl), &qxl_io_ops, qxl,
                           "qxl-ioports", io_size);
-    if (qxl->id == 0) {
+    if (qxl->have_vga) {
         vga_dirty_log_start(&qxl->vga);
     }
     memory_region_set_flush_coalesced(&qxl->io_bar);
@@ -2147,7 +2171,7 @@
 
     /* print pci bar details */
     dprint(qxl, 1, "ram/%s: %" PRId64 " MB [region 0]\n",
-           qxl->id == 0 ? "pri" : "sec", qxl->vga.vram_size / MiB);
+           qxl->have_vga ? "pri" : "sec", qxl->vga.vram_size / MiB);
     dprint(qxl, 1, "vram/32: %" PRIx64 " MB [region 1]\n",
            qxl->vram32_size / MiB);
     dprint(qxl, 1, "vram/64: %" PRIx64 " MB %s\n",
@@ -2175,7 +2199,6 @@
     VGACommonState *vga = &qxl->vga;
     Error *local_err = NULL;
 
-    qxl->id = 0;
     qxl_init_ramsize(qxl);
     vga->vbe_size = qxl->vgamem_size;
     vga->vram_size_mb = qxl->vga.vram_size / MiB;
@@ -2186,8 +2209,15 @@
                      vga, "vga");
     portio_list_set_flush_coalesced(&qxl->vga_port_list);
     portio_list_add(&qxl->vga_port_list, pci_address_space_io(dev), 0x3b0);
+    qxl->have_vga = true;
 
     vga->con = graphic_console_init(DEVICE(dev), 0, &qxl_ops, qxl);
+    qxl->id = qemu_console_get_index(vga->con); /* == channel_id */
+    if (qxl->id != 0) {
+        error_setg(errp, "primary qxl-vga device must be console 0 "
+                   "(first display device on the command line)");
+        return;
+    }
 
     qxl_realize_common(qxl, &local_err);
     if (local_err) {
@@ -2202,15 +2232,14 @@
 
 static void qxl_realize_secondary(PCIDevice *dev, Error **errp)
 {
-    static int device_id = 1;
     PCIQXLDevice *qxl = PCI_QXL(dev);
 
-    qxl->id = device_id++;
     qxl_init_ramsize(qxl);
     memory_region_init_ram(&qxl->vga.vram, OBJECT(dev), "qxl.vgavram",
                            qxl->vga.vram_size, &error_fatal);
     qxl->vga.vram_ptr = memory_region_get_ram_ptr(&qxl->vga.vram);
     qxl->vga.con = graphic_console_init(DEVICE(dev), 0, &qxl_ops, qxl);
+    qxl->id = qemu_console_get_index(qxl->vga.con); /* == channel_id */
 
     qxl_realize_common(qxl, errp);
 }
diff --git a/hw/display/qxl.h b/hw/display/qxl.h
index dd9c052..6f9d1f2 100644
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -34,6 +34,7 @@
     PortioList         vga_port_list;
     SimpleSpiceDisplay ssd;
     int                id;
+    bool               have_vga;
     uint32_t           debug;
     uint32_t           guestdebug;
     uint32_t           cmdlog;
diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index b2786ee..66f2459 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -823,7 +823,7 @@
         ret = load_image_mr(fcode_filename, &s->rom);
         g_free(fcode_filename);
         if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
-            error_report("tcx: could not load prom '%s'", TCX_ROM_FILE);
+            warn_report("tcx: could not load prom '%s'", TCX_ROM_FILE);
         }
     }
 
diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c
index 24ca1b3..a17c96e 100644
--- a/hw/display/vga-pci.c
+++ b/hw/display/vga-pci.c
@@ -309,6 +309,14 @@
     VGACommonState *s = &d->vga;
 
     graphic_console_close(s->con);
+    memory_region_del_subregion(&d->mmio, &d->mrs[0]);
+    memory_region_del_subregion(&d->mmio, &d->mrs[1]);
+    if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_QEXT)) {
+        memory_region_del_subregion(&d->mmio, &d->mrs[2]);
+    }
+    if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_EDID)) {
+        memory_region_del_subregion(&d->mmio, &d->mrs[3]);
+    }
 }
 
 static void pci_secondary_vga_init(Object *obj)
diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h
index 6e4fa48..55c418e 100644
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -166,7 +166,6 @@
                           const MemoryRegionPortio **vbe_ports);
 void vga_common_reset(VGACommonState *s);
 
-void vga_sync_dirty_bitmap(VGACommonState *s);
 void vga_dirty_log_start(VGACommonState *s);
 void vga_dirty_log_stop(VGACommonState *s);
 
diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c
index 564b938..31e0994 100644
--- a/hw/hppa/dino.c
+++ b/hw/hppa/dino.c
@@ -488,17 +488,10 @@
     return b;
 }
 
-static int dino_pcihost_init(SysBusDevice *dev)
-{
-    return 0;
-}
-
 static void dino_pcihost_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->init = dino_pcihost_init;
     dc->vmsd = &vmstate_dino;
 }
 
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index 0fb8fb8..ac6dd7f 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -191,7 +191,7 @@
 
         if (initrd_filename) {
             ram_addr_t initrd_base;
-            long initrd_size;
+            int64_t initrd_size;
 
             initrd_size = get_image_size(initrd_filename);
             if (initrd_size < 0) {
diff --git a/hw/hyperv/Makefile.objs b/hw/hyperv/Makefile.objs
new file mode 100644
index 0000000..edaca2f
--- /dev/null
+++ b/hw/hyperv/Makefile.objs
@@ -0,0 +1,2 @@
+obj-y += hyperv.o
+obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o
diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c
new file mode 100644
index 0000000..a28e724
--- /dev/null
+++ b/hw/hyperv/hyperv.c
@@ -0,0 +1,654 @@
+/*
+ * Hyper-V guest/hypervisor interaction
+ *
+ * Copyright (c) 2015-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qapi/error.h"
+#include "exec/address-spaces.h"
+#include "sysemu/kvm.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/queue.h"
+#include "qemu/rcu.h"
+#include "qemu/rcu_queue.h"
+#include "hw/hyperv/hyperv.h"
+
+typedef struct SynICState {
+    DeviceState parent_obj;
+
+    CPUState *cs;
+
+    bool enabled;
+    hwaddr msg_page_addr;
+    hwaddr event_page_addr;
+    MemoryRegion msg_page_mr;
+    MemoryRegion event_page_mr;
+    struct hyperv_message_page *msg_page;
+    struct hyperv_event_flags_page *event_page;
+} SynICState;
+
+#define TYPE_SYNIC "hyperv-synic"
+#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
+
+static SynICState *get_synic(CPUState *cs)
+{
+    return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
+}
+
+static void synic_update(SynICState *synic, bool enable,
+                         hwaddr msg_page_addr, hwaddr event_page_addr)
+{
+
+    synic->enabled = enable;
+    if (synic->msg_page_addr != msg_page_addr) {
+        if (synic->msg_page_addr) {
+            memory_region_del_subregion(get_system_memory(),
+                                        &synic->msg_page_mr);
+        }
+        if (msg_page_addr) {
+            memory_region_add_subregion(get_system_memory(), msg_page_addr,
+                                        &synic->msg_page_mr);
+        }
+        synic->msg_page_addr = msg_page_addr;
+    }
+    if (synic->event_page_addr != event_page_addr) {
+        if (synic->event_page_addr) {
+            memory_region_del_subregion(get_system_memory(),
+                                        &synic->event_page_mr);
+        }
+        if (event_page_addr) {
+            memory_region_add_subregion(get_system_memory(), event_page_addr,
+                                        &synic->event_page_mr);
+        }
+        synic->event_page_addr = event_page_addr;
+    }
+}
+
+void hyperv_synic_update(CPUState *cs, bool enable,
+                         hwaddr msg_page_addr, hwaddr event_page_addr)
+{
+    SynICState *synic = get_synic(cs);
+
+    if (!synic) {
+        return;
+    }
+
+    synic_update(synic, enable, msg_page_addr, event_page_addr);
+}
+
+static void synic_realize(DeviceState *dev, Error **errp)
+{
+    Object *obj = OBJECT(dev);
+    SynICState *synic = SYNIC(dev);
+    char *msgp_name, *eventp_name;
+    uint32_t vp_index;
+
+    /* memory region names have to be globally unique */
+    vp_index = hyperv_vp_index(synic->cs);
+    msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
+    eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
+
+    memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
+                           sizeof(*synic->msg_page), &error_abort);
+    memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
+                           sizeof(*synic->event_page), &error_abort);
+    synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
+    synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
+
+    g_free(msgp_name);
+    g_free(eventp_name);
+}
+static void synic_reset(DeviceState *dev)
+{
+    SynICState *synic = SYNIC(dev);
+    memset(synic->msg_page, 0, sizeof(*synic->msg_page));
+    memset(synic->event_page, 0, sizeof(*synic->event_page));
+    synic_update(synic, false, 0, 0);
+}
+
+static void synic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = synic_realize;
+    dc->reset = synic_reset;
+    dc->user_creatable = false;
+}
+
+void hyperv_synic_add(CPUState *cs)
+{
+    Object *obj;
+    SynICState *synic;
+
+    obj = object_new(TYPE_SYNIC);
+    synic = SYNIC(obj);
+    synic->cs = cs;
+    object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
+    object_unref(obj);
+    object_property_set_bool(obj, true, "realized", &error_abort);
+}
+
+void hyperv_synic_reset(CPUState *cs)
+{
+    device_reset(DEVICE(get_synic(cs)));
+}
+
+static const TypeInfo synic_type_info = {
+    .name = TYPE_SYNIC,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(SynICState),
+    .class_init = synic_class_init,
+};
+
+static void synic_register_types(void)
+{
+    type_register_static(&synic_type_info);
+}
+
+type_init(synic_register_types)
+
+/*
+ * KVM has its own message producers (SynIC timers).  To guarantee
+ * serialization with both KVM vcpu and the guest cpu, the messages are first
+ * staged in an intermediate area and then posted to the SynIC message page in
+ * the vcpu thread.
+ */
+typedef struct HvSintStagedMessage {
+    /* message content staged by hyperv_post_msg */
+    struct hyperv_message msg;
+    /* callback + data (r/o) to complete the processing in a BH */
+    HvSintMsgCb cb;
+    void *cb_data;
+    /* message posting status filled by cpu_post_msg */
+    int status;
+    /* passing the buck: */
+    enum {
+        /* initial state */
+        HV_STAGED_MSG_FREE,
+        /*
+         * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
+         * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
+         */
+        HV_STAGED_MSG_BUSY,
+        /*
+         * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
+         * notify the guest, records the status, marks the posting done (BUSY
+         * -> POSTED), and schedules sint_msg_bh BH
+         */
+        HV_STAGED_MSG_POSTED,
+        /*
+         * sint_msg_bh (BH) verifies that the posting is done, runs the
+         * callback, and starts over (POSTED -> FREE)
+         */
+    } state;
+} HvSintStagedMessage;
+
+struct HvSintRoute {
+    uint32_t sint;
+    SynICState *synic;
+    int gsi;
+    EventNotifier sint_set_notifier;
+    EventNotifier sint_ack_notifier;
+
+    HvSintStagedMessage *staged_msg;
+
+    unsigned refcount;
+};
+
+static CPUState *hyperv_find_vcpu(uint32_t vp_index)
+{
+    CPUState *cs = qemu_get_cpu(vp_index);
+    assert(hyperv_vp_index(cs) == vp_index);
+    return cs;
+}
+
+/*
+ * BH to complete the processing of a staged message.
+ */
+static void sint_msg_bh(void *opaque)
+{
+    HvSintRoute *sint_route = opaque;
+    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
+
+    if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
+        /* status nor ready yet (spurious ack from guest?), ignore */
+        return;
+    }
+
+    staged_msg->cb(staged_msg->cb_data, staged_msg->status);
+    staged_msg->status = 0;
+
+    /* staged message processing finished, ready to start over */
+    atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
+    /* drop the reference taken in hyperv_post_msg */
+    hyperv_sint_route_unref(sint_route);
+}
+
+/*
+ * Worker to transfer the message from the staging area into the SynIC message
+ * page in vcpu context.
+ */
+static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
+{
+    HvSintRoute *sint_route = data.host_ptr;
+    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
+    SynICState *synic = sint_route->synic;
+    struct hyperv_message *dst_msg;
+    bool wait_for_sint_ack = false;
+
+    assert(staged_msg->state == HV_STAGED_MSG_BUSY);
+
+    if (!synic->enabled || !synic->msg_page_addr) {
+        staged_msg->status = -ENXIO;
+        goto posted;
+    }
+
+    dst_msg = &synic->msg_page->slot[sint_route->sint];
+
+    if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
+        dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
+        staged_msg->status = -EAGAIN;
+        wait_for_sint_ack = true;
+    } else {
+        memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
+        staged_msg->status = hyperv_sint_route_set_sint(sint_route);
+    }
+
+    memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
+
+posted:
+    atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
+    /*
+     * Notify the msg originator of the progress made; if the slot was busy we
+     * set msg_pending flag in it so it will be the guest who will do EOM and
+     * trigger the notification from KVM via sint_ack_notifier
+     */
+    if (!wait_for_sint_ack) {
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
+                                sint_route);
+    }
+}
+
+/*
+ * Post a Hyper-V message to the staging area, for delivery to guest in the
+ * vcpu thread.
+ */
+int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
+{
+    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
+
+    assert(staged_msg);
+
+    /* grab the staging area */
+    if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
+                       HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
+        return -EAGAIN;
+    }
+
+    memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
+
+    /* hold a reference on sint_route until the callback is finished */
+    hyperv_sint_route_ref(sint_route);
+
+    /* schedule message posting attempt in vcpu thread */
+    async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
+                     RUN_ON_CPU_HOST_PTR(sint_route));
+    return 0;
+}
+
+static void sint_ack_handler(EventNotifier *notifier)
+{
+    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
+                                           sint_ack_notifier);
+    event_notifier_test_and_clear(notifier);
+
+    /*
+     * the guest consumed the previous message so complete the current one with
+     * -EAGAIN and let the msg originator retry
+     */
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
+}
+
+/*
+ * Set given event flag for a given sint on a given vcpu, and signal the sint.
+ */
+int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
+{
+    int ret;
+    SynICState *synic = sint_route->synic;
+    unsigned long *flags, set_mask;
+    unsigned set_idx;
+
+    if (eventno > HV_EVENT_FLAGS_COUNT) {
+        return -EINVAL;
+    }
+    if (!synic->enabled || !synic->event_page_addr) {
+        return -ENXIO;
+    }
+
+    set_idx = BIT_WORD(eventno);
+    set_mask = BIT_MASK(eventno);
+    flags = synic->event_page->slot[sint_route->sint].flags;
+
+    if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
+        memory_region_set_dirty(&synic->event_page_mr, 0,
+                                sizeof(*synic->event_page));
+        ret = hyperv_sint_route_set_sint(sint_route);
+    } else {
+        ret = 0;
+    }
+    return ret;
+}
+
+HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
+                                   HvSintMsgCb cb, void *cb_data)
+{
+    HvSintRoute *sint_route;
+    EventNotifier *ack_notifier;
+    int r, gsi;
+    CPUState *cs;
+    SynICState *synic;
+
+    cs = hyperv_find_vcpu(vp_index);
+    if (!cs) {
+        return NULL;
+    }
+
+    synic = get_synic(cs);
+    if (!synic) {
+        return NULL;
+    }
+
+    sint_route = g_new0(HvSintRoute, 1);
+    r = event_notifier_init(&sint_route->sint_set_notifier, false);
+    if (r) {
+        goto err;
+    }
+
+
+    ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
+    if (ack_notifier) {
+        sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
+        sint_route->staged_msg->cb = cb;
+        sint_route->staged_msg->cb_data = cb_data;
+
+        r = event_notifier_init(ack_notifier, false);
+        if (r) {
+            goto err_sint_set_notifier;
+        }
+
+        event_notifier_set_handler(ack_notifier, sint_ack_handler);
+    }
+
+    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
+    if (gsi < 0) {
+        goto err_gsi;
+    }
+
+    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+                                           &sint_route->sint_set_notifier,
+                                           ack_notifier, gsi);
+    if (r) {
+        goto err_irqfd;
+    }
+    sint_route->gsi = gsi;
+    sint_route->synic = synic;
+    sint_route->sint = sint;
+    sint_route->refcount = 1;
+
+    return sint_route;
+
+err_irqfd:
+    kvm_irqchip_release_virq(kvm_state, gsi);
+err_gsi:
+    if (ack_notifier) {
+        event_notifier_set_handler(ack_notifier, NULL);
+        event_notifier_cleanup(ack_notifier);
+        g_free(sint_route->staged_msg);
+    }
+err_sint_set_notifier:
+    event_notifier_cleanup(&sint_route->sint_set_notifier);
+err:
+    g_free(sint_route);
+
+    return NULL;
+}
+
+void hyperv_sint_route_ref(HvSintRoute *sint_route)
+{
+    sint_route->refcount++;
+}
+
+void hyperv_sint_route_unref(HvSintRoute *sint_route)
+{
+    if (!sint_route) {
+        return;
+    }
+
+    assert(sint_route->refcount > 0);
+
+    if (--sint_route->refcount) {
+        return;
+    }
+
+    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+                                          &sint_route->sint_set_notifier,
+                                          sint_route->gsi);
+    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
+    if (sint_route->staged_msg) {
+        event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
+        event_notifier_cleanup(&sint_route->sint_ack_notifier);
+        g_free(sint_route->staged_msg);
+    }
+    event_notifier_cleanup(&sint_route->sint_set_notifier);
+    g_free(sint_route);
+}
+
+int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
+{
+    return event_notifier_set(&sint_route->sint_set_notifier);
+}
+
+typedef struct MsgHandler {
+    struct rcu_head rcu;
+    QLIST_ENTRY(MsgHandler) link;
+    uint32_t conn_id;
+    HvMsgHandler handler;
+    void *data;
+} MsgHandler;
+
+typedef struct EventFlagHandler {
+    struct rcu_head rcu;
+    QLIST_ENTRY(EventFlagHandler) link;
+    uint32_t conn_id;
+    EventNotifier *notifier;
+} EventFlagHandler;
+
+static QLIST_HEAD(, MsgHandler) msg_handlers;
+static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
+static QemuMutex handlers_mutex;
+
+static void __attribute__((constructor)) hv_init(void)
+{
+    QLIST_INIT(&msg_handlers);
+    QLIST_INIT(&event_flag_handlers);
+    qemu_mutex_init(&handlers_mutex);
+}
+
+int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
+{
+    int ret;
+    MsgHandler *mh;
+
+    qemu_mutex_lock(&handlers_mutex);
+    QLIST_FOREACH(mh, &msg_handlers, link) {
+        if (mh->conn_id == conn_id) {
+            if (handler) {
+                ret = -EEXIST;
+            } else {
+                QLIST_REMOVE_RCU(mh, link);
+                g_free_rcu(mh, rcu);
+                ret = 0;
+            }
+            goto unlock;
+        }
+    }
+
+    if (handler) {
+        mh = g_new(MsgHandler, 1);
+        mh->conn_id = conn_id;
+        mh->handler = handler;
+        mh->data = data;
+        QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
+        ret = 0;
+    } else {
+        ret = -ENOENT;
+    }
+unlock:
+    qemu_mutex_unlock(&handlers_mutex);
+    return ret;
+}
+
+uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
+{
+    uint16_t ret;
+    hwaddr len;
+    struct hyperv_post_message_input *msg;
+    MsgHandler *mh;
+
+    if (fast) {
+        return HV_STATUS_INVALID_HYPERCALL_CODE;
+    }
+    if (param & (__alignof__(*msg) - 1)) {
+        return HV_STATUS_INVALID_ALIGNMENT;
+    }
+
+    len = sizeof(*msg);
+    msg = cpu_physical_memory_map(param, &len, 0);
+    if (len < sizeof(*msg)) {
+        ret = HV_STATUS_INSUFFICIENT_MEMORY;
+        goto unmap;
+    }
+    if (msg->payload_size > sizeof(msg->payload)) {
+        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+        goto unmap;
+    }
+
+    ret = HV_STATUS_INVALID_CONNECTION_ID;
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
+        if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
+            ret = mh->handler(msg, mh->data);
+            break;
+        }
+    }
+    rcu_read_unlock();
+
+unmap:
+    cpu_physical_memory_unmap(msg, len, 0, 0);
+    return ret;
+}
+
+static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
+{
+    int ret;
+    EventFlagHandler *handler;
+
+    qemu_mutex_lock(&handlers_mutex);
+    QLIST_FOREACH(handler, &event_flag_handlers, link) {
+        if (handler->conn_id == conn_id) {
+            if (notifier) {
+                ret = -EEXIST;
+            } else {
+                QLIST_REMOVE_RCU(handler, link);
+                g_free_rcu(handler, rcu);
+                ret = 0;
+            }
+            goto unlock;
+        }
+    }
+
+    if (notifier) {
+        handler = g_new(EventFlagHandler, 1);
+        handler->conn_id = conn_id;
+        handler->notifier = notifier;
+        QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
+        ret = 0;
+    } else {
+        ret = -ENOENT;
+    }
+unlock:
+    qemu_mutex_unlock(&handlers_mutex);
+    return ret;
+}
+
+static bool process_event_flags_userspace;
+
+int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
+{
+    if (!process_event_flags_userspace &&
+        !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
+        process_event_flags_userspace = true;
+
+        warn_report("Hyper-V event signaling is not supported by this kernel; "
+                    "using slower userspace hypercall processing");
+    }
+
+    if (!process_event_flags_userspace) {
+        struct kvm_hyperv_eventfd hvevfd = {
+            .conn_id = conn_id,
+            .fd = notifier ? event_notifier_get_fd(notifier) : -1,
+            .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
+        };
+
+        return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
+    }
+    return set_event_flag_handler(conn_id, notifier);
+}
+
+uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
+{
+    uint16_t ret;
+    EventFlagHandler *handler;
+
+    if (unlikely(!fast)) {
+        hwaddr addr = param;
+
+        if (addr & (__alignof__(addr) - 1)) {
+            return HV_STATUS_INVALID_ALIGNMENT;
+        }
+
+        param = ldq_phys(&address_space_memory, addr);
+    }
+
+    /*
+     * Per spec, bits 32-47 contain the extra "flag number".  However, we
+     * have no use for it, and in all known usecases it is zero, so just
+     * report lookup failure if it isn't.
+     */
+    if (param & 0xffff00000000ULL) {
+        return HV_STATUS_INVALID_PORT_ID;
+    }
+    /* remaining bits are reserved-zero */
+    if (param & ~HV_CONNECTION_ID_MASK) {
+        return HV_STATUS_INVALID_HYPERCALL_INPUT;
+    }
+
+    ret = HV_STATUS_INVALID_CONNECTION_ID;
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
+        if (handler->conn_id == param) {
+            event_notifier_set(handler->notifier);
+            ret = 0;
+            break;
+        }
+    }
+    rcu_read_unlock();
+    return ret;
+}
diff --git a/hw/hyperv/hyperv_testdev.c b/hw/hyperv/hyperv_testdev.c
new file mode 100644
index 0000000..4880333
--- /dev/null
+++ b/hw/hyperv/hyperv_testdev.c
@@ -0,0 +1,327 @@
+/*
+ * QEMU KVM Hyper-V test device to support Hyper-V kvm-unit-tests
+ *
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ *  Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/queue.h"
+#include "hw/qdev.h"
+#include "hw/isa/isa.h"
+#include "hw/hyperv/hyperv.h"
+
+typedef struct TestSintRoute {
+    QLIST_ENTRY(TestSintRoute) le;
+    uint8_t vp_index;
+    uint8_t sint;
+    HvSintRoute *sint_route;
+} TestSintRoute;
+
+typedef struct TestMsgConn {
+    QLIST_ENTRY(TestMsgConn) le;
+    uint8_t conn_id;
+    HvSintRoute *sint_route;
+    struct hyperv_message msg;
+} TestMsgConn;
+
+typedef struct TestEvtConn {
+    QLIST_ENTRY(TestEvtConn) le;
+    uint8_t conn_id;
+    HvSintRoute *sint_route;
+    EventNotifier notifier;
+} TestEvtConn;
+
+struct HypervTestDev {
+    ISADevice parent_obj;
+    MemoryRegion sint_control;
+    QLIST_HEAD(, TestSintRoute) sint_routes;
+    QLIST_HEAD(, TestMsgConn) msg_conns;
+    QLIST_HEAD(, TestEvtConn) evt_conns;
+};
+typedef struct HypervTestDev HypervTestDev;
+
+#define TYPE_HYPERV_TEST_DEV "hyperv-testdev"
+#define HYPERV_TEST_DEV(obj) \
+        OBJECT_CHECK(HypervTestDev, (obj), TYPE_HYPERV_TEST_DEV)
+
+enum {
+    HV_TEST_DEV_SINT_ROUTE_CREATE = 1,
+    HV_TEST_DEV_SINT_ROUTE_DESTROY,
+    HV_TEST_DEV_SINT_ROUTE_SET_SINT,
+    HV_TEST_DEV_MSG_CONN_CREATE,
+    HV_TEST_DEV_MSG_CONN_DESTROY,
+    HV_TEST_DEV_EVT_CONN_CREATE,
+    HV_TEST_DEV_EVT_CONN_DESTROY,
+};
+
+static void sint_route_create(HypervTestDev *dev,
+                              uint8_t vp_index, uint8_t sint)
+{
+    TestSintRoute *sint_route;
+
+    sint_route = g_new0(TestSintRoute, 1);
+    assert(sint_route);
+
+    sint_route->vp_index = vp_index;
+    sint_route->sint = sint;
+
+    sint_route->sint_route = hyperv_sint_route_new(vp_index, sint, NULL, NULL);
+    assert(sint_route->sint_route);
+
+    QLIST_INSERT_HEAD(&dev->sint_routes, sint_route, le);
+}
+
+static TestSintRoute *sint_route_find(HypervTestDev *dev,
+                                      uint8_t vp_index, uint8_t sint)
+{
+    TestSintRoute *sint_route;
+
+    QLIST_FOREACH(sint_route, &dev->sint_routes, le) {
+        if (sint_route->vp_index == vp_index && sint_route->sint == sint) {
+            return sint_route;
+        }
+    }
+    assert(false);
+    return NULL;
+}
+
+static void sint_route_destroy(HypervTestDev *dev,
+                               uint8_t vp_index, uint8_t sint)
+{
+    TestSintRoute *sint_route;
+
+    sint_route = sint_route_find(dev, vp_index, sint);
+    QLIST_REMOVE(sint_route, le);
+    hyperv_sint_route_unref(sint_route->sint_route);
+    g_free(sint_route);
+}
+
+static void sint_route_set_sint(HypervTestDev *dev,
+                                uint8_t vp_index, uint8_t sint)
+{
+    TestSintRoute *sint_route;
+
+    sint_route = sint_route_find(dev, vp_index, sint);
+
+    hyperv_sint_route_set_sint(sint_route->sint_route);
+}
+
+static void msg_retry(void *opaque)
+{
+    TestMsgConn *conn = opaque;
+    assert(!hyperv_post_msg(conn->sint_route, &conn->msg));
+}
+
+static void msg_cb(void *data, int status)
+{
+    TestMsgConn *conn = data;
+
+    if (!status) {
+        return;
+    }
+
+    assert(status == -EAGAIN);
+
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), msg_retry, conn);
+}
+
+static uint16_t msg_handler(const struct hyperv_post_message_input *msg,
+                            void *data)
+{
+    int ret;
+    TestMsgConn *conn = data;
+
+    /* post the same message we've got */
+    conn->msg.header.message_type = msg->message_type;
+    assert(msg->payload_size < sizeof(conn->msg.payload));
+    conn->msg.header.payload_size = msg->payload_size;
+    memcpy(&conn->msg.payload, msg->payload, msg->payload_size);
+
+    ret = hyperv_post_msg(conn->sint_route, &conn->msg);
+
+    switch (ret) {
+    case 0:
+        return HV_STATUS_SUCCESS;
+    case -EAGAIN:
+        return HV_STATUS_INSUFFICIENT_BUFFERS;
+    default:
+        return HV_STATUS_INVALID_HYPERCALL_INPUT;
+    }
+}
+
+static void msg_conn_create(HypervTestDev *dev, uint8_t vp_index,
+                            uint8_t sint, uint8_t conn_id)
+{
+    TestMsgConn *conn;
+
+    conn = g_new0(TestMsgConn, 1);
+    assert(conn);
+
+    conn->conn_id = conn_id;
+
+    conn->sint_route = hyperv_sint_route_new(vp_index, sint, msg_cb, conn);
+    assert(conn->sint_route);
+
+    assert(!hyperv_set_msg_handler(conn->conn_id, msg_handler, conn));
+
+    QLIST_INSERT_HEAD(&dev->msg_conns, conn, le);
+}
+
+static void msg_conn_destroy(HypervTestDev *dev, uint8_t conn_id)
+{
+    TestMsgConn *conn;
+
+    QLIST_FOREACH(conn, &dev->msg_conns, le) {
+        if (conn->conn_id == conn_id) {
+            QLIST_REMOVE(conn, le);
+            hyperv_set_msg_handler(conn->conn_id, NULL, NULL);
+            hyperv_sint_route_unref(conn->sint_route);
+            g_free(conn);
+            return;
+        }
+    }
+    assert(false);
+}
+
+static void evt_conn_handler(EventNotifier *notifier)
+{
+    TestEvtConn *conn = container_of(notifier, TestEvtConn, notifier);
+
+    event_notifier_test_and_clear(notifier);
+
+    /* signal the same event flag we've got */
+    assert(!hyperv_set_event_flag(conn->sint_route, conn->conn_id));
+}
+
+static void evt_conn_create(HypervTestDev *dev, uint8_t vp_index,
+                            uint8_t sint, uint8_t conn_id)
+{
+    TestEvtConn *conn;
+
+    conn = g_new0(TestEvtConn, 1);
+    assert(conn);
+
+    conn->conn_id = conn_id;
+
+    conn->sint_route = hyperv_sint_route_new(vp_index, sint, NULL, NULL);
+    assert(conn->sint_route);
+
+    assert(!event_notifier_init(&conn->notifier, false));
+
+    event_notifier_set_handler(&conn->notifier, evt_conn_handler);
+
+    assert(!hyperv_set_event_flag_handler(conn_id, &conn->notifier));
+
+    QLIST_INSERT_HEAD(&dev->evt_conns, conn, le);
+}
+
+static void evt_conn_destroy(HypervTestDev *dev, uint8_t conn_id)
+{
+    TestEvtConn *conn;
+
+    QLIST_FOREACH(conn, &dev->evt_conns, le) {
+        if (conn->conn_id == conn_id) {
+            QLIST_REMOVE(conn, le);
+            hyperv_set_event_flag_handler(conn->conn_id, NULL);
+            event_notifier_set_handler(&conn->notifier, NULL);
+            event_notifier_cleanup(&conn->notifier);
+            hyperv_sint_route_unref(conn->sint_route);
+            g_free(conn);
+            return;
+        }
+    }
+    assert(false);
+}
+
+static uint64_t hv_test_dev_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0;
+}
+
+static void hv_test_dev_write(void *opaque, hwaddr addr, uint64_t data,
+                                uint32_t len)
+{
+    HypervTestDev *dev = HYPERV_TEST_DEV(opaque);
+    uint8_t sint = data & 0xFF;
+    uint8_t vp_index = (data >> 8ULL) & 0xFF;
+    uint8_t ctl = (data >> 16ULL) & 0xFF;
+    uint8_t conn_id = (data >> 24ULL) & 0xFF;
+
+    switch (ctl) {
+    case HV_TEST_DEV_SINT_ROUTE_CREATE:
+        sint_route_create(dev, vp_index, sint);
+        break;
+    case HV_TEST_DEV_SINT_ROUTE_DESTROY:
+        sint_route_destroy(dev, vp_index, sint);
+        break;
+    case HV_TEST_DEV_SINT_ROUTE_SET_SINT:
+        sint_route_set_sint(dev, vp_index, sint);
+        break;
+    case HV_TEST_DEV_MSG_CONN_CREATE:
+        msg_conn_create(dev, vp_index, sint, conn_id);
+        break;
+    case HV_TEST_DEV_MSG_CONN_DESTROY:
+        msg_conn_destroy(dev, conn_id);
+        break;
+    case HV_TEST_DEV_EVT_CONN_CREATE:
+        evt_conn_create(dev, vp_index, sint, conn_id);
+        break;
+    case HV_TEST_DEV_EVT_CONN_DESTROY:
+        evt_conn_destroy(dev, conn_id);
+        break;
+    default:
+        break;
+    }
+}
+
+static const MemoryRegionOps synic_test_sint_ops = {
+    .read = hv_test_dev_read,
+    .write = hv_test_dev_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void hv_test_dev_realizefn(DeviceState *d, Error **errp)
+{
+    ISADevice *isa = ISA_DEVICE(d);
+    HypervTestDev *dev = HYPERV_TEST_DEV(d);
+    MemoryRegion *io = isa_address_space_io(isa);
+
+    QLIST_INIT(&dev->sint_routes);
+    QLIST_INIT(&dev->msg_conns);
+    QLIST_INIT(&dev->evt_conns);
+    memory_region_init_io(&dev->sint_control, OBJECT(dev),
+                          &synic_test_sint_ops, dev,
+                          "hyperv-testdev-ctl", 4);
+    memory_region_add_subregion(io, 0x3000, &dev->sint_control);
+}
+
+static void hv_test_dev_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->realize = hv_test_dev_realizefn;
+}
+
+static const TypeInfo hv_test_dev_info = {
+    .name           = TYPE_HYPERV_TEST_DEV,
+    .parent         = TYPE_ISA_DEVICE,
+    .instance_size  = sizeof(HypervTestDev),
+    .class_init     = hv_test_dev_class_init,
+};
+
+static void hv_test_dev_register_types(void)
+{
+    type_register_static(&hv_test_dev_info);
+}
+type_init(hv_test_dev_register_types);
diff --git a/hw/i2c/i2c-ddc.c b/hw/i2c/i2c-ddc.c
index bec0c91..be34fe0 100644
--- a/hw/i2c/i2c-ddc.c
+++ b/hw/i2c/i2c-ddc.c
@@ -32,197 +32,6 @@
     }                                                                          \
 } while (0)
 
-/* Structure defining a monitor's characteristics in a
- * readable format: this should be passed to build_edid_blob()
- * to convert it into the 128 byte binary EDID blob.
- * Not all bits of the EDID are customisable here.
- */
-struct EDIDData {
-    char manuf_id[3]; /* three upper case letters */
-    uint16_t product_id;
-    uint32_t serial_no;
-    uint8_t manuf_week;
-    int manuf_year;
-    uint8_t h_cm;
-    uint8_t v_cm;
-    uint8_t gamma;
-    char monitor_name[14];
-    char serial_no_string[14];
-    /* Range limits */
-    uint8_t vmin; /* Hz */
-    uint8_t vmax; /* Hz */
-    uint8_t hmin; /* kHz */
-    uint8_t hmax; /* kHz */
-    uint8_t pixclock; /* MHz / 10 */
-    uint8_t timing_data[18];
-};
-
-typedef struct EDIDData EDIDData;
-
-/* EDID data for a simple LCD monitor */
-static const EDIDData lcd_edid = {
-    /* The manuf_id ought really to be an assigned EISA ID */
-    .manuf_id = "QMU",
-    .product_id = 0,
-    .serial_no = 1,
-    .manuf_week = 1,
-    .manuf_year = 2011,
-    .h_cm = 40,
-    .v_cm = 30,
-    .gamma = 0x78,
-    .monitor_name = "QEMU monitor",
-    .serial_no_string = "1",
-    .vmin = 40,
-    .vmax = 120,
-    .hmin = 30,
-    .hmax = 100,
-    .pixclock = 18,
-    .timing_data = {
-        /* Borrowed from a 21" LCD */
-        0x48, 0x3f, 0x40, 0x30, 0x62, 0xb0, 0x32, 0x40, 0x40,
-        0xc0, 0x13, 0x00, 0x98, 0x32, 0x11, 0x00, 0x00, 0x1e
-    }
-};
-
-static uint8_t manuf_char_to_int(char c)
-{
-    return (c - 'A') & 0x1f;
-}
-
-static void write_ascii_descriptor_block(uint8_t *descblob, uint8_t blocktype,
-                                         const char *string)
-{
-    /* Write an EDID Descriptor Block of the "ascii string" type */
-    int i;
-    descblob[0] = descblob[1] = descblob[2] = descblob[4] = 0;
-    descblob[3] = blocktype;
-    /* The rest is 13 bytes of ASCII; if less then the rest must
-     * be filled with newline then spaces
-     */
-    for (i = 5; i < 19; i++) {
-        descblob[i] = string[i - 5];
-        if (!descblob[i]) {
-            break;
-        }
-    }
-    if (i < 19) {
-        descblob[i++] = '\n';
-    }
-    for ( ; i < 19; i++) {
-        descblob[i] = ' ';
-    }
-}
-
-static void write_range_limits_descriptor(const EDIDData *edid,
-                                          uint8_t *descblob)
-{
-    int i;
-    descblob[0] = descblob[1] = descblob[2] = descblob[4] = 0;
-    descblob[3] = 0xfd;
-    descblob[5] = edid->vmin;
-    descblob[6] = edid->vmax;
-    descblob[7] = edid->hmin;
-    descblob[8] = edid->hmax;
-    descblob[9] = edid->pixclock;
-    descblob[10] = 0;
-    descblob[11] = 0xa;
-    for (i = 12; i < 19; i++) {
-        descblob[i] = 0x20;
-    }
-}
-
-static void build_edid_blob(const EDIDData *edid, uint8_t *blob)
-{
-    /* Write an EDID 1.3 format blob (128 bytes) based
-     * on the EDIDData structure.
-     */
-    int i;
-    uint8_t cksum;
-
-    /* 00-07 : header */
-    blob[0] = blob[7] = 0;
-    for (i = 1 ; i < 7; i++) {
-        blob[i] = 0xff;
-    }
-    /* 08-09 : manufacturer ID */
-    blob[8] = (manuf_char_to_int(edid->manuf_id[0]) << 2)
-        | (manuf_char_to_int(edid->manuf_id[1]) >> 3);
-    blob[9] = (manuf_char_to_int(edid->manuf_id[1]) << 5)
-        | manuf_char_to_int(edid->manuf_id[2]);
-    /* 10-11 : product ID code */
-    blob[10] = edid->product_id;
-    blob[11] = edid->product_id >> 8;
-    blob[12] = edid->serial_no;
-    blob[13] = edid->serial_no >> 8;
-    blob[14] = edid->serial_no >> 16;
-    blob[15] = edid->serial_no >> 24;
-    /* 16 : week of manufacture */
-    blob[16] = edid->manuf_week;
-    /* 17 : year of manufacture - 1990 */
-    blob[17] = edid->manuf_year - 1990;
-    /* 18, 19 : EDID version and revision */
-    blob[18] = 1;
-    blob[19] = 3;
-    /* 20 - 24 : basic display parameters */
-    /* We are always a digital display */
-    blob[20] = 0x80;
-    /* 21, 22 : max h/v size in cm */
-    blob[21] = edid->h_cm;
-    blob[22] = edid->v_cm;
-    /* 23 : gamma (divide by 100 then add 1 for actual value) */
-    blob[23] = edid->gamma;
-    /* 24 feature support: no power management, RGB, preferred timing mode,
-     * standard colour space
-     */
-    blob[24] = 0x0e;
-    /* 25 - 34 : chromaticity coordinates. These are the
-     * standard sRGB chromaticity values
-     */
-    blob[25] = 0xee;
-    blob[26] = 0x91;
-    blob[27] = 0xa3;
-    blob[28] = 0x54;
-    blob[29] = 0x4c;
-    blob[30] = 0x99;
-    blob[31] = 0x26;
-    blob[32] = 0x0f;
-    blob[33] = 0x50;
-    blob[34] = 0x54;
-    /* 35, 36 : Established timings: claim to support everything */
-    blob[35] = blob[36] = 0xff;
-    /* 37 : manufacturer's reserved timing: none */
-    blob[37] = 0;
-    /* 38 - 53 : standard timing identification
-     * don't claim anything beyond what the 'established timings'
-     * already provide. Unused slots must be (0x1, 0x1)
-     */
-    for (i = 38; i < 54; i++) {
-        blob[i] = 0x1;
-    }
-    /* 54 - 71 : descriptor block 1 : must be preferred timing data */
-    memcpy(blob + 54, edid->timing_data, 18);
-    /* 72 - 89, 90 - 107, 108 - 125 : descriptor block 2, 3, 4
-     * Order not important, but we must have a monitor name and a
-     * range limits descriptor.
-     */
-    write_range_limits_descriptor(edid, blob + 72);
-    write_ascii_descriptor_block(blob + 90, 0xfc, edid->monitor_name);
-    write_ascii_descriptor_block(blob + 108, 0xff, edid->serial_no_string);
-
-    /* 126 : extension flag */
-    blob[126] = 0;
-
-    cksum = 0;
-    for (i = 0; i < 127; i++) {
-        cksum += blob[i];
-    }
-    /* 127 : checksum */
-    blob[127] = -cksum;
-    if (DEBUG_I2CDDC) {
-        qemu_hexdump((char *)blob, stdout, "", 128);
-    }
-}
-
 static void i2c_ddc_reset(DeviceState *ds)
 {
     I2CDDCState *s = I2CDDC(ds);
@@ -270,7 +79,8 @@
 static void i2c_ddc_init(Object *obj)
 {
     I2CDDCState *s = I2CDDC(obj);
-    build_edid_blob(&lcd_edid, s->edid_blob);
+
+    qemu_edid_generate(s->edid_blob, sizeof(s->edid_blob), &s->edid_info);
 }
 
 static const VMStateDescription vmstate_i2c_ddc = {
@@ -283,6 +93,11 @@
     }
 };
 
+static Property i2c_ddc_properties[] = {
+    DEFINE_EDID_PROPERTIES(I2CDDCState, edid_info),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void i2c_ddc_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -290,6 +105,7 @@
 
     dc->reset = i2c_ddc_reset;
     dc->vmsd = &vmstate_i2c_ddc;
+    dc->props = i2c_ddc_properties;
     isc->event = i2c_ddc_event;
     isc->recv = i2c_ddc_rx;
     isc->send = i2c_ddc_tx;
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 0bf1c60..25ea783 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -147,6 +147,15 @@
     s->clock_is_reliable = kvm_has_adjust_clock_stable();
 }
 
+static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data)
+{
+    int ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0);
+
+    if (ret && ret != -EINVAL) {
+        fprintf(stderr, "%s: %s\n", __func__, strerror(-ret));
+    }
+}
+
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
 {
@@ -183,13 +192,7 @@
             return;
         }
         CPU_FOREACH(cpu) {
-            ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0);
-            if (ret) {
-                if (ret != -EINVAL) {
-                    fprintf(stderr, "%s: %s\n", __func__, strerror(-ret));
-                }
-                return;
-            }
+            run_on_cpu(cpu, do_kvmclock_ctrl, RUN_ON_CPU_NULL);
         }
     } else {
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0314845..f095725 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -838,7 +838,8 @@
                        FWCfgState *fw_cfg)
 {
     uint16_t protocol;
-    int setup_size, kernel_size, initrd_size = 0, cmdline_size;
+    int setup_size, kernel_size, cmdline_size;
+    int64_t initrd_size = 0;
     int dtb_size, setup_data_offset;
     uint32_t initrd_max;
     uint8_t header[8192], *setup, *kernel, *initrd_data;
@@ -974,6 +975,10 @@
             fprintf(stderr, "qemu: error reading initrd %s: %s\n",
                     initrd_filename, strerror(errno));
             exit(1);
+        } else if (initrd_size >= initrd_max) {
+            fprintf(stderr, "qemu: initrd is too large, cannot support."
+                    "(max: %"PRIu32", need %"PRId64")\n", initrd_max, initrd_size);
+            exit(1);
         }
 
         initrd_addr = (initrd_max-initrd_size) & ~4095;
@@ -1699,7 +1704,7 @@
         return;
     }
 
-    pc_dimm_pre_plug(dev, MACHINE(hotplug_dev),
+    pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev),
                      pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp);
 }
 
@@ -1711,7 +1716,7 @@
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
     bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
 
-    pc_dimm_plug(dev, MACHINE(pcms), &local_err);
+    pc_dimm_plug(PC_DIMM(dev), MACHINE(pcms), &local_err);
     if (local_err) {
         goto out;
     }
@@ -1771,7 +1776,7 @@
         goto out;
     }
 
-    pc_dimm_unplug(dev, MACHINE(pcms));
+    pc_dimm_unplug(PC_DIMM(dev), MACHINE(pcms));
     object_unparent(OBJECT(dev));
 
  out:
@@ -2204,8 +2209,9 @@
     else if (strcmp(value, "mem-ctrl") == 0)
         nvdimm_state->persistence = 2;
     else {
-        error_report("-machine nvdimm-persistence=%s: unsupported option", value);
-        exit(EXIT_FAILURE);
+        error_setg(errp, "-machine nvdimm-persistence=%s: unsupported option",
+                   value);
+        return;
     }
 
     g_free(nvdimm_state->persistence_string);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 2c62efc..04e22e7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -35,6 +35,7 @@
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
+#include "sysemu/replay.h"
 
 #include "hw/ide/internal.h"
 #include "trace.h"
@@ -479,7 +480,7 @@
 done:
     iocb->aiocb = NULL;
     if (iocb->bh) {
-        qemu_bh_schedule(iocb->bh);
+        replay_bh_schedule_event(iocb->bh);
     }
 }
 
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index fdfcadf..6c43fc2 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -914,7 +914,12 @@
     uint8_t tmp_data[PS2_QUEUE_SIZE];
 
     /* set the useful data buffer queue size, < PS2_QUEUE_SIZE */
-    size = (q->count < 0 || q->count > PS2_QUEUE_SIZE) ? 0 : q->count;
+    size = q->count;
+    if (q->count < 0) {
+        size = 0;
+    } else if (q->count > PS2_QUEUE_SIZE) {
+        size = PS2_QUEUE_SIZE;
+    }
 
     /* move the queue elements to the start of data array */
     for (i = 0; i < size; i++) {
@@ -929,7 +934,6 @@
     q->rptr = 0;
     q->wptr = (size == PS2_QUEUE_SIZE) ? 0 : size;
     q->count = size;
-    s->update_irq(s->update_arg, q->count != 0);
 }
 
 static void ps2_kbd_reset(void *opaque)
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 0d816fd..0beefb0 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -1055,17 +1055,17 @@
     case 0xd5c: /* MMFR3.  */
         return cpu->id_mmfr3;
     case 0xd60: /* ISAR0.  */
-        return cpu->id_isar0;
+        return cpu->isar.id_isar0;
     case 0xd64: /* ISAR1.  */
-        return cpu->id_isar1;
+        return cpu->isar.id_isar1;
     case 0xd68: /* ISAR2.  */
-        return cpu->id_isar2;
+        return cpu->isar.id_isar2;
     case 0xd6c: /* ISAR3.  */
-        return cpu->id_isar3;
+        return cpu->isar.id_isar3;
     case 0xd70: /* ISAR4.  */
-        return cpu->id_isar4;
+        return cpu->isar.id_isar4;
     case 0xd74: /* ISAR5.  */
-        return cpu->id_isar5;
+        return cpu->isar.id_isar5;
     case 0xd78: /* CLIDR */
         return cpu->clidr;
     case 0xd7c: /* CTR */
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index b6896ac..4e52972 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -21,7 +21,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/error-report.h"
+#include "qapi/error.h"
 #include "monitor/monitor.h"
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
@@ -393,9 +393,9 @@
     IOAPICCommonState *s = IOAPIC_COMMON(dev);
 
     if (s->version != 0x11 && s->version != 0x20) {
-        error_report("IOAPIC only supports version 0x11 or 0x20 "
-                     "(default: 0x%x).", IOAPIC_VER_DEF);
-        exit(1);
+        error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
+                   "(default: 0x%x).", IOAPIC_VER_DEF);
+        return;
     }
 
     memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index c90c893..406efee 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -320,8 +320,9 @@
 
     obj = object_property_get_link(OBJECT(dev), ICP_PROP_XICS, &err);
     if (!obj) {
-        error_propagate(errp, err);
-        error_prepend(errp, "required link '" ICP_PROP_XICS "' not found: ");
+        error_propagate_prepend(errp, err,
+                                "required link '" ICP_PROP_XICS
+                                "' not found: ");
         return;
     }
 
@@ -329,8 +330,9 @@
 
     obj = object_property_get_link(OBJECT(dev), ICP_PROP_CPU, &err);
     if (!obj) {
-        error_propagate(errp, err);
-        error_prepend(errp, "required link '" ICP_PROP_CPU "' not found: ");
+        error_propagate_prepend(errp, err,
+                                "required link '" ICP_PROP_CPU
+                                "' not found: ");
         return;
     }
 
@@ -624,8 +626,9 @@
 
     obj = object_property_get_link(OBJECT(dev), ICS_PROP_XICS, &err);
     if (!obj) {
-        error_propagate(errp, err);
-        error_prepend(errp, "required link '" ICS_PROP_XICS "' not found: ");
+        error_propagate_prepend(errp, err,
+                                "required link '" ICS_PROP_XICS
+                                "' not found: ");
         return;
     }
     ics->xics = XICS_FABRIC(obj);
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 30c3769..e8fa9a5 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -198,17 +198,12 @@
 {
     uint64_t state;
     int i;
-    Error *local_err = NULL;
 
     for (i = 0; i < ics->nr_irqs; i++) {
         ICSIRQState *irq = &ics->irqs[i];
 
         kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
-                          i + ics->offset, &state, false, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            exit(1);
-        }
+                          i + ics->offset, &state, false, &error_fatal);
 
         irq->server = state & KVM_XICS_DESTINATION_MASK;
         irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT)
diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs
index 83e06f6..9e106df 100644
--- a/hw/isa/Makefile.objs
+++ b/hw/isa/Makefile.objs
@@ -1,9 +1,10 @@
 common-obj-$(CONFIG_ISA_BUS) += isa-bus.o
-common-obj-$(CONFIG_ISA_BUS) += isa-superio.o smc37c669-superio.o
+common-obj-$(CONFIG_ISA_BUS) += isa-superio.o
 common-obj-$(CONFIG_APM) += apm.o
 common-obj-$(CONFIG_I82378) += i82378.o
 common-obj-$(CONFIG_PC87312) += pc87312.o
 common-obj-$(CONFIG_PIIX4) += piix4.o
 common-obj-$(CONFIG_VT82C686) += vt82c686.o
+common-obj-$(CONFIG_SMC37C669) += smc37c669-superio.o
 
 obj-$(CONFIG_LPC_ICH9) += lpc_ich9.o
diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs
index 10be4df..3e2f7c5 100644
--- a/hw/mem/Makefile.objs
+++ b/hw/mem/Makefile.objs
@@ -1,3 +1,3 @@
-common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o
-common-obj-$(CONFIG_MEM_HOTPLUG) += memory-device.o
+common-obj-$(CONFIG_DIMM) += pc-dimm.o
+common-obj-$(CONFIG_MEM_DEVICE) += memory-device.o
 common-obj-$(CONFIG_NVDIMM) += nvdimm.o
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 6de4f70..7de1ccd 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -17,6 +17,7 @@
 #include "qemu/range.h"
 #include "hw/virtio/vhost.h"
 #include "sysemu/kvm.h"
+#include "trace.h"
 
 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
 {
@@ -57,10 +58,9 @@
     if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
         const DeviceState *dev = DEVICE(obj);
         const MemoryDeviceState *md = MEMORY_DEVICE(obj);
-        const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
 
         if (dev->realized) {
-            *size += mdc->get_region_size(md);
+            *size += memory_device_get_region_size(md, &error_abort);
         }
     }
 
@@ -87,16 +87,17 @@
     memory_device_used_region_size(OBJECT(ms), &used_region_size);
     if (used_region_size + size > ms->maxram_size - ms->ram_size) {
         error_setg(errp, "not enough space, currently 0x%" PRIx64
-                   " in use of total hot pluggable 0x" RAM_ADDR_FMT,
+                   " in use of total space for memory devices 0x" RAM_ADDR_FMT,
                    used_region_size, ms->maxram_size - ms->ram_size);
         return;
     }
 
 }
 
-uint64_t memory_device_get_free_addr(MachineState *ms, const uint64_t *hint,
-                                     uint64_t align, uint64_t size,
-                                     Error **errp)
+static uint64_t memory_device_get_free_addr(MachineState *ms,
+                                            const uint64_t *hint,
+                                            uint64_t align, uint64_t size,
+                                            Error **errp)
 {
     uint64_t address_space_start, address_space_end;
     GSList *list = NULL, *item;
@@ -120,7 +121,7 @@
 
     /* address_space_start indicates the maximum alignment we expect */
     if (QEMU_ALIGN_UP(address_space_start, align) != address_space_start) {
-        error_setg(errp, "the alignment (0%" PRIx64 ") is not supported",
+        error_setg(errp, "the alignment (0x%" PRIx64 ") is not supported",
                    align);
         return 0;
     }
@@ -145,11 +146,12 @@
     if (hint) {
         new_addr = *hint;
         if (new_addr < address_space_start) {
-            error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
-                       "] at 0x%" PRIx64, new_addr, size, address_space_start);
+            error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
+                       "] before 0x%" PRIx64, new_addr, size,
+                       address_space_start);
             return 0;
         } else if ((new_addr + size) > address_space_end) {
-            error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
+            error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
                        "] beyond 0x%" PRIx64, new_addr, size,
                        address_space_end);
             return 0;
@@ -166,15 +168,13 @@
         uint64_t md_size, md_addr;
 
         md_addr = mdc->get_addr(md);
-        md_size = mdc->get_region_size(md);
-        if (*errp) {
-            goto out;
-        }
+        md_size = memory_device_get_region_size(md, &error_abort);
 
         if (ranges_overlap(md_addr, md_size, new_addr, size)) {
             if (hint) {
                 const DeviceState *d = DEVICE(md);
-                error_setg(errp, "address range conflicts with '%s'", d->id);
+                error_setg(errp, "address range conflicts with memory device"
+                           " id='%s'", d->id ? d->id : "(unnamed)");
                 goto out;
             }
             new_addr = QEMU_ALIGN_UP(md_addr + md_size, align);
@@ -232,7 +232,7 @@
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
 
         if (dev->realized) {
-            *size += mdc->get_plugged_size(md);
+            *size += mdc->get_plugged_size(md, &error_abort);
         }
     }
 
@@ -249,22 +249,83 @@
     return size;
 }
 
-void memory_device_plug_region(MachineState *ms, MemoryRegion *mr,
-                               uint64_t addr)
+void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
+                            const uint64_t *legacy_align, Error **errp)
 {
-    /* we expect a previous call to memory_device_get_free_addr() */
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    Error *local_err = NULL;
+    uint64_t addr, align;
+    MemoryRegion *mr;
+
+    mr = mdc->get_memory_region(md, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    align = legacy_align ? *legacy_align : memory_region_get_alignment(mr);
+    addr = mdc->get_addr(md);
+    addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align,
+                                       memory_region_size(mr), &local_err);
+    if (local_err) {
+        goto out;
+    }
+    mdc->set_addr(md, addr, &local_err);
+    if (!local_err) {
+        trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "",
+                                     addr);
+    }
+out:
+    error_propagate(errp, local_err);
+}
+
+void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
+{
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    const uint64_t addr = mdc->get_addr(md);
+    MemoryRegion *mr;
+
+    /*
+     * We expect that a previous call to memory_device_pre_plug() succeeded, so
+     * it can't fail at this point.
+     */
+    mr = mdc->get_memory_region(md, &error_abort);
     g_assert(ms->device_memory);
 
     memory_region_add_subregion(&ms->device_memory->mr,
                                 addr - ms->device_memory->base, mr);
+    trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
 }
 
-void memory_device_unplug_region(MachineState *ms, MemoryRegion *mr)
+void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
 {
-    /* we expect a previous call to memory_device_get_free_addr() */
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    MemoryRegion *mr;
+
+    /*
+     * We expect that a previous call to memory_device_pre_plug() succeeded, so
+     * it can't fail at this point.
+     */
+    mr = mdc->get_memory_region(md, &error_abort);
     g_assert(ms->device_memory);
 
     memory_region_del_subregion(&ms->device_memory->mr, mr);
+    trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
+                               mdc->get_addr(md));
+}
+
+uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
+                                       Error **errp)
+{
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    MemoryRegion *mr;
+
+    /* dropping const here is fine as we don't touch the memory region */
+    mr = mdc->get_memory_region((MemoryDeviceState *)md, errp);
+    if (!mr) {
+        return 0;
+    }
+
+    return memory_region_size(mr);
 }
 
 static const TypeInfo memory_device_info = {
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 1c6674c..49324f3 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -27,6 +27,7 @@
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/mem/nvdimm.h"
+#include "hw/mem/memory-device.h"
 
 static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -118,9 +119,10 @@
     nvdimm->nvdimm_mr->align = align;
 }
 
-static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
+static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md,
+                                                 Error **errp)
 {
-    NVDIMMDevice *nvdimm = NVDIMM(dimm);
+    NVDIMMDevice *nvdimm = NVDIMM(md);
     Error *local_err = NULL;
 
     if (!nvdimm->nvdimm_mr) {
@@ -190,11 +192,12 @@
 static void nvdimm_class_init(ObjectClass *oc, void *data)
 {
     PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
     NVDIMMClass *nvc = NVDIMM_CLASS(oc);
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     ddc->realize = nvdimm_realize;
-    ddc->get_memory_region = nvdimm_get_memory_region;
+    mdc->get_memory_region = nvdimm_md_get_memory_region;
     dc->props = nvdimm_properties;
 
     nvc->read_label_data = nvdimm_read_label_data;
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index fb6bcae..0c9b9e8 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -29,72 +29,47 @@
 
 static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
-void pc_dimm_pre_plug(DeviceState *dev, MachineState *machine,
+void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
                       const uint64_t *legacy_align, Error **errp)
 {
-    PCDIMMDevice *dimm = PC_DIMM(dev);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     Error *local_err = NULL;
-    MemoryRegion *mr;
-    uint64_t addr, align;
     int slot;
 
-    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP,
+    slot = object_property_get_int(OBJECT(dimm), PC_DIMM_SLOT_PROP,
                                    &error_abort);
     slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
                                  machine->ram_slots, &local_err);
     if (local_err) {
         goto out;
     }
-    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &error_abort);
+    object_property_set_int(OBJECT(dimm), slot, PC_DIMM_SLOT_PROP,
+                            &error_abort);
     trace_mhp_pc_dimm_assigned_slot(slot);
 
-    mr = ddc->get_memory_region(dimm, &local_err);
-    if (local_err) {
-        goto out;
-    }
-
-    align = legacy_align ? *legacy_align : memory_region_get_alignment(mr);
-    addr = object_property_get_uint(OBJECT(dev), PC_DIMM_ADDR_PROP,
-                                    &error_abort);
-    addr = memory_device_get_free_addr(machine, !addr ? NULL : &addr, align,
-                                       memory_region_size(mr), &local_err);
-    if (local_err) {
-        goto out;
-    }
-    trace_mhp_pc_dimm_assigned_address(addr);
-    object_property_set_uint(OBJECT(dev), addr, PC_DIMM_ADDR_PROP,
-                             &error_abort);
+    memory_device_pre_plug(MEMORY_DEVICE(dimm), machine, legacy_align,
+                           &local_err);
 out:
     error_propagate(errp, local_err);
 }
 
-void pc_dimm_plug(DeviceState *dev, MachineState *machine, Error **errp)
+void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine, Error **errp)
 {
-    PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm,
                                                               &error_abort);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
-    uint64_t addr;
 
-    addr = object_property_get_uint(OBJECT(dev), PC_DIMM_ADDR_PROP,
-                                    &error_abort);
-
-    memory_device_plug_region(machine, mr, addr);
-    vmstate_register_ram(vmstate_mr, dev);
+    memory_device_plug(MEMORY_DEVICE(dimm), machine);
+    vmstate_register_ram(vmstate_mr, DEVICE(dimm));
 }
 
-void pc_dimm_unplug(DeviceState *dev, MachineState *machine)
+void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
 {
-    PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm,
                                                               &error_abort);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
 
-    memory_device_unplug_region(machine, mr);
-    vmstate_unregister_ram(vmstate_mr, dev);
+    memory_device_unplug(MEMORY_DEVICE(dimm), machine);
+    vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
 }
 
 static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
@@ -163,16 +138,14 @@
 static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name,
                              void *opaque, Error **errp)
 {
+    Error *local_err = NULL;
     uint64_t value;
-    MemoryRegion *mr;
-    PCDIMMDevice *dimm = PC_DIMM(obj);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(obj);
 
-    mr = ddc->get_memory_region(dimm, errp);
-    if (!mr) {
+    value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         return;
     }
-    value = memory_region_size(mr);
 
     visit_type_uint64(v, name, &value, errp);
 }
@@ -236,19 +209,16 @@
     return dimm->addr;
 }
 
-static uint64_t pc_dimm_md_get_region_size(const MemoryDeviceState *md)
+static void pc_dimm_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                Error **errp)
 {
-    /* dropping const here is fine as we don't touch the memory region */
-    PCDIMMDevice *dimm = PC_DIMM(md);
-    const PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(md);
-    MemoryRegion *mr;
+    object_property_set_uint(OBJECT(md), addr, PC_DIMM_ADDR_PROP, errp);
+}
 
-    mr = ddc->get_memory_region(dimm, &error_abort);
-    if (!mr) {
-        return 0;
-    }
-
-    return memory_region_size(mr);
+static MemoryRegion *pc_dimm_md_get_memory_region(MemoryDeviceState *md,
+                                                  Error **errp)
+{
+    return pc_dimm_get_memory_region(PC_DIMM(md), errp);
 }
 
 static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md,
@@ -292,13 +262,13 @@
     dc->props = pc_dimm_properties;
     dc->desc = "DIMM memory module";
 
-    ddc->get_memory_region = pc_dimm_get_memory_region;
     ddc->get_vmstate_memory_region = pc_dimm_get_memory_region;
 
     mdc->get_addr = pc_dimm_md_get_addr;
+    mdc->set_addr = pc_dimm_md_set_addr;
     /* for a dimm plugged_size == region_size */
-    mdc->get_plugged_size = pc_dimm_md_get_region_size;
-    mdc->get_region_size = pc_dimm_md_get_region_size;
+    mdc->get_plugged_size = memory_device_get_region_size;
+    mdc->get_memory_region = pc_dimm_md_get_memory_region;
     mdc->fill_device_info = pc_dimm_md_fill_device_info;
 }
 
diff --git a/hw/mem/trace-events b/hw/mem/trace-events
index e150dcc..0f2f278 100644
--- a/hw/mem/trace-events
+++ b/hw/mem/trace-events
@@ -2,4 +2,7 @@
 
 # hw/mem/pc-dimm.c
 mhp_pc_dimm_assigned_slot(int slot) "%d"
-mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
+# hw/mem/memory-device.c
+memory_device_pre_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64
+memory_device_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64
+memory_device_unplug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 24ad0ad..1cd8aac 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -992,9 +992,9 @@
 }
 
 
-static void gt64120_reset(void *opaque)
+static void gt64120_reset(DeviceState *dev)
 {
-    GT64120State *s = opaque;
+    GT64120State *s = GT64120_PCI_HOST_BRIDGE(dev);
 
     /* FIXME: Malta specific hw assumptions ahead */
 
@@ -1184,16 +1184,6 @@
     return phb->bus;
 }
 
-static int gt64120_init(SysBusDevice *dev)
-{
-    GT64120State *s;
-
-    s = GT64120_PCI_HOST_BRIDGE(dev);
-
-    qemu_register_reset(gt64120_reset, s);
-    return 0;
-}
-
 static void gt64120_pci_realize(PCIDevice *d, Error **errp)
 {
     /* FIXME: Malta specific hw assumptions ahead */
@@ -1241,9 +1231,9 @@
 static void gt64120_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
 
-    sdc->init = gt64120_init;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->reset = gt64120_reset;
     dc->vmsd = &vmstate_gt64120;
 }
 
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index c1694c8..2fbba32 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -104,9 +104,9 @@
 
 static int64_t load_kernel (CPUMIPSState *env)
 {
-    int64_t kernel_entry, kernel_low, kernel_high;
+    int64_t kernel_entry, kernel_low, kernel_high, initrd_size;
     int index = 0;
-    long kernel_size, initrd_size;
+    long kernel_size;
     ram_addr_t initrd_offset;
     uint32_t *prom_buf;
     long prom_size;
@@ -150,7 +150,7 @@
 
     prom_set(prom_buf, index++, "%s", loaderparams.kernel_filename);
     if (initrd_size > 0) {
-        prom_set(prom_buf, index++, "rd_start=0x%" PRIx64 " rd_size=%li %s",
+        prom_set(prom_buf, index++, "rd_start=0x%" PRIx64 " rd_size=%" PRId64 " %s",
                  cpu_mips_phys_to_kseg0(NULL, initrd_offset), initrd_size,
                  loaderparams.kernel_cmdline);
     } else {
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 40041d5..c1cf0fe 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -995,8 +995,8 @@
 /* Kernel */
 static int64_t load_kernel (void)
 {
-    int64_t kernel_entry, kernel_high;
-    long kernel_size, initrd_size;
+    int64_t kernel_entry, kernel_high, initrd_size;
+    long kernel_size;
     ram_addr_t initrd_offset;
     int big_endian;
     uint32_t *prom_buf;
@@ -1070,7 +1070,7 @@
 
     prom_set(prom_buf, prom_index++, "%s", loaderparams.kernel_filename);
     if (initrd_size > 0) {
-        prom_set(prom_buf, prom_index++, "rd_start=0x%" PRIx64 " rd_size=%li %s",
+        prom_set(prom_buf, prom_index++, "rd_start=0x%" PRIx64 " rd_size=%" PRId64 " %s",
                  xlate_to_kseg0(NULL, initrd_offset), initrd_size,
                  loaderparams.kernel_cmdline);
     } else {
@@ -1422,23 +1422,10 @@
     pci_vga_init(pci_bus);
 }
 
-static int mips_malta_sysbus_device_init(SysBusDevice *sysbusdev)
-{
-    return 0;
-}
-
-static void mips_malta_class_init(ObjectClass *klass, void *data)
-{
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
-
-    k->init = mips_malta_sysbus_device_init;
-}
-
 static const TypeInfo mips_malta_device = {
     .name          = TYPE_MIPS_MALTA,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(MaltaState),
-    .class_init    = mips_malta_class_init,
 };
 
 static void mips_malta_machine_init(MachineClass *mc)
diff --git a/hw/mips/mips_mipssim.c b/hw/mips/mips_mipssim.c
index 241faa1..f665752 100644
--- a/hw/mips/mips_mipssim.c
+++ b/hw/mips/mips_mipssim.c
@@ -58,9 +58,8 @@
 
 static int64_t load_kernel(void)
 {
-    int64_t entry, kernel_high;
+    int64_t entry, kernel_high, initrd_size;
     long kernel_size;
-    long initrd_size;
     ram_addr_t initrd_offset;
     int big_endian;
 
diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c
index d5725d0..3e852e9 100644
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -81,8 +81,8 @@
 static int64_t load_kernel(void)
 {
     const size_t params_size = 264;
-    int64_t entry, kernel_high;
-    long kernel_size, initrd_size;
+    int64_t entry, kernel_high, initrd_size;
+    long kernel_size;
     ram_addr_t initrd_offset;
     uint32_t *params_buf;
     int big_endian;
@@ -136,7 +136,7 @@
     params_buf[1] = tswap32(0x12345678);
 
     if (initrd_size > 0) {
-        snprintf((char *)params_buf + 8, 256, "rd_start=0x%" PRIx64 " rd_size=%li %s",
+        snprintf((char *)params_buf + 8, 256, "rd_start=0x%" PRIx64 " rd_size=%" PRId64 " %s",
                  cpu_mips_phys_to_kseg0(NULL, initrd_offset),
                  initrd_size, loaderparams.kernel_cmdline);
     } else {
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 6d50b03..680350b 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -71,7 +71,6 @@
 obj-$(CONFIG_IOTKIT_SYSINFO) += iotkit-sysinfo.o
 
 obj-$(CONFIG_PVPANIC) += pvpanic.o
-obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o
 obj-$(CONFIG_AUX) += auxbus.o
 obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o
 obj-$(CONFIG_MSF2) += msf2-sysreg.o
diff --git a/hw/misc/debugexit.c b/hw/misc/debugexit.c
index 84fa1a5..bed2932 100644
--- a/hw/misc/debugexit.c
+++ b/hw/misc/debugexit.c
@@ -23,6 +23,11 @@
     MemoryRegion io;
 } ISADebugExitState;
 
+static uint64_t debug_exit_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0;
+}
+
 static void debug_exit_write(void *opaque, hwaddr addr, uint64_t val,
                              unsigned width)
 {
@@ -30,6 +35,7 @@
 }
 
 static const MemoryRegionOps debug_exit_ops = {
+    .read = debug_exit_read,
     .write = debug_exit_write,
     .valid.min_access_size = 1,
     .valid.max_access_size = 4,
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index df26a4d..cdcf550 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -30,7 +30,8 @@
 #include "qemu/main-loop.h" /* iothread mutex */
 #include "qapi/visitor.h"
 
-#define EDU(obj)        OBJECT_CHECK(EduState, obj, "edu")
+#define TYPE_PCI_EDU_DEVICE "edu"
+#define EDU(obj)        OBJECT_CHECK(EduState, obj, TYPE_PCI_EDU_DEVICE)
 
 #define FACT_IRQ        0x00000001
 #define DMA_IRQ         0x00000100
@@ -341,7 +342,7 @@
 
 static void pci_edu_realize(PCIDevice *pdev, Error **errp)
 {
-    EduState *edu = DO_UPCAST(EduState, pdev, pdev);
+    EduState *edu = EDU(pdev);
     uint8_t *pci_conf = pdev->config;
 
     pci_config_set_interrupt_pin(pci_conf, 1);
@@ -364,7 +365,7 @@
 
 static void pci_edu_uninit(PCIDevice *pdev)
 {
-    EduState *edu = DO_UPCAST(EduState, pdev, pdev);
+    EduState *edu = EDU(pdev);
 
     qemu_mutex_lock(&edu->thr_mutex);
     edu->stopping = true;
@@ -414,7 +415,7 @@
         { },
     };
     static const TypeInfo edu_info = {
-        .name          = "edu",
+        .name          = TYPE_PCI_EDU_DEVICE,
         .parent        = TYPE_PCI_DEVICE,
         .instance_size = sizeof(EduState),
         .instance_init = edu_instance_init,
diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c
deleted file mode 100644
index bf6bbfa..0000000
--- a/hw/misc/hyperv_testdev.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * QEMU KVM Hyper-V test device to support Hyper-V kvm-unit-tests
- *
- * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * Authors:
- *  Andrey Smetanin <asmetanin@virtuozzo.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include <linux/kvm.h>
-#include "hw/hw.h"
-#include "hw/qdev.h"
-#include "hw/isa/isa.h"
-#include "sysemu/kvm.h"
-#include "target/i386/hyperv.h"
-#include "kvm_i386.h"
-
-#define HV_TEST_DEV_MAX_SINT_ROUTES 64
-
-struct HypervTestDev {
-    ISADevice parent_obj;
-    MemoryRegion sint_control;
-    HvSintRoute *sint_route[HV_TEST_DEV_MAX_SINT_ROUTES];
-};
-typedef struct HypervTestDev HypervTestDev;
-
-#define TYPE_HYPERV_TEST_DEV "hyperv-testdev"
-#define HYPERV_TEST_DEV(obj) \
-        OBJECT_CHECK(HypervTestDev, (obj), TYPE_HYPERV_TEST_DEV)
-
-enum {
-    HV_TEST_DEV_SINT_ROUTE_CREATE = 1,
-    HV_TEST_DEV_SINT_ROUTE_DESTROY,
-    HV_TEST_DEV_SINT_ROUTE_SET_SINT
-};
-
-static int alloc_sint_route_index(HypervTestDev *dev)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(dev->sint_route); i++) {
-        if (dev->sint_route[i] == NULL) {
-            return i;
-        }
-    }
-    return -1;
-}
-
-static void free_sint_route_index(HypervTestDev *dev, int i)
-{
-    assert(i >= 0 && i < ARRAY_SIZE(dev->sint_route));
-    dev->sint_route[i] = NULL;
-}
-
-static int find_sint_route_index(HypervTestDev *dev, uint32_t vp_index,
-                                 uint32_t sint)
-{
-    HvSintRoute *sint_route;
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(dev->sint_route); i++) {
-        sint_route = dev->sint_route[i];
-        if (sint_route && sint_route->vp_index == vp_index &&
-            sint_route->sint == sint) {
-            return i;
-        }
-    }
-    return -1;
-}
-
-static void hv_synic_test_dev_control(HypervTestDev *dev, uint32_t ctl,
-                                      uint32_t vp_index, uint32_t sint)
-{
-    int i;
-    HvSintRoute *sint_route;
-
-    switch (ctl) {
-    case HV_TEST_DEV_SINT_ROUTE_CREATE:
-        i = alloc_sint_route_index(dev);
-        assert(i >= 0);
-        sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL);
-        assert(sint_route);
-        dev->sint_route[i] = sint_route;
-        break;
-    case HV_TEST_DEV_SINT_ROUTE_DESTROY:
-        i = find_sint_route_index(dev, vp_index, sint);
-        assert(i >= 0);
-        sint_route = dev->sint_route[i];
-        kvm_hv_sint_route_destroy(sint_route);
-        free_sint_route_index(dev, i);
-        break;
-    case HV_TEST_DEV_SINT_ROUTE_SET_SINT:
-        i = find_sint_route_index(dev, vp_index, sint);
-        assert(i >= 0);
-        sint_route = dev->sint_route[i];
-        kvm_hv_sint_route_set_sint(sint_route);
-        break;
-    default:
-        break;
-    }
-}
-
-static void hv_test_dev_control(void *opaque, hwaddr addr, uint64_t data,
-                                uint32_t len)
-{
-    HypervTestDev *dev = HYPERV_TEST_DEV(opaque);
-    uint8_t ctl;
-
-    ctl = (data >> 16ULL) & 0xFF;
-    switch (ctl) {
-    case HV_TEST_DEV_SINT_ROUTE_CREATE:
-    case HV_TEST_DEV_SINT_ROUTE_DESTROY:
-    case HV_TEST_DEV_SINT_ROUTE_SET_SINT: {
-        uint8_t sint = data & 0xFF;
-        uint8_t vp_index = (data >> 8ULL) & 0xFF;
-        hv_synic_test_dev_control(dev, ctl, vp_index, sint);
-        break;
-    }
-    default:
-        break;
-    }
-}
-
-static const MemoryRegionOps synic_test_sint_ops = {
-    .write = hv_test_dev_control,
-    .valid.min_access_size = 4,
-    .valid.max_access_size = 4,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static void hv_test_dev_realizefn(DeviceState *d, Error **errp)
-{
-    ISADevice *isa = ISA_DEVICE(d);
-    HypervTestDev *dev = HYPERV_TEST_DEV(d);
-    MemoryRegion *io = isa_address_space_io(isa);
-
-    memset(dev->sint_route, 0, sizeof(dev->sint_route));
-    memory_region_init_io(&dev->sint_control, OBJECT(dev),
-                          &synic_test_sint_ops, dev,
-                          "hyperv-testdev-ctl", 4);
-    memory_region_add_subregion(io, 0x3000, &dev->sint_control);
-}
-
-static void hv_test_dev_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-    dc->realize = hv_test_dev_realizefn;
-}
-
-static const TypeInfo hv_test_dev_info = {
-    .name           = TYPE_HYPERV_TEST_DEV,
-    .parent         = TYPE_ISA_DEVICE,
-    .instance_size  = sizeof(HypervTestDev),
-    .class_init     = hv_test_dev_class_init,
-};
-
-static void hv_test_dev_register_types(void)
-{
-    type_register_static(&hv_test_dev_info);
-}
-type_init(hv_test_dev_register_types);
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 6febbab..f88910e 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -911,6 +911,7 @@
         IVSHMEM_DPRINTF("using hostmem\n");
 
         s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
+        host_memory_backend_set_mapped(s->hostmem, true);
     } else {
         Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
         assert(chr);
@@ -993,6 +994,10 @@
         vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
     }
 
+    if (s->hostmem) {
+        host_memory_backend_set_mapped(s->hostmem, false);
+    }
+
     if (s->peers) {
         for (i = 0; i < s->nb_peers; i++) {
             close_peer_eventfds(s, i);
@@ -1101,14 +1106,6 @@
     }
 
     ivshmem_common_realize(dev, errp);
-    host_memory_backend_set_mapped(s->hostmem, true);
-}
-
-static void ivshmem_plain_exit(PCIDevice *pci_dev)
-{
-    IVShmemState *s = IVSHMEM_COMMON(pci_dev);
-
-    host_memory_backend_set_mapped(s->hostmem, false);
 }
 
 static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
@@ -1117,7 +1114,6 @@
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->realize = ivshmem_plain_realize;
-    k->exit = ivshmem_plain_exit;
     dc->props = ivshmem_plain_properties;
     dc->vmsd = &ivshmem_plain_vmsd;
 }
@@ -1292,8 +1288,8 @@
     IVShmemState *s = IVSHMEM_COMMON(dev);
 
     if (!qtest_enabled()) {
-        error_report("ivshmem is deprecated, please use ivshmem-plain"
-                     " or ivshmem-doorbell instead");
+        warn_report("ivshmem is deprecated, please use ivshmem-plain"
+                    " or ivshmem-doorbell instead");
     }
 
     if (qemu_chr_fe_backend_connected(&s->server_chr) + !!s->shmobj != 1) {
diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c
index b81d820..697eb88 100644
--- a/hw/misc/pc-testdev.c
+++ b/hw/misc/pc-testdev.c
@@ -58,7 +58,12 @@
 #define TESTDEV(obj) \
      OBJECT_CHECK(PCTestdev, (obj), TYPE_TESTDEV)
 
-static void test_irq_line(void *opaque, hwaddr addr, uint64_t data,
+static uint64_t test_irq_line_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0;
+}
+
+static void test_irq_line_write(void *opaque, hwaddr addr, uint64_t data,
                           unsigned len)
 {
     PCTestdev *dev = opaque;
@@ -68,7 +73,8 @@
 }
 
 static const MemoryRegionOps test_irq_ops = {
-    .write = test_irq_line,
+    .read = test_irq_line_read,
+    .write = test_irq_line_write,
     .valid.min_access_size = 1,
     .valid.max_access_size = 1,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -110,7 +116,12 @@
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static void test_flush_page(void *opaque, hwaddr addr, uint64_t data,
+static uint64_t test_flush_page_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0;
+}
+
+static void test_flush_page_write(void *opaque, hwaddr addr, uint64_t data,
                             unsigned len)
 {
     hwaddr page = 4096;
@@ -126,7 +137,8 @@
 }
 
 static const MemoryRegionOps test_flush_ops = {
-    .write = test_flush_page,
+    .read = test_flush_page_read,
+    .write = test_flush_page_write,
     .valid.min_access_size = 4,
     .valid.max_access_size = 4,
     .endianness = DEVICE_LITTLE_ENDIAN,
diff --git a/hw/moxie/moxiesim.c b/hw/moxie/moxiesim.c
index d41247d..4b0ce09 100644
--- a/hw/moxie/moxiesim.c
+++ b/hw/moxie/moxiesim.c
@@ -54,8 +54,8 @@
 static void load_kernel(MoxieCPU *cpu, LoaderParams *loader_params)
 {
     uint64_t entry, kernel_low, kernel_high;
+    int64_t initrd_size;
     long kernel_size;
-    long initrd_size;
     ram_addr_t initrd_offset;
 
     kernel_size = load_elf(loader_params->kernel_filename,  NULL, NULL,
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 0fa4b0d..d95cc27 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -28,6 +28,7 @@
 #include "hw/net/cadence_gem.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
+#include "sysemu/dma.h"
 #include "net/checksum.h"
 
 #ifdef CADENCE_GEM_ERR_DEBUG
@@ -141,6 +142,7 @@
 #define GEM_DESCONF4      (0x0000028C/4)
 #define GEM_DESCONF5      (0x00000290/4)
 #define GEM_DESCONF6      (0x00000294/4)
+#define GEM_DESCONF6_64B_MASK (1U << 23)
 #define GEM_DESCONF7      (0x00000298/4)
 
 #define GEM_INT_Q1_STATUS               (0x00000400 / 4)
@@ -152,6 +154,9 @@
 #define GEM_RECEIVE_Q1_PTR              (0x00000480 / 4)
 #define GEM_RECEIVE_Q7_PTR              (GEM_RECEIVE_Q1_PTR + 6)
 
+#define GEM_TBQPH                       (0x000004C8 / 4)
+#define GEM_RBQPH                       (0x000004D4 / 4)
+
 #define GEM_INT_Q1_ENABLE               (0x00000600 / 4)
 #define GEM_INT_Q7_ENABLE               (GEM_INT_Q1_ENABLE + 6)
 
@@ -207,6 +212,9 @@
 #define GEM_NWCFG_BCAST_REJ    0x00000020 /* Reject broadcast packets */
 #define GEM_NWCFG_PROMISC      0x00000010 /* Accept all packets */
 
+#define GEM_DMACFG_ADDR_64B    (1U << 30)
+#define GEM_DMACFG_TX_BD_EXT   (1U << 29)
+#define GEM_DMACFG_RX_BD_EXT   (1U << 28)
 #define GEM_DMACFG_RBUFSZ_M    0x00FF0000 /* DMA RX Buffer Size mask */
 #define GEM_DMACFG_RBUFSZ_S    16         /* DMA RX Buffer Size shift */
 #define GEM_DMACFG_RBUFSZ_MUL  64         /* DMA RX Buffer Size multiplier */
@@ -302,42 +310,47 @@
 
 #define GEM_MODID_VALUE 0x00020118
 
-static inline unsigned tx_desc_get_buffer(unsigned *desc)
+static inline uint64_t tx_desc_get_buffer(CadenceGEMState *s, uint32_t *desc)
 {
-    return desc[0];
+    uint64_t ret = desc[0];
+
+    if (s->regs[GEM_DMACFG] & GEM_DMACFG_ADDR_64B) {
+        ret |= (uint64_t)desc[2] << 32;
+    }
+    return ret;
 }
 
-static inline unsigned tx_desc_get_used(unsigned *desc)
+static inline unsigned tx_desc_get_used(uint32_t *desc)
 {
     return (desc[1] & DESC_1_USED) ? 1 : 0;
 }
 
-static inline void tx_desc_set_used(unsigned *desc)
+static inline void tx_desc_set_used(uint32_t *desc)
 {
     desc[1] |= DESC_1_USED;
 }
 
-static inline unsigned tx_desc_get_wrap(unsigned *desc)
+static inline unsigned tx_desc_get_wrap(uint32_t *desc)
 {
     return (desc[1] & DESC_1_TX_WRAP) ? 1 : 0;
 }
 
-static inline unsigned tx_desc_get_last(unsigned *desc)
+static inline unsigned tx_desc_get_last(uint32_t *desc)
 {
     return (desc[1] & DESC_1_TX_LAST) ? 1 : 0;
 }
 
-static inline void tx_desc_set_last(unsigned *desc)
+static inline void tx_desc_set_last(uint32_t *desc)
 {
     desc[1] |= DESC_1_TX_LAST;
 }
 
-static inline unsigned tx_desc_get_length(unsigned *desc)
+static inline unsigned tx_desc_get_length(uint32_t *desc)
 {
     return desc[1] & DESC_1_LENGTH;
 }
 
-static inline void print_gem_tx_desc(unsigned *desc, uint8_t queue)
+static inline void print_gem_tx_desc(uint32_t *desc, uint8_t queue)
 {
     DB_PRINT("TXDESC (queue %" PRId8 "):\n", queue);
     DB_PRINT("bufaddr: 0x%08x\n", *desc);
@@ -347,58 +360,79 @@
     DB_PRINT("length:  %d\n", tx_desc_get_length(desc));
 }
 
-static inline unsigned rx_desc_get_buffer(unsigned *desc)
+static inline uint64_t rx_desc_get_buffer(CadenceGEMState *s, uint32_t *desc)
 {
-    return desc[0] & ~0x3UL;
+    uint64_t ret = desc[0] & ~0x3UL;
+
+    if (s->regs[GEM_DMACFG] & GEM_DMACFG_ADDR_64B) {
+        ret |= (uint64_t)desc[2] << 32;
+    }
+    return ret;
 }
 
-static inline unsigned rx_desc_get_wrap(unsigned *desc)
+static inline int gem_get_desc_len(CadenceGEMState *s, bool rx_n_tx)
+{
+    int ret = 2;
+
+    if (s->regs[GEM_DMACFG] & GEM_DMACFG_ADDR_64B) {
+        ret += 2;
+    }
+    if (s->regs[GEM_DMACFG] & (rx_n_tx ? GEM_DMACFG_RX_BD_EXT
+                                       : GEM_DMACFG_TX_BD_EXT)) {
+        ret += 2;
+    }
+
+    assert(ret <= DESC_MAX_NUM_WORDS);
+    return ret;
+}
+
+static inline unsigned rx_desc_get_wrap(uint32_t *desc)
 {
     return desc[0] & DESC_0_RX_WRAP ? 1 : 0;
 }
 
-static inline unsigned rx_desc_get_ownership(unsigned *desc)
+static inline unsigned rx_desc_get_ownership(uint32_t *desc)
 {
     return desc[0] & DESC_0_RX_OWNERSHIP ? 1 : 0;
 }
 
-static inline void rx_desc_set_ownership(unsigned *desc)
+static inline void rx_desc_set_ownership(uint32_t *desc)
 {
     desc[0] |= DESC_0_RX_OWNERSHIP;
 }
 
-static inline void rx_desc_set_sof(unsigned *desc)
+static inline void rx_desc_set_sof(uint32_t *desc)
 {
     desc[1] |= DESC_1_RX_SOF;
 }
 
-static inline void rx_desc_set_eof(unsigned *desc)
+static inline void rx_desc_set_eof(uint32_t *desc)
 {
     desc[1] |= DESC_1_RX_EOF;
 }
 
-static inline void rx_desc_set_length(unsigned *desc, unsigned len)
+static inline void rx_desc_set_length(uint32_t *desc, unsigned len)
 {
     desc[1] &= ~DESC_1_LENGTH;
     desc[1] |= len;
 }
 
-static inline void rx_desc_set_broadcast(unsigned *desc)
+static inline void rx_desc_set_broadcast(uint32_t *desc)
 {
     desc[1] |= R_DESC_1_RX_BROADCAST;
 }
 
-static inline void rx_desc_set_unicast_hash(unsigned *desc)
+static inline void rx_desc_set_unicast_hash(uint32_t *desc)
 {
     desc[1] |= R_DESC_1_RX_UNICAST_HASH;
 }
 
-static inline void rx_desc_set_multicast_hash(unsigned *desc)
+static inline void rx_desc_set_multicast_hash(uint32_t *desc)
 {
     desc[1] |= R_DESC_1_RX_MULTICAST_HASH;
 }
 
-static inline void rx_desc_set_sar(unsigned *desc, int sar_idx)
+static inline void rx_desc_set_sar(uint32_t *desc, int sar_idx)
 {
     desc[1] = deposit32(desc[1], R_DESC_1_RX_SAR_SHIFT, R_DESC_1_RX_SAR_LENGTH,
                         sar_idx);
@@ -419,7 +453,7 @@
     memset(&s->regs_ro[0], 0, sizeof(s->regs_ro));
     s->regs_ro[GEM_NWCTRL]   = 0xFFF80000;
     s->regs_ro[GEM_NWSTATUS] = 0xFFFFFFFF;
-    s->regs_ro[GEM_DMACFG]   = 0xFE00F000;
+    s->regs_ro[GEM_DMACFG]   = 0x8E00F000;
     s->regs_ro[GEM_TXSTATUS] = 0xFFFFFE08;
     s->regs_ro[GEM_RXQBASE]  = 0x00000003;
     s->regs_ro[GEM_TXQBASE]  = 0x00000003;
@@ -802,17 +836,42 @@
     return 0;
 }
 
+static hwaddr gem_get_desc_addr(CadenceGEMState *s, bool tx, int q)
+{
+    hwaddr desc_addr = 0;
+
+    if (s->regs[GEM_DMACFG] & GEM_DMACFG_ADDR_64B) {
+        desc_addr = s->regs[tx ? GEM_TBQPH : GEM_RBQPH];
+    }
+    desc_addr <<= 32;
+    desc_addr |= tx ? s->tx_desc_addr[q] : s->rx_desc_addr[q];
+    return desc_addr;
+}
+
+static hwaddr gem_get_tx_desc_addr(CadenceGEMState *s, int q)
+{
+    return gem_get_desc_addr(s, true, q);
+}
+
+static hwaddr gem_get_rx_desc_addr(CadenceGEMState *s, int q)
+{
+    return gem_get_desc_addr(s, false, q);
+}
+
 static void gem_get_rx_desc(CadenceGEMState *s, int q)
 {
-    DB_PRINT("read descriptor 0x%x\n", (unsigned)s->rx_desc_addr[q]);
+    hwaddr desc_addr = gem_get_rx_desc_addr(s, q);
+
+    DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", desc_addr);
+
     /* read current descriptor */
-    cpu_physical_memory_read(s->rx_desc_addr[q],
-                             (uint8_t *)s->rx_desc[q], sizeof(s->rx_desc[q]));
+    address_space_read(&s->dma_as, desc_addr, MEMTXATTRS_UNSPECIFIED,
+                       (uint8_t *)s->rx_desc[q],
+                       sizeof(uint32_t) * gem_get_desc_len(s, true));
 
     /* Descriptor owned by software ? */
     if (rx_desc_get_ownership(s->rx_desc[q]) == 1) {
-        DB_PRINT("descriptor 0x%x owned by sw.\n",
-                 (unsigned)s->rx_desc_addr[q]);
+        DB_PRINT("descriptor 0x%" HWADDR_PRIx " owned by sw.\n", desc_addr);
         s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF;
         s->regs[GEM_ISR] |= GEM_INT_RXUSED & ~(s->regs[GEM_IMR]);
         /* Handle interrupt consequences */
@@ -916,6 +975,8 @@
     q = get_queue_from_screen(s, rxbuf_ptr, rxbufsize);
 
     while (bytes_to_copy) {
+        hwaddr desc_addr;
+
         /* Do nothing if receive is not enabled. */
         if (!gem_can_receive(nc)) {
             assert(!first_desc);
@@ -926,9 +987,10 @@
                 rx_desc_get_buffer(s->rx_desc[q]));
 
         /* Copy packet data to emulated DMA buffer */
-        cpu_physical_memory_write(rx_desc_get_buffer(s->rx_desc[q]) +
-                                                                 rxbuf_offset,
-                                  rxbuf_ptr, MIN(bytes_to_copy, rxbufsize));
+        address_space_write(&s->dma_as, rx_desc_get_buffer(s, s->rx_desc[q]) +
+                                                                  rxbuf_offset,
+                            MEMTXATTRS_UNSPECIFIED, rxbuf_ptr,
+                            MIN(bytes_to_copy, rxbufsize));
         rxbuf_ptr += MIN(bytes_to_copy, rxbufsize);
         bytes_to_copy -= MIN(bytes_to_copy, rxbufsize);
 
@@ -962,9 +1024,11 @@
         }
 
         /* Descriptor write-back.  */
-        cpu_physical_memory_write(s->rx_desc_addr[q],
-                                  (uint8_t *)s->rx_desc[q],
-                                  sizeof(s->rx_desc[q]));
+        desc_addr = gem_get_rx_desc_addr(s, q);
+        address_space_write(&s->dma_as, desc_addr,
+                            MEMTXATTRS_UNSPECIFIED,
+                            (uint8_t *)s->rx_desc[q],
+                            sizeof(uint32_t) * gem_get_desc_len(s, true));
 
         /* Next descriptor */
         if (rx_desc_get_wrap(s->rx_desc[q])) {
@@ -972,7 +1036,7 @@
             s->rx_desc_addr[q] = s->regs[GEM_RXQBASE];
         } else {
             DB_PRINT("incrementing RX descriptor list\n");
-            s->rx_desc_addr[q] += 8;
+            s->rx_desc_addr[q] += 4 * gem_get_desc_len(s, true);
         }
 
         gem_get_rx_desc(s, q);
@@ -1042,7 +1106,7 @@
  */
 static void gem_transmit(CadenceGEMState *s)
 {
-    unsigned    desc[2];
+    uint32_t desc[DESC_MAX_NUM_WORDS];
     hwaddr packet_desc_addr;
     uint8_t     tx_packet[2048];
     uint8_t     *p;
@@ -1065,11 +1129,12 @@
 
     for (q = s->num_priority_queues - 1; q >= 0; q--) {
         /* read current descriptor */
-        packet_desc_addr = s->tx_desc_addr[q];
+        packet_desc_addr = gem_get_tx_desc_addr(s, q);
 
         DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr);
-        cpu_physical_memory_read(packet_desc_addr,
-                                 (uint8_t *)desc, sizeof(desc));
+        address_space_read(&s->dma_as, packet_desc_addr,
+                           MEMTXATTRS_UNSPECIFIED, (uint8_t *)desc,
+                           sizeof(uint32_t) * gem_get_desc_len(s, false));
         /* Handle all descriptors owned by hardware */
         while (tx_desc_get_used(desc) == 0) {
 
@@ -1082,7 +1147,7 @@
             /* The real hardware would eat this (and possibly crash).
              * For QEMU let's lend a helping hand.
              */
-            if ((tx_desc_get_buffer(desc) == 0) ||
+            if ((tx_desc_get_buffer(s, desc) == 0) ||
                 (tx_desc_get_length(desc) == 0)) {
                 DB_PRINT("Invalid TX descriptor @ 0x%x\n",
                          (unsigned)packet_desc_addr);
@@ -1101,30 +1166,35 @@
             /* Gather this fragment of the packet from "dma memory" to our
              * contig buffer.
              */
-            cpu_physical_memory_read(tx_desc_get_buffer(desc), p,
-                                     tx_desc_get_length(desc));
+            address_space_read(&s->dma_as, tx_desc_get_buffer(s, desc),
+                               MEMTXATTRS_UNSPECIFIED,
+                               p, tx_desc_get_length(desc));
             p += tx_desc_get_length(desc);
             total_bytes += tx_desc_get_length(desc);
 
             /* Last descriptor for this packet; hand the whole thing off */
             if (tx_desc_get_last(desc)) {
-                unsigned    desc_first[2];
+                uint32_t desc_first[DESC_MAX_NUM_WORDS];
+                hwaddr desc_addr = gem_get_tx_desc_addr(s, q);
 
                 /* Modify the 1st descriptor of this packet to be owned by
                  * the processor.
                  */
-                cpu_physical_memory_read(s->tx_desc_addr[q],
-                                         (uint8_t *)desc_first,
-                                         sizeof(desc_first));
+                address_space_read(&s->dma_as, desc_addr,
+                                   MEMTXATTRS_UNSPECIFIED,
+                                   (uint8_t *)desc_first,
+                                   sizeof(desc_first));
                 tx_desc_set_used(desc_first);
-                cpu_physical_memory_write(s->tx_desc_addr[q],
-                                          (uint8_t *)desc_first,
-                                          sizeof(desc_first));
+                address_space_write(&s->dma_as, desc_addr,
+                                  MEMTXATTRS_UNSPECIFIED,
+                                  (uint8_t *)desc_first,
+                                   sizeof(desc_first));
                 /* Advance the hardware current descriptor past this packet */
                 if (tx_desc_get_wrap(desc)) {
                     s->tx_desc_addr[q] = s->regs[GEM_TXQBASE];
                 } else {
-                    s->tx_desc_addr[q] = packet_desc_addr + 8;
+                    s->tx_desc_addr[q] = packet_desc_addr +
+                                         4 * gem_get_desc_len(s, false);
                 }
                 DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr[q]);
 
@@ -1168,11 +1238,12 @@
                 tx_desc_set_last(desc);
                 packet_desc_addr = s->regs[GEM_TXQBASE];
             } else {
-                packet_desc_addr += 8;
+                packet_desc_addr += 4 * gem_get_desc_len(s, false);
             }
             DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr);
-            cpu_physical_memory_read(packet_desc_addr,
-                                     (uint8_t *)desc, sizeof(desc));
+            address_space_read(&s->dma_as, packet_desc_addr,
+                              MEMTXATTRS_UNSPECIFIED, (uint8_t *)desc,
+                              sizeof(uint32_t) * gem_get_desc_len(s, false));
         }
 
         if (tx_desc_get_used(desc)) {
@@ -1213,6 +1284,7 @@
     int i;
     CadenceGEMState *s = CADENCE_GEM(d);
     const uint8_t *a;
+    uint32_t queues_mask = 0;
 
     DB_PRINT("\n");
 
@@ -1228,8 +1300,13 @@
     s->regs[GEM_MODID] = s->revision;
     s->regs[GEM_DESCONF] = 0x02500111;
     s->regs[GEM_DESCONF2] = 0x2ab13fff;
-    s->regs[GEM_DESCONF5] = 0x002f2145;
-    s->regs[GEM_DESCONF6] = 0x00000200;
+    s->regs[GEM_DESCONF5] = 0x002f2045;
+    s->regs[GEM_DESCONF6] = GEM_DESCONF6_64B_MASK;
+
+    if (s->num_priority_queues > 1) {
+        queues_mask = MAKE_64BIT_MASK(1, s->num_priority_queues - 1);
+        s->regs[GEM_DESCONF6] |= queues_mask;
+    }
 
     /* Set MAC address */
     a = &s->conf.macaddr.a[0];
@@ -1463,6 +1540,9 @@
     CadenceGEMState *s = CADENCE_GEM(dev);
     int i;
 
+    address_space_init(&s->dma_as,
+                       s->dma_mr ? s->dma_mr : get_system_memory(), "dma");
+
     if (s->num_priority_queues == 0 ||
         s->num_priority_queues > MAX_PRIORITY_QUEUES) {
         error_setg(errp, "Invalid num-priority-queues value: %" PRIx8,
@@ -1500,6 +1580,12 @@
                           "enet", sizeof(s->regs));
 
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
+
+    object_property_add_link(obj, "dma", TYPE_MEMORY_REGION,
+                             (Object **)&s->dma_mr,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_STRONG,
+                             &error_abort);
 }
 
 static const VMStateDescription vmstate_cadence_gem = {
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 13a9494..5e144cb 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -36,6 +36,7 @@
 #include "qemu/range.h"
 
 #include "e1000x_common.h"
+#include "trace.h"
 
 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -847,6 +848,15 @@
     return (bah << 32) + bal;
 }
 
+static void
+e1000_receiver_overrun(E1000State *s, size_t size)
+{
+    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
+    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
+    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
+    set_ics(s, 0, E1000_ICS_RXO);
+}
+
 static ssize_t
 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
 {
@@ -916,8 +926,8 @@
     desc_offset = 0;
     total_size = size + e1000x_fcs_len(s->mac_reg);
     if (!e1000_has_rxbufs(s, total_size)) {
-            set_ics(s, 0, E1000_ICS_RXO);
-            return -1;
+        e1000_receiver_overrun(s, total_size);
+        return -1;
     }
     do {
         desc_size = total_size - desc_offset;
@@ -969,7 +979,7 @@
             rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
             DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
                    rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
-            set_ics(s, 0, E1000_ICS_RXO);
+            e1000_receiver_overrun(s, total_size);
             return -1;
         }
     } while (desc_offset < total_size);
diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c
index a693243..3685580 100644
--- a/hw/net/etraxfs_eth.c
+++ b/hw/net/etraxfs_eth.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/sysbus.h"
 #include "net/net.h"
 #include "hw/cris/etraxfs.h"
@@ -126,7 +127,7 @@
 }
 
 static void
-tdk_init(struct qemu_phy *phy)
+tdk_reset(struct qemu_phy *phy)
 {
     phy->regs[0] = 0x3100;
     /* PHY Id.  */
@@ -135,9 +136,6 @@
     /* Autonegotiation advertisement reg.  */
     phy->regs[4] = 0x01E1;
     phy->link = 1;
-
-    phy->read = tdk_read;
-    phy->write = tdk_write;
 }
 
 struct qemu_mdio
@@ -584,14 +582,35 @@
     .link_status_changed = eth_set_link,
 };
 
-static int fs_eth_init(SysBusDevice *sbd)
+static void etraxfs_eth_reset(DeviceState *dev)
 {
-    DeviceState *dev = DEVICE(sbd);
+    ETRAXFSEthState *s = ETRAX_FS_ETH(dev);
+
+    memset(s->regs, 0, sizeof(s->regs));
+    memset(s->macaddr, 0, sizeof(s->macaddr));
+    s->duplex_mismatch = 0;
+
+    s->mdio_bus.mdc = 0;
+    s->mdio_bus.mdio = 0;
+    s->mdio_bus.state = 0;
+    s->mdio_bus.drive = 0;
+    s->mdio_bus.cnt = 0;
+    s->mdio_bus.addr = 0;
+    s->mdio_bus.opc = 0;
+    s->mdio_bus.req = 0;
+    s->mdio_bus.data = 0;
+
+    tdk_reset(&s->phy);
+}
+
+static void etraxfs_eth_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     ETRAXFSEthState *s = ETRAX_FS_ETH(dev);
 
     if (!s->dma_out || !s->dma_in) {
-        error_report("Unconnected ETRAX-FS Ethernet MAC");
-        return -1;
+        error_setg(errp, "Unconnected ETRAX-FS Ethernet MAC");
+        return;
     }
 
     s->dma_out->client.push = eth_tx_push;
@@ -608,10 +627,9 @@
                           object_get_typename(OBJECT(s)), dev->id, s);
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 
-
-    tdk_init(&s->phy);
+    s->phy.read = tdk_read;
+    s->phy.write = tdk_write;
     mdio_attach(&s->mdio_bus, &s->phy, s->phyaddr);
-    return 0;
 }
 
 static Property etraxfs_eth_properties[] = {
@@ -625,9 +643,9 @@
 static void etraxfs_eth_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = fs_eth_init;
+    dc->realize = etraxfs_eth_realize;
+    dc->reset = etraxfs_eth_reset;
     dc->props = etraxfs_eth_properties;
     /* Reason: pointer properties "dma_out", "dma_in" */
     dc->user_creatable = false;
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index b9032da..a6269d9 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@@ -1320,9 +1320,9 @@
     .link_status_changed = lan9118_set_link,
 };
 
-static int lan9118_init1(SysBusDevice *sbd)
+static void lan9118_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     lan9118_state *s = LAN9118(dev);
     QEMUBH *bh;
     int i;
@@ -1349,8 +1349,6 @@
     s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
     ptimer_set_freq(s->timer, 10000);
     ptimer_set_limit(s->timer, 0xffff, 1);
-
-    return 0;
 }
 
 static Property lan9118_properties[] = {
@@ -1362,12 +1360,11 @@
 static void lan9118_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = lan9118_init1;
     dc->reset = lan9118_reset;
     dc->props = lan9118_properties;
     dc->vmsd = &vmstate_lan9118;
+    dc->realize = lan9118_realize;
 }
 
 static const TypeInfo lan9118_info = {
diff --git a/hw/net/lance.c b/hw/net/lance.c
index a08d5ac..f987b2f 100644
--- a/hw/net/lance.c
+++ b/hw/net/lance.c
@@ -97,9 +97,9 @@
     }
 };
 
-static int lance_init(SysBusDevice *sbd)
+static void lance_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     SysBusPCNetState *d = SYSBUS_PCNET(dev);
     PCNetState *s = &d->state;
 
@@ -115,7 +115,6 @@
     s->phys_mem_read = ledma_memory_read;
     s->phys_mem_write = ledma_memory_write;
     pcnet_common_init(dev, s, &net_lance_info);
-    return 0;
 }
 
 static void lance_reset(DeviceState *dev)
@@ -144,9 +143,8 @@
 static void lance_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = lance_init;
+    dc->realize = lance_realize;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
     dc->fw_name = "ethernet";
     dc->reset = lance_reset;
diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c
index 3eaa19d..85c9fc0 100644
--- a/hw/net/milkymist-minimac2.c
+++ b/hw/net/milkymist-minimac2.c
@@ -30,6 +30,7 @@
 #include "hw/sysbus.h"
 #include "trace.h"
 #include "net/net.h"
+#include "qemu/log.h"
 #include "qemu/error-report.h"
 
 #include <zlib.h>
@@ -214,7 +215,8 @@
     uint32_t crc;
 
     if (size < payload_size + 12) {
-        error_report("milkymist_minimac2: received too big ethernet frame");
+        qemu_log_mask(LOG_GUEST_ERROR, "milkymist_minimac2: frame too big "
+                      "(%zd bytes)\n", payload_size);
         return 0;
     }
 
@@ -347,8 +349,9 @@
         break;
 
     default:
-        error_report("milkymist_minimac2: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "milkymist_minimac2_rd%d: 0x%" HWADDR_PRIx "\n",
+                      size, addr << 2);
         break;
     }
 
@@ -413,8 +416,10 @@
         break;
 
     default:
-        error_report("milkymist_minimac2: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "milkymist_minimac2_wr%d: 0x%" HWADDR_PRIx
+                      " = 0x%" PRIx64 "\n",
+                      size, addr << 2, value);
         break;
     }
 }
@@ -452,9 +457,9 @@
     .receive = minimac2_rx,
 };
 
-static int milkymist_minimac2_init(SysBusDevice *sbd)
+static void milkymist_minimac2_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(dev);
     size_t buffers_size = TARGET_PAGE_ALIGN(3 * MINIMAC2_BUFFER_SIZE);
 
@@ -479,8 +484,6 @@
     s->nic = qemu_new_nic(&net_milkymist_minimac2_info, &s->conf,
                           object_get_typename(OBJECT(dev)), dev->id, s);
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
-
-    return 0;
 }
 
 static const VMStateDescription vmstate_milkymist_minimac2_mdio = {
@@ -521,9 +524,8 @@
 static void milkymist_minimac2_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = milkymist_minimac2_init;
+    dc->realize = milkymist_minimac2_realize;
     dc->reset = milkymist_minimac2_reset;
     dc->vmsd = &vmstate_milkymist_minimac2;
     dc->props = milkymist_minimac2_properties;
diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c
index 5a63df7..03b3104 100644
--- a/hw/net/mipsnet.c
+++ b/hw/net/mipsnet.c
@@ -236,9 +236,9 @@
     .impl.max_access_size = 4,
 };
 
-static int mipsnet_sysbus_init(SysBusDevice *sbd)
+static void mipsnet_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     MIPSnetState *s = MIPS_NET(dev);
 
     memory_region_init_io(&s->io, OBJECT(dev), &mipsnet_ioport_ops, s,
@@ -249,8 +249,6 @@
     s->nic = qemu_new_nic(&net_mipsnet_info, &s->conf,
                           object_get_typename(OBJECT(dev)), dev->id, s);
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
-
-    return 0;
 }
 
 static void mipsnet_sysbus_reset(DeviceState *dev)
@@ -267,9 +265,8 @@
 static void mipsnet_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = mipsnet_sysbus_init;
+    dc->realize = mipsnet_realize;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
     dc->desc = "MIPS Simulator network device";
     dc->reset = mipsnet_sysbus_reset;
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 07d79e3..869518e 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -174,7 +174,7 @@
 ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 {
     NE2000State *s = qemu_get_nic_opaque(nc);
-    int size = size_;
+    size_t size = size_;
     uint8_t *p;
     unsigned int total_len, next, avail, len, index, mcast_idx;
     uint8_t buf1[60];
@@ -182,7 +182,7 @@
         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 #if defined(DEBUG_NE2000)
-    printf("NE2000: received len=%d\n", size);
+    printf("NE2000: received len=%zu\n", size);
 #endif
 
     if (s->cmd & E8390_STOP || ne2000_buffer_full(s))
diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c
index d42b79c..d6f54f8 100644
--- a/hw/net/opencores_eth.c
+++ b/hw/net/opencores_eth.c
@@ -715,9 +715,9 @@
     .write = open_eth_desc_write,
 };
 
-static int sysbus_open_eth_init(SysBusDevice *sbd)
+static void sysbus_open_eth_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     OpenEthState *s = OPEN_ETH(dev);
 
     memory_region_init_io(&s->reg_io, OBJECT(dev), &open_eth_reg_ops, s,
@@ -732,7 +732,6 @@
 
     s->nic = qemu_new_nic(&net_open_eth_info, &s->conf,
                           object_get_typename(OBJECT(s)), dev->id, s);
-    return 0;
 }
 
 static void qdev_open_eth_reset(DeviceState *dev)
@@ -750,9 +749,8 @@
 static void open_eth_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = sysbus_open_eth_init;
+    dc->realize = sysbus_open_eth_realize;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
     dc->desc = "Opencores 10/100 Mbit Ethernet";
     dc->reset = qdev_open_eth_reset;
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
index 0c44554..d9ba04b 100644
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@@ -988,14 +988,14 @@
     uint8_t buf1[60];
     int remaining;
     int crc_err = 0;
-    int size = size_;
+    size_t size = size_;
 
     if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size ||
         (CSR_LOOP(s) && !s->looptest)) {
         return -1;
     }
 #ifdef PCNET_DEBUG
-    printf("pcnet_receive size=%d\n", size);
+    printf("pcnet_receive size=%zu\n", size);
 #endif
 
     /* if too small buffer, then expand it */
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 46daa16..2342a09 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -817,7 +817,7 @@
     RTL8139State *s = qemu_get_nic_opaque(nc);
     PCIDevice *d = PCI_DEVICE(s);
     /* size is the length of the buffer passed to the driver */
-    int size = size_;
+    size_t size = size_;
     const uint8_t *dot1q_buf = NULL;
 
     uint32_t packet_header = 0;
@@ -826,7 +826,7 @@
     static const uint8_t broadcast_macaddr[6] =
         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
-    DPRINTF(">>> received len=%d\n", size);
+    DPRINTF(">>> received len=%zu\n", size);
 
     /* test if board clock is stopped */
     if (!s->clock_enabled)
@@ -1035,7 +1035,7 @@
 
         if (size+4 > rx_space)
         {
-            DPRINTF("C+ Rx mode : descriptor %d size %d received %d + 4\n",
+            DPRINTF("C+ Rx mode : descriptor %d size %d received %zu + 4\n",
                 descriptor, rx_space, size);
 
             s->IntrStatus |= RxOverflow;
@@ -1148,7 +1148,7 @@
         if (avail != 0 && RX_ALIGN(size + 8) >= avail)
         {
             DPRINTF("rx overflow: rx buffer length %d head 0x%04x "
-                "read 0x%04x === available 0x%04x need 0x%04x\n",
+                "read 0x%04x === available 0x%04x need 0x%04zx\n",
                 s->RxBufferSize, s->RxBufAddr, s->RxBufPtr, avail, size + 8);
 
             s->IntrStatus |= RxOverflow;
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index d2fd204..99da2d9 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -766,9 +766,9 @@
     .receive = smc91c111_receive,
 };
 
-static int smc91c111_init1(SysBusDevice *sbd)
+static void smc91c111_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     smc91c111_state *s = SMC91C111(dev);
 
     memory_region_init_io(&s->mmio, OBJECT(s), &smc91c111_mem_ops, s,
@@ -780,7 +780,6 @@
                           object_get_typename(OBJECT(dev)), dev->id, s);
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
     /* ??? Save/restore.  */
-    return 0;
 }
 
 static Property smc91c111_properties[] = {
@@ -791,9 +790,8 @@
 static void smc91c111_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = smc91c111_init1;
+    dc->realize = smc91c111_realize;
     dc->reset = smc91c111_reset;
     dc->vmsd = &vmstate_smc91c111;
     dc->props = smc91c111_properties;
diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c
index 165562d..b3375eb 100644
--- a/hw/net/stellaris_enet.c
+++ b/hw/net/stellaris_enet.c
@@ -457,8 +457,10 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void stellaris_enet_reset(stellaris_enet_state *s)
+static void stellaris_enet_reset(DeviceState *dev)
 {
+    stellaris_enet_state *s =  STELLARIS_ENET(dev);
+
     s->mdv = 0x80;
     s->rctl = SE_RCTL_BADCRC;
     s->im = SE_INT_PHY | SE_INT_MD | SE_INT_RXER | SE_INT_FOV | SE_INT_TXEMP
@@ -473,9 +475,9 @@
     .receive = stellaris_enet_receive,
 };
 
-static int stellaris_enet_init(SysBusDevice *sbd)
+static void stellaris_enet_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     stellaris_enet_state *s = STELLARIS_ENET(dev);
 
     memory_region_init_io(&s->mmio, OBJECT(s), &stellaris_enet_ops, s,
@@ -487,9 +489,6 @@
     s->nic = qemu_new_nic(&net_stellaris_enet_info, &s->conf,
                           object_get_typename(OBJECT(dev)), dev->id, s);
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
-
-    stellaris_enet_reset(s);
-    return 0;
 }
 
 static Property stellaris_enet_properties[] = {
@@ -500,9 +499,9 @@
 static void stellaris_enet_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = stellaris_enet_init;
+    dc->realize = stellaris_enet_realize;
+    dc->reset = stellaris_enet_reset;
     dc->props = stellaris_enet_properties;
     dc->vmsd = &vmstate_stellaris_enet;
 }
diff --git a/hw/net/trace-events b/hw/net/trace-events
index c1dea4b..9d49f62 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -98,6 +98,9 @@
 net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
 net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"
 
+# hw/net/e1000.c
+e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun: dropped packet of %zu bytes, RDH=%u, RDT=%u"
+
 # hw/net/e1000x_common.c
 e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d"
 e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X"
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 4bdd5b8..385b1a0 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -2020,10 +2020,10 @@
 
     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
                        && strcmp(n->net_conf.tx, "bh")) {
-        error_report("virtio-net: "
-                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
-                     n->net_conf.tx);
-        error_report("Defaulting to \"bh\"");
+        warn_report("virtio-net: "
+                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
+                    n->net_conf.tx);
+        error_printf("Defaulting to \"bh\"");
     }
 
     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
index fa00156..63f5a62 100644
--- a/hw/net/xgmac.c
+++ b/hw/net/xgmac.c
@@ -374,9 +374,9 @@
     .receive = eth_rx,
 };
 
-static int xgmac_enet_init(SysBusDevice *sbd)
+static void xgmac_enet_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     XgmacState *s = XGMAC(dev);
 
     memory_region_init_io(&s->iomem, OBJECT(s), &enet_mem_ops, s,
@@ -397,8 +397,6 @@
                                  (s->conf.macaddr.a[2] << 16) |
                                  (s->conf.macaddr.a[1] << 8) |
                                   s->conf.macaddr.a[0];
-
-    return 0;
 }
 
 static Property xgmac_properties[] = {
@@ -408,10 +406,9 @@
 
 static void xgmac_enet_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    sbc->init = xgmac_enet_init;
+    dc->realize = xgmac_enet_realize;
     dc->vmsd = &vmstate_xgmac;
     dc->props = xgmac_properties;
 }
diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs
index a912d25..b318e53 100644
--- a/hw/nvram/Makefile.objs
+++ b/hw/nvram/Makefile.objs
@@ -1,6 +1,6 @@
 common-obj-$(CONFIG_DS1225Y) += ds1225y.o
 common-obj-y += eeprom93xx.o
-common-obj-$(CONFIG_I2C) += eeprom_at24c.o
+common-obj-$(CONFIG_AT24C) += eeprom_at24c.o
 common-obj-y += fw_cfg.o
 common-obj-y += chrp_nvram.o
 common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index d79a568..946f765 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -434,6 +434,11 @@
     return addr == 0;
 }
 
+static uint64_t fw_cfg_ctl_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0;
+}
+
 static void fw_cfg_ctl_mem_write(void *opaque, hwaddr addr,
                                  uint64_t value, unsigned size)
 {
@@ -468,6 +473,7 @@
 }
 
 static const MemoryRegionOps fw_cfg_ctl_mem_ops = {
+    .read = fw_cfg_ctl_mem_read,
     .write = fw_cfg_ctl_mem_write,
     .endianness = DEVICE_BIG_ENDIAN,
     .valid.accepts = fw_cfg_ctl_mem_valid,
@@ -1109,12 +1115,7 @@
     sysbus_init_mmio(sbd, &s->ctl_iomem);
 
     if (s->data_width > data_ops->valid.max_access_size) {
-        /* memberwise copy because the "old_mmio" member is const */
-        s->wide_data_ops.read       = data_ops->read;
-        s->wide_data_ops.write      = data_ops->write;
-        s->wide_data_ops.endianness = data_ops->endianness;
-        s->wide_data_ops.valid      = data_ops->valid;
-        s->wide_data_ops.impl       = data_ops->impl;
+        s->wide_data_ops = *data_ops;
 
         s->wide_data_ops.valid.max_access_size = s->data_width;
         s->wide_data_ops.impl.max_access_size  = s->data_width;
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 9868e2e..9f33582 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -595,7 +595,7 @@
     }
 };
 
-static int bonito_pcihost_initfn(SysBusDevice *dev)
+static void bonito_pcihost_realize(DeviceState *dev, Error **errp)
 {
     PCIHostState *phb = PCI_HOST_BRIDGE(dev);
 
@@ -603,8 +603,6 @@
                                      pci_bonito_set_irq, pci_bonito_map_irq,
                                      dev, get_system_memory(), get_system_io(),
                                      0x28, 32, TYPE_PCI_BUS);
-
-    return 0;
 }
 
 static void bonito_realize(PCIDevice *dev, Error **errp)
@@ -684,7 +682,6 @@
     pcihost->pic = pic;
     qdev_init_nofail(dev);
 
-    /* set the pcihost pointer before bonito_initfn is called */
     d = pci_create(phb->bus, PCI_DEVFN(0, 0), TYPE_PCI_BONITO);
     s = PCI_BONITO(d);
     s->pcihost = pcihost;
@@ -726,9 +723,9 @@
 
 static void bonito_pcihost_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->init = bonito_pcihost_initfn;
+    dc->realize = bonito_pcihost_realize;
 }
 
 static const TypeInfo bonito_pcihost_info = {
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 0e60834..47293a3 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -144,7 +144,7 @@
     memory_region_transaction_begin();
     for (i = 0; i < 13; i++) {
         pam_update(&d->pam_regions[i], i,
-                   pd->config[I440FX_PAM + (DIV_ROUND_UP(i, 2))]);
+                   pd->config[I440FX_PAM + DIV_ROUND_UP(i, 2)]);
     }
     memory_region_set_enabled(&d->smram_region,
                               !(pd->config[I440FX_SMRAM] & SMRAM_D_OPEN));
@@ -327,6 +327,10 @@
 
     sysbus_add_io(sbd, 0xcfc, &s->data_mem);
     sysbus_init_ioports(sbd, 0xcfc, 4);
+
+    /* register i440fx 0xcf8 port as coalesced pio */
+    memory_region_set_flush_coalesced(&s->data_mem);
+    memory_region_add_coalescing(&s->conf_mem, 0, 4);
 }
 
 static void i440fx_realize(PCIDevice *dev, Error **errp)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 02f9576..966a7cf 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -51,6 +51,10 @@
     sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem);
     sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4);
 
+    /* register q35 0xcf8 port as coalesced pio */
+    memory_region_set_flush_coalesced(&pci->data_mem);
+    memory_region_add_coalescing(&pci->conf_mem, 0, 4);
+
     pci->bus = pci_root_bus_new(DEVICE(s), "pcie.0",
                                 s->mch.pci_address_space,
                                 s->mch.address_space_io,
@@ -352,7 +356,7 @@
     memory_region_transaction_begin();
     for (i = 0; i < 13; i++) {
         pam_update(&mch->pam_regions[i], i,
-                   pd->config[MCH_HOST_BRIDGE_PAM0 + (DIV_ROUND_UP(i, 2))]);
+                   pd->config[MCH_HOST_BRIDGE_PAM0 + DIV_ROUND_UP(i, 2)]);
     }
     memory_region_transaction_commit();
 }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 51d0dec..b937f0d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1737,9 +1737,6 @@
     info->id = g_new0(PciDeviceId, 1);
     info->id->vendor = pci_get_word(dev->config + PCI_VENDOR_ID);
     info->id->device = pci_get_word(dev->config + PCI_DEVICE_ID);
-    info->id->subsystem = pci_get_word(dev->config + PCI_SUBSYSTEM_ID);
-    info->id->subsystem_vendor =
-        pci_get_word(dev->config + PCI_SUBSYSTEM_VENDOR_ID);
     info->regions = qmp_query_pci_regions(dev);
     info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : "");
 
@@ -1752,6 +1749,16 @@
     if (type == PCI_HEADER_TYPE_BRIDGE) {
         info->has_pci_bridge = true;
         info->pci_bridge = qmp_query_pci_bridge(dev, bus, bus_num);
+    } else if (type == PCI_HEADER_TYPE_NORMAL) {
+        info->id->has_subsystem = info->id->has_subsystem_vendor = true;
+        info->id->subsystem = pci_get_word(dev->config + PCI_SUBSYSTEM_ID);
+        info->id->subsystem_vendor =
+            pci_get_word(dev->config + PCI_SUBSYSTEM_VENDOR_ID);
+    } else if (type == PCI_HEADER_TYPE_CARDBUS) {
+        info->id->has_subsystem = info->id->has_subsystem_vendor = true;
+        info->id->subsystem = pci_get_word(dev->config + PCI_CB_SUBSYSTEM_ID);
+        info->id->subsystem_vendor =
+            pci_get_word(dev->config + PCI_CB_SUBSYSTEM_VENDOR_ID);
     }
 
     return info;
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 9750464..ad1bcc7 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -148,8 +148,8 @@
 
     chip = object_property_get_link(OBJECT(dev), "chip", &local_err);
     if (!chip) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "required link 'chip' not found: ");
+        error_propagate_prepend(errp, local_err,
+                                "required link 'chip' not found: ");
         return;
     }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 98868d8..c08130f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3128,14 +3128,12 @@
     Error *local_err = NULL;
     sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
     PCDIMMDevice *dimm = PC_DIMM(dev);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
     uint64_t size, addr;
     uint32_t node;
 
-    size = memory_region_size(mr);
+    size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort);
 
-    pc_dimm_plug(dev, MACHINE(ms), &local_err);
+    pc_dimm_plug(dimm, MACHINE(ms), &local_err);
     if (local_err) {
         goto out;
     }
@@ -3158,7 +3156,7 @@
     return;
 
 out_unplug:
-    pc_dimm_unplug(dev, MACHINE(ms));
+    pc_dimm_unplug(dimm, MACHINE(ms));
 out:
     error_propagate(errp, local_err);
 }
@@ -3169,9 +3167,7 @@
     const sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev);
     sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
     PCDIMMDevice *dimm = PC_DIMM(dev);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     Error *local_err = NULL;
-    MemoryRegion *mr;
     uint64_t size;
     Object *memdev;
     hwaddr pagesize;
@@ -3181,11 +3177,11 @@
         return;
     }
 
-    mr = ddc->get_memory_region(dimm, errp);
-    if (!mr) {
+    size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         return;
     }
-    size = memory_region_size(mr);
 
     if (size % SPAPR_MEMORY_BLOCK_SIZE) {
         error_setg(errp, "Hotplugged memory size must be a multiple of "
@@ -3202,7 +3198,7 @@
         return;
     }
 
-    pc_dimm_pre_plug(dev, MACHINE(hotplug_dev), NULL, errp);
+    pc_dimm_pre_plug(dimm, MACHINE(hotplug_dev), NULL, errp);
 }
 
 struct sPAPRDIMMState {
@@ -3257,9 +3253,8 @@
                                                         PCDIMMDevice *dimm)
 {
     sPAPRDRConnector *drc;
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
-    uint64_t size = memory_region_size(mr);
+    uint64_t size = memory_device_get_region_size(MEMORY_DEVICE(dimm),
+                                                  &error_abort);
     uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
     uint32_t avail_lmbs = 0;
     uint64_t addr_start, addr;
@@ -3314,7 +3309,7 @@
     sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
     sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
 
-    pc_dimm_unplug(dev, MACHINE(hotplug_dev));
+    pc_dimm_unplug(PC_DIMM(dev), MACHINE(hotplug_dev));
     object_unparent(OBJECT(dev));
     spapr_pending_dimm_unplugs_remove(spapr, ds);
 }
@@ -3325,14 +3320,12 @@
     sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
     Error *local_err = NULL;
     PCDIMMDevice *dimm = PC_DIMM(dev);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
     uint32_t nr_lmbs;
     uint64_t size, addr_start, addr;
     int i;
     sPAPRDRConnector *drc;
 
-    size = memory_region_size(mr);
+    size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
     nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
 
     addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index c2271e6..58afa46 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1724,16 +1724,15 @@
         if (smc->legacy_irq_allocation) {
             irq = spapr_irq_findone(spapr, &local_err);
             if (local_err) {
-                error_propagate(errp, local_err);
-                error_prepend(errp, "can't allocate LSIs: ");
+                error_propagate_prepend(errp, local_err,
+                                        "can't allocate LSIs: ");
                 return;
             }
         }
 
         spapr_irq_claim(spapr, irq, true, &local_err);
         if (local_err) {
-            error_propagate(errp, local_err);
-            error_prepend(errp, "can't allocate LSIs: ");
+            error_propagate_prepend(errp, local_err, "can't allocate LSIs: ");
             return;
         }
 
diff --git a/hw/riscv/sifive_clint.c b/hw/riscv/sifive_clint.c
index 7cc606e..0d2fd52 100644
--- a/hw/riscv/sifive_clint.c
+++ b/hw/riscv/sifive_clint.c
@@ -47,12 +47,12 @@
     if (cpu->env.timecmp <= rtc_r) {
         /* if we're setting an MTIMECMP value in the "past",
            immediately raise the timer interrupt */
-        riscv_set_local_interrupt(cpu, MIP_MTIP, 1);
+        riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(1));
         return;
     }
 
     /* otherwise, set up the future timer interrupt */
-    riscv_set_local_interrupt(cpu, MIP_MTIP, 0);
+    riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(0));
     diff = cpu->env.timecmp - rtc_r;
     /* back to ns (note args switched in muldiv64) */
     next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
@@ -67,7 +67,7 @@
 static void sifive_clint_timer_cb(void *opaque)
 {
     RISCVCPU *cpu = opaque;
-    riscv_set_local_interrupt(cpu, MIP_MTIP, 1);
+    riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(1));
 }
 
 /* CPU wants to read rtc or timecmp register */
@@ -132,7 +132,7 @@
         if (!env) {
             error_report("clint: invalid timecmp hartid: %zu", hartid);
         } else if ((addr & 0x3) == 0) {
-            riscv_set_local_interrupt(RISCV_CPU(cpu), MIP_MSIP, value != 0);
+            riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MSIP, BOOL_TO_MASK(value));
         } else {
             error_report("clint: invalid sip write: %08x", (uint32_t)addr);
         }
diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c
index f635e6f..9cf9a1f 100644
--- a/hw/riscv/sifive_plic.c
+++ b/hw/riscv/sifive_plic.c
@@ -142,10 +142,10 @@
         int level = sifive_plic_irqs_pending(plic, addrid);
         switch (mode) {
         case PLICMode_M:
-            riscv_set_local_interrupt(RISCV_CPU(cpu), MIP_MEIP, level);
+            riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MEIP, BOOL_TO_MASK(level));
             break;
         case PLICMode_S:
-            riscv_set_local_interrupt(RISCV_CPU(cpu), MIP_SEIP, level);
+            riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_SEIP, BOOL_TO_MASK(level));
             break;
         default:
             break;
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 862f8ff..ef07df2 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -230,7 +230,9 @@
 
     qemu_fdt_add_subnode(fdt, "/chosen");
     qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", nodename);
-    qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    if (cmdline) {
+        qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    }
     g_free(nodename);
 }
 
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index be5ef85..8a712ed 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -156,8 +156,10 @@
     g_free(cells);
     g_free(nodename);
 
-    qemu_fdt_add_subnode(fdt, "/chosen");
-    qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    if (cmdline) {
+        qemu_fdt_add_subnode(fdt, "/chosen");
+        qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    }
  }
 
 static void spike_v1_10_0_board_init(MachineState *machine)
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 005169e..4a137a5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -254,7 +254,9 @@
 
     qemu_fdt_add_subnode(fdt, "/chosen");
     qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", nodename);
-    qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    if (cmdline) {
+        qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    }
     g_free(nodename);
 
     return fdt;
@@ -385,6 +387,8 @@
     serial_mm_init(system_memory, memmap[VIRT_UART0].base,
         0, qdev_get_gpio_in(DEVICE(s->plic), UART0_IRQ), 399193,
         serial_hd(0), DEVICE_LITTLE_ENDIAN);
+
+    g_free(plic_hart_config);
 }
 
 static void riscv_virt_board_machine_init(MachineClass *mc)
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 5dbc00c..ca68806 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -26,8 +26,10 @@
 obj-y += s390-skeys.o
 obj-y += s390-stattrib.o
 obj-y += tod.o
+obj-y += tod-qemu.o
 obj-$(CONFIG_KVM) += tod-kvm.o
-obj-$(CONFIG_TCG) += tod-qemu.o
 obj-$(CONFIG_KVM) += s390-skeys-kvm.o
 obj-$(CONFIG_KVM) += s390-stattrib-kvm.o
 obj-y += s390-ccw.o
+obj-y += ap-device.o
+obj-y += ap-bridge.o
diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c
new file mode 100644
index 0000000..3795d30
--- /dev/null
+++ b/hw/s390x/ap-bridge.c
@@ -0,0 +1,78 @@
+/*
+ * ap bridge
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "hw/s390x/ap-bridge.h"
+#include "cpu.h"
+
+static char *ap_bus_get_dev_path(DeviceState *dev)
+{
+    /* at most one */
+    return g_strdup_printf("/1");
+}
+
+static void ap_bus_class_init(ObjectClass *oc, void *data)
+{
+    BusClass *k = BUS_CLASS(oc);
+
+    k->get_dev_path = ap_bus_get_dev_path;
+    /* More than one ap device does not make sense */
+    k->max_dev = 1;
+}
+
+static const TypeInfo ap_bus_info = {
+    .name = TYPE_AP_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = 0,
+    .class_init = ap_bus_class_init,
+};
+
+void s390_init_ap(void)
+{
+    DeviceState *dev;
+
+    /* If no AP instructions then no need for AP bridge */
+    if (!s390_has_feat(S390_FEAT_AP)) {
+        return;
+    }
+
+    /* Create bridge device */
+    dev = qdev_create(NULL, TYPE_AP_BRIDGE);
+    object_property_add_child(qdev_get_machine(), TYPE_AP_BRIDGE,
+                              OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+
+    /* Create bus on bridge device */
+    qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS);
+ }
+
+static void ap_bridge_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+}
+
+static const TypeInfo ap_bridge_info = {
+    .name          = TYPE_AP_BRIDGE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = 0,
+    .class_init    = ap_bridge_class_init,
+};
+
+static void ap_register(void)
+{
+    type_register_static(&ap_bridge_info);
+    type_register_static(&ap_bus_info);
+}
+
+type_init(ap_register)
diff --git a/hw/s390x/ap-device.c b/hw/s390x/ap-device.c
new file mode 100644
index 0000000..f5ac8db
--- /dev/null
+++ b/hw/s390x/ap-device.c
@@ -0,0 +1,38 @@
+/*
+ * Adjunct Processor (AP) matrix device
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+#include "hw/qdev.h"
+#include "hw/s390x/ap-device.h"
+
+static void ap_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "AP device class";
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo ap_device_info = {
+    .name = AP_DEVICE_TYPE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(APDevice),
+    .class_size = sizeof(DeviceClass),
+    .class_init = ap_class_init,
+    .abstract = true,
+};
+
+static void ap_device_register(void)
+{
+    type_register_static(&ap_device_info);
+}
+
+type_init(ap_device_register)
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 5a9fe45..04ec5cc 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -750,20 +750,25 @@
 
 }
 
-static void copy_sense_id_to_guest(SenseId *dest, SenseId *src)
+/*
+ * As the SenseId struct cannot be packed (would cause unaligned accesses), we
+ * have to copy the individual fields to an unstructured area using the correct
+ * layout (see SA22-7204-01 "Common I/O-Device Commands").
+ */
+static void copy_sense_id_to_guest(uint8_t *dest, SenseId *src)
 {
     int i;
 
-    dest->reserved = src->reserved;
-    dest->cu_type = cpu_to_be16(src->cu_type);
-    dest->cu_model = src->cu_model;
-    dest->dev_type = cpu_to_be16(src->dev_type);
-    dest->dev_model = src->dev_model;
-    dest->unused = src->unused;
-    for (i = 0; i < ARRAY_SIZE(dest->ciw); i++) {
-        dest->ciw[i].type = src->ciw[i].type;
-        dest->ciw[i].command = src->ciw[i].command;
-        dest->ciw[i].count = cpu_to_be16(src->ciw[i].count);
+    dest[0] = src->reserved;
+    stw_be_p(dest + 1, src->cu_type);
+    dest[3] = src->cu_model;
+    stw_be_p(dest + 4, src->dev_type);
+    dest[6] = src->dev_model;
+    dest[7] = src->unused;
+    for (i = 0; i < ARRAY_SIZE(src->ciw); i++) {
+        dest[8 + i * 4] = src->ciw[i].type;
+        dest[9 + i * 4] = src->ciw[i].command;
+        stw_be_p(dest + 10 + i * 4, src->ciw[i].count);
     }
 }
 
@@ -1044,9 +1049,10 @@
         break;
     case CCW_CMD_SENSE_ID:
     {
-        SenseId sense_id;
+        /* According to SA22-7204-01, Sense-ID can store up to 256 bytes */
+        uint8_t sense_id[256];
 
-        copy_sense_id_to_guest(&sense_id, &sch->id);
+        copy_sense_id_to_guest(sense_id, &sch->id);
         /* Sense ID information is device specific. */
         if (check_len) {
             if (ccw.count != sizeof(sense_id)) {
@@ -1060,11 +1066,11 @@
          * have enough place to store at least bytes 0-3.
          */
         if (len >= 4) {
-            sense_id.reserved = 0xff;
+            sense_id[0] = 0xff;
         } else {
-            sense_id.reserved = 0;
+            sense_id[0] = 0;
         }
-        ccw_dstream_write_buf(&sch->cds, &sense_id, len);
+        ccw_dstream_write_buf(&sch->cds, sense_id, len);
         sch->curr_status.scsw.count = ccw_dstream_residual_count(&sch->cds);
         ret = 0;
         break;
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index 4e87b89..b3a07a1 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -132,15 +132,15 @@
 struct S390IPLState {
     /*< private >*/
     DeviceState parent_obj;
+    IplParameterBlock iplb;
+    QemuIplParameters qipl;
     uint64_t start_addr;
     uint64_t compat_start_addr;
     uint64_t bios_start_addr;
     uint64_t compat_bios_start_addr;
     bool enforce_bios;
-    IplParameterBlock iplb;
     bool iplb_valid;
     bool netboot;
-    QemuIplParameters qipl;
     /* reset related properties don't have to be migrated or reset */
     enum s390_reset reset_type;
     int reset_cpu_index;
@@ -157,6 +157,7 @@
     bool iplbext_migration;
 };
 typedef struct S390IPLState S390IPLState;
+QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong");
 
 #define S390_IPL_TYPE_FCP 0x00
 #define S390_IPL_TYPE_CCW 0x02
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e3e0ebb..e42e1b8 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -692,27 +692,35 @@
     object_unref(OBJECT(iommu));
 }
 
-static int s390_pcihost_init(SysBusDevice *dev)
+static void s390_pcihost_realize(DeviceState *dev, Error **errp)
 {
     PCIBus *b;
     BusState *bus;
     PCIHostState *phb = PCI_HOST_BRIDGE(dev);
     S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
+    Error *local_err = NULL;
 
     DPRINTF("host_init\n");
 
-    b = pci_register_root_bus(DEVICE(dev), NULL,
-                              s390_pci_set_irq, s390_pci_map_irq, NULL,
-                              get_system_memory(), get_system_io(), 0, 64,
-                              TYPE_PCI_BUS);
+    b = pci_register_root_bus(dev, NULL, s390_pci_set_irq, s390_pci_map_irq,
+                              NULL, get_system_memory(), get_system_io(), 0,
+                              64, TYPE_PCI_BUS);
     pci_setup_iommu(b, s390_pci_dma_iommu, s);
 
     bus = BUS(b);
-    qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
+    qbus_set_hotplug_handler(bus, dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
     phb->bus = b;
 
-    s->bus = S390_PCI_BUS(qbus_create(TYPE_S390_PCI_BUS, DEVICE(s), NULL));
-    qbus_set_hotplug_handler(BUS(s->bus), DEVICE(s), NULL);
+    s->bus = S390_PCI_BUS(qbus_create(TYPE_S390_PCI_BUS, dev, NULL));
+    qbus_set_hotplug_handler(BUS(s->bus), dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     s->iommu_table = g_hash_table_new_full(g_int64_hash, g_int64_equal,
                                            NULL, g_free);
@@ -722,9 +730,10 @@
     QTAILQ_INIT(&s->zpci_devs);
 
     css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
-                             S390_ADAPTER_SUPPRESSIBLE, &error_abort);
-
-    return 0;
+                             S390_ADAPTER_SUPPRESSIBLE, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+    }
 }
 
 static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
@@ -1018,12 +1027,11 @@
 
 static void s390_pcihost_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
     dc->reset = s390_pcihost_reset;
-    k->init = s390_pcihost_init;
+    dc->realize = s390_pcihost_realize;
     hc->plug = s390_pcihost_hot_plug;
     hc->unplug = s390_pcihost_hot_unplug;
     msi_nonbroken = true;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index f0f7fdc..a0615a8 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -32,6 +32,7 @@
 #include "ipl.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/css-bridge.h"
+#include "hw/s390x/ap-bridge.h"
 #include "migration/register.h"
 #include "cpu_models.h"
 #include "hw/nmi.h"
@@ -263,6 +264,9 @@
     /* init the SIGP facility */
     s390_init_sigp();
 
+    /* create AP bridge and bus(es) */
+    s390_init_ap();
+
     /* get a BUS */
     css_bus = virtual_css_bus_init();
     s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
@@ -456,6 +460,7 @@
     s390mc->ri_allowed = true;
     s390mc->cpu_model_allowed = true;
     s390mc->css_migration_enabled = true;
+    s390mc->hpage_1m_allowed = true;
     mc->init = ccw_init;
     mc->reset = s390_machine_reset;
     mc->hot_add_cpu = s390_hot_add_cpu;
@@ -535,6 +540,12 @@
     return get_machine_class()->cpu_model_allowed;
 }
 
+bool hpage_1m_allowed(void)
+{
+    /* for "none" machine this results in true */
+    return get_machine_class()->hpage_1m_allowed;
+}
+
 static char *machine_get_loadparm(Object *obj, Error **errp)
 {
     S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
@@ -747,6 +758,9 @@
 
 static void ccw_machine_3_0_class_options(MachineClass *mc)
 {
+    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
+
+    s390mc->hpage_1m_allowed = false;
     ccw_machine_3_1_class_options(mc);
     SET_MACHINE_COMPAT(mc, CCW_COMPAT_3_0);
 }
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 996b406..d1e6534 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -20,20 +20,7 @@
 #include "hw/scsi/scsi.h"
 #include "sysemu/dma.h"
 #include "qemu/log.h"
-
-//#define DEBUG_LSI
-//#define DEBUG_LSI_REG
-
-#ifdef DEBUG_LSI
-#define DPRINTF(fmt, ...) \
-do { printf("lsi_scsi: " fmt , ## __VA_ARGS__); } while (0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__); exit(1);} while (0)
-#else
-#define DPRINTF(fmt, ...) do {} while(0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__);} while (0)
-#endif
+#include "trace.h"
 
 static const char *names[] = {
     "SCNTL0", "SCNTL1", "SCNTL2", "SCNTL3", "SCID", "SXFER", "SDID", "GPREG",
@@ -313,7 +300,7 @@
 
 static void lsi_soft_reset(LSIState *s)
 {
-    DPRINTF("Reset\n");
+    trace_lsi_reset();
     s->carry = 0;
 
     s->msg_action = 0;
@@ -484,15 +471,13 @@
         level = 1;
 
     if (level != last_level) {
-        DPRINTF("Update IRQ level %d dstat %02x sist %02x%02x\n",
-                level, s->dstat, s->sist1, s->sist0);
+        trace_lsi_update_irq(level, s->dstat, s->sist1, s->sist0);
         last_level = level;
     }
     lsi_set_irq(s, level);
 
     if (!level && lsi_irq_on_rsl(s) && !(s->scntl1 & LSI_SCNTL1_CON)) {
-        DPRINTF("Handled IRQs & disconnected, looking for pending "
-                "processes\n");
+        trace_lsi_update_irq_disconnected();
         QTAILQ_FOREACH(p, &s->queue, next) {
             if (p->pending) {
                 lsi_reselect(s, p);
@@ -508,8 +493,7 @@
     uint32_t mask0;
     uint32_t mask1;
 
-    DPRINTF("SCSI Interrupt 0x%02x%02x prev 0x%02x%02x\n",
-            stat1, stat0, s->sist1, s->sist0);
+    trace_lsi_script_scsi_interrupt(stat1, stat0, s->sist1, s->sist0);
     s->sist0 |= stat0;
     s->sist1 |= stat1;
     /* Stop processor on fatal or unmasked interrupt.  As a special hack
@@ -527,7 +511,7 @@
 /* Stop SCRIPTS execution and raise a DMA interrupt.  */
 static void lsi_script_dma_interrupt(LSIState *s, int stat)
 {
-    DPRINTF("DMA Interrupt 0x%x prev 0x%x\n", stat, s->dstat);
+    trace_lsi_script_dma_interrupt(stat, s->dstat);
     s->dstat |= stat;
     lsi_update_irq(s);
     lsi_stop_script(s);
@@ -547,9 +531,9 @@
         } else {
             s->dsp = (s->scntl2 & LSI_SCNTL2_WSR ? s->pmjad2 : s->pmjad1);
         }
-        DPRINTF("Data phase mismatch jump to %08x\n", s->dsp);
+        trace_lsi_bad_phase_jump(s->dsp);
     } else {
-        DPRINTF("Phase mismatch interrupt\n");
+        trace_lsi_bad_phase_interrupt();
         lsi_script_scsi_interrupt(s, LSI_SIST0_MA, 0);
         lsi_stop_script(s);
     }
@@ -576,7 +560,7 @@
 
 static void lsi_bad_selection(LSIState *s, uint32_t id)
 {
-    DPRINTF("Selected absent target %d\n", id);
+    trace_lsi_bad_selection(id);
     lsi_script_scsi_interrupt(s, 0, LSI_SIST1_STO);
     lsi_disconnect(s);
 }
@@ -591,7 +575,7 @@
     assert(s->current);
     if (!s->current->dma_len) {
         /* Wait until data is available.  */
-        DPRINTF("DMA no data available\n");
+        trace_lsi_do_dma_unavailable();
         return;
     }
 
@@ -611,7 +595,7 @@
     else if (s->sbms)
         addr |= ((uint64_t)s->sbms << 32);
 
-    DPRINTF("DMA addr=0x" DMA_ADDR_FMT " len=%d\n", addr, count);
+    trace_lsi_do_dma(addr, count);
     s->csbc += count;
     s->dnad += count;
     s->dbc -= count;
@@ -640,7 +624,7 @@
 {
     lsi_request *p = s->current;
 
-    DPRINTF("Queueing tag=0x%x\n", p->tag);
+    trace_lsi_queue_command(p->tag);
     assert(s->current != NULL);
     assert(s->current->dma_len == 0);
     QTAILQ_INSERT_TAIL(&s->queue, s->current, next);
@@ -654,9 +638,9 @@
 static void lsi_add_msg_byte(LSIState *s, uint8_t data)
 {
     if (s->msg_len >= LSI_MAX_MSGIN_LEN) {
-        BADF("MSG IN data too long\n");
+        trace_lsi_add_msg_byte_error();
     } else {
-        DPRINTF("MSG IN 0x%02x\n", data);
+        trace_lsi_add_msg_byte(data);
         s->msg[s->msg_len++] = data;
     }
 }
@@ -676,7 +660,7 @@
     if (!(s->dcntl & LSI_DCNTL_COM)) {
         s->sfbr = 1 << (id & 0x7);
     }
-    DPRINTF("Reselected target %d\n", id);
+    trace_lsi_reselect(id);
     s->scntl1 |= LSI_SCNTL1_CON;
     lsi_set_phase(s, PHASE_MI);
     s->msg_action = p->out ? 2 : 3;
@@ -732,7 +716,7 @@
     lsi_request *p = req->hba_private;
 
     if (p->pending) {
-        BADF("Multiple IO pending for request %p\n", p);
+        trace_lsi_queue_req_error(p);
     }
     p->pending = len;
     /* Reselect if waiting for it, or if reselection triggers an IRQ
@@ -747,7 +731,7 @@
         lsi_reselect(s, p);
         return 0;
     } else {
-        DPRINTF("Queueing IO tag=0x%x\n", p->tag);
+        trace_lsi_queue_req(p->tag);
         p->pending = len;
         return 1;
     }
@@ -760,7 +744,7 @@
     int out;
 
     out = (s->sstat1 & PHASE_MASK) == PHASE_DO;
-    DPRINTF("Command complete status=%d\n", (int)status);
+    trace_lsi_command_complete(status);
     s->status = status;
     s->command_complete = 2;
     if (s->waiting && s->dbc != 0) {
@@ -795,7 +779,7 @@
     out = (s->sstat1 & PHASE_MASK) == PHASE_DO;
 
     /* host adapter (re)connected */
-    DPRINTF("Data ready tag=0x%x len=%d\n", req->tag, len);
+    trace_lsi_transfer_data(req->tag, len);
     s->current->dma_len = len;
     s->command_complete = 1;
     if (s->waiting) {
@@ -814,7 +798,7 @@
     uint32_t id;
     int n;
 
-    DPRINTF("Send command len=%d\n", s->dbc);
+    trace_lsi_do_command(s->dbc);
     if (s->dbc > 16)
         s->dbc = 16;
     pci_dma_read(PCI_DEVICE(s), s->dnad, buf, s->dbc);
@@ -862,9 +846,10 @@
 static void lsi_do_status(LSIState *s)
 {
     uint8_t status;
-    DPRINTF("Get status len=%d status=%d\n", s->dbc, s->status);
-    if (s->dbc != 1)
-        BADF("Bad Status move\n");
+    trace_lsi_do_status(s->dbc, s->status);
+    if (s->dbc != 1) {
+        trace_lsi_do_status_error();
+    }
     s->dbc = 1;
     status = s->status;
     s->sfbr = status;
@@ -877,7 +862,7 @@
 static void lsi_do_msgin(LSIState *s)
 {
     int len;
-    DPRINTF("Message in len=%d/%d\n", s->dbc, s->msg_len);
+    trace_lsi_do_msgin(s->dbc, s->msg_len);
     s->sfbr = s->msg[0];
     len = s->msg_len;
     if (len > s->dbc)
@@ -942,36 +927,36 @@
         current_req = lsi_find_by_tag(s, current_tag);
     }
 
-    DPRINTF("MSG out len=%d\n", s->dbc);
+    trace_lsi_do_msgout(s->dbc);
     while (s->dbc) {
         msg = lsi_get_msgbyte(s);
         s->sfbr = msg;
 
         switch (msg) {
         case 0x04:
-            DPRINTF("MSG: Disconnect\n");
+            trace_lsi_do_msgout_disconnect();
             lsi_disconnect(s);
             break;
         case 0x08:
-            DPRINTF("MSG: No Operation\n");
+            trace_lsi_do_msgout_noop();
             lsi_set_phase(s, PHASE_CMD);
             break;
         case 0x01:
             len = lsi_get_msgbyte(s);
             msg = lsi_get_msgbyte(s);
             (void)len; /* avoid a warning about unused variable*/
-            DPRINTF("Extended message 0x%x (len %d)\n", msg, len);
+            trace_lsi_do_msgout_extended(msg, len);
             switch (msg) {
             case 1:
-                DPRINTF("SDTR (ignored)\n");
+                trace_lsi_do_msgout_ignored("SDTR");
                 lsi_skip_msgbytes(s, 2);
                 break;
             case 3:
-                DPRINTF("WDTR (ignored)\n");
+                trace_lsi_do_msgout_ignored("WDTR");
                 lsi_skip_msgbytes(s, 1);
                 break;
             case 4:
-                DPRINTF("PPR (ignored)\n");
+                trace_lsi_do_msgout_ignored("PPR");
                 lsi_skip_msgbytes(s, 5);
                 break;
             default:
@@ -980,19 +965,20 @@
             break;
         case 0x20: /* SIMPLE queue */
             s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID;
-            DPRINTF("SIMPLE queue tag=0x%x\n", s->select_tag & 0xff);
+            trace_lsi_do_msgout_simplequeue(s->select_tag & 0xff);
             break;
         case 0x21: /* HEAD of queue */
-            BADF("HEAD queue not implemented\n");
+            qemu_log_mask(LOG_UNIMP, "lsi_scsi: HEAD queue not implemented\n");
             s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID;
             break;
         case 0x22: /* ORDERED queue */
-            BADF("ORDERED queue not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: ORDERED queue not implemented\n");
             s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID;
             break;
         case 0x0d:
             /* The ABORT TAG message clears the current I/O process only. */
-            DPRINTF("MSG: ABORT TAG tag=0x%x\n", current_tag);
+            trace_lsi_do_msgout_abort(current_tag);
             if (current_req) {
                 scsi_req_cancel(current_req->req);
             }
@@ -1004,17 +990,17 @@
             /* The ABORT message clears all I/O processes for the selecting
                initiator on the specified logical unit of the target. */
             if (msg == 0x06) {
-                DPRINTF("MSG: ABORT tag=0x%x\n", current_tag);
+                trace_lsi_do_msgout_abort(current_tag);
             }
             /* The CLEAR QUEUE message clears all I/O processes for all
                initiators on the specified logical unit of the target. */
             if (msg == 0x0e) {
-                DPRINTF("MSG: CLEAR QUEUE tag=0x%x\n", current_tag);
+                trace_lsi_do_msgout_clearqueue(current_tag);
             }
             /* The BUS DEVICE RESET message clears all I/O processes for all
                initiators on all logical units of the target. */
             if (msg == 0x0c) {
-                DPRINTF("MSG: BUS DEVICE RESET tag=0x%x\n", current_tag);
+                trace_lsi_do_msgout_busdevicereset(current_tag);
             }
 
             /* clear the current I/O process */
@@ -1042,14 +1028,14 @@
                 goto bad;
             }
             s->current_lun = msg & 7;
-            DPRINTF("Select LUN %d\n", s->current_lun);
+            trace_lsi_do_msgout_select(s->current_lun);
             lsi_set_phase(s, PHASE_CMD);
             break;
         }
     }
     return;
 bad:
-    BADF("Unimplemented message 0x%02x\n", msg);
+    qemu_log_mask(LOG_UNIMP, "Unimplemented message 0x%02x\n", msg);
     lsi_set_phase(s, PHASE_MI);
     lsi_add_msg_byte(s, 7); /* MESSAGE REJECT */
     s->msg_action = 0;
@@ -1061,7 +1047,7 @@
     int n;
     uint8_t buf[LSI_BUF_SIZE];
 
-    DPRINTF("memcpy dest 0x%08x src 0x%08x count %d\n", dest, src, count);
+    trace_lsi_memcpy(dest, src, count);
     while (count) {
         n = (count > LSI_BUF_SIZE) ? LSI_BUF_SIZE : count;
         lsi_mem_read(s, src, buf, n);
@@ -1076,7 +1062,7 @@
 {
     lsi_request *p;
 
-    DPRINTF("Wait Reselect\n");
+    trace_lsi_wait_reselect();
 
     QTAILQ_FOREACH(p, &s->queue, next) {
         if (p->pending) {
@@ -1109,14 +1095,14 @@
     }
     addr = read_dword(s, s->dsp + 4);
     addr_high = 0;
-    DPRINTF("SCRIPTS dsp=%08x opcode %08x arg %08x\n", s->dsp, insn, addr);
+    trace_lsi_execute_script(s->dsp, insn, addr);
     s->dsps = addr;
     s->dcmd = insn >> 24;
     s->dsp += 8;
     switch (insn >> 30) {
     case 0: /* Block move.  */
         if (s->sist1 & LSI_SIST1_STO) {
-            DPRINTF("Delayed select timeout\n");
+            trace_lsi_execute_script_blockmove_delayed();
             lsi_stop_script(s);
             break;
         }
@@ -1171,8 +1157,9 @@
                     addr_high = s->dbms;
                     break;
                 default:
-                    BADF("Illegal selector specified (0x%x > 0x15)"
-                         " for 64-bit DMA block move", selector);
+                    qemu_log_mask(LOG_GUEST_ERROR,
+                          "lsi_scsi: Illegal selector specified (0x%x > 0x15) "
+                          "for 64-bit DMA block move", selector);
                     break;
                 }
             }
@@ -1184,8 +1171,8 @@
             s->ia = s->dsp - 12;
         }
         if ((s->sstat1 & PHASE_MASK) != ((insn >> 24) & 7)) {
-            DPRINTF("Wrong phase got %d expected %d\n",
-                    s->sstat1 & PHASE_MASK, (insn >> 24) & 7);
+            trace_lsi_execute_script_blockmove_badphase(s->sstat1 & PHASE_MASK,
+                                                        (insn >> 24) & 7);
             lsi_script_scsi_interrupt(s, LSI_SIST0_MA, 0);
             break;
         }
@@ -1217,8 +1204,8 @@
             lsi_do_msgin(s);
             break;
         default:
-            BADF("Unimplemented phase %d\n", s->sstat1 & PHASE_MASK);
-            exit(1);
+            qemu_log_mask(LOG_UNIMP, "lsi_scsi: Unimplemented phase %d\n",
+                          s->sstat1 & PHASE_MASK);
         }
         s->dfifo = s->dbc & 0xff;
         s->ctest5 = (s->ctest5 & 0xfc) | ((s->dbc >> 8) & 3);
@@ -1246,7 +1233,7 @@
             case 0: /* Select */
                 s->sdid = id;
                 if (s->scntl1 & LSI_SCNTL1_CON) {
-                    DPRINTF("Already reselected, jumping to alternative address\n");
+                    trace_lsi_execute_script_io_alreadyreselected();
                     s->dsp = s->dnad;
                     break;
                 }
@@ -1256,8 +1243,8 @@
                     lsi_bad_selection(s, id);
                     break;
                 }
-                DPRINTF("Selected target %d%s\n",
-                        id, insn & (1 << 3) ? " ATN" : "");
+                trace_lsi_execute_script_io_selected(id,
+                                             insn & (1 << 3) ? " ATN" : "");
                 /* ??? Linux drivers compain when this is set.  Maybe
                    it only applies in low-level mode (unimplemented).
                 lsi_script_scsi_interrupt(s, LSI_SIST0_CMP, 0); */
@@ -1269,7 +1256,7 @@
                 lsi_set_phase(s, PHASE_MO);
                 break;
             case 1: /* Disconnect */
-                DPRINTF("Wait Disconnect\n");
+                trace_lsi_execute_script_io_disconnect();
                 s->scntl1 &= ~LSI_SCNTL1_CON;
                 break;
             case 2: /* Wait Reselect */
@@ -1278,7 +1265,7 @@
                 }
                 break;
             case 3: /* Set */
-                DPRINTF("Set%s%s%s%s\n",
+                trace_lsi_execute_script_io_set(
                         insn & (1 << 3) ? " ATN" : "",
                         insn & (1 << 6) ? " ACK" : "",
                         insn & (1 << 9) ? " TM" : "",
@@ -1288,14 +1275,14 @@
                     lsi_set_phase(s, PHASE_MO);
                 }
                 if (insn & (1 << 9)) {
-                    BADF("Target mode not implemented\n");
-                    exit(1);
+                    qemu_log_mask(LOG_UNIMP,
+                        "lsi_scsi: Target mode not implemented\n");
                 }
                 if (insn & (1 << 10))
                     s->carry = 1;
                 break;
             case 4: /* Clear */
-                DPRINTF("Clear%s%s%s%s\n",
+                trace_lsi_execute_script_io_clear(
                         insn & (1 << 3) ? " ATN" : "",
                         insn & (1 << 6) ? " ACK" : "",
                         insn & (1 << 9) ? " TM" : "",
@@ -1313,18 +1300,17 @@
             uint8_t data8;
             int reg;
             int operator;
-#ifdef DEBUG_LSI
+
             static const char *opcode_names[3] =
                 {"Write", "Read", "Read-Modify-Write"};
             static const char *operator_names[8] =
                 {"MOV", "SHL", "OR", "XOR", "AND", "SHR", "ADD", "ADC"};
-#endif
 
             reg = ((insn >> 16) & 0x7f) | (insn & 0x80);
             data8 = (insn >> 8) & 0xff;
             opcode = (insn >> 27) & 7;
             operator = (insn >> 24) & 7;
-            DPRINTF("%s reg 0x%x %s data8=0x%02x sfbr=0x%02x%s\n",
+            trace_lsi_execute_script_io_opcode(
                     opcode_names[opcode - 5], reg,
                     operator_names[operator], data8, s->sfbr,
                     (insn & (1 << 23)) ? " SFBR" : "");
@@ -1404,21 +1390,21 @@
             int jmp;
 
             if ((insn & 0x002e0000) == 0) {
-                DPRINTF("NOP\n");
+                trace_lsi_execute_script_tc_nop();
                 break;
             }
             if (s->sist1 & LSI_SIST1_STO) {
-                DPRINTF("Delayed select timeout\n");
+                trace_lsi_execute_script_tc_delayedselect_timeout();
                 lsi_stop_script(s);
                 break;
             }
             cond = jmp = (insn & (1 << 19)) != 0;
             if (cond == jmp && (insn & (1 << 21))) {
-                DPRINTF("Compare carry %d\n", s->carry == jmp);
+                trace_lsi_execute_script_tc_compc(s->carry == jmp);
                 cond = s->carry != 0;
             }
             if (cond == jmp && (insn & (1 << 17))) {
-                DPRINTF("Compare phase %d %c= %d\n",
+                trace_lsi_execute_script_tc_compp(
                         (s->sstat1 & PHASE_MASK),
                         jmp ? '=' : '!',
                         ((insn >> 24) & 7));
@@ -1428,7 +1414,7 @@
                 uint8_t mask;
 
                 mask = (~insn >> 8) & 0xff;
-                DPRINTF("Compare data 0x%x & 0x%x %c= 0x%x\n",
+                trace_lsi_execute_script_tc_compd(
                         s->sfbr, mask, jmp ? '=' : '!', insn & mask);
                 cond = (s->sfbr & mask) == (insn & mask);
             }
@@ -1439,21 +1425,21 @@
                 }
                 switch ((insn >> 27) & 7) {
                 case 0: /* Jump */
-                    DPRINTF("Jump to 0x%08x\n", addr);
+                    trace_lsi_execute_script_tc_jump(addr);
                     s->adder = addr;
                     s->dsp = addr;
                     break;
                 case 1: /* Call */
-                    DPRINTF("Call 0x%08x\n", addr);
+                    trace_lsi_execute_script_tc_call(addr);
                     s->temp = s->dsp;
                     s->dsp = addr;
                     break;
                 case 2: /* Return */
-                    DPRINTF("Return to 0x%08x\n", s->temp);
+                    trace_lsi_execute_script_tc_return(s->temp);
                     s->dsp = s->temp;
                     break;
                 case 3: /* Interrupt */
-                    DPRINTF("Interrupt 0x%08x\n", s->dsps);
+                    trace_lsi_execute_script_tc_interrupt(s->dsps);
                     if ((insn & (1 << 20)) != 0) {
                         s->istat0 |= LSI_ISTAT0_INTF;
                         lsi_update_irq(s);
@@ -1462,12 +1448,12 @@
                     }
                     break;
                 default:
-                    DPRINTF("Illegal transfer control\n");
+                    trace_lsi_execute_script_tc_illegal();
                     lsi_script_dma_interrupt(s, LSI_DSTAT_IID);
                     break;
                 }
             } else {
-                DPRINTF("Control condition failed\n");
+                trace_lsi_execute_script_tc_cc_failed();
             }
         }
         break;
@@ -1495,13 +1481,12 @@
             reg = (insn >> 16) & 0xff;
             if (insn & (1 << 24)) {
                 pci_dma_read(pci_dev, addr, data, n);
-                DPRINTF("Load reg 0x%x size %d addr 0x%08x = %08x\n", reg, n,
-                        addr, *(int *)data);
+                trace_lsi_execute_script_mm_load(reg, n, addr, *(int *)data);
                 for (i = 0; i < n; i++) {
                     lsi_reg_writeb(s, reg + i, data[i]);
                 }
             } else {
-                DPRINTF("Store reg 0x%x size %d addr 0x%08x\n", reg, n, addr);
+                trace_lsi_execute_script_mm_store(reg, n, addr);
                 for (i = 0; i < n; i++) {
                     data[i] = lsi_reg_readb(s, reg + i);
                 }
@@ -1515,8 +1500,10 @@
            assume this is the case and force an unexpected device disconnect.
            This is apparently sufficient to beat the drivers into submission.
          */
-        if (!(s->sien0 & LSI_SIST0_UDC))
-            fprintf(stderr, "inf. loop with UDC masked\n");
+        if (!(s->sien0 & LSI_SIST0_UDC)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "lsi_scsi: inf. loop with UDC masked");
+        }
         lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0);
         lsi_disconnect(s);
     } else if (s->istat1 & LSI_ISTAT1_SRUN && !s->waiting) {
@@ -1526,7 +1513,7 @@
             goto again;
         }
     }
-    DPRINTF("SCRIPTS execution stopped\n");
+    trace_lsi_execute_script_stop();
 }
 
 static uint8_t lsi_reg_readb(LSIState *s, int offset)
@@ -1761,10 +1748,8 @@
 #undef CASE_GET_REG24
 #undef CASE_GET_REG32
 
-#ifdef DEBUG_LSI_REG
-    DPRINTF("Read reg %s %x = %02x\n",
-            offset < ARRAY_SIZE(names) ? names[offset] : "???", offset, ret);
-#endif
+    trace_lsi_reg_read(offset < ARRAY_SIZE(names) ? names[offset] : "???",
+                       offset, ret);
 
     return ret;
 }
@@ -1782,21 +1767,22 @@
     case addr + 2: s->name &= 0xff00ffff; s->name |= val << 16; break; \
     case addr + 3: s->name &= 0x00ffffff; s->name |= val << 24; break;
 
-#ifdef DEBUG_LSI_REG
-    DPRINTF("Write reg %s %x = %02x\n",
-            offset < ARRAY_SIZE(names) ? names[offset] : "???", offset, val);
-#endif
+    trace_lsi_reg_write(offset < ARRAY_SIZE(names) ? names[offset] : "???",
+                        offset, val);
+
     switch (offset) {
     case 0x00: /* SCNTL0 */
         s->scntl0 = val;
         if (val & LSI_SCNTL0_START) {
-            BADF("Start sequence not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: Start sequence not implemented\n");
         }
         break;
     case 0x01: /* SCNTL1 */
         s->scntl1 = val & ~LSI_SCNTL1_SST;
         if (val & LSI_SCNTL1_IARB) {
-            BADF("Immediate Arbritration not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                      "lsi_scsi: Immediate Arbritration not implemented\n");
         }
         if (val & LSI_SCNTL1_RST) {
             if (!(s->sstat0 & LSI_SSTAT0_RST)) {
@@ -1823,7 +1809,8 @@
         break;
     case 0x06: /* SDID */
         if ((s->ssid & 0x80) && (val & 0xf) != (s->ssid & 0xf)) {
-            BADF("Destination ID does not match SSID\n");
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "lsi_scsi: Destination ID does not match SSID\n");
         }
         s->sdid = val & 0xf;
         break;
@@ -1851,7 +1838,7 @@
             lsi_update_irq(s);
         }
         if (s->waiting == 1 && val & LSI_ISTAT0_SIGP) {
-            DPRINTF("Woken by SIGP\n");
+            trace_lsi_awoken();
             s->waiting = 0;
             s->dsp = s->dnad;
             lsi_execute_script(s);
@@ -1878,13 +1865,15 @@
     CASE_SET_REG32(temp, 0x1c)
     case 0x21: /* CTEST4 */
         if (val & 7) {
-           BADF("Unimplemented CTEST4-FBL 0x%x\n", val);
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: Unimplemented CTEST4-FBL 0x%x\n", val);
         }
         s->ctest4 = val;
         break;
     case 0x22: /* CTEST5 */
         if (val & (LSI_CTEST5_ADCK | LSI_CTEST5_BBCK)) {
-            BADF("CTEST5 DMA increment not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: CTEST5 DMA increment not implemented\n");
         }
         s->ctest5 = val;
         break;
@@ -1941,7 +1930,8 @@
         break;
     case 0x49: /* STIME1 */
         if (val & 0xf) {
-            DPRINTF("General purpose timer not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: General purpose timer not implemented\n");
             /* ??? Raising the interrupt immediately seems to be sufficient
                to keep the FreeBSD driver happy.  */
             lsi_script_scsi_interrupt(s, 0, LSI_SIST1_GEN);
@@ -1958,13 +1948,15 @@
         break;
     case 0x4e: /* STEST2 */
         if (val & 1) {
-            BADF("Low level mode not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: Low level mode not implemented\n");
         }
         s->stest2 = val;
         break;
     case 0x4f: /* STEST3 */
         if (val & 0x41) {
-            BADF("SCSI FIFO test mode not implemented\n");
+            qemu_log_mask(LOG_UNIMP,
+                          "lsi_scsi: SCSI FIFO test mode not implemented\n");
         }
         s->stest3 = val;
         break;
diff --git a/hw/scsi/mptendian.c b/hw/scsi/mptendian.c
index 8ae39a7..79f9973 100644
--- a/hw/scsi/mptendian.c
+++ b/hw/scsi/mptendian.c
@@ -35,152 +35,155 @@
 
 static void mptsas_fix_sgentry_endianness(MPISGEntry *sge)
 {
-    le32_to_cpus(&sge->FlagsLength);
+    sge->FlagsLength = le32_to_cpu(sge->FlagsLength);
     if (sge->FlagsLength & MPI_SGE_FLAGS_64_BIT_ADDRESSING) {
-       le64_to_cpus(&sge->u.Address64);
+        sge->u.Address64 = le64_to_cpu(sge->u.Address64);
     } else {
-       le32_to_cpus(&sge->u.Address32);
+        sge->u.Address32 = le32_to_cpu(sge->u.Address32);
     }
 }
 
 static void mptsas_fix_sgentry_endianness_reply(MPISGEntry *sge)
 {
     if (sge->FlagsLength & MPI_SGE_FLAGS_64_BIT_ADDRESSING) {
-       cpu_to_le64s(&sge->u.Address64);
+        sge->u.Address64 = cpu_to_le64(sge->u.Address64);
     } else {
-       cpu_to_le32s(&sge->u.Address32);
+        sge->u.Address32 = cpu_to_le32(sge->u.Address32);
     }
-    cpu_to_le32s(&sge->FlagsLength);
+    sge->FlagsLength = cpu_to_le32(sge->FlagsLength);
 }
 
 void mptsas_fix_scsi_io_endianness(MPIMsgSCSIIORequest *req)
 {
-    le32_to_cpus(&req->MsgContext);
-    le32_to_cpus(&req->Control);
-    le32_to_cpus(&req->DataLength);
-    le32_to_cpus(&req->SenseBufferLowAddr);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
+    req->Control = le32_to_cpu(req->Control);
+    req->DataLength = le32_to_cpu(req->DataLength);
+    req->SenseBufferLowAddr = le32_to_cpu(req->SenseBufferLowAddr);
 }
 
 void mptsas_fix_scsi_io_reply_endianness(MPIMsgSCSIIOReply *reply)
 {
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
-    cpu_to_le32s(&reply->TransferCount);
-    cpu_to_le32s(&reply->SenseCount);
-    cpu_to_le32s(&reply->ResponseInfo);
-    cpu_to_le16s(&reply->TaskTag);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
+    reply->TransferCount = cpu_to_le32(reply->TransferCount);
+    reply->SenseCount = cpu_to_le32(reply->SenseCount);
+    reply->ResponseInfo = cpu_to_le32(reply->ResponseInfo);
+    reply->TaskTag = cpu_to_le16(reply->TaskTag);
 }
 
 void mptsas_fix_scsi_task_mgmt_endianness(MPIMsgSCSITaskMgmt *req)
 {
-    le32_to_cpus(&req->MsgContext);
-    le32_to_cpus(&req->TaskMsgContext);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
+    req->TaskMsgContext = le32_to_cpu(req->TaskMsgContext);
 }
 
 void mptsas_fix_scsi_task_mgmt_reply_endianness(MPIMsgSCSITaskMgmtReply *reply)
 {
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
-    cpu_to_le32s(&reply->TerminationCount);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
+    reply->TerminationCount = cpu_to_le32(reply->TerminationCount);
 }
 
 void mptsas_fix_ioc_init_endianness(MPIMsgIOCInit *req)
 {
-    le32_to_cpus(&req->MsgContext);
-    le16_to_cpus(&req->ReplyFrameSize);
-    le32_to_cpus(&req->HostMfaHighAddr);
-    le32_to_cpus(&req->SenseBufferHighAddr);
-    le32_to_cpus(&req->ReplyFifoHostSignalingAddr);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
+    req->ReplyFrameSize = le16_to_cpu(req->ReplyFrameSize);
+    req->HostMfaHighAddr = le32_to_cpu(req->HostMfaHighAddr);
+    req->SenseBufferHighAddr = le32_to_cpu(req->SenseBufferHighAddr);
+    req->ReplyFifoHostSignalingAddr =
+        le32_to_cpu(req->ReplyFifoHostSignalingAddr);
     mptsas_fix_sgentry_endianness(&req->HostPageBufferSGE);
-    le16_to_cpus(&req->MsgVersion);
-    le16_to_cpus(&req->HeaderVersion);
+    req->MsgVersion = le16_to_cpu(req->MsgVersion);
+    req->HeaderVersion = le16_to_cpu(req->HeaderVersion);
 }
 
 void mptsas_fix_ioc_init_reply_endianness(MPIMsgIOCInitReply *reply)
 {
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
 }
 
 void mptsas_fix_ioc_facts_endianness(MPIMsgIOCFacts *req)
 {
-    le32_to_cpus(&req->MsgContext);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
 }
 
 void mptsas_fix_ioc_facts_reply_endianness(MPIMsgIOCFactsReply *reply)
 {
-    cpu_to_le16s(&reply->MsgVersion);
-    cpu_to_le16s(&reply->HeaderVersion);
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCExceptions);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
-    cpu_to_le16s(&reply->ReplyQueueDepth);
-    cpu_to_le16s(&reply->RequestFrameSize);
-    cpu_to_le16s(&reply->ProductID);
-    cpu_to_le32s(&reply->CurrentHostMfaHighAddr);
-    cpu_to_le16s(&reply->GlobalCredits);
-    cpu_to_le32s(&reply->CurrentSenseBufferHighAddr);
-    cpu_to_le16s(&reply->CurReplyFrameSize);
-    cpu_to_le32s(&reply->FWImageSize);
-    cpu_to_le32s(&reply->IOCCapabilities);
-    cpu_to_le16s(&reply->HighPriorityQueueDepth);
+    reply->MsgVersion = cpu_to_le16(reply->MsgVersion);
+    reply->HeaderVersion = cpu_to_le16(reply->HeaderVersion);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCExceptions = cpu_to_le16(reply->IOCExceptions);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
+    reply->ReplyQueueDepth = cpu_to_le16(reply->ReplyQueueDepth);
+    reply->RequestFrameSize = cpu_to_le16(reply->RequestFrameSize);
+    reply->ProductID = cpu_to_le16(reply->ProductID);
+    reply->CurrentHostMfaHighAddr = cpu_to_le32(reply->CurrentHostMfaHighAddr);
+    reply->GlobalCredits = cpu_to_le16(reply->GlobalCredits);
+    reply->CurrentSenseBufferHighAddr =
+        cpu_to_le32(reply->CurrentSenseBufferHighAddr);
+    reply->CurReplyFrameSize = cpu_to_le16(reply->CurReplyFrameSize);
+    reply->FWImageSize = cpu_to_le32(reply->FWImageSize);
+    reply->IOCCapabilities = cpu_to_le32(reply->IOCCapabilities);
+    reply->HighPriorityQueueDepth = cpu_to_le16(reply->HighPriorityQueueDepth);
     mptsas_fix_sgentry_endianness_reply(&reply->HostPageBufferSGE);
-    cpu_to_le32s(&reply->ReplyFifoHostSignalingAddr);
+    reply->ReplyFifoHostSignalingAddr =
+        cpu_to_le32(reply->ReplyFifoHostSignalingAddr);
 }
 
 void mptsas_fix_config_endianness(MPIMsgConfig *req)
 {
-    le16_to_cpus(&req->ExtPageLength);
-    le32_to_cpus(&req->MsgContext);
-    le32_to_cpus(&req->PageAddress);
+    req->ExtPageLength = le16_to_cpu(req->ExtPageLength);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
+    req->PageAddress = le32_to_cpu(req->PageAddress);
     mptsas_fix_sgentry_endianness(&req->PageBufferSGE);
 }
 
 void mptsas_fix_config_reply_endianness(MPIMsgConfigReply *reply)
 {
-    cpu_to_le16s(&reply->ExtPageLength);
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
+    reply->ExtPageLength = cpu_to_le16(reply->ExtPageLength);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
 }
 
 void mptsas_fix_port_facts_endianness(MPIMsgPortFacts *req)
 {
-    le32_to_cpus(&req->MsgContext);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
 }
 
 void mptsas_fix_port_facts_reply_endianness(MPIMsgPortFactsReply *reply)
 {
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
-    cpu_to_le16s(&reply->MaxDevices);
-    cpu_to_le16s(&reply->PortSCSIID);
-    cpu_to_le16s(&reply->ProtocolFlags);
-    cpu_to_le16s(&reply->MaxPostedCmdBuffers);
-    cpu_to_le16s(&reply->MaxPersistentIDs);
-    cpu_to_le16s(&reply->MaxLanBuckets);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
+    reply->MaxDevices = cpu_to_le16(reply->MaxDevices);
+    reply->PortSCSIID = cpu_to_le16(reply->PortSCSIID);
+    reply->ProtocolFlags = cpu_to_le16(reply->ProtocolFlags);
+    reply->MaxPostedCmdBuffers = cpu_to_le16(reply->MaxPostedCmdBuffers);
+    reply->MaxPersistentIDs = cpu_to_le16(reply->MaxPersistentIDs);
+    reply->MaxLanBuckets = cpu_to_le16(reply->MaxLanBuckets);
 }
 
 void mptsas_fix_port_enable_endianness(MPIMsgPortEnable *req)
 {
-    le32_to_cpus(&req->MsgContext);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
 }
 
 void mptsas_fix_port_enable_reply_endianness(MPIMsgPortEnableReply *reply)
 {
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
 }
 
 void mptsas_fix_event_notification_endianness(MPIMsgEventNotify *req)
 {
-    le32_to_cpus(&req->MsgContext);
+    req->MsgContext = le32_to_cpu(req->MsgContext);
 }
 
 void mptsas_fix_event_notification_reply_endianness(MPIMsgEventNotifyReply *reply)
@@ -188,16 +191,16 @@
     int length = reply->EventDataLength;
     int i;
 
-    cpu_to_le16s(&reply->EventDataLength);
-    cpu_to_le32s(&reply->MsgContext);
-    cpu_to_le16s(&reply->IOCStatus);
-    cpu_to_le32s(&reply->IOCLogInfo);
-    cpu_to_le32s(&reply->Event);
-    cpu_to_le32s(&reply->EventContext);
+    reply->EventDataLength = cpu_to_le16(reply->EventDataLength);
+    reply->MsgContext = cpu_to_le32(reply->MsgContext);
+    reply->IOCStatus = cpu_to_le16(reply->IOCStatus);
+    reply->IOCLogInfo = cpu_to_le32(reply->IOCLogInfo);
+    reply->Event = cpu_to_le32(reply->Event);
+    reply->EventContext = cpu_to_le32(reply->EventContext);
 
     /* Really depends on the event kind.  This will do for now.  */
     for (i = 0; i < length; i++) {
-        cpu_to_le32s(&reply->Data[i]);
+        reply->Data[i] = cpu_to_le32(reply->Data[i]);
     }
 }
 
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 5ae7baa..e2c5408 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -441,9 +441,18 @@
         }
         switch (error) {
         case 0:
-            /* The command has run, no need to fake sense.  */
+            /* A passthrough command has run and has produced sense data; check
+             * whether the error has to be handled by the guest or should rather
+             * pause the host.
+             */
             assert(r->status && *r->status);
-            scsi_req_complete(&r->req, *r->status);
+            error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
+            if (error == ECANCELED || error == EAGAIN || error == ENOTCONN ||
+                error == 0)  {
+                /* These errors are handled by guest. */
+                scsi_req_complete(&r->req, *r->status);
+                return true;
+            }
             break;
         case ENOMEDIUM:
             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
@@ -462,23 +471,17 @@
             break;
         }
     }
-    if (!error) {
-        assert(r->status && *r->status);
-        error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
-
-        if (error == ECANCELED || error == EAGAIN || error == ENOTCONN ||
-            error == 0)  {
-            /* These errors are handled by guest. */
-            scsi_req_complete(&r->req, *r->status);
-            return true;
-        }
-    }
 
     blk_error_action(s->qdev.conf.blk, action, is_read, error);
+    if (action == BLOCK_ERROR_ACTION_IGNORE) {
+        scsi_req_complete(&r->req, 0);
+        return true;
+    }
+
     if (action == BLOCK_ERROR_ACTION_STOP) {
         scsi_req_retry(&r->req);
     }
-    return action != BLOCK_ERROR_ACTION_IGNORE;
+    return false;
 }
 
 static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
@@ -2610,6 +2613,12 @@
         return;
     }
 
+    if (s->rotation_rate) {
+        error_report_once("rotation_rate is specified for scsi-block but is "
+                          "not implemented. This option is deprecated and will "
+                          "be removed in a future version");
+    }
+
     /* check we are using a driver managing SG_IO (version 3 and after) */
     rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
     if (rc < 0) {
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index 6e299d0..0fb6a99 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -229,3 +229,65 @@
 spapr_vscsi_queue_cmd_no_drive(uint64_t lun) "Command for lun 0x%08" PRIx64 " with no drive"
 spapr_vscsi_queue_cmd(uint32_t qtag, unsigned cdb, const char *cmd, int lun, int ret) "Queued command tag 0x%"PRIx32" CMD 0x%x=%s LUN %d ret: %d"
 spapr_vscsi_do_crq(unsigned c0, unsigned c1) "crq: %02x %02x ..."
+
+# hw/scsi/lsi53c895a.c
+lsi_reset(void) "Reset"
+lsi_update_irq(int level, uint8_t dstat, uint8_t sist1, uint8_t sist0) "Update IRQ level %d dstat 0x%02x sist 0x%02x0x%02x"
+lsi_update_irq_disconnected(void) "Handled IRQs & disconnected, looking for pending processes"
+lsi_script_scsi_interrupt(uint8_t stat1, uint8_t stat0, uint8_t sist1, uint8_t sist0) "SCSI Interrupt 0x%02x0x%02x prev 0x%02x0x%02x"
+lsi_script_dma_interrupt(uint8_t stat, uint8_t dstat) "DMA Interrupt 0x%x prev 0x%x"
+lsi_bad_phase_jump(uint32_t dsp) "Data phase mismatch jump to 0x%"PRIX32
+lsi_bad_phase_interrupt(void) "Phase mismatch interrupt"
+lsi_bad_selection(uint32_t id) "Selected absent target %"PRIu32
+lsi_do_dma_unavailable(void) "DMA no data available"
+lsi_do_dma(uint64_t addr, int len) "DMA addr=0x%"PRIx64" len=%d"
+lsi_queue_command(uint32_t tag) "Queueing tag=0x%"PRId32
+lsi_add_msg_byte_error(void) "MSG IN data too long"
+lsi_add_msg_byte(uint8_t data) "MSG IN 0x%02x"
+lsi_reselect(int id) "Reselected target %d"
+lsi_queue_req_error(void *p) "Multiple IO pending for request %p"
+lsi_queue_req(uint32_t tag) "Queueing IO tag=0x%"PRIx32
+lsi_command_complete(uint32_t status) "Command complete status=%"PRId32
+lsi_transfer_data(uint32_t tag, uint32_t len) "Data ready tag=0x%"PRIx32" len=%"PRId32
+lsi_do_command(uint32_t dbc) "Send command len=%"PRId32
+lsi_do_status(uint32_t dbc, uint8_t status) "Get status len=%"PRId32" status=%d"
+lsi_do_status_error(void) "Bad Status move"
+lsi_do_msgin(uint32_t dbc, int len) "Message in len=%"PRId32" %d"
+lsi_do_msgout(uint32_t dbc) "MSG out len=%"PRId32
+lsi_do_msgout_disconnect(void) "MSG: Disconnect"
+lsi_do_msgout_noop(void) "MSG: No Operation"
+lsi_do_msgout_extended(uint8_t msg, uint8_t len) "Extended message 0x%x (len %d)"
+lsi_do_msgout_ignored(const char *msg) "%s (ignored)"
+lsi_do_msgout_simplequeue(uint8_t select_tag) "SIMPLE queue tag=0x%x"
+lsi_do_msgout_abort(uint32_t tag) "MSG: ABORT TAG tag=0x%"PRId32
+lsi_do_msgout_clearqueue(uint32_t tag) "MSG: CLEAR QUEUE tag=0x%"PRIx32
+lsi_do_msgout_busdevicereset(uint32_t tag) "MSG: BUS DEVICE RESET tag=0x%"PRIx32
+lsi_do_msgout_select(int id) "Select LUN %d"
+lsi_memcpy(uint32_t dest, uint32_t src, int count) "memcpy dest 0x%"PRIx32" src 0x%"PRIx32" count %d"
+lsi_wait_reselect(void) "Wait Reselect"
+lsi_execute_script(uint32_t dsp, uint32_t insn, uint32_t addr) "SCRIPTS dsp=0x%"PRIx32" opcode 0x%"PRIx32" arg 0x%"PRIx32
+lsi_execute_script_blockmove_delayed(void) "Delayed select timeout"
+lsi_execute_script_blockmove_badphase(uint8_t phase, uint8_t expected) "Wrong phase got %d expected %d"
+lsi_execute_script_io_alreadyreselected(void) "Already reselected, jumping to alternative address"
+lsi_execute_script_io_selected(uint8_t id, const char *atn) "Selected target %d%s"
+lsi_execute_script_io_disconnect(void) "Wait Disconnect"
+lsi_execute_script_io_set(const char *atn, const char *ack, const char *tm, const char *cc) "Set%s%s%s%s"
+lsi_execute_script_io_clear(const char *atn, const char *ack, const char *tm, const char *cc) "Clear%s%s%s%s"
+lsi_execute_script_io_opcode(const char *opcode, int reg, const char *opname, uint8_t data8, uint32_t sfbr, const char *ssfbr) "%s reg 0x%x %s data8=0x%02x sfbr=0x%02x%s"
+lsi_execute_script_tc_nop(void) "NOP"
+lsi_execute_script_tc_delayedselect_timeout(void) "Delayed select timeout"
+lsi_execute_script_tc_compc(int result) "Compare carry %d"
+lsi_execute_script_tc_compp(uint8_t phase, int op, uint8_t insn_phase) "Compare phase %d %c= %d"
+lsi_execute_script_tc_compd(uint32_t sfbr, uint8_t mask, int op, int result) "Compare data 0x%"PRIx32" & 0x%x %c= 0x%x"
+lsi_execute_script_tc_jump(uint32_t addr) "Jump to 0x%"PRIx32
+lsi_execute_script_tc_call(uint32_t addr) "Call 0x%"PRIx32
+lsi_execute_script_tc_return(uint32_t addr) "Return to 0x%"PRIx32
+lsi_execute_script_tc_interrupt(uint32_t addr) "Interrupt 0x%"PRIx32
+lsi_execute_script_tc_illegal(void) "Illegal transfer control"
+lsi_execute_script_tc_cc_failed(void) "Control condition failed"
+lsi_execute_script_mm_load(int reg, int n, uint32_t addr, int data) "Load reg 0x%x size %d addr 0x%"PRIx32" = 0x%08x"
+lsi_execute_script_mm_store(int reg, int n, uint32_t addr) "Store reg 0x%x size %d addr 0x%"PRIx32
+lsi_execute_script_stop(void) "SCRIPTS execution stopped"
+lsi_awoken(void) "Woken by SIGP"
+lsi_reg_read(const char *name, int offset, uint8_t ret) "Read reg %s 0x%x = 0x%02x"
+lsi_reg_write(const char *name, int offset, uint8_t val) "Write reg %s 0x%x = 0x%02x"
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 5a3057d..3aa9971 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -797,16 +797,8 @@
         virtio_scsi_acquire(s);
         blk_set_aio_context(sd->conf.blk, s->ctx);
         virtio_scsi_release(s);
-    }
-}
 
-/* Announce the new device after it has been plugged */
-static void virtio_scsi_post_hotplug(HotplugHandler *hotplug_dev,
-                                     DeviceState *dev)
-{
-    VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
-    VirtIOSCSI *s = VIRTIO_SCSI(vdev);
-    SCSIDevice *sd = SCSI_DEVICE(dev);
+    }
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
         virtio_scsi_acquire(s);
@@ -976,7 +968,6 @@
     vdc->start_ioeventfd = virtio_scsi_dataplane_start;
     vdc->stop_ioeventfd = virtio_scsi_dataplane_stop;
     hc->plug = virtio_scsi_hotplug;
-    hc->post_plug = virtio_scsi_post_hotplug;
     hc->unplug = virtio_scsi_hotunplug;
 }
 
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 95a143b..623d033 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -284,6 +284,8 @@
     k->cs_polarity = SSI_CS_LOW;
     dc->vmsd = &vmstate_ssi_sd;
     dc->reset = ssi_sd_reset;
+    /* Reason: init() method uses drive_get_next() */
+    dc->user_creatable = false;
 }
 
 static const TypeInfo ssi_sd_info = {
diff --git a/hw/sh4/sh_pci.c b/hw/sh4/sh_pci.c
index 4ec2e35..379d068 100644
--- a/hw/sh4/sh_pci.c
+++ b/hw/sh4/sh_pci.c
@@ -120,16 +120,15 @@
     qemu_set_irq(pic[irq_num], level);
 }
 
-static int sh_pci_device_init(SysBusDevice *dev)
+static void sh_pci_device_realize(DeviceState *dev, Error **errp)
 {
-    PCIHostState *phb;
-    SHPCIState *s;
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    SHPCIState *s = SH_PCI_HOST_BRIDGE(dev);
+    PCIHostState *phb = PCI_HOST_BRIDGE(s);
     int i;
 
-    s = SH_PCI_HOST_BRIDGE(dev);
-    phb = PCI_HOST_BRIDGE(s);
     for (i = 0; i < 4; i++) {
-        sysbus_init_irq(dev, &s->irq[i]);
+        sysbus_init_irq(sbd, &s->irq[i]);
     }
     phb->bus = pci_register_root_bus(DEVICE(dev), "pci",
                                      sh_pci_set_irq, sh_pci_map_irq,
@@ -143,13 +142,12 @@
                              &s->memconfig_p4, 0, 0x224);
     memory_region_init_alias(&s->isa, OBJECT(s), "sh_pci.isa",
                              get_system_io(), 0, 0x40000);
-    sysbus_init_mmio(dev, &s->memconfig_p4);
-    sysbus_init_mmio(dev, &s->memconfig_a7);
+    sysbus_init_mmio(sbd, &s->memconfig_p4);
+    sysbus_init_mmio(sbd, &s->memconfig_a7);
     s->iobr = 0xfe240000;
     memory_region_add_subregion(get_system_memory(), s->iobr, &s->isa);
 
     s->dev = pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "sh_pci_host");
-    return 0;
 }
 
 static void sh_pci_host_realize(PCIDevice *d, Error **errp)
@@ -187,9 +185,9 @@
 
 static void sh_pci_device_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    sdc->init = sh_pci_device_init;
+    dc->realize = sh_pci_device_realize;
 }
 
 static const TypeInfo sh_pci_device_info = {
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index a27e54b..9209394 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -950,6 +950,7 @@
 
 void smbios_entry_add(QemuOpts *opts, Error **errp)
 {
+    Error *err = NULL;
     const char *val;
 
     assert(!smbios_immutable);
@@ -960,12 +961,16 @@
         int size;
         struct smbios_table *table; /* legacy mode only */
 
-        qemu_opts_validate(opts, qemu_smbios_file_opts, &error_fatal);
+        qemu_opts_validate(opts, qemu_smbios_file_opts, &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
 
         size = get_image_size(val);
         if (size == -1 || size < sizeof(struct smbios_structure_header)) {
-            error_report("Cannot read SMBIOS file %s", val);
-            exit(1);
+            error_setg(errp, "Cannot read SMBIOS file %s", val);
+            return;
         }
 
         /*
@@ -978,14 +983,15 @@
                                                     smbios_tables_len);
 
         if (load_image(val, (uint8_t *)header) != size) {
-            error_report("Failed to load SMBIOS file %s", val);
-            exit(1);
+            error_setg(errp, "Failed to load SMBIOS file %s", val);
+            return;
         }
 
         if (test_bit(header->type, have_fields_bitmap)) {
-            error_report("can't load type %d struct, fields already specified!",
-                         header->type);
-            exit(1);
+            error_setg(errp,
+                       "can't load type %d struct, fields already specified!",
+                       header->type);
+            return;
         }
         set_bit(header->type, have_binfile_bitmap);
 
@@ -1030,19 +1036,23 @@
         unsigned long type = strtoul(val, NULL, 0);
 
         if (type > SMBIOS_MAX_TYPE) {
-            error_report("out of range!");
-            exit(1);
+            error_setg(errp, "out of range!");
+            return;
         }
 
         if (test_bit(type, have_binfile_bitmap)) {
-            error_report("can't add fields, binary file already loaded!");
-            exit(1);
+            error_setg(errp, "can't add fields, binary file already loaded!");
+            return;
         }
         set_bit(type, have_fields_bitmap);
 
         switch (type) {
         case 0:
-            qemu_opts_validate(opts, qemu_smbios_type0_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type0_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt(&type0.vendor, opts, "vendor");
             save_opt(&type0.version, opts, "version");
             save_opt(&type0.date, opts, "date");
@@ -1051,14 +1061,18 @@
             val = qemu_opt_get(opts, "release");
             if (val) {
                 if (sscanf(val, "%hhu.%hhu", &type0.major, &type0.minor) != 2) {
-                    error_report("Invalid release");
-                    exit(1);
+                    error_setg(errp, "Invalid release");
+                    return;
                 }
                 type0.have_major_minor = true;
             }
             return;
         case 1:
-            qemu_opts_validate(opts, qemu_smbios_type1_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type1_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt(&type1.manufacturer, opts, "manufacturer");
             save_opt(&type1.product, opts, "product");
             save_opt(&type1.version, opts, "version");
@@ -1069,14 +1083,18 @@
             val = qemu_opt_get(opts, "uuid");
             if (val) {
                 if (qemu_uuid_parse(val, &qemu_uuid) != 0) {
-                    error_report("Invalid UUID");
-                    exit(1);
+                    error_setg(errp, "Invalid UUID");
+                    return;
                 }
                 qemu_uuid_set = true;
             }
             return;
         case 2:
-            qemu_opts_validate(opts, qemu_smbios_type2_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type2_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt(&type2.manufacturer, opts, "manufacturer");
             save_opt(&type2.product, opts, "product");
             save_opt(&type2.version, opts, "version");
@@ -1085,7 +1103,11 @@
             save_opt(&type2.location, opts, "location");
             return;
         case 3:
-            qemu_opts_validate(opts, qemu_smbios_type3_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type3_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt(&type3.manufacturer, opts, "manufacturer");
             save_opt(&type3.version, opts, "version");
             save_opt(&type3.serial, opts, "serial");
@@ -1093,7 +1115,11 @@
             save_opt(&type3.sku, opts, "sku");
             return;
         case 4:
-            qemu_opts_validate(opts, qemu_smbios_type4_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type4_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt(&type4.sock_pfx, opts, "sock_pfx");
             save_opt(&type4.manufacturer, opts, "manufacturer");
             save_opt(&type4.version, opts, "version");
@@ -1102,11 +1128,19 @@
             save_opt(&type4.part, opts, "part");
             return;
         case 11:
-            qemu_opts_validate(opts, qemu_smbios_type11_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type11_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt_list(&type11.nvalues, &type11.values, opts, "value");
             return;
         case 17:
-            qemu_opts_validate(opts, qemu_smbios_type17_opts, &error_fatal);
+            qemu_opts_validate(opts, qemu_smbios_type17_opts, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
             save_opt(&type17.loc_pfx, opts, "loc_pfx");
             save_opt(&type17.bank, opts, "bank");
             save_opt(&type17.manufacturer, opts, "manufacturer");
@@ -1116,12 +1150,12 @@
             type17.speed = qemu_opt_get_number(opts, "speed", 0);
             return;
         default:
-            error_report("Don't know how to build fields for SMBIOS type %ld",
-                         type);
-            exit(1);
+            error_setg(errp,
+                       "Don't know how to build fields for SMBIOS type %ld",
+                       type);
+            return;
         }
     }
 
-    error_report("Must specify type= or file=");
-    exit(1);
+    error_setg(errp, "Must specify type= or file=");
 }
diff --git a/hw/sparc64/niagara.c b/hw/sparc64/niagara.c
index 4fa8cb2..f8a856f 100644
--- a/hw/sparc64/niagara.c
+++ b/hw/sparc64/niagara.c
@@ -29,7 +29,7 @@
 #include "hw/hw.h"
 #include "hw/boards.h"
 #include "hw/char/serial.h"
-#include "hw/empty_slot.h"
+#include "hw/misc/unimp.h"
 #include "hw/loader.h"
 #include "hw/sparc/sparc64.h"
 #include "hw/timer/sun4v-rtc.h"
@@ -161,7 +161,7 @@
         serial_mm_init(sysmem, NIAGARA_UART_BASE, 0, NULL, 115200,
                        serial_hd(0), DEVICE_BIG_ENDIAN);
     }
-    empty_slot_init(NIAGARA_IOBBASE, NIAGARA_IOBSIZE);
+    create_unimplemented_device("sun4v-iob", NIAGARA_IOBBASE, NIAGARA_IOBSIZE);
     sun4v_rtc_init(NIAGARA_RTC_BASE);
 }
 
diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c
index 83585bc..3dae303 100644
--- a/hw/ssi/xilinx_spi.c
+++ b/hw/ssi/xilinx_spi.c
@@ -319,9 +319,9 @@
     }
 };
 
-static int xilinx_spi_init(SysBusDevice *sbd)
+static void xilinx_spi_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     XilinxSPI *s = XILINX_SPI(dev);
     int i;
 
@@ -344,8 +344,6 @@
 
     fifo8_create(&s->tx_fifo, FIFO_CAPACITY);
     fifo8_create(&s->rx_fifo, FIFO_CAPACITY);
-
-    return 0;
 }
 
 static const VMStateDescription vmstate_xilinx_spi = {
@@ -368,9 +366,8 @@
 static void xilinx_spi_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = xilinx_spi_init;
+    dc->realize = xilinx_spi_realize;
     dc->reset = xlx_spi_reset;
     dc->props = xilinx_spi_properties;
     dc->vmsd = &vmstate_xilinx_spi;
diff --git a/hw/timer/aspeed_timer.c b/hw/timer/aspeed_timer.c
index 54b400b..5c786e5 100644
--- a/hw/timer/aspeed_timer.c
+++ b/hw/timer/aspeed_timer.c
@@ -454,8 +454,7 @@
 
     obj = object_property_get_link(OBJECT(dev), "scu", &err);
     if (!obj) {
-        error_propagate(errp, err);
-        error_prepend(errp, "required link 'scu' not found: ");
+        error_propagate_prepend(errp, err, "required link 'scu' not found: ");
         return;
     }
     s->scu = ASPEED_SCU(obj);
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index acee47d..e4e4de8 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -34,6 +34,7 @@
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qapi-events-misc.h"
 #include "qapi/visitor.h"
+#include "exec/address-spaces.h"
 
 #ifdef TARGET_I386
 #include "hw/i386/apic.h"
@@ -70,6 +71,7 @@
     ISADevice parent_obj;
 
     MemoryRegion io;
+    MemoryRegion coalesced_io;
     uint8_t cmos_data[128];
     uint8_t cmos_index;
     int32_t base_year;
@@ -990,6 +992,13 @@
     memory_region_init_io(&s->io, OBJECT(s), &cmos_ops, s, "rtc", 2);
     isa_register_ioport(isadev, &s->io, base);
 
+    /* register rtc 0x70 port for coalesced_pio */
+    memory_region_set_flush_coalesced(&s->io);
+    memory_region_init_io(&s->coalesced_io, OBJECT(s), &cmos_ops,
+                          s, "rtc-index", 1);
+    memory_region_add_subregion(&s->io, 0, &s->coalesced_io);
+    memory_region_add_coalescing(&s->coalesced_io, 0, 1);
+
     qdev_set_legacy_instance_id(dev, base, 3);
     qemu_register_reset(rtc_reset, s);
 
diff --git a/hw/timer/sun4v-rtc.c b/hw/timer/sun4v-rtc.c
index 31052322..4e7f6a1 100644
--- a/hw/timer/sun4v-rtc.c
+++ b/hw/timer/sun4v-rtc.c
@@ -14,15 +14,8 @@
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "hw/timer/sun4v-rtc.h"
+#include "trace.h"
 
-//#define DEBUG_SUN4V_RTC
-
-#ifdef DEBUG_SUN4V_RTC
-#define DPRINTF(fmt, ...)                                       \
-    do { printf("sun4v_rtc: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) do {} while (0)
-#endif
 
 #define TYPE_SUN4V_RTC "sun4v_rtc"
 #define SUN4V_RTC(obj) OBJECT_CHECK(Sun4vRtc, (obj), TYPE_SUN4V_RTC)
@@ -41,14 +34,14 @@
         /* accessing the high 32 bits */
         val >>= 32;
     }
-    DPRINTF("read from " TARGET_FMT_plx " val %lx\n", addr, val);
+    trace_sun4v_rtc_read(addr, val);
     return val;
 }
 
 static void sun4v_rtc_write(void *opaque, hwaddr addr,
                              uint64_t val, unsigned size)
 {
-    DPRINTF("write 0x%x to " TARGET_FMT_plx "\n", (unsigned)val, addr);
+    trace_sun4v_rtc_read(addr, val);
 }
 
 static const MemoryRegionOps sun4v_rtc_ops = {
@@ -70,21 +63,21 @@
     sysbus_mmio_map(s, 0, addr);
 }
 
-static int sun4v_rtc_init1(SysBusDevice *dev)
+static void sun4v_rtc_realize(DeviceState *dev, Error **errp)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     Sun4vRtc *s = SUN4V_RTC(dev);
 
     memory_region_init_io(&s->iomem, OBJECT(s), &sun4v_rtc_ops, s,
                           "sun4v-rtc", 0x08ULL);
-    sysbus_init_mmio(dev, &s->iomem);
-    return 0;
+    sysbus_init_mmio(sbd, &s->iomem);
 }
 
 static void sun4v_rtc_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->init = sun4v_rtc_init1;
+    dc->realize = sun4v_rtc_realize;
 }
 
 static const TypeInfo sun4v_rtc_info = {
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index fa4213d..75bd3b1 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -56,7 +56,7 @@
 systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
 systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
 
-# hw/char/cmsdk_apb_timer.c
+# hw/timer/cmsdk_apb_timer.c
 cmsdk_apb_timer_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB timer read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 cmsdk_apb_timer_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB timer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 cmsdk_apb_timer_reset(void) "CMSDK APB timer: reset"
@@ -66,5 +66,9 @@
 cmsdk_apb_dualtimer_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB dualtimer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 cmsdk_apb_dualtimer_reset(void) "CMSDK APB dualtimer: reset"
 
+# hw/timer/sun4v-rtc.c
+sun4v_rtc_read(uint64_t addr, uint64_t value) "read: addr 0x%" PRIx64 " value 0x%" PRIx64
+sun4v_rtc_write(uint64_t addr, uint64_t value) "write: addr 0x%" PRIx64 " value 0x%" PRIx64
+
 # hw/timer/xlnx-zynqmp-rtc.c
 xlnx_zynqmp_rtc_gettime(int year, int month, int day, int hour, int min, int sec) "Get time from host: %d-%d-%d %2d:%02d:%02d"
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 11f7720..bf796d6 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -340,8 +340,9 @@
     }
     object_property_set_bool(OBJECT(dev), true, "realized", &err);
     if (err) {
-        error_propagate(errp, err);
-        error_prepend(errp, "Failed to initialize USB device '%s': ", name);
+        error_propagate_prepend(errp, err,
+                                "Failed to initialize USB device '%s': ",
+                                name);
         return NULL;
     }
     return dev;
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index 5c8b3c9..25976ed 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -409,6 +409,12 @@
     return 0;
 }
 
+static void clean_event_notifier(EmulatedState *card)
+{
+    event_notifier_set_handler(&card->notifier, NULL);
+    event_notifier_cleanup(&card->notifier);
+}
+
 #define CERTIFICATES_DEFAULT_DB "/etc/pki/nssdb"
 #define CERTIFICATES_ARGS_TEMPLATE\
     "db=\"%s\" use_hw=no soft=(,Virtual Reader,CAC,,%s,%s,%s)"
@@ -493,7 +499,7 @@
     card->reader = NULL;
     card->quit_apdu_thread = 0;
     if (init_event_notifier(card, errp) < 0) {
-        return;
+        goto out1;
     }
 
     card->backend = 0;
@@ -507,7 +513,7 @@
         for (ptable = backend_enum_table; ptable->name != NULL; ++ptable) {
             error_append_hint(errp, "%s\n", ptable->name);
         }
-        return;
+        goto out2;
     }
 
     /* TODO: a passthru backened that works on local machine. third card type?*/
@@ -517,31 +523,39 @@
         } else {
             error_setg(errp, "%s: you must provide all three certs for"
                        " certificates backend", TYPE_EMULATED_CCID);
-            return;
+            goto out2;
         }
     } else {
         if (card->backend != BACKEND_NSS_EMULATED) {
             error_setg(errp, "%s: bad backend specified. The options are:%s"
                        " (default), %s.", TYPE_EMULATED_CCID,
                        BACKEND_NSS_EMULATED_NAME, BACKEND_CERTIFICATES_NAME);
-            return;
+            goto out2;
         }
         if (card->cert1 != NULL || card->cert2 != NULL || card->cert3 != NULL) {
             error_setg(errp, "%s: unexpected cert parameters to nss emulated "
                        "backend", TYPE_EMULATED_CCID);
-            return;
+            goto out2;
         }
         /* default to mirroring the local hardware readers */
         ret = wrap_vcard_emul_init(NULL);
     }
     if (ret != VCARD_EMUL_OK) {
         error_setg(errp, "%s: failed to initialize vcard", TYPE_EMULATED_CCID);
-        return;
+        goto out2;
     }
     qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
                        card, QEMU_THREAD_JOINABLE);
     qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
                        card, QEMU_THREAD_JOINABLE);
+
+out2:
+    clean_event_notifier(card);
+out1:
+    qemu_cond_destroy(&card->handle_apdu_cond);
+    qemu_mutex_destroy(&card->handle_apdu_mutex);
+    qemu_mutex_destroy(&card->vreader_mutex);
+    qemu_mutex_destroy(&card->event_list_mutex);
 }
 
 static void emulated_unrealize(CCIDCardState *base, Error **errp)
@@ -556,6 +570,7 @@
     qemu_cond_signal(&card->handle_apdu_cond);
     qemu_thread_join(&card->apdu_thread_id);
 
+    clean_event_notifier(card);
     /* threads exited, can destroy all condvars/mutexes */
     qemu_cond_destroy(&card->handle_apdu_cond);
     qemu_mutex_destroy(&card->handle_apdu_mutex);
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 66656a1..c34cf5b 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -57,7 +57,7 @@
     qemu_irq irq;
     MemoryRegion mem;
     AddressSpace *as;
-    int num_ports;
+    uint32_t num_ports;
     const char *name;
 
     QEMUTimer *eof_timer;
@@ -1850,7 +1850,7 @@
 };
 
 static void usb_ohci_init(OHCIState *ohci, DeviceState *dev,
-                          int num_ports, dma_addr_t localmem_base,
+                          uint32_t num_ports, dma_addr_t localmem_base,
                           char *masterbus, uint32_t firstport,
                           AddressSpace *as, Error **errp)
 {
@@ -1860,7 +1860,7 @@
     ohci->as = as;
 
     if (num_ports > OHCI_MAX_PORTS) {
-        error_setg(errp, "OHCI num-ports=%d is too big (limit is %d ports)",
+        error_setg(errp, "OHCI num-ports=%u is too big (limit is %u ports)",
                    num_ports, OHCI_MAX_PORTS);
         return;
     }
diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index a2e7a0a..8b3f664 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -6,4 +6,5 @@
 obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o
 obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o
 obj-$(CONFIG_SOFTMMU) += spapr.o
+obj-$(CONFIG_VFIO_AP) += ap.o
 endif
diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
index 0c4ec4b..ee64a3b 100644
--- a/hw/vfio/amd-xgbe.c
+++ b/hw/vfio/amd-xgbe.c
@@ -20,6 +20,7 @@
     VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev);
 
     vdev->compat = g_strdup("amd,xgbe-seattle-v1a");
+    vdev->num_compat = 1;
 
     k->parent_realize(dev, errp);
 }
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
new file mode 100644
index 0000000..3962bb7
--- /dev/null
+++ b/hw/vfio/ap.c
@@ -0,0 +1,181 @@
+/*
+ * VFIO based AP matrix device assignment
+ *
+ * Copyright 2018 IBM Corp.
+ * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ *            Halil Pasic <pasic@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/vfio/vfio.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/s390x/ap-device.h"
+#include "qemu/error-report.h"
+#include "qemu/queue.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "cpu.h"
+#include "kvm_s390x.h"
+#include "sysemu/sysemu.h"
+#include "hw/s390x/ap-bridge.h"
+#include "exec/address-spaces.h"
+
+#define VFIO_AP_DEVICE_TYPE      "vfio-ap"
+
+typedef struct VFIOAPDevice {
+    APDevice apdev;
+    VFIODevice vdev;
+} VFIOAPDevice;
+
+#define VFIO_AP_DEVICE(obj) \
+        OBJECT_CHECK(VFIOAPDevice, (obj), VFIO_AP_DEVICE_TYPE)
+
+static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
+{
+    vdev->needs_reset = false;
+}
+
+/*
+ * We don't need vfio_hot_reset_multi and vfio_eoi operations for
+ * vfio-ap device now.
+ */
+struct VFIODeviceOps vfio_ap_ops = {
+    .vfio_compute_needs_reset = vfio_ap_compute_needs_reset,
+};
+
+static void vfio_ap_put_device(VFIOAPDevice *vapdev)
+{
+    g_free(vapdev->vdev.name);
+    vfio_put_base_device(&vapdev->vdev);
+}
+
+static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp)
+{
+    GError *gerror = NULL;
+    char *symlink, *group_path;
+    int groupid;
+
+    symlink = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev);
+    group_path = g_file_read_link(symlink, &gerror);
+    g_free(symlink);
+
+    if (!group_path) {
+        error_setg(errp, "%s: no iommu_group found for %s: %s",
+                   VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev, gerror->message);
+        return NULL;
+    }
+
+    if (sscanf(basename(group_path), "%d", &groupid) != 1) {
+        error_setg(errp, "vfio: failed to read %s", group_path);
+        g_free(group_path);
+        return NULL;
+    }
+
+    g_free(group_path);
+
+    return vfio_get_group(groupid, &address_space_memory, errp);
+}
+
+static void vfio_ap_realize(DeviceState *dev, Error **errp)
+{
+    int ret;
+    char *mdevid;
+    Error *local_err = NULL;
+    VFIOGroup *vfio_group;
+    APDevice *apdev = AP_DEVICE(dev);
+    VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev);
+
+    vfio_group = vfio_ap_get_group(vapdev, &local_err);
+    if (!vfio_group) {
+        goto out_err;
+    }
+
+    vapdev->vdev.ops = &vfio_ap_ops;
+    vapdev->vdev.type = VFIO_DEVICE_TYPE_AP;
+    mdevid = basename(vapdev->vdev.sysfsdev);
+    vapdev->vdev.name = g_strdup_printf("%s", mdevid);
+    vapdev->vdev.dev = dev;
+
+    ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, &local_err);
+    if (ret) {
+        goto out_get_dev_err;
+    }
+
+    return;
+
+out_get_dev_err:
+    vfio_ap_put_device(vapdev);
+    vfio_put_group(vfio_group);
+out_err:
+    error_propagate(errp, local_err);
+}
+
+static void vfio_ap_unrealize(DeviceState *dev, Error **errp)
+{
+    APDevice *apdev = AP_DEVICE(dev);
+    VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev);
+    VFIOGroup *group = vapdev->vdev.group;
+
+    vfio_ap_put_device(vapdev);
+    vfio_put_group(group);
+}
+
+static Property vfio_ap_properties[] = {
+    DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vfio_ap_reset(DeviceState *dev)
+{
+    int ret;
+    APDevice *apdev = AP_DEVICE(dev);
+    VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev);
+
+    ret = ioctl(vapdev->vdev.fd, VFIO_DEVICE_RESET);
+    if (ret) {
+        error_report("%s: failed to reset %s device: %s", __func__,
+                     vapdev->vdev.name, strerror(ret));
+    }
+}
+
+static const VMStateDescription vfio_ap_vmstate = {
+    .name = VFIO_AP_DEVICE_TYPE,
+    .unmigratable = 1,
+};
+
+static void vfio_ap_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = vfio_ap_properties;
+    dc->vmsd = &vfio_ap_vmstate;
+    dc->desc = "VFIO-based AP device assignment";
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->realize = vfio_ap_realize;
+    dc->unrealize = vfio_ap_unrealize;
+    dc->hotpluggable = false;
+    dc->reset = vfio_ap_reset;
+    dc->bus_type = TYPE_AP_BUS;
+}
+
+static const TypeInfo vfio_ap_info = {
+    .name = VFIO_AP_DEVICE_TYPE,
+    .parent = AP_DEVICE_TYPE,
+    .instance_size = sizeof(VFIOAPDevice),
+    .class_init = vfio_ap_class_init,
+};
+
+static void vfio_ap_type_init(void)
+{
+    type_register_static(&vfio_ap_info);
+}
+
+type_init(vfio_ap_type_init)
diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
index 24cee6d..e7767c4 100644
--- a/hw/vfio/calxeda-xgmac.c
+++ b/hw/vfio/calxeda-xgmac.c
@@ -20,6 +20,7 @@
     VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev);
 
     vdev->compat = g_strdup("calxeda,hb-xgmac");
+    vdev->num_compat = 1;
 
     k->parent_realize(dev, errp);
 }
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 59c0e5d..dead30e 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -124,6 +124,9 @@
 
     primary = vfio_display_get_dmabuf(vdev, DRM_PLANE_TYPE_PRIMARY);
     if (primary == NULL) {
+        if (dpy->ramfb) {
+            ramfb_display_update(dpy->con, dpy->ramfb);
+        }
         return;
     }
 
@@ -181,6 +184,9 @@
     vdev->dpy->con = graphic_console_init(DEVICE(vdev), 0,
                                           &vfio_display_dmabuf_ops,
                                           vdev);
+    if (vdev->enable_ramfb) {
+        vdev->dpy->ramfb = ramfb_setup(errp);
+    }
     return 0;
 }
 
@@ -228,6 +234,9 @@
         return;
     }
     if (!plane.drm_format || !plane.size) {
+        if (dpy->ramfb) {
+            ramfb_display_update(dpy->con, dpy->ramfb);
+        }
         return;
     }
     format = qemu_drm_format_to_pixman(plane.drm_format);
@@ -300,6 +309,9 @@
     vdev->dpy->con = graphic_console_init(DEVICE(vdev), 0,
                                           &vfio_display_region_ops,
                                           vdev);
+    if (vdev->enable_ramfb) {
+        vdev->dpy->ramfb = ramfb_setup(errp);
+    }
     return 0;
 }
 
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 481fd08..eae31c7 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1670,7 +1670,7 @@
      * but also no point in us enabling VGA if disabled in hardware.
      */
     if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev, &err)) {
-        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
         error_report("IGD device %s failed to enable VGA access, "
                      "legacy mode disabled", vdev->vbasedev.name);
         goto out;
@@ -1696,7 +1696,7 @@
     ret = vfio_pci_igd_opregion_init(vdev, opregion, &err);
     if (ret) {
         error_append_hint(&err, "IGD legacy mode disabled\n");
-        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
         goto out;
     }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 866f0de..5c7bd96 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -37,6 +37,9 @@
 
 #define MSIX_CAP_LENGTH 12
 
+#define TYPE_VFIO_PCI "vfio-pci"
+#define PCI_VFIO(obj)    OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
+
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
 
@@ -222,7 +225,7 @@
 
 static void vfio_intx_update(PCIDevice *pdev)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     PCIINTxRoute route;
     Error *err = NULL;
 
@@ -249,7 +252,7 @@
 
     vfio_intx_enable_kvm(vdev, &err);
     if (err) {
-        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+        warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     }
 
     /* Re-enable the interrupt in cased we missed an EOI */
@@ -314,7 +317,7 @@
 
     vfio_intx_enable_kvm(vdev, &err);
     if (err) {
-        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+        warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     }
 
     vdev->interrupt = VFIO_INT_INTx;
@@ -477,7 +480,7 @@
 static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
                                    MSIMessage *msg, IOHandler *handler)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     VFIOMSIVector *vector;
     int ret;
 
@@ -574,7 +577,7 @@
 
 static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     VFIOMSIVector *vector = &vdev->msi_vectors[nr];
 
     trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
@@ -742,7 +745,7 @@
 
     vfio_intx_enable(vdev, &err);
     if (err) {
-        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     }
 }
 
@@ -1086,7 +1089,7 @@
  */
 static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     VFIORegion *region = &vdev->bars[bar].region;
     MemoryRegion *mmap_mr, *region_mr, *base_mr;
     PCIIORegion *r;
@@ -1132,7 +1135,7 @@
  */
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;
 
     memcpy(&emu_bits, vdev->emulated_config_bits + addr, len);
@@ -1165,7 +1168,7 @@
 void vfio_pci_write_config(PCIDevice *pdev,
                            uint32_t addr, uint32_t val, int len)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     uint32_t val_le = cpu_to_le32(val);
 
     trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len);
@@ -1280,8 +1283,7 @@
         if (ret == -ENOTSUP) {
             return 0;
         }
-        error_prepend(&err, "msi_init failed: ");
-        error_propagate(errp, err);
+        error_propagate_prepend(errp, err, "msi_init failed: ");
         return ret;
     }
     vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
@@ -1555,7 +1557,7 @@
                     &err);
     if (ret < 0) {
         if (ret == -ENOTSUP) {
-            error_report_err(err);
+            warn_report_err(err);
             return 0;
         }
 
@@ -2194,7 +2196,7 @@
 
     vfio_intx_enable(vdev, &err);
     if (err) {
-        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     }
 
     for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
@@ -2588,9 +2590,9 @@
     } else if (irq_info.count == 1) {
         vdev->pci_aer = true;
     } else {
-        error_report(WARN_PREFIX
-                     "Could not enable error recovery for the device",
-                     vbasedev->name);
+        warn_report(VFIO_MSG_PREFIX
+                    "Could not enable error recovery for the device",
+                    vbasedev->name);
     }
 }
 
@@ -2715,7 +2717,7 @@
 
     qdev_unplug(&vdev->pdev.qdev, &err);
     if (err) {
-        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+        warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     }
 }
 
@@ -2801,7 +2803,7 @@
 
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
     VFIODevice *vbasedev_iter;
     VFIOGroup *group;
     char *tmp, *subsys, group_path[PATH_MAX], *group_name;
@@ -2828,7 +2830,7 @@
 
     if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
         error_setg_errno(errp, errno, "no such host device");
-        error_prepend(errp, ERR_PREFIX, vdev->vbasedev.sysfsdev);
+        error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.sysfsdev);
         return;
     }
 
@@ -3067,6 +3069,10 @@
             goto out_teardown;
         }
     }
+    if (vdev->enable_ramfb && vdev->dpy == NULL) {
+        error_setg(errp, "ramfb=on requires display=on");
+        goto out_teardown;
+    }
 
     vfio_register_err_notifier(vdev);
     vfio_register_req_notifier(vdev);
@@ -3079,13 +3085,12 @@
     vfio_teardown_msi(vdev);
     vfio_bars_exit(vdev);
 error:
-    error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
+    error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
 }
 
 static void vfio_instance_finalize(Object *obj)
 {
-    PCIDevice *pci_dev = PCI_DEVICE(obj);
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
+    VFIOPCIDevice *vdev = PCI_VFIO(obj);
     VFIOGroup *group = vdev->vbasedev.group;
 
     vfio_display_finalize(vdev);
@@ -3105,7 +3110,7 @@
 
 static void vfio_exitfn(PCIDevice *pdev)
 {
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(pdev);
 
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
@@ -3120,8 +3125,7 @@
 
 static void vfio_pci_reset(DeviceState *dev)
 {
-    PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIOPCIDevice *vdev = PCI_VFIO(dev);
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
@@ -3161,7 +3165,7 @@
 static void vfio_instance_init(Object *obj)
 {
     PCIDevice *pci_dev = PCI_DEVICE(obj);
-    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(obj));
+    VFIOPCIDevice *vdev = PCI_VFIO(obj);
 
     device_add_bootindex_property(obj, &vdev->bootindex,
                                   "bootindex", NULL,
@@ -3245,7 +3249,7 @@
 }
 
 static const TypeInfo vfio_pci_dev_info = {
-    .name = "vfio-pci",
+    .name = TYPE_VFIO_PCI,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(VFIOPCIDevice),
     .class_init = vfio_pci_dev_class_init,
@@ -3258,9 +3262,30 @@
     },
 };
 
+static Property vfio_pci_dev_nohotplug_properties[] = {
+    DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = vfio_pci_dev_nohotplug_properties;
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo vfio_pci_nohotplug_dev_info = {
+    .name = "vfio-pci-nohotplug",
+    .parent = "vfio-pci",
+    .instance_size = sizeof(VFIOPCIDevice),
+    .class_init = vfio_pci_nohotplug_dev_class_init,
+};
+
 static void register_vfio_pci_dev_type(void)
 {
     type_register_static(&vfio_pci_dev_info);
+    type_register_static(&vfio_pci_nohotplug_dev_info);
 }
 
 type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 52b0654..b1ae4c0 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -165,6 +165,7 @@
     bool no_geforce_quirks;
     bool no_kvm_ioeventfd;
     bool no_vfio_ioeventfd;
+    bool enable_ramfb;
     VFIODisplay *dpy;
 } VFIOPCIDevice;
 
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 57c4a0e..398db38 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -655,10 +655,32 @@
         goto out;
     }
 
+    if (!vdev->compat) {
+        GError *gerr = NULL;
+        gchar *contents;
+        gsize length;
+        char *path;
+
+        path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
+        if (!g_file_get_contents(path, &contents, &length, &gerr)) {
+            error_setg(errp, "%s", gerr->message);
+            g_error_free(gerr);
+            g_free(path);
+            return;
+        }
+        g_free(path);
+        vdev->compat = contents;
+        for (vdev->num_compat = 0; length; vdev->num_compat++) {
+            size_t skip = strlen(contents) + 1;
+            contents += skip;
+            length -= skip;
+        }
+    }
+
     for (i = 0; i < vbasedev->num_regions; i++) {
         if (vfio_region_mmap(vdev->regions[i])) {
-            error_report("%s mmap unsupported. Performance may be slow",
-                         memory_region_name(vdev->regions[i]->mem));
+            warn_report("%s mmap unsupported, performance may be slow",
+                        memory_region_name(vdev->regions[i]->mem));
         }
         sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
     }
@@ -668,7 +690,7 @@
     }
 
     if (vdev->vbasedev.name) {
-        error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
+        error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
     } else {
         error_prepend(errp, "vfio error: ");
     }
@@ -700,6 +722,8 @@
     dc->desc = "VFIO-based platform device assignment";
     sbc->connect_irq_notifier = vfio_start_irqfd_injection;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    /* Supported by TYPE_VIRT_MACHINE */
+    dc->user_creatable = true;
 }
 
 static const TypeInfo vfio_platform_dev_info = {
@@ -708,7 +732,6 @@
     .instance_size = sizeof(VFIOPlatformDevice),
     .class_init = vfio_platform_class_init,
     .class_size = sizeof(VFIOPlatformDeviceClass),
-    .abstract   = true,
 };
 
 static void register_vfio_platform_dev_type(void)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index b041343..e09bed0 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -374,8 +374,6 @@
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     int i, fd;
     size_t fd_num = 0;
-    bool reply_supported = virtio_has_feature(dev->protocol_features,
-                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
     VhostUserMsg msg_reply;
     int region_i, msg_i;
 
@@ -384,10 +382,6 @@
         .hdr.flags = VHOST_USER_VERSION,
     };
 
-    if (reply_supported) {
-        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
-    }
-
     if (u->region_rb_len < dev->mem->nregions) {
         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
@@ -503,10 +497,6 @@
         return -1;
     }
 
-    if (reply_supported) {
-        return process_message_reply(dev, &msg);
-    }
-
     return 0;
 }
 
@@ -519,8 +509,7 @@
     size_t fd_num = 0;
     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
     bool reply_supported = virtio_has_feature(dev->protocol_features,
-                                          VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
-                                          !do_postcopy;
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
 
     if (do_postcopy) {
         /* Postcopy has enough differences that it's best done in it's own
@@ -1291,6 +1280,7 @@
         return ret;
     }
     postcopy_unregister_shared_ufd(&u->postcopy_fd);
+    close(u->postcopy_fd.fd);
     u->postcopy_fd.handler = NULL;
 
     trace_vhost_user_postcopy_end_exit();
@@ -1430,6 +1420,12 @@
         postcopy_remove_notifier(&u->postcopy_notifier);
         u->postcopy_notifier.notify = NULL;
     }
+    u->postcopy_listen = false;
+    if (u->postcopy_fd.handler) {
+        postcopy_unregister_shared_ufd(&u->postcopy_fd);
+        close(u->postcopy_fd.fd);
+        u->postcopy_fd.handler = NULL;
+    }
     if (u->slave_fd >= 0) {
         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
         close(u->slave_fd);
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 3a01fe9..a954799 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1683,8 +1683,8 @@
         if (err) {
             /* Notice when a system that supports MSIx can't initialize it */
             if (err != -ENOTSUP) {
-                error_report("unable to init msix vectors to %" PRIu32,
-                             proxy->nvectors);
+                warn_report("unable to init msix vectors to %" PRIu32,
+                            proxy->nvectors);
             }
             proxy->nvectors = 0;
         }
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f6a588a..4136d23 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -358,6 +358,10 @@
  * Called within rcu_read_lock().  */
 static int virtio_queue_empty_rcu(VirtQueue *vq)
 {
+    if (unlikely(vq->vdev->broken)) {
+        return 1;
+    }
+
     if (unlikely(!vq->vring.avail)) {
         return 1;
     }
@@ -373,6 +377,10 @@
 {
     bool empty;
 
+    if (unlikely(vq->vdev->broken)) {
+        return 1;
+    }
+
     if (unlikely(!vq->vring.avail)) {
         return 1;
     }
@@ -1161,7 +1169,6 @@
     return 0;
 }
 
-bool target_words_bigendian(void);
 static enum virtio_device_endian virtio_default_endian(void)
 {
     if (target_words_bigendian()) {
@@ -1604,6 +1611,8 @@
 
     vdev->vq[n].vring.num = 0;
     vdev->vq[n].vring.num_default = 0;
+    vdev->vq[n].handle_output = NULL;
+    vdev->vq[n].handle_aio_output = NULL;
 }
 
 static void virtio_set_isr(VirtIODevice *vdev, int value)
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index e5a6eff..f1f3a37 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -830,7 +830,7 @@
     xen_pt_config_init(s, &err);
     if (err) {
         error_append_hint(&err, "PCI Config space initialisation failed");
-        error_report_err(err);
+        error_propagate(errp, err);
         rc = -1;
         goto err_out;
     }
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index aee31c6..47f9010 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -358,7 +358,7 @@
 static XenPTBarFlag xen_pt_bar_reg_parse(XenPCIPassthroughState *s,
                                          int index)
 {
-    PCIDevice *d = &s->dev;
+    PCIDevice *d = PCI_DEVICE(s);
     XenPTRegion *region = NULL;
     PCIIORegion *r;
 
@@ -469,7 +469,7 @@
 {
     XenPTRegInfo *reg = cfg_entry->reg;
     XenPTRegion *base = NULL;
-    PCIDevice *d = &s->dev;
+    PCIDevice *d = PCI_DEVICE(s);
     const PCIIORegion *r;
     uint32_t writable_mask = 0;
     uint32_t bar_emu_mask = 0;
@@ -543,7 +543,7 @@
 {
     XenPTRegInfo *reg = cfg_entry->reg;
     XenPTRegion *base = NULL;
-    PCIDevice *d = (PCIDevice *)&s->dev;
+    PCIDevice *d = PCI_DEVICE(s);
     uint32_t writable_mask = 0;
     uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask);
     pcibus_t r_size = 0;
@@ -1587,7 +1587,7 @@
                                  const XenPTRegGroupInfo *grp_reg,
                                  uint32_t base_offset, uint8_t *size)
 {
-    PCIDevice *d = &s->dev;
+    PCIDevice *d = PCI_DEVICE(s);
     uint8_t version = get_capability_version(s, base_offset);
     uint8_t type = get_device_type(s, base_offset);
     uint8_t pcie_size = 0;
diff --git a/include/block/aio.h b/include/block/aio.h
index f08630c..0ca25df 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -388,6 +388,32 @@
 struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
 
 /**
+ * aio_timer_new_with_attrs:
+ * @ctx: the aio context
+ * @type: the clock type
+ * @scale: the scale
+ * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values
+ *              to assign
+ * @cb: the callback to call on timer expiry
+ * @opaque: the opaque pointer to pass to the callback
+ *
+ * Allocate a new timer (with attributes) attached to the context @ctx.
+ * The function is responsible for memory allocation.
+ *
+ * The preferred interface is aio_timer_init or aio_timer_init_with_attrs.
+ * Use that unless you really need dynamic memory allocation.
+ *
+ * Returns: a pointer to the new timer
+ */
+static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx,
+                                                  QEMUClockType type,
+                                                  int scale, int attributes,
+                                                  QEMUTimerCB *cb, void *opaque)
+{
+    return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque);
+}
+
+/**
  * aio_timer_new:
  * @ctx: the aio context
  * @type: the clock type
@@ -396,10 +422,7 @@
  * @opaque: the opaque pointer to pass to the callback
  *
  * Allocate a new timer attached to the context @ctx.
- * The function is responsible for memory allocation.
- *
- * The preferred interface is aio_timer_init. Use that
- * unless you really need dynamic memory allocation.
+ * See aio_timer_new_with_attrs for details.
  *
  * Returns: a pointer to the new timer
  */
@@ -407,7 +430,29 @@
                                        int scale,
                                        QEMUTimerCB *cb, void *opaque)
 {
-    return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
+    return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque);
+}
+
+/**
+ * aio_timer_init_with_attrs:
+ * @ctx: the aio context
+ * @ts: the timer
+ * @type: the clock type
+ * @scale: the scale
+ * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values
+ *              to assign
+ * @cb: the callback to call on timer expiry
+ * @opaque: the opaque pointer to pass to the callback
+ *
+ * Initialise a new timer (with attributes) attached to the context @ctx.
+ * The caller is responsible for memory allocation.
+ */
+static inline void aio_timer_init_with_attrs(AioContext *ctx,
+                                             QEMUTimer *ts, QEMUClockType type,
+                                             int scale, int attributes,
+                                             QEMUTimerCB *cb, void *opaque)
+{
+    timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque);
 }
 
 /**
@@ -420,14 +465,14 @@
  * @opaque: the opaque pointer to pass to the callback
  *
  * Initialise a new timer attached to the context @ctx.
- * The caller is responsible for memory allocation.
+ * See aio_timer_init_with_attrs for details.
  */
 static inline void aio_timer_init(AioContext *ctx,
                                   QEMUTimer *ts, QEMUClockType type,
                                   int scale,
                                   QEMUTimerCB *cb, void *opaque)
 {
-    timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
+    timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque);
 }
 
 /**
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 92ecbd8..f605622 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1155,7 +1155,7 @@
 void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
+void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
 
 void bdrv_inc_in_flight(BlockDriverState *bs);
 void bdrv_dec_in_flight(BlockDriverState *bs);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 259bd27..8f38a3d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -26,7 +26,6 @@
                                         const char *name);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
-void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs);
 void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
                                          const char *name,
                                          Error **errp);
@@ -71,7 +70,8 @@
                                        bool persistent);
 void bdrv_dirty_bitmap_set_qmp_locked(BdrvDirtyBitmap *bitmap, bool qmp_locked);
 void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
-                             Error **errp);
+                             HBitmap **backup, Error **errp);
+void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration);
 
 /* Functions that require manual locking.  */
 void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
@@ -94,6 +94,7 @@
 bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_qmp_locked(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_user_locked(BdrvDirtyBitmap *bitmap);
 bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
 BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
                                         BdrvDirtyBitmap *bitmap);
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4638c83..6a5bfe5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -135,7 +135,9 @@
 #define NBD_FLAG_SEND_TRIM         (1 << 5) /* Send TRIM (discard) */
 #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
 #define NBD_FLAG_SEND_DF           (1 << 7) /* Send DF (Do not Fragment) */
-#define NBD_FLAG_SEND_CACHE        (1 << 8) /* Send CACHE (prefetch) */
+#define NBD_FLAG_CAN_MULTI_CONN    (1 << 8) /* Multi-client cache consistent */
+#define NBD_FLAG_SEND_RESIZE       (1 << 9) /* Send resize */
+#define NBD_FLAG_SEND_CACHE        (1 << 10) /* Send CACHE (prefetch) */
 
 /* New-style handshake (global) flags, sent from server to client, and
    control what will happen during handshake phase. */
@@ -308,8 +310,7 @@
 void nbd_export_set_description(NBDExport *exp, const char *description);
 void nbd_export_close_all(void);
 
-void nbd_client_new(NBDExport *exp,
-                    QIOChannelSocket *sioc,
+void nbd_client_new(QIOChannelSocket *sioc,
                     QCryptoTLSCreds *tlscreds,
                     const char *tlsaclname,
                     void (*close_fn)(NBDClient *, bool));
diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
index c67271f..46c997d 100644
--- a/include/chardev/char-fe.h
+++ b/include/chardev/char-fe.h
@@ -20,7 +20,7 @@
 };
 
 /**
- * @qemu_chr_fe_init:
+ * qemu_chr_fe_init:
  *
  * Initializes a front end for the given CharBackend and
  * Chardev. Call qemu_chr_fe_deinit() to remove the association and
@@ -31,7 +31,7 @@
 bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp);
 
 /**
- * @qemu_chr_fe_deinit:
+ * qemu_chr_fe_deinit:
  * @b: a CharBackend
  * @del: if true, delete the chardev backend
 *
@@ -42,9 +42,9 @@
 void qemu_chr_fe_deinit(CharBackend *b, bool del);
 
 /**
- * @qemu_chr_fe_get_driver:
+ * qemu_chr_fe_get_driver:
  *
- * Returns the driver associated with a CharBackend or NULL if no
+ * Returns: the driver associated with a CharBackend or NULL if no
  * associated Chardev.
  * Note: avoid this function as the driver should never be accessed directly,
  *       especially by the frontends that support chardevice hotswap.
@@ -53,21 +53,21 @@
 Chardev *qemu_chr_fe_get_driver(CharBackend *be);
 
 /**
- * @qemu_chr_fe_backend_connected:
+ * qemu_chr_fe_backend_connected:
  *
- * Returns true if there is a chardevice associated with @be.
+ * Returns: true if there is a chardevice associated with @be.
  */
 bool qemu_chr_fe_backend_connected(CharBackend *be);
 
 /**
- * @qemu_chr_fe_backend_open:
+ * qemu_chr_fe_backend_open:
  *
- * Returns true if chardevice associated with @be is open.
+ * Returns: true if chardevice associated with @be is open.
  */
 bool qemu_chr_fe_backend_open(CharBackend *be);
 
 /**
- * @qemu_chr_fe_set_handlers:
+ * qemu_chr_fe_set_handlers:
  * @b: a CharBackend
  * @fd_can_read: callback to get the amount of data the frontend may
  *               receive
@@ -95,7 +95,7 @@
                               bool set_open);
 
 /**
- * @qemu_chr_fe_take_focus:
+ * qemu_chr_fe_take_focus:
  *
  * Take the focus (if the front end is muxed).
  *
@@ -104,14 +104,14 @@
 void qemu_chr_fe_take_focus(CharBackend *b);
 
 /**
- * @qemu_chr_fe_accept_input:
+ * qemu_chr_fe_accept_input:
  *
  * Notify that the frontend is ready to receive data
  */
 void qemu_chr_fe_accept_input(CharBackend *be);
 
 /**
- * @qemu_chr_fe_disconnect:
+ * qemu_chr_fe_disconnect:
  *
  * Close a fd accepted by character backend.
  * Without associated Chardev, do nothing.
@@ -119,7 +119,7 @@
 void qemu_chr_fe_disconnect(CharBackend *be);
 
 /**
- * @qemu_chr_fe_wait_connected:
+ * qemu_chr_fe_wait_connected:
  *
  * Wait for characted backend to be connected, return < 0 on error or
  * if no associated Chardev.
@@ -127,19 +127,18 @@
 int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp);
 
 /**
- * @qemu_chr_fe_set_echo:
+ * qemu_chr_fe_set_echo:
+ * @echo: true to enable echo, false to disable echo
  *
  * Ask the backend to override its normal echo setting.  This only really
  * applies to the stdio backend and is used by the QMP server such that you
  * can see what you type if you try to type QMP commands.
  * Without associated Chardev, do nothing.
- *
- * @echo true to enable echo, false to disable echo
  */
 void qemu_chr_fe_set_echo(CharBackend *be, bool echo);
 
 /**
- * @qemu_chr_fe_set_open:
+ * qemu_chr_fe_set_open:
  *
  * Set character frontend open status.  This is an indication that the
  * front end is ready (or not) to begin doing I/O.
@@ -148,83 +147,77 @@
 void qemu_chr_fe_set_open(CharBackend *be, int fe_open);
 
 /**
- * @qemu_chr_fe_printf:
+ * qemu_chr_fe_printf:
+ * @fmt: see #printf
  *
  * Write to a character backend using a printf style interface.  This
  * function is thread-safe. It does nothing without associated
  * Chardev.
- *
- * @fmt see #printf
  */
 void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
 /**
- * @qemu_chr_fe_add_watch:
+ * qemu_chr_fe_add_watch:
+ * @cond: the condition to poll for
+ * @func: the function to call when the condition happens
+ * @user_data: the opaque pointer to pass to @func
  *
  * If the backend is connected, create and add a #GSource that fires
  * when the given condition (typically G_IO_OUT|G_IO_HUP or G_IO_HUP)
  * is active; return the #GSource's tag.  If it is disconnected,
  * or without associated Chardev, return 0.
  *
- * @cond the condition to poll for
- * @func the function to call when the condition happens
- * @user_data the opaque pointer to pass to @func
- *
  * Returns: the source tag
  */
 guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
                             GIOFunc func, void *user_data);
 
 /**
- * @qemu_chr_fe_write:
+ * qemu_chr_fe_write:
+ * @buf: the data
+ * @len: the number of bytes to send
  *
  * Write data to a character backend from the front end.  This function
  * will send data from the front end to the back end.  This function
  * is thread-safe.
  *
- * @buf the data
- * @len the number of bytes to send
- *
  * Returns: the number of bytes consumed (0 if no associated Chardev)
  */
 int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
 
 /**
- * @qemu_chr_fe_write_all:
+ * qemu_chr_fe_write_all:
+ * @buf: the data
+ * @len: the number of bytes to send
  *
  * Write data to a character backend from the front end.  This function will
  * send data from the front end to the back end.  Unlike @qemu_chr_fe_write,
  * this function will block if the back end cannot consume all of the data
  * attempted to be written.  This function is thread-safe.
  *
- * @buf the data
- * @len the number of bytes to send
- *
  * Returns: the number of bytes consumed (0 if no associated Chardev)
  */
 int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
 
 /**
- * @qemu_chr_fe_read_all:
+ * qemu_chr_fe_read_all:
+ * @buf: the data buffer
+ * @len: the number of bytes to read
  *
  * Read data to a buffer from the back end.
  *
- * @buf the data buffer
- * @len the number of bytes to read
- *
  * Returns: the number of bytes read (0 if no associated Chardev)
  */
 int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len);
 
 /**
- * @qemu_chr_fe_ioctl:
+ * qemu_chr_fe_ioctl:
+ * @cmd: see CHR_IOCTL_*
+ * @arg: the data associated with @cmd
  *
  * Issue a device specific ioctl to a backend.  This function is thread-safe.
  *
- * @cmd see CHR_IOCTL_*
- * @arg the data associated with @cmd
- *
  * Returns: if @cmd is not supported by the backend or there is no
  *          associated Chardev, -ENOTSUP, otherwise the return
  *          value depends on the semantics of @cmd
@@ -232,7 +225,7 @@
 int qemu_chr_fe_ioctl(CharBackend *be, int cmd, void *arg);
 
 /**
- * @qemu_chr_fe_get_msgfd:
+ * qemu_chr_fe_get_msgfd:
  *
  * For backends capable of fd passing, return the latest file descriptor passed
  * by a client.
@@ -245,7 +238,7 @@
 int qemu_chr_fe_get_msgfd(CharBackend *be);
 
 /**
- * @qemu_chr_fe_get_msgfds:
+ * qemu_chr_fe_get_msgfds:
  *
  * For backends capable of fd passing, return the number of file received
  * descriptors and fills the fds array up to num elements
@@ -258,7 +251,7 @@
 int qemu_chr_fe_get_msgfds(CharBackend *be, int *fds, int num);
 
 /**
- * @qemu_chr_fe_set_msgfds:
+ * qemu_chr_fe_set_msgfds:
  *
  * For backends capable of fd passing, set an array of fds to be passed with
  * the next send operation.
diff --git a/include/chardev/char.h b/include/chardev/char.h
index 6f0576e..7becd8c 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -68,12 +68,11 @@
 };
 
 /**
- * @qemu_chr_new_from_opts:
+ * qemu_chr_new_from_opts:
+ * @opts: see qemu-config.c for a list of valid options
  *
  * Create a new character backend from a QemuOpts list.
  *
- * @opts see qemu-config.c for a list of valid options
- *
  * Returns: on success: a new character backend
  *          otherwise:  NULL; @errp specifies the error
  *                            or left untouched in case of help option
@@ -82,17 +81,16 @@
                                 Error **errp);
 
 /**
- * @qemu_chr_parse_common:
+ * qemu_chr_parse_common:
+ * @opts: the options that still need parsing
+ * @backend: a new backend
  *
  * Parse the common options available to all character backends.
- *
- * @opts the options that still need parsing
- * @backend a new backend
  */
 void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend);
 
 /**
- * @qemu_chr_parse_opts:
+ * qemu_chr_parse_opts:
  *
  * Parse the options to the ChardevBackend struct.
  *
@@ -102,49 +100,61 @@
                                     Error **errp);
 
 /**
- * @qemu_chr_new:
+ * qemu_chr_new:
+ * @label: the name of the backend
+ * @filename: the URI
  *
  * Create a new character backend from a URI.
- *
- * @label the name of the backend
- * @filename the URI
+ * Do not implicitly initialize a monitor if the chardev is muxed.
  *
  * Returns: a new character backend
  */
 Chardev *qemu_chr_new(const char *label, const char *filename);
 
 /**
- * @qemu_chr_change:
+ * qemu_chr_new_mux_mon:
+ * @label: the name of the backend
+ * @filename: the URI
+ *
+ * Create a new character backend from a URI.
+ * Implicitly initialize a monitor if the chardev is muxed.
+ *
+ * Returns: a new character backend
+ */
+Chardev *qemu_chr_new_mux_mon(const char *label, const char *filename);
+
+/**
+* qemu_chr_change:
+* @opts: the new backend options
  *
  * Change an existing character backend
- *
- * @opts the new backend options
  */
 void qemu_chr_change(QemuOpts *opts, Error **errp);
 
 /**
- * @qemu_chr_cleanup:
+ * qemu_chr_cleanup:
  *
  * Delete all chardevs (when leaving qemu)
  */
 void qemu_chr_cleanup(void);
 
 /**
- * @qemu_chr_new_noreplay:
+ * qemu_chr_new_noreplay:
+ * @label: the name of the backend
+ * @filename: the URI
+ * @permit_mux_mon: if chardev is muxed, initialize a monitor
  *
  * Create a new character backend from a URI.
  * Character device communications are not written
  * into the replay log.
  *
- * @label the name of the backend
- * @filename the URI
- *
  * Returns: a new character backend
  */
-Chardev *qemu_chr_new_noreplay(const char *label, const char *filename);
+Chardev *qemu_chr_new_noreplay(const char *label, const char *filename,
+                               bool permit_mux_mon);
 
 /**
- * @qemu_chr_be_can_write:
+ * qemu_chr_be_can_write:
  *
  * Determine how much data the front end can currently accept.  This function
  * returns the number of bytes the front end can accept.  If it returns 0, the
@@ -156,43 +166,39 @@
 int qemu_chr_be_can_write(Chardev *s);
 
 /**
- * @qemu_chr_be_write:
+ * qemu_chr_be_write:
+ * @buf: a buffer to receive data from the front end
+ * @len: the number of bytes to receive from the front end
  *
  * Write data from the back end to the front end.  Before issuing this call,
  * the caller should call @qemu_chr_be_can_write to determine how much data
  * the front end can currently accept.
- *
- * @buf a buffer to receive data from the front end
- * @len the number of bytes to receive from the front end
  */
 void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len);
 
 /**
- * @qemu_chr_be_write_impl:
+ * qemu_chr_be_write_impl:
+ * @buf: a buffer to receive data from the front end
+ * @len: the number of bytes to receive from the front end
  *
  * Implementation of back end writing. Used by replay module.
- *
- * @buf a buffer to receive data from the front end
- * @len the number of bytes to receive from the front end
  */
 void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf, int len);
 
 /**
- * @qemu_chr_be_update_read_handlers:
+ * qemu_chr_be_update_read_handlers:
+ * @context: the gcontext that will be used to attach the watch sources
  *
  * Invoked when frontend read handlers are setup
- *
- * @context the gcontext that will be used to attach the watch sources
  */
 void qemu_chr_be_update_read_handlers(Chardev *s,
                                       GMainContext *context);
 
 /**
- * @qemu_chr_be_event:
+ * qemu_chr_be_event:
+ * @event: the event to send
  *
  * Send an event from the back end to the front end.
- *
- * @event the event to send
  */
 void qemu_chr_be_event(Chardev *s, int event);
 
@@ -203,7 +209,8 @@
                           ChardevFeature feature);
 void qemu_chr_set_feature(Chardev *chr,
                           ChardevFeature feature);
-QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename);
+QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename,
+                                bool permit_mux_mon);
 int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all);
 #define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true)
 int qemu_chr_wait_connected(Chardev *chr, Error **errp);
diff --git a/include/disas/bfd.h b/include/disas/bfd.h
index 1f69a6e..41b61c8 100644
--- a/include/disas/bfd.h
+++ b/include/disas/bfd.h
@@ -387,6 +387,7 @@
 int print_insn_tci(bfd_vma, disassemble_info*);
 int print_insn_big_mips         (bfd_vma, disassemble_info*);
 int print_insn_little_mips      (bfd_vma, disassemble_info*);
+int print_insn_nanomips         (bfd_vma, disassemble_info*);
 int print_insn_i386             (bfd_vma, disassemble_info*);
 int print_insn_m68k             (bfd_vma, disassemble_info*);
 int print_insn_z8001            (bfd_vma, disassemble_info*);
diff --git a/include/elf.h b/include/elf.h
index 312f68a..c151164 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -28,8 +28,11 @@
 #define PT_PHDR    6
 #define PT_LOPROC  0x70000000
 #define PT_HIPROC  0x7fffffff
-#define PT_MIPS_REGINFO		0x70000000
-#define PT_MIPS_OPTIONS		0x70000001
+
+#define PT_MIPS_REGINFO   0x70000000
+#define PT_MIPS_RTPROC    0x70000001
+#define PT_MIPS_OPTIONS   0x70000002
+#define PT_MIPS_ABIFLAGS  0x70000003
 
 /* Flags in the e_flags field of the header */
 /* MIPS architecture level. */
@@ -76,14 +79,40 @@
 #define EF_MIPS_MACH_OCTEON2  0x008d0000  /* Cavium Networks Octeon2         */
 #define EF_MIPS_MACH_OCTEON3  0x008e0000  /* Cavium Networks Octeon3         */
 #define EF_MIPS_MACH_5400     0x00910000  /* NEC VR5400                      */
-#define EF_MIPS_MACH_5900     0x00920000  /* MIPS R5900                      */
+#define EF_MIPS_MACH_5900     0x00920000  /* Toshiba/Sony R5900              */
 #define EF_MIPS_MACH_5500     0x00980000  /* NEC VR5500                      */
-#define EF_MIPS_MACH_9000     0x00990000  /* PMC-Sierra's RM9000             */
+#define EF_MIPS_MACH_9000     0x00990000  /* PMC-Sierra RM9000               */
 #define EF_MIPS_MACH_LS2E     0x00a00000  /* ST Microelectronics Loongson 2E */
 #define EF_MIPS_MACH_LS2F     0x00a10000  /* ST Microelectronics Loongson 2F */
 #define EF_MIPS_MACH_LS3A     0x00a20000  /* ST Microelectronics Loongson 3A */
 #define EF_MIPS_MACH          0x00ff0000  /* EF_MIPS_MACH_xxx selection mask */
 
+#define MIPS_ABI_FP_UNKNOWN   (-1)        /* Unknown FP ABI (internal)       */
+
+#define MIPS_ABI_FP_ANY       0x0         /* FP ABI doesn't matter           */
+#define MIPS_ABI_FP_DOUBLE    0x1         /* -mdouble-float                  */
+#define MIPS_ABI_FP_SINGLE    0x2         /* -msingle-float                  */
+#define MIPS_ABI_FP_SOFT      0x3         /* -msoft-float                    */
+#define MIPS_ABI_FP_OLD_64    0x4         /* -mips32r2 -mfp64                */
+#define MIPS_ABI_FP_XX        0x5         /* -mfpxx                          */
+#define MIPS_ABI_FP_64        0x6         /* -mips32r2 -mfp64                */
+#define MIPS_ABI_FP_64A       0x7         /* -mips32r2 -mfp64 -mno-odd-spreg */
+
+typedef struct mips_elf_abiflags_v0 {
+  uint16_t version;           /* Version of flags structure                  */
+  uint8_t isa_level;          /* The level of the ISA: 1-5, 32, 64           */
+  uint8_t isa_rev;            /* The revision of ISA:                        */
+                              /*   - 0 for MIPS V and below,                 */
+                              /*   - 1-n otherwise.                          */
+  uint8_t gpr_size;           /* The size of general purpose registers       */
+  uint8_t cpr1_size;          /* The size of co-processor 1 registers        */
+  uint8_t cpr2_size;          /* The size of co-processor 2 registers        */
+  uint8_t fp_abi;             /* The floating-point ABI                      */
+  uint32_t isa_ext;           /* Mask of processor-specific extensions       */
+  uint32_t ases;              /* Mask of ASEs used                           */
+  uint32_t flags1;            /* Mask of general flags                       */
+  uint32_t flags2;
+} Mips_elf_abiflags_v0;
 
 /* These constants define the different elf file types */
 #define ET_NONE   0
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index a171ffc..4ff62f3 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -24,6 +24,7 @@
 #endif
 
 #include "qemu/host-utils.h"
+#include "qemu/thread.h"
 #include "qemu/queue.h"
 #ifdef CONFIG_TCG
 #include "tcg-target.h"
@@ -142,6 +143,8 @@
 
 #define CPU_COMMON_TLB \
     /* The meaning of the MMU modes is defined in the target code. */   \
+    /* tlb_lock serializes updates to tlb_table and tlb_v_table */      \
+    QemuSpin tlb_lock;                                                  \
     CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
     CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE];               \
     CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE];                    \
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 41ed052..9590684 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -126,6 +126,29 @@
 /* The memory helpers for tcg-generated code need tcg_target_long etc.  */
 #include "tcg.h"
 
+static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
+{
+#if TCG_OVERSIZED_GUEST
+    return entry->addr_write;
+#else
+    return atomic_read(&entry->addr_write);
+#endif
+}
+
+/* Find the TLB index corresponding to the mmu_idx + address pair.  */
+static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
+                                  target_ulong addr)
+{
+    return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+}
+
+/* Find the TLB entry corresponding to the mmu_idx + address pair.  */
+static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
+                                     target_ulong addr)
+{
+    return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
+}
+
 #ifdef MMU_MODE0_SUFFIX
 #define CPU_MMU_INDEX 0
 #define MEMSUFFIX MMU_MODE0_SUFFIX
@@ -416,8 +439,7 @@
 #if defined(CONFIG_USER_ONLY)
     return g2h(addr);
 #else
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
+    CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr);
     abi_ptr tlb_addr;
     uintptr_t haddr;
 
@@ -426,7 +448,7 @@
         tlb_addr = tlbentry->addr_read;
         break;
     case 1:
-        tlb_addr = tlbentry->addr_write;
+        tlb_addr = tlb_addr_write(tlbentry);
         break;
     case 2:
         tlb_addr = tlbentry->addr_code;
@@ -445,7 +467,7 @@
         return NULL;
     }
 
-    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    haddr = addr + tlbentry->addend;
     return (void *)haddr;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
index 4db2302..0f061d4 100644
--- a/include/exec/cpu_ldst_template.h
+++ b/include/exec/cpu_ldst_template.h
@@ -81,7 +81,7 @@
                                                   target_ulong ptr,
                                                   uintptr_t retaddr)
 {
-    int page_index;
+    CPUTLBEntry *entry;
     RES_TYPE res;
     target_ulong addr;
     int mmu_idx;
@@ -94,15 +94,15 @@
 #endif
 
     addr = ptr;
-    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     mmu_idx = CPU_MMU_INDEX;
-    if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
+    entry = tlb_entry(env, mmu_idx, addr);
+    if (unlikely(entry->ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
         oi = make_memop_idx(SHIFT, mmu_idx);
         res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
                                                             oi, retaddr);
     } else {
-        uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+        uintptr_t hostaddr = addr + entry->addend;
         res = glue(glue(ld, USUFFIX), _p)((uint8_t *)hostaddr);
     }
     return res;
@@ -120,7 +120,8 @@
                                                   target_ulong ptr,
                                                   uintptr_t retaddr)
 {
-    int res, page_index;
+    CPUTLBEntry *entry;
+    int res;
     target_ulong addr;
     int mmu_idx;
     TCGMemOpIdx oi;
@@ -132,15 +133,15 @@
 #endif
 
     addr = ptr;
-    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     mmu_idx = CPU_MMU_INDEX;
-    if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
+    entry = tlb_entry(env, mmu_idx, addr);
+    if (unlikely(entry->ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
         oi = make_memop_idx(SHIFT, mmu_idx);
         res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
                                MMUSUFFIX)(env, addr, oi, retaddr);
     } else {
-        uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+        uintptr_t hostaddr = addr + entry->addend;
         res = glue(glue(lds, SUFFIX), _p)((uint8_t *)hostaddr);
     }
     return res;
@@ -162,7 +163,7 @@
                                                  target_ulong ptr,
                                                  RES_TYPE v, uintptr_t retaddr)
 {
-    int page_index;
+    CPUTLBEntry *entry;
     target_ulong addr;
     int mmu_idx;
     TCGMemOpIdx oi;
@@ -174,15 +175,15 @@
 #endif
 
     addr = ptr;
-    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     mmu_idx = CPU_MMU_INDEX;
-    if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
+    entry = tlb_entry(env, mmu_idx, addr);
+    if (unlikely(tlb_addr_write(entry) !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
         oi = make_memop_idx(SHIFT, mmu_idx);
         glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
                                                      retaddr);
     } else {
-        uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+        uintptr_t hostaddr = addr + entry->addend;
         glue(glue(st, SUFFIX), _p)((uint8_t *)hostaddr, v);
     }
 }
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 5f78125..815e5b1 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -100,6 +100,11 @@
 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
 /* cputlb.c */
 /**
+ * tlb_init - initialize a CPU's TLB
+ * @cpu: CPU whose TLB should be initialized
+ */
+void tlb_init(CPUState *cpu);
+/**
  * tlb_flush_page:
  * @cpu: CPU whose TLB should be flushed
  * @addr: virtual address of page to be flushed
@@ -258,6 +263,9 @@
 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
                  uintptr_t retaddr);
 #else
+static inline void tlb_init(CPUState *cpu)
+{
+}
 static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
 {
 }
diff --git a/include/exec/memory.h b/include/exec/memory.h
index eb4f2fb..d0c7f0d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -201,11 +201,6 @@
          */
         bool unaligned;
     } impl;
-
-    /* If .read and .write are not present, old_mmio may be used for
-     * backwards compatibility with old mmio registration
-     */
-    const MemoryRegionMmio old_mmio;
 };
 
 enum IOMMUMemoryRegionAttr {
@@ -424,9 +419,9 @@
                         bool match_data, uint64_t data, EventNotifier *e);
     void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section,
                         bool match_data, uint64_t data, EventNotifier *e);
-    void (*coalesced_mmio_add)(MemoryListener *listener, MemoryRegionSection *section,
+    void (*coalesced_io_add)(MemoryListener *listener, MemoryRegionSection *section,
                                hwaddr addr, hwaddr len);
-    void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section,
+    void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section,
                                hwaddr addr, hwaddr len);
     /* Lower = earlier (during add), later (during del) */
     unsigned priority;
@@ -633,7 +628,7 @@
                                                        uint64_t length,
                                                        void *host),
                                        Error **errp);
-#ifdef __linux__
+#ifdef CONFIG_POSIX
 
 /**
  * memory_region_init_ram_from_file:  Initialize RAM memory region with a
@@ -940,7 +935,7 @@
 /**
  * memory_region_is_ram: check whether a memory region is random access
  *
- * Returns %true is a memory region is random access.
+ * Returns %true if a memory region is random access.
  *
  * @mr: the memory region being queried
  */
@@ -952,7 +947,7 @@
 /**
  * memory_region_is_ram_device: check whether a memory region is a ram device
  *
- * Returns %true is a memory region is a device backed ram region
+ * Returns %true if a memory region is a device backed ram region
  *
  * @mr: the memory region being queried
  */
@@ -1166,7 +1161,7 @@
 /**
  * memory_region_is_rom: check whether a memory region is ROM
  *
- * Returns %true is a memory region is read-only memory.
+ * Returns %true if a memory region is read-only memory.
  *
  * @mr: the memory region being queried
  */
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 97d3b56..32d5378 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -75,6 +75,7 @@
 #pragma GCC poison CONFIG_M68K_DIS
 #pragma GCC poison CONFIG_MICROBLAZE_DIS
 #pragma GCC poison CONFIG_MIPS_DIS
+#pragma GCC poison CONFIG_NANOMIPS_DIS
 #pragma GCC poison CONFIG_MOXIE_DIS
 #pragma GCC poison CONFIG_NIOS2_DIS
 #pragma GCC poison CONFIG_PPC_DIS
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 3abb639..9ecd911 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -27,6 +27,7 @@
     struct rcu_head rcu;
     struct MemoryRegion *mr;
     uint8_t *host;
+    uint8_t *colo_cache; /* For colo, VM's ram cache */
     ram_addr_t offset;
     ram_addr_t used_length;
     ram_addr_t max_length;
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 35e1603..c86687f 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -80,17 +80,6 @@
  */
 
 /*----------------------------------------------------------------------------
-| This macro tests for minimum version of the GNU C compiler.
-*----------------------------------------------------------------------------*/
-#if defined(__GNUC__) && defined(__GNUC_MINOR__)
-# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
-         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
-#else
-# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
-#endif
-
-
-/*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
 | bits are shifted off, they are ``jammed'' into the least significant bit of
 | the result by setting the least significant bit to 1.  The value of `count'
@@ -340,15 +329,30 @@
 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- shortShift128Left(
-     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
+                                     uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
+    *z1Ptr = a1 << count;
+    *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
+}
 
-    *z1Ptr = a1<<count;
-    *z0Ptr =
-        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' may be greater than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
 
+static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
+                                uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    if (count < 64) {
+        *z1Ptr = a1 << count;
+        *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
+    } else {
+        *z1Ptr = 0;
+        *z0Ptr = a1 << (count - 64);
+    }
 }
 
 /*----------------------------------------------------------------------------
@@ -630,8 +634,36 @@
  *
  * Licensed under the GPLv2/LGPLv3
  */
-static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
+                                  uint64_t n0, uint64_t d)
 {
+#if defined(__x86_64__)
+    uint64_t q;
+    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
+    return q;
+#elif defined(__s390x__)
+    /* Need to use a TImode type to get an even register pair for DLGR.  */
+    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
+    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
+    *r = n >> 64;
+    return n;
+#elif defined(_ARCH_PPC64)
+    /* From Power ISA 3.0B, programming note for divdeu.  */
+    uint64_t q1, q2, Q, r1, r2, R;
+    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
+        : "=&r"(q1), "=r"(q2)
+        : "r"(n1), "r"(n0), "r"(d));
+    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
+    r2 = n0 - (q2 * d);
+    Q = q1 + q2;
+    R = r1 + r2;
+    if (R >= d || R < r2) { /* overflow implies R > d */
+        Q += 1;
+        R -= d;
+    }
+    *r = R;
+    return Q;
+#else
     uint64_t d0, d1, q0, q1, r1, r0, m;
 
     d0 = (uint32_t)d;
@@ -669,8 +701,9 @@
     }
     r0 -= m;
 
-    /* Return remainder in LSB */
-    return (q1 << 32) | q0 | (r0 != 0);
+    *r = r0;
+    return (q1 << 32) | q0;
+#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -713,82 +746,6 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns the number of leading 0 bits before the most-significant 1 bit of
-| `a'.  If `a' is zero, 32 is returned.
-*----------------------------------------------------------------------------*/
-
-static inline int8_t countLeadingZeros32(uint32_t a)
-{
-#if SOFTFLOAT_GNUC_PREREQ(3, 4)
-    if (a) {
-        return __builtin_clz(a);
-    } else {
-        return 32;
-    }
-#else
-    static const int8_t countLeadingZerosHigh[] = {
-        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-    int8_t shiftCount;
-
-    shiftCount = 0;
-    if ( a < 0x10000 ) {
-        shiftCount += 16;
-        a <<= 16;
-    }
-    if ( a < 0x1000000 ) {
-        shiftCount += 8;
-        a <<= 8;
-    }
-    shiftCount += countLeadingZerosHigh[ a>>24 ];
-    return shiftCount;
-#endif
-}
-
-/*----------------------------------------------------------------------------
-| Returns the number of leading 0 bits before the most-significant 1 bit of
-| `a'.  If `a' is zero, 64 is returned.
-*----------------------------------------------------------------------------*/
-
-static inline int8_t countLeadingZeros64(uint64_t a)
-{
-#if SOFTFLOAT_GNUC_PREREQ(3, 4)
-    if (a) {
-        return __builtin_clzll(a);
-    } else {
-        return 64;
-    }
-#else
-    int8_t shiftCount;
-
-    shiftCount = 0;
-    if ( a < ( (uint64_t) 1 )<<32 ) {
-        shiftCount += 32;
-    }
-    else {
-        a >>= 32;
-    }
-    shiftCount += countLeadingZeros32( a );
-    return shiftCount;
-#endif
-}
-
-/*----------------------------------------------------------------------------
 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 | Otherwise, returns 0.
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index cc1b58b..8fd9f9b 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -535,7 +535,6 @@
 | Software IEC/IEEE double-precision operations.
 *----------------------------------------------------------------------------*/
 float64 float64_round_to_int(float64, float_status *status);
-float64 float64_trunc_to_int(float64, float_status *status);
 float64 float64_add(float64, float64, float_status *status);
 float64 float64_sub(float64, float64, float_status *status);
 float64 float64_mul(float64, float64, float_status *status);
diff --git a/include/hw/audio/wm8750.h b/include/hw/audio/wm8750.h
index 84e7a11..e12cb88 100644
--- a/include/hw/audio/wm8750.h
+++ b/include/hw/audio/wm8750.h
@@ -17,6 +17,7 @@
 #include "hw/hw.h"
 
 #define TYPE_WM8750 "wm8750"
+#define TYPE_MV88W8618_AUDIO "mv88w8618_audio"
 
 typedef void data_req_cb(void *opaque, int free_out, int free_in);
 
diff --git a/include/hw/display/edid.h b/include/hw/display/edid.h
index bd51d26..bacf170 100644
--- a/include/hw/display/edid.h
+++ b/include/hw/display/edid.h
@@ -4,7 +4,7 @@
 #include "hw/hw.h"
 
 typedef struct qemu_edid_info {
-    const char *vendor;
+    const char *vendor; /* http://www.uefi.org/pnp_id_list */
     const char *name;
     const char *serial;
     uint32_t    dpi;
diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h
index 51541d6..1a0516a 100644
--- a/include/hw/hotplug.h
+++ b/include/hw/hotplug.h
@@ -47,8 +47,6 @@
  * @parent: Opaque parent interface.
  * @pre_plug: pre plug callback called at start of device.realize(true)
  * @plug: plug callback called at end of device.realize(true).
- * @post_plug: post plug callback called after device.realize(true) and device
- *             reset
  * @unplug_request: unplug request callback.
  *                  Used as a means to initiate device unplug for devices that
  *                  require asynchronous unplug handling.
@@ -63,7 +61,6 @@
     /* <public> */
     hotplug_fn pre_plug;
     hotplug_fn plug;
-    void (*post_plug)(HotplugHandler *plug_handler, DeviceState *plugged_dev);
     hotplug_fn unplug_request;
     hotplug_fn unplug;
 } HotplugHandlerClass;
@@ -87,14 +84,6 @@
                               Error **errp);
 
 /**
- * hotplug_handler_post_plug:
- *
- * Call #HotplugHandlerClass.post_plug callback of @plug_handler.
- */
-void hotplug_handler_post_plug(HotplugHandler *plug_handler,
-                               DeviceState *plugged_dev);
-
-/**
  * hotplug_handler_unplug_request:
  *
  * Calls #HotplugHandlerClass.unplug_request callback of @plug_handler.
diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h
new file mode 100644
index 0000000..21dc28a
--- /dev/null
+++ b/include/hw/hyperv/hyperv-proto.h
@@ -0,0 +1,130 @@
+/*
+ * Definitions for Hyper-V guest/hypervisor interaction
+ *
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HYPERV_PROTO_H
+#define HW_HYPERV_HYPERV_PROTO_H
+
+#include "qemu/bitmap.h"
+
+/*
+ * Hypercall status code
+ */
+#define HV_STATUS_SUCCESS                     0
+#define HV_STATUS_INVALID_HYPERCALL_CODE      2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT     3
+#define HV_STATUS_INVALID_ALIGNMENT           4
+#define HV_STATUS_INVALID_PARAMETER           5
+#define HV_STATUS_INSUFFICIENT_MEMORY         11
+#define HV_STATUS_INVALID_PORT_ID             17
+#define HV_STATUS_INVALID_CONNECTION_ID       18
+#define HV_STATUS_INSUFFICIENT_BUFFERS        19
+
+/*
+ * Hypercall numbers
+ */
+#define HV_POST_MESSAGE                       0x005c
+#define HV_SIGNAL_EVENT                       0x005d
+#define HV_HYPERCALL_FAST                     (1u << 16)
+
+/*
+ * Message size
+ */
+#define HV_MESSAGE_PAYLOAD_SIZE               240
+
+/*
+ * Message types
+ */
+#define HV_MESSAGE_NONE                       0x00000000
+#define HV_MESSAGE_VMBUS                      0x00000001
+#define HV_MESSAGE_UNMAPPED_GPA               0x80000000
+#define HV_MESSAGE_GPA_INTERCEPT              0x80000001
+#define HV_MESSAGE_TIMER_EXPIRED              0x80000010
+#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE  0x80000020
+#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION    0x80000021
+#define HV_MESSAGE_UNSUPPORTED_FEATURE        0x80000022
+#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE    0x80000040
+#define HV_MESSAGE_X64_IOPORT_INTERCEPT       0x80010000
+#define HV_MESSAGE_X64_MSR_INTERCEPT          0x80010001
+#define HV_MESSAGE_X64_CPUID_INTERCEPT        0x80010002
+#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT    0x80010003
+#define HV_MESSAGE_X64_APIC_EOI               0x80010004
+#define HV_MESSAGE_X64_LEGACY_FP_ERROR        0x80010005
+
+/*
+ * Message flags
+ */
+#define HV_MESSAGE_FLAG_PENDING               0x1
+
+/*
+ * Number of synthetic interrupts
+ */
+#define HV_SINT_COUNT                         16
+
+/*
+ * Event flags number per SINT
+ */
+#define HV_EVENT_FLAGS_COUNT                  (256 * 8)
+
+/*
+ * Connection id valid bits
+ */
+#define HV_CONNECTION_ID_MASK                 0x00ffffff
+
+/*
+ * Input structure for POST_MESSAGE hypercall
+ */
+struct hyperv_post_message_input {
+    uint32_t connection_id;
+    uint32_t _reserved;
+    uint32_t message_type;
+    uint32_t payload_size;
+    uint8_t  payload[HV_MESSAGE_PAYLOAD_SIZE];
+};
+
+/*
+ * Input structure for SIGNAL_EVENT hypercall
+ */
+struct hyperv_signal_event_input {
+    uint32_t connection_id;
+    uint16_t flag_number;
+    uint16_t _reserved_zero;
+};
+
+/*
+ * SynIC message structures
+ */
+struct hyperv_message_header {
+    uint32_t message_type;
+    uint8_t  payload_size;
+    uint8_t  message_flags; /* HV_MESSAGE_FLAG_XX */
+    uint8_t  _reserved[2];
+    uint64_t sender;
+};
+
+struct hyperv_message {
+    struct hyperv_message_header header;
+    uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE];
+};
+
+struct hyperv_message_page {
+    struct hyperv_message slot[HV_SINT_COUNT];
+};
+
+/*
+ * SynIC event flags structures
+ */
+struct hyperv_event_flags {
+    DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT);
+};
+
+struct hyperv_event_flags_page {
+    struct hyperv_event_flags slot[HV_SINT_COUNT];
+};
+
+#endif
diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h
new file mode 100644
index 0000000..597381c
--- /dev/null
+++ b/include/hw/hyperv/hyperv.h
@@ -0,0 +1,83 @@
+/*
+ * Hyper-V guest/hypervisor interaction
+ *
+ * Copyright (c) 2015-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HYPERV_H
+#define HW_HYPERV_HYPERV_H
+
+#include "cpu-qom.h"
+#include "hw/hyperv/hyperv-proto.h"
+
+typedef struct HvSintRoute HvSintRoute;
+
+/*
+ * Callback executed in a bottom-half when the status of posting the message
+ * becomes known, before unblocking the connection for further messages
+ */
+typedef void (*HvSintMsgCb)(void *data, int status);
+
+HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
+                                   HvSintMsgCb cb, void *cb_data);
+void hyperv_sint_route_ref(HvSintRoute *sint_route);
+void hyperv_sint_route_unref(HvSintRoute *sint_route);
+
+int hyperv_sint_route_set_sint(HvSintRoute *sint_route);
+
+/*
+ * Submit a message to be posted in vcpu context.  If the submission succeeds,
+ * the status of posting the message is reported via the callback associated
+ * with the @sint_route; until then no more messages are accepted.
+ */
+int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg);
+/*
+ * Set event flag @eventno, and signal the SINT if the flag has changed.
+ */
+int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno);
+
+/*
+ * Handler for messages arriving from the guest via HV_POST_MESSAGE hypercall.
+ * Executed in vcpu context.
+ */
+typedef uint16_t (*HvMsgHandler)(const struct hyperv_post_message_input *msg,
+                                 void *data);
+/*
+ * Associate @handler with the message connection @conn_id, such that @handler
+ * is called with @data when the guest executes HV_POST_MESSAGE hypercall on
+ * @conn_id.  If @handler is NULL clear the association.
+ */
+int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data);
+/*
+ * Associate @notifier with the event connection @conn_id, such that @notifier
+ * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id.
+ * If @notifier is NULL clear the association.
+ */
+int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier);
+
+/*
+ * Process HV_POST_MESSAGE hypercall: parse the data in the guest memory as
+ * specified in @param, and call the HvMsgHandler associated with the
+ * connection on the message contained therein.
+ */
+uint16_t hyperv_hcall_post_message(uint64_t param, bool fast);
+/*
+ * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with
+ * the connection as specified in @param.
+ */
+uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast);
+
+static inline uint32_t hyperv_vp_index(CPUState *cs)
+{
+    return cs->cpu_index;
+}
+
+void hyperv_synic_add(CPUState *cs);
+void hyperv_synic_reset(CPUState *cs);
+void hyperv_synic_update(CPUState *cs, bool enable,
+                         hwaddr msg_page_addr, hwaddr event_page_addr);
+
+#endif
diff --git a/include/hw/i2c/i2c-ddc.h b/include/hw/i2c/i2c-ddc.h
index d9b5f33..c29443c 100644
--- a/include/hw/i2c/i2c-ddc.h
+++ b/include/hw/i2c/i2c-ddc.h
@@ -19,14 +19,16 @@
 #ifndef I2C_DDC_H
 #define I2C_DDC_H
 
-/* A simple I2C slave which just returns the contents of its EDID blob. */
+#include "hw/display/edid.h"
 
+/* A simple I2C slave which just returns the contents of its EDID blob. */
 struct I2CDDCState {
     /*< private >*/
     I2CSlave i2c;
     /*< public >*/
     bool firstbyte;
     uint8_t reg;
+    qemu_edid_info edid_info;
     uint8_t edid_blob[128];
 };
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 6894f37..dfe6746 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -294,6 +294,14 @@
 int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
+#define PC_COMPAT_3_0 \
+    HW_COMPAT_3_0 \
+    {\
+        .driver   = TYPE_X86_CPU,\
+        .property = "x-hv-synic-kvm-only",\
+        .value    = "on",\
+    }
+
 #define PC_COMPAT_2_12 \
     HW_COMPAT_2_12 \
     {\
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index b798486..31ec9a1 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -62,7 +62,7 @@
  * avoids bugs where we forget to subtract GIC_INTERNAL from an
  * interrupt number.
  */
-#define GICV3_BMP_SIZE (DIV_ROUND_UP(GICV3_MAXIRQ, 32))
+#define GICV3_BMP_SIZE DIV_ROUND_UP(GICV3_MAXIRQ, 32)
 
 #define GIC_DECLARE_BITMAP(name) \
     uint32_t name[GICV3_BMP_SIZE]
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 3c11297..67a0af8 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -10,7 +10,7 @@
  * Returns the size of the image file on success, -1 otherwise.
  * On error, errno is also set as appropriate.
  */
-int get_image_size(const char *filename);
+int64_t get_image_size(const char *filename);
 int load_image(const char *filename, uint8_t *addr); /* deprecated */
 ssize_t load_image_size(const char *filename, void *addr, size_t size);
 
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index 2853b08..e904e19 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -29,23 +29,81 @@
     Object parent_obj;
 } MemoryDeviceState;
 
+/**
+ * MemoryDeviceClass:
+ *
+ * All memory devices need to implement TYPE_MEMORY_DEVICE as an interface.
+ *
+ * A memory device is a device that owns a memory region which is
+ * mapped into guest physical address space at a certain address. The
+ * address in guest physical memory can either be specified explicitly
+ * or get assigned automatically.
+ *
+ * Conceptually, memory devices only span one memory region. If multiple
+ * successive memory regions are used, a covering memory region has to
+ * be provided. Scattered memory regions are not supported for single
+ * devices.
+ */
 typedef struct MemoryDeviceClass {
+    /* private */
     InterfaceClass parent_class;
 
+    /*
+     * Return the address of the memory device in guest physical memory.
+     *
+     * Called when (un)plugging a memory device or when iterating over
+     * all memory devices mapped into guest physical address space.
+     *
+     * If "0" is returned, no address has been specified by the user and
+     * no address has been assigned to this memory device yet.
+     */
     uint64_t (*get_addr)(const MemoryDeviceState *md);
-    uint64_t (*get_plugged_size)(const MemoryDeviceState *md);
-    uint64_t (*get_region_size)(const MemoryDeviceState *md);
+
+    /*
+     * Set the address of the memory device in guest physical memory.
+     *
+     * Called when plugging the memory device to configure the determined
+     * address in guest physical memory.
+     */
+    void (*set_addr)(MemoryDeviceState *md, uint64_t addr, Error **errp);
+
+    /*
+     * Return the amount of memory provided by the memory device currently
+     * usable ("plugged") by the VM.
+     *
+     * Called when calculating the total amount of ram available to the
+     * VM (e.g. to report memory stats to the user).
+     *
+     * This is helpful for devices that dynamically manage the amount of
+     * memory accessible by the guest via the reserved memory region. For
+     * most devices, this corresponds to the size of the memory region.
+     */
+    uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp);
+
+    /*
+     * Return the memory region of the memory device.
+     *
+     * Called when (un)plugging the memory device, to (un)map the
+     * memory region in guest physical memory, but also to detect the
+     * required alignment during address assignment or when the size of the
+     * memory region is required.
+     */
+    MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
+
+    /*
+     * Translate the memory device into #MemoryDeviceInfo.
+     */
     void (*fill_device_info)(const MemoryDeviceState *md,
                              MemoryDeviceInfo *info);
 } MemoryDeviceClass;
 
 MemoryDeviceInfoList *qmp_memory_device_list(void);
 uint64_t get_plugged_memory_size(void);
-uint64_t memory_device_get_free_addr(MachineState *ms, const uint64_t *hint,
-                                     uint64_t align, uint64_t size,
-                                     Error **errp);
-void memory_device_plug_region(MachineState *ms, MemoryRegion *mr,
-                               uint64_t addr);
-void memory_device_unplug_region(MachineState *ms, MemoryRegion *mr);
+void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
+                            const uint64_t *legacy_align, Error **errp);
+void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
+void memory_device_unplug(MemoryDeviceState *md, MachineState *ms);
+uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
+                                       Error **errp);
 
 #endif
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index b382eb4..01436b9 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -61,9 +61,6 @@
  * PCDIMMDeviceClass:
  * @realize: called after common dimm is realized so that the dimm based
  * devices get the chance to do specified operations.
- * @get_memory_region: returns #MemoryRegion associated with @dimm which
- * is directly mapped into the physical address space of guest. Will not
- * fail after the device was realized.
  * @get_vmstate_memory_region: returns #MemoryRegion which indicates the
  * memory of @dimm should be kept during live migration. Will not fail
  * after the device was realized.
@@ -74,13 +71,12 @@
 
     /* public */
     void (*realize)(PCDIMMDevice *dimm, Error **errp);
-    MemoryRegion *(*get_memory_region)(PCDIMMDevice *dimm, Error **errp);
     MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm,
                                                Error **errp);
 } PCDIMMDeviceClass;
 
-void pc_dimm_pre_plug(DeviceState *dev, MachineState *machine,
+void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
                       const uint64_t *legacy_align, Error **errp);
-void pc_dimm_plug(DeviceState *dev, MachineState *machine, Error **errp);
-void pc_dimm_unplug(DeviceState *dev, MachineState *machine);
+void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine, Error **errp);
+void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine);
 #endif
diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h
index 35de622..5426961 100644
--- a/include/hw/net/cadence_gem.h
+++ b/include/hw/net/cadence_gem.h
@@ -32,6 +32,9 @@
 
 #define CADENCE_GEM_MAXREG        (0x00000800 / 4) /* Last valid GEM address */
 
+/* Max number of words in a DMA descriptor.  */
+#define DESC_MAX_NUM_WORDS              6
+
 #define MAX_PRIORITY_QUEUES             8
 #define MAX_TYPE1_SCREENERS             16
 #define MAX_TYPE2_SCREENERS             16
@@ -42,6 +45,8 @@
 
     /*< public >*/
     MemoryRegion iomem;
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
     NICState *nic;
     NICConf conf;
     qemu_irq irq[MAX_PRIORITY_QUEUES];
@@ -74,7 +79,7 @@
 
     uint8_t can_rx_state; /* Debug only */
 
-    unsigned rx_desc[MAX_PRIORITY_QUEUES][2];
+    uint32_t rx_desc[MAX_PRIORITY_QUEUES][DESC_MAX_NUM_WORDS];
 
     bool sar_active[4];
 } CadenceGEMState;
diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h
new file mode 100644
index 0000000..470e439
--- /dev/null
+++ b/include/hw/s390x/ap-bridge.h
@@ -0,0 +1,19 @@
+/*
+ * ap bridge
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_AP_BRIDGE_H
+#define HW_S390X_AP_BRIDGE_H
+
+#define TYPE_AP_BRIDGE "ap-bridge"
+#define TYPE_AP_BUS "ap-bus"
+
+void s390_init_ap(void);
+
+#endif
diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h
new file mode 100644
index 0000000..765e908
--- /dev/null
+++ b/include/hw/s390x/ap-device.h
@@ -0,0 +1,22 @@
+/*
+ * Adjunct Processor (AP) matrix device interfaces
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_AP_DEVICE_H
+#define HW_S390X_AP_DEVICE_H
+
+#define AP_DEVICE_TYPE       "ap-device"
+
+typedef struct APDevice {
+    DeviceState parent_obj;
+} APDevice;
+
+#define AP_DEVICE(obj) \
+    OBJECT_CHECK(APDevice, (obj), AP_DEVICE_TYPE)
+
+#endif /* HW_S390X_AP_DEVICE_H */
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index 9da5912..aae19c4 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -48,7 +48,7 @@
     uint8_t unused;          /* padding byte */
     /* extended part */
     CIW ciw[MAX_CIWS];       /* variable # of CIWs */
-} QEMU_PACKED SenseId;
+} SenseId;                   /* Note: No QEMU_PACKED due to unaligned members */
 
 /* Channel measurements, from linux/drivers/s390/cio/cmf.c. */
 typedef struct CMB {
@@ -118,11 +118,12 @@
 typedef struct SubchDev SubchDev;
 struct SubchDev {
     /* channel-subsystem related things: */
+    SCHIB curr_status;           /* Needs alignment and thus must come first */
+    ORB orb;
     uint8_t cssid;
     uint8_t ssid;
     uint16_t schid;
     uint16_t devno;
-    SCHIB curr_status;
     uint8_t sense_data[32];
     hwaddr channel_prog;
     CCW1 last_cmd;
@@ -131,7 +132,6 @@
     bool thinint_active;
     uint8_t ccw_no_data_cnt;
     uint16_t migrated_schid; /* used for missmatch detection */
-    ORB orb;
     CcwDataStream cds;
     /* transport-provided data: */
     int (*ccw_cb) (SubchDev *, CCW1);
diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h
index 5f2db69..c6737a3 100644
--- a/include/hw/s390x/ioinst.h
+++ b/include/hw/s390x/ioinst.h
@@ -25,7 +25,8 @@
     uint8_t dstat;
     uint8_t cstat;
     uint16_t count;
-} QEMU_PACKED SCSW;
+} SCSW;
+QEMU_BUILD_BUG_MSG(sizeof(SCSW) != 12, "size of SCSW is wrong");
 
 #define SCSW_FLAGS_MASK_KEY 0xf000
 #define SCSW_FLAGS_MASK_SCTL 0x0800
@@ -94,7 +95,8 @@
     uint8_t  pam;
     uint8_t  chpid[8];
     uint32_t chars;
-} QEMU_PACKED PMCW;
+} PMCW;
+QEMU_BUILD_BUG_MSG(sizeof(PMCW) != 28, "size of PMCW is wrong");
 
 #define PMCW_FLAGS_MASK_QF 0x8000
 #define PMCW_FLAGS_MASK_W 0x4000
@@ -127,7 +129,8 @@
     uint32_t esw[5];
     uint32_t ecw[8];
     uint32_t emw[8];
-} QEMU_PACKED IRB;
+} IRB;
+QEMU_BUILD_BUG_MSG(sizeof(IRB) != 96, "size of IRB is wrong");
 
 /* operation request block */
 typedef struct ORB {
@@ -136,7 +139,8 @@
     uint8_t lpm;
     uint8_t ctrl1;
     uint32_t cpa;
-} QEMU_PACKED ORB;
+} ORB;
+QEMU_BUILD_BUG_MSG(sizeof(ORB) != 12, "size of ORB is wrong");
 
 #define ORB_CTRL0_MASK_KEY 0xf000
 #define ORB_CTRL0_MASK_SPND 0x0800
@@ -165,7 +169,8 @@
         uint8_t flags;
         uint8_t reserved;
         uint16_t count;
-} QEMU_PACKED CCW0;
+} CCW0;
+QEMU_BUILD_BUG_MSG(sizeof(CCW0) != 8, "size of CCW0 is wrong");
 
 /* channel command word (type 1) */
 typedef struct CCW1 {
@@ -173,7 +178,8 @@
     uint8_t flags;
     uint16_t count;
     uint32_t cda;
-} QEMU_PACKED CCW1;
+} CCW1;
+QEMU_BUILD_BUG_MSG(sizeof(CCW1) != 8, "size of CCW1 is wrong");
 
 #define CCW_FLAG_DC              0x80
 #define CCW_FLAG_CC              0x40
@@ -192,7 +198,8 @@
 typedef struct CRW {
     uint16_t flags;
     uint16_t rsid;
-} QEMU_PACKED CRW;
+} CRW;
+QEMU_BUILD_BUG_MSG(sizeof(CRW) != 4, "size of CRW is wrong");
 
 #define CRW_FLAGS_MASK_S 0x4000
 #define CRW_FLAGS_MASK_R 0x2000
diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
index e9c4f41..8aa2719 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -39,12 +39,15 @@
     bool ri_allowed;
     bool cpu_model_allowed;
     bool css_migration_enabled;
+    bool hpage_1m_allowed;
 } S390CcwMachineClass;
 
 /* runtime-instrumentation allowed by the machine */
 bool ri_allowed(void);
 /* cpu model allowed by the machine */
 bool cpu_model_allowed(void);
+/* 1M huge page mappings allowed by the machine */
+bool hpage_1m_allowed(void);
 
 /**
  * Returns true if (vmstate based) migration of the channel subsystem
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 821def0..1b434d0 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -26,17 +26,18 @@
 #include "qemu/queue.h"
 #include "qemu/notify.h"
 #include "ui/console.h"
+#include "hw/display/ramfb.h"
 #ifdef CONFIG_LINUX
 #include <linux/vfio.h>
 #endif
 
-#define ERR_PREFIX "vfio error: %s: "
-#define WARN_PREFIX "vfio warning: %s: "
+#define VFIO_MSG_PREFIX "vfio %s: "
 
 enum {
     VFIO_DEVICE_TYPE_PCI = 0,
     VFIO_DEVICE_TYPE_PLATFORM = 1,
     VFIO_DEVICE_TYPE_CCW = 2,
+    VFIO_DEVICE_TYPE_AP = 3,
 };
 
 typedef struct VFIOMmap {
@@ -146,6 +147,7 @@
 
 typedef struct VFIODisplay {
     QemuConsole *con;
+    RAMFBState *ramfb;
     struct {
         VFIORegion buffer;
         DisplaySurface *surface;
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
index 9baaa2db..0ee10b1 100644
--- a/include/hw/vfio/vfio-platform.h
+++ b/include/hw/vfio/vfio-platform.h
@@ -54,7 +54,8 @@
     QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQs */
     /* queue of pending IRQs */
     QSIMPLEQ_HEAD(pending_intp_queue, VFIOINTp) pending_intp_queue;
-    char *compat; /* compatibility string */
+    char *compat; /* DT compatible values, separated by NUL */
+    unsigned int num_compat; /* number of compatible values */
     uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
     QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */
     QemuMutex intp_mutex; /* protect the intp_list IRQ state */
diff --git a/include/migration/colo.h b/include/migration/colo.h
index 2fe48ad..99ce17a 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -16,14 +16,21 @@
 #include "qemu-common.h"
 #include "qapi/qapi-types-migration.h"
 
+enum colo_event {
+    COLO_EVENT_NONE,
+    COLO_EVENT_CHECKPOINT,
+    COLO_EVENT_FAILOVER,
+};
+
 void colo_info_init(void);
 
 void migrate_start_colo_process(MigrationState *s);
 bool migration_in_colo_state(void);
 
 /* loadvm */
-bool migration_incoming_enable_colo(void);
-void migration_incoming_exit_colo(void);
+void migration_incoming_enable_colo(void);
+void migration_incoming_disable_colo(void);
+bool migration_incoming_colo_enabled(void);
 void *colo_process_incoming_thread(void *opaque);
 bool migration_incoming_in_colo_state(void);
 
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 2ef5e04..6fd2c53 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -47,4 +47,7 @@
 void monitor_fdset_dup_fd_remove(int dup_fd);
 int monitor_fdset_dup_fd_find(int dup_fd);
 
+void monitor_vfprintf(FILE *stream,
+                      const char *fmt, va_list ap) GCC_FMT_ATTR(2, 0);
+
 #endif /* MONITOR_H */
diff --git a/include/net/filter.h b/include/net/filter.h
index 435acd6..49da666 100644
--- a/include/net/filter.h
+++ b/include/net/filter.h
@@ -38,6 +38,8 @@
 
 typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp);
 
+typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp);
+
 typedef struct NetFilterClass {
     ObjectClass parent_class;
 
@@ -45,6 +47,7 @@
     FilterSetup *setup;
     FilterCleanup *cleanup;
     FilterStatusChanged *status_changed;
+    FilterHandleEvent *handle_event;
     /* mandatory */
     FilterReceiveIOV *receive_iov;
 } NetFilterClass;
@@ -77,4 +80,6 @@
                                     int iovcnt,
                                     void *opaque);
 
+void colo_notify_filters_event(int event, Error **errp);
+
 #endif /* QEMU_NET_FILTER_H */
diff --git a/include/qapi/error.h b/include/qapi/error.h
index bcb86a7..51b63dd 100644
--- a/include/qapi/error.h
+++ b/include/qapi/error.h
@@ -52,8 +52,12 @@
  * where Error **errp is a parameter, by convention the last one.
  *
  * Pass an existing error to the caller with the message modified:
+ *     error_propagate_prepend(errp, err);
+ *
+ * Avoid
  *     error_propagate(errp, err);
  *     error_prepend(errp, "Could not frobnicate '%s': ", name);
+ * because this fails to prepend when @errp is &error_fatal.
  *
  * Create a new error and pass it to the caller:
  *     error_setg(errp, "situation normal, all fouled up");
@@ -215,6 +219,16 @@
  */
 void error_propagate(Error **dst_errp, Error *local_err);
 
+
+/*
+ * Propagate error object (if any) with some text prepended.
+ * Behaves like
+ *     error_prepend(&local_err, fmt, ...);
+ *     error_propagate(dst_errp, local_err);
+ */
+void error_propagate_prepend(Error **dst_errp, Error *local_err,
+                             const char *fmt, ...);
+
 /*
  * Prepend some text to @errp's human-readable error message.
  * The text is made by formatting @fmt, @ap like vprintf().
diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h
index 145571f..7c76e24 100644
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h
@@ -79,6 +79,9 @@
 #define QERR_QGA_COMMAND_FAILED \
     "Guest agent command failed, error was '%s'"
 
+#define QERR_REPLAY_NOT_SUPPORTED \
+    "Record/replay feature is not supported for '%s'"
+
 #define QERR_SET_PASSWD_FAILED \
     "Could not set password"
 
@@ -88,7 +91,4 @@
 #define QERR_UNSUPPORTED \
     "this feature or command is not currently supported"
 
-#define QERR_REPLAY_NOT_SUPPORTED \
-    "Record/replay feature is not supported for '%s'"
-
 #endif /* QERROR_H */
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 85f4749..ed60ba2 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -17,7 +17,7 @@
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
 /* Copyright string for -version arguments, About dialogs, etc */
-#define QEMU_COPYRIGHT "Copyright (c) 2003-2017 " \
+#define QEMU_COPYRIGHT "Copyright (c) 2003-2018 " \
     "Fabrice Bellard and the QEMU Project developers"
 
 /* Bug reporting information for --help arguments, About dialogs, etc */
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 9ed39ef..f6993a8 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -98,7 +98,7 @@
  * We'd prefer not want to pull in everything else TCG related, so handle
  * those few cases by hand.
  *
- * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for
+ * Note that x32 is fully detected with __x86_64__ + _ILP32, and that for
  * Sparc we always force the use of sparcv9 in configure.
  */
 #if defined(__x86_64__) || defined(__sparc__)
@@ -450,4 +450,38 @@
     _oldn;                                                              \
 })
 
+/* Abstractions to access atomically (i.e. "once") i64/u64 variables */
+#ifdef CONFIG_ATOMIC64
+static inline int64_t atomic_read_i64(const int64_t *ptr)
+{
+    /* use __nocheck because sizeof(void *) might be < sizeof(u64) */
+    return atomic_read__nocheck(ptr);
+}
+
+static inline uint64_t atomic_read_u64(const uint64_t *ptr)
+{
+    return atomic_read__nocheck(ptr);
+}
+
+static inline void atomic_set_i64(int64_t *ptr, int64_t val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+static inline void atomic_set_u64(uint64_t *ptr, uint64_t val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+static inline void atomic64_init(void)
+{
+}
+#else /* !CONFIG_ATOMIC64 */
+int64_t  atomic_read_i64(const int64_t *ptr);
+uint64_t atomic_read_u64(const uint64_t *ptr);
+void atomic_set_i64(int64_t *ptr, int64_t val);
+void atomic_set_u64(uint64_t *ptr, uint64_t val);
+void atomic64_init(void);
+#endif /* !CONFIG_ATOMIC64 */
+
 #endif /* QEMU_ATOMIC_H */
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
new file mode 100644
index 0000000..a6af22f
--- /dev/null
+++ b/include/qemu/atomic128.h
@@ -0,0 +1,153 @@
+/*
+ * Simple interface for 128-bit atomic operations.
+ *
+ * Copyright (C) 2018 Linaro, Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * See docs/devel/atomics.txt for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef QEMU_ATOMIC128_H
+#define QEMU_ATOMIC128_H
+
+/*
+ * GCC is a house divided about supporting large atomic operations.
+ *
+ * For hosts that only have large compare-and-swap, a legalistic reading
+ * of the C++ standard means that one cannot implement __atomic_read on
+ * read-only memory, and thus all atomic operations must synchronize
+ * through libatomic.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878
+ *
+ * This interpretation is not especially helpful for QEMU.
+ * For softmmu, all RAM is always read/write from the hypervisor.
+ * For user-only, if the guest doesn't implement such an __atomic_read
+ * then the host need not worry about it either.
+ *
+ * Moreover, using libatomic is not an option, because its interface is
+ * built for std::atomic<T>, and requires that *all* accesses to such an
+ * object go through the library.  In our case we do not have an object
+ * in the C/C++ sense, but a view of memory as seen by the guest.
+ * The guest may issue a large atomic operation and then access those
+ * pieces using word-sized accesses.  From the hypervisor, we have no
+ * way to connect those two actions.
+ *
+ * Therefore, special case each platform.
+ */
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    return atomic_cmpxchg__nocheck(ptr, cmp, new);
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(CONFIG_CMPXCHG128)
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    return __sync_val_compare_and_swap_16(ptr, cmp, new);
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(__aarch64__)
+/* Through gcc 8, aarch64 has no support for 128-bit at all.  */
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
+    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
+    uint64_t oldl, oldh;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "cmp %[oldl], %[cmpl]\n\t"
+        "ccmp %[oldh], %[cmph], #0, eq\n\t"
+        "b.ne 1f\n\t"
+        "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
+        "cbnz %w[tmp], 0b\n"
+        "1:"
+        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
+          [oldl] "=&r"(oldl), [oldh] "=r"(oldh)
+        : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
+          [newl] "r"(newl), [newh] "r"(newh)
+        : "memory", "cc");
+
+    return int128_make128(oldl, oldh);
+}
+# define HAVE_CMPXCHG128 1
+#else
+/* Fallback definition that must be optimized away, or error.  */
+Int128 QEMU_ERROR("unsupported atomic")
+    atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
+# define HAVE_CMPXCHG128 0
+#endif /* Some definition for HAVE_CMPXCHG128 */
+
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    return atomic_read__nocheck(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
+/* We can do better than cmpxchg for AArch64.  */
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    uint64_t l, h;
+    uint32_t tmp;
+
+    /* The load must be paired with the store to guarantee not tearing.  */
+    asm("0: ldxp %[l], %[h], %[mem]\n\t"
+        "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[tmp], 0b"
+        : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
+
+    return int128_make128(l, h);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l = int128_getlo(val), h = int128_gethi(val);
+    uint64_t t1, t2;
+
+    /* Load into temporaries to acquire the exclusive access lock.  */
+    asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
+        "stxp %w[t1], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[t1], 0b"
+        : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
+        : [l] "r"(l), [h] "r"(h));
+}
+
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    /* Maybe replace 0 with 0, returning the old value.  */
+    return atomic16_cmpxchg(ptr, 0, 0);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    Int128 old = *ptr, cmp;
+    do {
+        cmp = old;
+        old = atomic16_cmpxchg(ptr, cmp, val);
+    } while (old != cmp);
+}
+
+# define HAVE_ATOMIC128 1
+#else
+/* Fallback definitions that must be optimized away, or error.  */
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
+void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
+# define HAVE_ATOMIC128 0
+#endif /* Some definition for HAVE_ATOMIC128 */
+
+#endif /* QEMU_ATOMIC128_H */
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index 5843812..6b92710 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -122,6 +122,41 @@
 #ifndef __has_feature
 #define __has_feature(x) 0 /* compatibility with non-clang compilers */
 #endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0 /* compatibility with non-clang compilers */
+#endif
+
+#if __has_builtin(__builtin_assume_aligned) || QEMU_GNUC_PREREQ(4, 7)
+#define HAS_ASSUME_ALIGNED
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0 /* compatibility with older GCC */
+#endif
+
+/*
+ * GCC doesn't provide __has_attribute() until GCC 5, but we know all the GCC
+ * versions we support have the "flatten" attribute. Clang may not have the
+ * "flatten" attribute but always has __has_attribute() to check for it.
+ */
+#if __has_attribute(flatten) || !defined(__clang__)
+# define QEMU_FLATTEN __attribute__((flatten))
+#else
+# define QEMU_FLATTEN
+#endif
+
+/*
+ * If __attribute__((error)) is present, use it to produce an error at
+ * compile time.  Otherwise, one must wait for the linker to diagnose
+ * the missing symbol.
+ */
+#if __has_attribute(error)
+# define QEMU_ERROR(X) __attribute__((error(X)))
+#else
+# define QEMU_ERROR(X)
+#endif
+
 /* Implement C11 _Generic via GCC builtins.  Example:
  *
  *    QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x)
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 47aaa3b..7071bfe 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -169,4 +169,16 @@
 int uleb128_encode_small(uint8_t *out, uint32_t n);
 int uleb128_decode_small(const uint8_t *in, uint32_t *n);
 
+/**
+ * qemu_pstrcmp0:
+ * @str1: a non-NULL pointer to a C string (*str1 can be NULL)
+ * @str2: a non-NULL pointer to a C string (*str2 can be NULL)
+ *
+ * Compares *str1 and *str2 with g_strcmp0().
+ *
+ * Returns: an integer less than, equal to, or greater than zero, if
+ * *str1 is <, == or > than *str2.
+ */
+int qemu_pstrcmp0(const char **str1, const char **str2);
+
 #endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index ddca52c..a7cb780 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -73,16 +73,23 @@
 
 /**
  * hbitmap_merge:
- * @a: The bitmap to store the result in.
- * @b: The bitmap to merge into @a.
- * @return true if the merge was successful,
- *         false if it was not attempted.
  *
- * Merge two bitmaps together.
- * A := A (BITOR) B.
- * B is left unmodified.
+ * Store result of merging @a and @b into @result.
+ * @result is allowed to be equal to @a or @b.
+ *
+ * Return true if the merge was successful,
+ *        false if it was not attempted.
  */
-bool hbitmap_merge(HBitmap *a, const HBitmap *b);
+bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
+
+/**
+ * hbitmap_can_merge:
+ *
+ * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and
+ * necessary for hbitmap_merge will not fail.
+ *
+ */
+bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b);
 
 /**
  * hbitmap_empty:
diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h
index 49e7963..d551c28 100644
--- a/include/qemu/memfd.h
+++ b/include/qemu/memfd.h
@@ -16,12 +16,28 @@
 #define F_SEAL_WRITE    0x0008  /* prevent writes */
 #endif
 
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+#ifndef MFD_HUGETLB
+#define MFD_HUGETLB 0x0004U
+#endif
+
+#ifndef MFD_HUGE_SHIFT
+#define MFD_HUGE_SHIFT 26
+#endif
+
 int qemu_memfd_create(const char *name, size_t size, bool hugetlb,
                       uint64_t hugetlbsize, unsigned int seals, Error **errp);
 bool qemu_memfd_alloc_check(void);
 void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
                        int *fd, Error **errp);
 void qemu_memfd_free(void *ptr, size_t size, int fd);
-bool qemu_memfd_check(void);
+bool qemu_memfd_check(unsigned int flags);
 
 #endif /* QEMU_MEMFD_H */
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index a91068d..3bf48bc 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -123,6 +123,18 @@
 #include "qemu/typedefs.h"
 
 /*
+ * For mingw, as of v6.0.0, the function implementing the assert macro is
+ * not marked as noreturn, so the compiler cannot delete code following an
+ * assert(false) as unused.  We rely on this within the code base to delete
+ * code that is unreachable when features are disabled.
+ * All supported versions of Glib's g_assert() satisfy this requirement.
+ */
+#ifdef __MINGW32__
+#undef assert
+#define assert(x)  g_assert(x)
+#endif
+
+/*
  * According to waitpid man page:
  * WCOREDUMP
  *  This  macro  is  not  specified  in POSIX.1-2001 and is not
@@ -448,7 +460,8 @@
 #define FMT_pid "%d"
 #endif
 
-int qemu_create_pidfile(const char *filename);
+bool qemu_write_pidfile(const char *pidfile, Error **errp);
+
 int qemu_get_thread_id(void);
 
 #ifndef CONFIG_IOVEC
@@ -570,6 +583,8 @@
 extern intptr_t qemu_real_host_page_mask;
 
 extern int qemu_icache_linesize;
+extern int qemu_icache_linesize_log;
 extern int qemu_dcache_linesize;
+extern int qemu_dcache_linesize_log;
 
 #endif
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index dacebcf..b2661b6 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -48,6 +48,22 @@
 #define qemu_mutex_trylock__raw(m)                      \
         qemu_mutex_trylock_impl(m, __FILE__, __LINE__)
 
+#ifdef __COVERITY__
+/*
+ * Coverity is severely confused by the indirect function calls,
+ * hide them.
+ */
+#define qemu_mutex_lock(m)                                              \
+            qemu_mutex_lock_impl(m, __FILE__, __LINE__);
+#define qemu_mutex_trylock(m)                                           \
+            qemu_mutex_trylock_impl(m, __FILE__, __LINE__);
+#define qemu_rec_mutex_lock(m)                                          \
+            qemu_rec_mutex_lock_impl(m, __FILE__, __LINE__);
+#define qemu_rec_mutex_trylock(m)                                       \
+            qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__);
+#define qemu_cond_wait(c, m)                                            \
+            qemu_cond_wait_impl(c, m, __FILE__, __LINE__);
+#else
 #define qemu_mutex_lock(m) ({                                           \
             QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func);  \
             _f(m, __FILE__, __LINE__);                                  \
@@ -73,6 +89,7 @@
             QemuCondWaitFunc _f = atomic_read(&qemu_cond_wait_func);    \
             _f(c, m, __FILE__, __LINE__);                               \
         })
+#endif
 
 #define qemu_mutex_unlock(mutex) \
         qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__)
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 39ea907..a86330c 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -2,6 +2,7 @@
 #define QEMU_TIMER_H
 
 #include "qemu-common.h"
+#include "qemu/bitops.h"
 #include "qemu/notify.h"
 #include "qemu/host-utils.h"
 
@@ -52,6 +53,24 @@
     QEMU_CLOCK_MAX
 } QEMUClockType;
 
+/**
+ * QEMU Timer attributes:
+ *
+ * An individual timer may be given one or multiple attributes when initialized.
+ * Each attribute corresponds to one bit. Attributes modify the processing
+ * of timers when they fire.
+ *
+ * The following attributes are available:
+ *
+ * QEMU_TIMER_ATTR_EXTERNAL: drives external subsystem
+ *
+ * Timers with this attribute do not recorded in rr mode, therefore it could be
+ * used for the subsystems that operate outside the guest core. Applicable only
+ * with virtual clock type.
+ */
+
+#define QEMU_TIMER_ATTR_EXTERNAL BIT(0)
+
 typedef struct QEMUTimerList QEMUTimerList;
 
 struct QEMUTimerListGroup {
@@ -67,6 +86,7 @@
     QEMUTimerCB *cb;
     void *opaque;
     QEMUTimer *next;
+    int attributes;
     int scale;
 };
 
@@ -418,22 +438,27 @@
  */
 
 /**
- * timer_init_tl:
+ * timer_init_full:
  * @ts: the timer to be initialised
- * @timer_list: the timer list to attach the timer to
+ * @timer_list_group: (optional) the timer list group to attach the timer to
+ * @type: the clock type to use
  * @scale: the scale value for the timer
+ * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_<id> values
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
- * Initialise a new timer and associate it with @timer_list.
+ * Initialise a timer with the given scale and attributes,
+ * and associate it with timer list for given clock @type in @timer_list_group
+ * (or default timer list group, if NULL).
  * The caller is responsible for allocating the memory.
  *
  * You need not call an explicit deinit call. Simply make
  * sure it is not on a list with timer_del.
  */
-void timer_init_tl(QEMUTimer *ts,
-                   QEMUTimerList *timer_list, int scale,
-                   QEMUTimerCB *cb, void *opaque);
+void timer_init_full(QEMUTimer *ts,
+                     QEMUTimerListGroup *timer_list_group, QEMUClockType type,
+                     int scale, int attributes,
+                     QEMUTimerCB *cb, void *opaque);
 
 /**
  * timer_init:
@@ -445,14 +470,12 @@
  *
  * Initialize a timer with the given scale on the default timer list
  * associated with the clock.
- *
- * You need not call an explicit deinit call. Simply make
- * sure it is not on a list with timer_del.
+ * See timer_init_full for details.
  */
 static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale,
                               QEMUTimerCB *cb, void *opaque)
 {
-    timer_init_tl(ts, main_loop_tlg.tl[type], scale, cb, opaque);
+    timer_init_full(ts, NULL, type, scale, 0, cb, opaque);
 }
 
 /**
@@ -464,9 +487,7 @@
  *
  * Initialize a timer with nanosecond scale on the default timer list
  * associated with the clock.
- *
- * You need not call an explicit deinit call. Simply make
- * sure it is not on a list with timer_del.
+ * See timer_init_full for details.
  */
 static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type,
                                  QEMUTimerCB *cb, void *opaque)
@@ -483,9 +504,7 @@
  *
  * Initialize a timer with microsecond scale on the default timer list
  * associated with the clock.
- *
- * You need not call an explicit deinit call. Simply make
- * sure it is not on a list with timer_del.
+ * See timer_init_full for details.
  */
 static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type,
                                  QEMUTimerCB *cb, void *opaque)
@@ -502,9 +521,7 @@
  *
  * Initialize a timer with millisecond scale on the default timer list
  * associated with the clock.
- *
- * You need not call an explicit deinit call. Simply make
- * sure it is not on a list with timer_del.
+ * See timer_init_full for details.
  */
 static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type,
                                  QEMUTimerCB *cb, void *opaque)
@@ -513,27 +530,37 @@
 }
 
 /**
- * timer_new_tl:
- * @timer_list: the timer list to attach the timer to
+ * timer_new_full:
+ * @timer_list_group: (optional) the timer list group to attach the timer to
+ * @type: the clock type to use
  * @scale: the scale value for the timer
+ * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_<id> values
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
- * Create a new timer and associate it with @timer_list.
+ * Create a new timer with the given scale and attributes,
+ * and associate it with timer list for given clock @type in @timer_list_group
+ * (or default timer list group, if NULL).
  * The memory is allocated by the function.
  *
  * This is not the preferred interface unless you know you
- * are going to call timer_free. Use timer_init instead.
+ * are going to call timer_free. Use timer_init or timer_init_full instead.
+ *
+ * The default timer list has one special feature: in icount mode,
+ * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
+ * not true of other timer lists, which are typically associated
+ * with an AioContext---each of them runs its timer callbacks in its own
+ * AioContext thread.
  *
  * Returns: a pointer to the timer
  */
-static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list,
-                                      int scale,
-                                      QEMUTimerCB *cb,
-                                      void *opaque)
+static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group,
+                                        QEMUClockType type,
+                                        int scale, int attributes,
+                                        QEMUTimerCB *cb, void *opaque)
 {
     QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer));
-    timer_init_tl(ts, timer_list, scale, cb, opaque);
+    timer_init_full(ts, timer_list_group, type, scale, attributes, cb, opaque);
     return ts;
 }
 
@@ -544,21 +571,16 @@
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
- * Create a new timer and associate it with the default
- * timer list for the clock type @type.
- *
- * The default timer list has one special feature: in icount mode,
- * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
- * not true of other timer lists, which are typically associated
- * with an AioContext---each of them runs its timer callbacks in its own
- * AioContext thread.
+ * Create a new timer with the given scale,
+ * and associate it with the default timer list for the clock type @type.
+ * See timer_new_full for details.
  *
  * Returns: a pointer to the timer
  */
 static inline QEMUTimer *timer_new(QEMUClockType type, int scale,
                                    QEMUTimerCB *cb, void *opaque)
 {
-    return timer_new_tl(main_loop_tlg.tl[type], scale, cb, opaque);
+    return timer_new_full(NULL, type, scale, 0, cb, opaque);
 }
 
 /**
@@ -569,12 +591,7 @@
  *
  * Create a new timer with nanosecond scale on the default timer list
  * associated with the clock.
- *
- * The default timer list has one special feature: in icount mode,
- * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
- * not true of other timer lists, which are typically associated
- * with an AioContext---each of them runs its timer callbacks in its own
- * AioContext thread.
+ * See timer_new_full for details.
  *
  * Returns: a pointer to the newly created timer
  */
@@ -590,14 +607,9 @@
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
- * The default timer list has one special feature: in icount mode,
- * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
- * not true of other timer lists, which are typically associated
- * with an AioContext---each of them runs its timer callbacks in its own
- * AioContext thread.
- *
  * Create a new timer with microsecond scale on the default timer list
  * associated with the clock.
+ * See timer_new_full for details.
  *
  * Returns: a pointer to the newly created timer
  */
@@ -613,14 +625,9 @@
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
- * The default timer list has one special feature: in icount mode,
- * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread.  This is
- * not true of other timer lists, which are typically associated
- * with an AioContext---each of them runs its timer callbacks in its own
- * AioContext thread.
- *
  * Create a new timer with millisecond scale on the default timer list
  * associated with the clock.
+ * See timer_new_full for details.
  *
  * Returns: a pointer to the newly created timer
  */
@@ -1037,7 +1044,6 @@
     return get_clock();
 }
 
-extern int64_t tcg_time;
 extern int64_t dev_time;
 #endif
 
diff --git a/include/qemu/win_dump_defs.h b/include/qemu/win_dump_defs.h
new file mode 100644
index 0000000..145096e
--- /dev/null
+++ b/include/qemu/win_dump_defs.h
@@ -0,0 +1,179 @@
+/*
+ * Windows crashdump definitions
+ *
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_WIN_DUMP_DEFS_H
+#define QEMU_WIN_DUMP_DEFS_H
+
+typedef struct WinDumpPhyMemRun64 {
+    uint64_t BasePage;
+    uint64_t PageCount;
+} QEMU_PACKED WinDumpPhyMemRun64;
+
+typedef struct WinDumpPhyMemDesc64 {
+    uint32_t NumberOfRuns;
+    uint32_t unused;
+    uint64_t NumberOfPages;
+    WinDumpPhyMemRun64 Run[43];
+} QEMU_PACKED WinDumpPhyMemDesc64;
+
+typedef struct WinDumpExceptionRecord {
+    uint32_t ExceptionCode;
+    uint32_t ExceptionFlags;
+    uint64_t ExceptionRecord;
+    uint64_t ExceptionAddress;
+    uint32_t NumberParameters;
+    uint32_t unused;
+    uint64_t ExceptionInformation[15];
+} QEMU_PACKED WinDumpExceptionRecord;
+
+typedef struct WinDumpHeader64 {
+    char Signature[4];
+    char ValidDump[4];
+    uint32_t MajorVersion;
+    uint32_t MinorVersion;
+    uint64_t DirectoryTableBase;
+    uint64_t PfnDatabase;
+    uint64_t PsLoadedModuleList;
+    uint64_t PsActiveProcessHead;
+    uint32_t MachineImageType;
+    uint32_t NumberProcessors;
+    union {
+        struct {
+            uint32_t BugcheckCode;
+            uint32_t unused0;
+            uint64_t BugcheckParameter1;
+            uint64_t BugcheckParameter2;
+            uint64_t BugcheckParameter3;
+            uint64_t BugcheckParameter4;
+        };
+        uint8_t BugcheckData[40];
+    };
+    uint8_t VersionUser[32];
+    uint64_t KdDebuggerDataBlock;
+    union {
+        WinDumpPhyMemDesc64 PhysicalMemoryBlock;
+        uint8_t PhysicalMemoryBlockBuffer[704];
+    };
+    union {
+        uint8_t ContextBuffer[3000];
+    };
+    WinDumpExceptionRecord Exception;
+    uint32_t DumpType;
+    uint32_t unused1;
+    uint64_t RequiredDumpSpace;
+    uint64_t SystemTime;
+    char Comment[128];
+    uint64_t SystemUpTime;
+    uint32_t MiniDumpFields;
+    uint32_t SecondaryDataState;
+    uint32_t ProductType;
+    uint32_t SuiteMask;
+    uint32_t WriterStatus;
+    uint8_t unused2;
+    uint8_t KdSecondaryVersion;
+    uint8_t reserved[4018];
+} QEMU_PACKED WinDumpHeader64;
+
+#define KDBG_OWNER_TAG_OFFSET64             0x10
+#define KDBG_MM_PFN_DATABASE_OFFSET64       0xC0
+#define KDBG_KI_BUGCHECK_DATA_OFFSET64      0x88
+#define KDBG_KI_PROCESSOR_BLOCK_OFFSET64    0x218
+#define KDBG_OFFSET_PRCB_CONTEXT_OFFSET64   0x338
+
+#define VMCOREINFO_ELF_NOTE_HDR_SIZE    24
+
+#define WIN_CTX_X64 0x00100000L
+
+#define WIN_CTX_CTL 0x00000001L
+#define WIN_CTX_INT 0x00000002L
+#define WIN_CTX_SEG 0x00000004L
+#define WIN_CTX_FP  0x00000008L
+#define WIN_CTX_DBG 0x00000010L
+
+#define WIN_CTX_FULL    (WIN_CTX_X64 | WIN_CTX_CTL | WIN_CTX_INT | WIN_CTX_FP)
+#define WIN_CTX_ALL     (WIN_CTX_FULL | WIN_CTX_SEG | WIN_CTX_DBG)
+
+#define LIVE_SYSTEM_DUMP    0x00000161
+
+typedef struct WinM128A {
+    uint64_t low;
+    int64_t high;
+} QEMU_ALIGNED(16) WinM128A;
+
+typedef struct WinContext {
+    uint64_t PHome[6];
+
+    uint32_t ContextFlags;
+    uint32_t MxCsr;
+
+    uint16_t SegCs;
+    uint16_t SegDs;
+    uint16_t SegEs;
+    uint16_t SegFs;
+    uint16_t SegGs;
+    uint16_t SegSs;
+    uint32_t EFlags;
+
+    uint64_t Dr0;
+    uint64_t Dr1;
+    uint64_t Dr2;
+    uint64_t Dr3;
+    uint64_t Dr6;
+    uint64_t Dr7;
+
+    uint64_t Rax;
+    uint64_t Rcx;
+    uint64_t Rdx;
+    uint64_t Rbx;
+    uint64_t Rsp;
+    uint64_t Rbp;
+    uint64_t Rsi;
+    uint64_t Rdi;
+    uint64_t R8;
+    uint64_t R9;
+    uint64_t R10;
+    uint64_t R11;
+    uint64_t R12;
+    uint64_t R13;
+    uint64_t R14;
+    uint64_t R15;
+
+    uint64_t Rip;
+
+    struct {
+        uint16_t ControlWord;
+        uint16_t StatusWord;
+        uint8_t TagWord;
+        uint8_t Reserved1;
+        uint16_t ErrorOpcode;
+        uint32_t ErrorOffset;
+        uint16_t ErrorSelector;
+        uint16_t Reserved2;
+        uint32_t DataOffset;
+        uint16_t DataSelector;
+        uint16_t Reserved3;
+        uint32_t MxCsr;
+        uint32_t MxCsr_Mask;
+        WinM128A FloatRegisters[8];
+        WinM128A XmmRegisters[16];
+        uint8_t Reserved4[96];
+    } FltSave;
+
+    WinM128A VectorRegister[26];
+    uint64_t VectorControl;
+
+    uint64_t DebugControl;
+    uint64_t LastBranchToRip;
+    uint64_t LastBranchFromRip;
+    uint64_t LastExceptionToRip;
+    uint64_t LastExceptionFromRip;
+} QEMU_ALIGNED(16) WinContext;
+
+#endif /* QEMU_WIN_DUMP_DEFS_H */
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index dc130cd..def0c64 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -852,7 +852,7 @@
 /**
  * cpu_interrupt:
  * @cpu: The CPU to set an interrupt on.
- * @mask: The interupts to set.
+ * @mask: The interrupts to set.
  *
  * Invokes the interrupt handler.
  */
@@ -1085,6 +1085,17 @@
 void cpu_exec_realizefn(CPUState *cpu, Error **errp);
 void cpu_exec_unrealizefn(CPUState *cpu);
 
+/**
+ * target_words_bigendian:
+ * Returns true if the (default) endianness of the target is big endian,
+ * false otherwise. Note that in target-specific code, you can use
+ * TARGET_WORDS_BIGENDIAN directly instead. On the other hand, common
+ * code should normally never need to know about the endianness of the
+ * target, so please do *not* use this function unless you know very well
+ * what you are doing!
+ */
+bool target_words_bigendian(void);
+
 #ifdef NEED_CPU_H
 
 #ifdef CONFIG_SOFTMMU
diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index 6d6128c..c0ad9fc 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -267,10 +267,11 @@
 /*
  * MT_TOOL types
  */
-#define MT_TOOL_FINGER		0
-#define MT_TOOL_PEN		1
-#define MT_TOOL_PALM		2
-#define MT_TOOL_MAX		2
+#define MT_TOOL_FINGER		0x00
+#define MT_TOOL_PEN		0x01
+#define MT_TOOL_PALM		0x02
+#define MT_TOOL_DIAL		0x0a
+#define MT_TOOL_MAX		0x0f
 
 /*
  * Values describing the status of a force-feedback effect
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 24954b9..d34c492 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -54,7 +54,8 @@
 QemuOpts *drive_def(const char *optstr);
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
                     const char *optstr);
-DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type);
+DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type,
+                     Error **errp);
 
 /* device-hotplug */
 
diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index 2411188..aaa51d2 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -17,7 +17,7 @@
 #include "exec/memory.h"
 #include "sysemu/accel.h"
 
-extern int hvf_disabled;
+extern bool hvf_allowed;
 #ifdef CONFIG_HVF
 #include <Hypervisor/hv.h>
 #include <Hypervisor/hv_vmx.h>
@@ -26,7 +26,7 @@
 #include "hw/hw.h"
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
                                  int reg);
-#define hvf_enabled() !hvf_disabled
+#define hvf_enabled() (hvf_allowed)
 #else
 #define hvf_enabled() 0
 #define hvf_get_supported_cpuid(func, idx, reg) 0
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 7a0ae75..21713b7 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -22,7 +22,6 @@
 };
 
 extern NodeInfo numa_info[MAX_NODES];
-int parse_numa(void *opaque, QemuOpts *opts, Error **errp);
 void parse_numa_opts(MachineState *ms);
 void numa_complete_configuration(MachineState *ms);
 void query_numa_node_mem(NumaNodeMem node_mem[]);
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 3ced6bc..3a7c58e 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -100,14 +100,20 @@
 /* Processing clocks and other time sources */
 
 /*! Save the specified clock */
-int64_t replay_save_clock(ReplayClockKind kind, int64_t clock);
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock,
+                          int64_t raw_icount);
 /*! Read the specified clock from the log or return cached data */
 int64_t replay_read_clock(ReplayClockKind kind);
 /*! Saves or reads the clock depending on the current replay mode. */
 #define REPLAY_CLOCK(clock, value)                                      \
     (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
         : replay_mode == REPLAY_MODE_RECORD                             \
-            ? replay_save_clock((clock), (value))                       \
+            ? replay_save_clock((clock), (value), cpu_get_icount_raw()) \
+        : (value))
+#define REPLAY_CLOCK_LOCKED(clock, value)                               \
+    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+        : replay_mode == REPLAY_MODE_RECORD                             \
+            ? replay_save_clock((clock), (value), cpu_get_icount_raw_locked()) \
         : (value))
 
 /* Events */
@@ -120,6 +126,9 @@
     Returns 0 in PLAY mode if checkpoint was not found.
     Returns 1 in all other cases. */
 bool replay_checkpoint(ReplayCheckpoint checkpoint);
+/*! Used to determine that checkpoint is pending.
+    Does not proceed to the next event in the log. */
+bool replay_has_checkpoint(void);
 
 /* Asynchronous events queue */
 
diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
index 9ae1ab6..17a97ed 100644
--- a/include/sysemu/tpm.h
+++ b/include/sysemu/tpm.h
@@ -16,7 +16,7 @@
 #include "qom/object.h"
 
 int tpm_config_parse(QemuOptsList *opts_list, const char *optarg);
-int tpm_init(void);
+void tpm_init(void);
 void tpm_cleanup(void);
 
 typedef enum TPMVersion {
diff --git a/include/ui/console.h b/include/ui/console.h
index fb969ca..c17803c 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -453,7 +453,7 @@
 void qemu_display_init(DisplayState *ds, DisplayOptions *opts);
 
 /* vnc.c */
-void vnc_display_init(const char *id);
+void vnc_display_init(const char *id, Error **errp);
 void vnc_display_open(const char *id, Error **errp);
 void vnc_display_add_client(const char *id, int csock, bool skipauth);
 int vnc_display_password(const char *id, const char *password);
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index a79780a..99edd3c 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -27,15 +27,6 @@
 #include "ui/egl-context.h"
 #endif
 
-/* Compatibility define to let us build on both Gtk2 and Gtk3 */
-#if GTK_CHECK_VERSION(3, 0, 0)
-static inline void gdk_drawable_get_size(GdkWindow *w, gint *ww, gint *wh)
-{
-    *ww = gdk_window_get_width(w);
-    *wh = gdk_window_get_height(w);
-}
-#endif
-
 typedef struct GtkDisplayState GtkDisplayState;
 
 typedef struct VirtualGfxConsole {
diff --git a/iothread.c b/iothread.c
index aff1281..2fb1cdf 100644
--- a/iothread.c
+++ b/iothread.c
@@ -110,6 +110,7 @@
     IOThread *iothread = IOTHREAD(obj);
 
     iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT;
+    iothread->thread_id = -1;
 }
 
 static void iothread_instance_finalize(Object *obj)
@@ -117,6 +118,11 @@
     IOThread *iothread = IOTHREAD(obj);
 
     iothread_stop(iothread);
+
+    if (iothread->thread_id != -1) {
+        qemu_cond_destroy(&iothread->init_done_cond);
+        qemu_mutex_destroy(&iothread->init_done_lock);
+    }
     /*
      * Before glib2 2.33.10, there is a glib2 bug that GSource context
      * pointer may not be cleared even if the context has already been
@@ -135,8 +141,6 @@
         g_main_context_unref(iothread->worker_context);
         iothread->worker_context = NULL;
     }
-    qemu_cond_destroy(&iothread->init_done_cond);
-    qemu_mutex_destroy(&iothread->init_done_lock);
 }
 
 static void iothread_complete(UserCreatable *obj, Error **errp)
@@ -147,7 +151,6 @@
 
     iothread->stopping = false;
     iothread->running = true;
-    iothread->thread_id = -1;
     iothread->ctx = aio_context_new(&local_error);
     if (!iothread->ctx) {
         error_propagate(errp, local_error);
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 72aa226..e1f8b74 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -125,6 +126,18 @@
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 99cb9ad..e6a98c1 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -39,6 +39,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -154,6 +155,18 @@
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 1b32b56..8c876c1 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -634,6 +634,7 @@
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
index 1ab9901..0265482 100644
--- a/linux-headers/asm-s390/kvm.h
+++ b/linux-headers/asm-s390/kvm.h
@@ -160,6 +160,8 @@
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE		4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE		5
 
 /* kvm attributes for migration mode */
 #define KVM_S390_VM_MIGRATION_STOP	0
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 86299ef..dabfcf7 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -288,6 +288,7 @@
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
 #define KVM_VCPUEVENT_VALID_SMM		0x00000008
+#define KVM_VCPUEVENT_VALID_PAYLOAD	0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -299,7 +300,7 @@
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		__u8 pad;
+		__u8 pending;
 		__u32 error_code;
 	} exception;
 	struct {
@@ -322,7 +323,9 @@
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
-	__u32 reserved[9];
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
@@ -377,9 +380,11 @@
 
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE	(1 << 2)
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 6679072..f11a7eb 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -420,13 +420,19 @@
 struct kvm_coalesced_mmio_zone {
 	__u64 addr;
 	__u32 size;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 };
 
 struct kvm_coalesced_mmio {
 	__u64 phys_addr;
 	__u32 len;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 	__u8  data[8];
 };
 
@@ -719,6 +725,7 @@
 
 #define KVM_PPC_PAGE_SIZES_REAL		0x00000001
 #define KVM_PPC_1T_SEGMENTS		0x00000002
+#define KVM_PPC_NO_HASH			0x00000004
 
 struct kvm_ppc_smmu_info {
 	__u64 flags;
@@ -951,6 +958,13 @@
 #define KVM_CAP_HYPERV_TLBFLUSH 155
 #define KVM_CAP_S390_HPAGE_1M 156
 #define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
+#define KVM_CAP_MSR_PLATFORM_INFO 159
+#define KVM_CAP_PPC_NESTED_HV 160
+#define KVM_CAP_HYPERV_SEND_IPI 161
+#define KVM_CAP_COALESCED_PIO 162
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 3615a26..ceb6453 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -200,6 +200,7 @@
 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
 #define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
+#define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
@@ -215,6 +216,7 @@
 #define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
 #define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
 #define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
+#define VFIO_DEVICE_API_AP_STRING		"vfio-ap"
 
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index 94726cb..c8a8fbe 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -176,7 +176,7 @@
 #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
 
 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
-#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
 
 /* VHOST_NET specific defines */
 
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 07fedfc..f84a9cf 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -314,7 +314,7 @@
             break;
 
         case TARGET_SVE_MAGIC:
-            if (arm_feature(env, ARM_FEATURE_SVE)) {
+            if (cpu_isar_feature(aa64_sve, arm_env_get_cpu(env))) {
                 vq = (env->vfp.zcr_el[1] & 0xf) + 1;
                 sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
                 if (!sve && size == sve_size) {
@@ -433,7 +433,7 @@
                                       &layout);
 
     /* SVE state needs saving only if it exists.  */
-    if (arm_feature(env, ARM_FEATURE_SVE)) {
+    if (cpu_isar_feature(aa64_sve, arm_env_get_cpu(env))) {
         vq = (env->vfp.zcr_el[1] & 0xf) + 1;
         sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
         sve_ofs = alloc_sigframe_space(sve_size, &layout);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 10bca65..5bccd2e 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -458,6 +458,10 @@
     /* probe for the extra features */
 #define GET_FEATURE(feat, hwcap) \
     do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
+
+#define GET_FEATURE_ID(feat, hwcap) \
+    do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
+
     /* EDSP is in v5TE and above, but all our v5 CPUs are v5TE */
     GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP);
     GET_FEATURE(ARM_FEATURE_VFP, ARM_HWCAP_ARM_VFP);
@@ -467,8 +471,8 @@
     GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3);
     GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS);
     GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4);
-    GET_FEATURE(ARM_FEATURE_ARM_DIV, ARM_HWCAP_ARM_IDIVA);
-    GET_FEATURE(ARM_FEATURE_THUMB_DIV, ARM_HWCAP_ARM_IDIVT);
+    GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA);
+    GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT);
     /* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c.
      * Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of
      * ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated
@@ -485,15 +489,16 @@
     ARMCPU *cpu = ARM_CPU(thread_cpu);
     uint32_t hwcaps = 0;
 
-    GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
-    GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
-    GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
-    GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
-    GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
+    GET_FEATURE_ID(aa32_aes, ARM_HWCAP2_ARM_AES);
+    GET_FEATURE_ID(aa32_pmull, ARM_HWCAP2_ARM_PMULL);
+    GET_FEATURE_ID(aa32_sha1, ARM_HWCAP2_ARM_SHA1);
+    GET_FEATURE_ID(aa32_sha2, ARM_HWCAP2_ARM_SHA2);
+    GET_FEATURE_ID(aa32_crc32, ARM_HWCAP2_ARM_CRC32);
     return hwcaps;
 }
 
 #undef GET_FEATURE
+#undef GET_FEATURE_ID
 
 #else
 /* 64 bit ARM definitions */
@@ -568,25 +573,26 @@
     hwcaps |= ARM_HWCAP_A64_ASIMD;
 
     /* probe for the extra features */
-#define GET_FEATURE(feat, hwcap) \
-    do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
-    GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES);
-    GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
-    GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1);
-    GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2);
-    GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
-    GET_FEATURE(ARM_FEATURE_V8_SHA3, ARM_HWCAP_A64_SHA3);
-    GET_FEATURE(ARM_FEATURE_V8_SM3, ARM_HWCAP_A64_SM3);
-    GET_FEATURE(ARM_FEATURE_V8_SM4, ARM_HWCAP_A64_SM4);
-    GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512);
-    GET_FEATURE(ARM_FEATURE_V8_FP16,
-                ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
-    GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS);
-    GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM);
-    GET_FEATURE(ARM_FEATURE_V8_DOTPROD, ARM_HWCAP_A64_ASIMDDP);
-    GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA);
-    GET_FEATURE(ARM_FEATURE_SVE, ARM_HWCAP_A64_SVE);
-#undef GET_FEATURE
+#define GET_FEATURE_ID(feat, hwcap) \
+    do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
+
+    GET_FEATURE_ID(aa64_aes, ARM_HWCAP_A64_AES);
+    GET_FEATURE_ID(aa64_pmull, ARM_HWCAP_A64_PMULL);
+    GET_FEATURE_ID(aa64_sha1, ARM_HWCAP_A64_SHA1);
+    GET_FEATURE_ID(aa64_sha256, ARM_HWCAP_A64_SHA2);
+    GET_FEATURE_ID(aa64_sha512, ARM_HWCAP_A64_SHA512);
+    GET_FEATURE_ID(aa64_crc32, ARM_HWCAP_A64_CRC32);
+    GET_FEATURE_ID(aa64_sha3, ARM_HWCAP_A64_SHA3);
+    GET_FEATURE_ID(aa64_sm3, ARM_HWCAP_A64_SM3);
+    GET_FEATURE_ID(aa64_sm4, ARM_HWCAP_A64_SM4);
+    GET_FEATURE_ID(aa64_fp16, ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
+    GET_FEATURE_ID(aa64_atomics, ARM_HWCAP_A64_ATOMICS);
+    GET_FEATURE_ID(aa64_rdm, ARM_HWCAP_A64_ASIMDRDM);
+    GET_FEATURE_ID(aa64_dp, ARM_HWCAP_A64_ASIMDDP);
+    GET_FEATURE_ID(aa64_fcma, ARM_HWCAP_A64_FCMA);
+    GET_FEATURE_ID(aa64_sve, ARM_HWCAP_A64_SVE);
+
+#undef GET_FEATURE_ID
 
     return hwcaps;
 }
@@ -1511,11 +1517,25 @@
     bswaptls(&sym->st_size);
     bswap16s(&sym->st_shndx);
 }
+
+#ifdef TARGET_MIPS
+static void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags)
+{
+    bswap16s(&abiflags->version);
+    bswap32s(&abiflags->ases);
+    bswap32s(&abiflags->isa_ext);
+    bswap32s(&abiflags->flags1);
+    bswap32s(&abiflags->flags2);
+}
+#endif
 #else
 static inline void bswap_ehdr(struct elfhdr *ehdr) { }
 static inline void bswap_phdr(struct elf_phdr *phdr, int phnum) { }
 static inline void bswap_shdr(struct elf_shdr *shdr, int shnum) { }
 static inline void bswap_sym(struct elf_sym *sym) { }
+#ifdef TARGET_MIPS
+static inline void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags) { }
+#endif
 #endif
 
 #ifdef USE_ELF_CORE_DUMP
@@ -2358,6 +2378,26 @@
                 goto exit_errmsg;
             }
             *pinterp_name = interp_name;
+#ifdef TARGET_MIPS
+        } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) {
+            Mips_elf_abiflags_v0 abiflags;
+            if (eppnt->p_filesz < sizeof(Mips_elf_abiflags_v0)) {
+                errmsg = "Invalid PT_MIPS_ABIFLAGS entry";
+                goto exit_errmsg;
+            }
+            if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) {
+                memcpy(&abiflags, bprm_buf + eppnt->p_offset,
+                       sizeof(Mips_elf_abiflags_v0));
+            } else {
+                retval = pread(image_fd, &abiflags, sizeof(Mips_elf_abiflags_v0),
+                               eppnt->p_offset);
+                if (retval != sizeof(Mips_elf_abiflags_v0)) {
+                    goto exit_perror;
+                }
+            }
+            bswap_mips_abiflags(&abiflags);
+            info->fp_abi = abiflags.fp_abi;
+#endif
         }
     }
 
@@ -2669,6 +2709,9 @@
             target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC,
                         MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
         }
+#ifdef TARGET_MIPS
+        info->interp_fp_abi = interp_info.fp_abi;
+#endif
     }
 
     bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex,
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index 586c794..ae89516 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -131,6 +131,52 @@
      IOCTL(FS_IOC_GETFLAGS, IOC_R, MK_PTR(TYPE_INT))
      IOCTL(FS_IOC_SETFLAGS, IOC_W, MK_PTR(TYPE_INT))
 
+#ifdef CONFIG_USBFS
+  /* USB ioctls */
+  IOCTL(USBDEVFS_CONTROL, IOC_RW,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_ctrltransfer)))
+  IOCTL(USBDEVFS_BULK, IOC_RW,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_bulktransfer)))
+  IOCTL(USBDEVFS_RESETEP, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_SETINTERFACE, IOC_W,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_setinterface)))
+  IOCTL(USBDEVFS_SETCONFIGURATION, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_GETDRIVER, IOC_R,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_getdriver)))
+  IOCTL_SPECIAL(USBDEVFS_SUBMITURB, IOC_W, do_ioctl_usbdevfs_submiturb,
+      MK_PTR(MK_STRUCT(STRUCT_usbdevfs_urb)))
+  IOCTL_SPECIAL(USBDEVFS_DISCARDURB, IOC_RW, do_ioctl_usbdevfs_discardurb,
+      MK_PTR(MK_STRUCT(STRUCT_usbdevfs_urb)))
+  IOCTL_SPECIAL(USBDEVFS_REAPURB, IOC_R, do_ioctl_usbdevfs_reapurb,
+      MK_PTR(TYPE_PTRVOID))
+  IOCTL_SPECIAL(USBDEVFS_REAPURBNDELAY, IOC_R, do_ioctl_usbdevfs_reapurb,
+      MK_PTR(TYPE_PTRVOID))
+  IOCTL(USBDEVFS_DISCSIGNAL, IOC_W,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_disconnectsignal)))
+  IOCTL(USBDEVFS_CLAIMINTERFACE, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_RELEASEINTERFACE, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_CONNECTINFO, IOC_R,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_connectinfo)))
+  IOCTL(USBDEVFS_IOCTL, IOC_RW, MK_PTR(MK_STRUCT(STRUCT_usbdevfs_ioctl)))
+  IOCTL(USBDEVFS_HUB_PORTINFO, IOC_R,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_hub_portinfo)))
+  IOCTL(USBDEVFS_RESET, 0, TYPE_NULL)
+  IOCTL(USBDEVFS_CLEAR_HALT, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_DISCONNECT, 0, TYPE_NULL)
+  IOCTL(USBDEVFS_CONNECT, 0, TYPE_NULL)
+  IOCTL(USBDEVFS_CLAIM_PORT, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_RELEASE_PORT, IOC_W, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_GET_CAPABILITIES, IOC_R, MK_PTR(TYPE_INT))
+  IOCTL(USBDEVFS_DISCONNECT_CLAIM, IOC_W,
+        MK_PTR(MK_STRUCT(STRUCT_usbdevfs_disconnect_claim)))
+#ifdef USBDEVFS_DROP_PRIVILEGES
+  IOCTL(USBDEVFS_DROP_PRIVILEGES, IOC_W, MK_PTR(TYPE_INT))
+#endif
+#ifdef USBDEVFS_GET_SPEED
+  IOCTL(USBDEVFS_GET_SPEED, 0, TYPE_NULL)
+#endif
+#endif /* CONFIG_USBFS */
+
   IOCTL(SIOCATMARK, IOC_R, MK_PTR(TYPE_INT))
   IOCTL(SIOCGIFNAME, IOC_RW, MK_PTR(TYPE_INT))
   IOCTL(SIOCGIFFLAGS, IOC_W | IOC_R, MK_PTR(MK_STRUCT(STRUCT_short_ifreq)))
diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
index c9c20cf..97e4957 100644
--- a/linux-user/mips/cpu_loop.c
+++ b/linux-user/mips/cpu_loop.c
@@ -740,6 +740,34 @@
     struct image_info *info = ts->info;
     int i;
 
+    struct mode_req {
+        bool single;
+        bool soft;
+        bool fr1;
+        bool frdefault;
+        bool fre;
+    };
+
+    static const struct mode_req fpu_reqs[] = {
+        [MIPS_ABI_FP_ANY]    = { true,  true,  true,  true,  true  },
+        [MIPS_ABI_FP_DOUBLE] = { false, false, false, true,  true  },
+        [MIPS_ABI_FP_SINGLE] = { true,  false, false, false, false },
+        [MIPS_ABI_FP_SOFT]   = { false, true,  false, false, false },
+        [MIPS_ABI_FP_OLD_64] = { false, false, false, false, false },
+        [MIPS_ABI_FP_XX]     = { false, false, true,  true,  true  },
+        [MIPS_ABI_FP_64]     = { false, false, true,  false, false },
+        [MIPS_ABI_FP_64A]    = { false, false, true,  false, true  }
+    };
+
+    /*
+     * Mode requirements when .MIPS.abiflags is not present in the ELF.
+     * Not present means that everything is acceptable except FR1.
+     */
+    static struct mode_req none_req = { true, true, false, true, true };
+
+    struct mode_req prog_req;
+    struct mode_req interp_req;
+
     for(i = 0; i < 32; i++) {
         env->active_tc.gpr[i] = regs->regs[i];
     }
@@ -747,6 +775,53 @@
     if (regs->cp0_epc & 1) {
         env->hflags |= MIPS_HFLAG_M16;
     }
+
+#ifdef TARGET_ABI_MIPSO32
+# define MAX_FP_ABI MIPS_ABI_FP_64A
+#else
+# define MAX_FP_ABI MIPS_ABI_FP_SOFT
+#endif
+     if ((info->fp_abi > MAX_FP_ABI && info->fp_abi != MIPS_ABI_FP_UNKNOWN)
+        || (info->interp_fp_abi > MAX_FP_ABI &&
+            info->interp_fp_abi != MIPS_ABI_FP_UNKNOWN)) {
+        fprintf(stderr, "qemu: Unexpected FPU mode\n");
+        exit(1);
+    }
+
+    prog_req = (info->fp_abi == MIPS_ABI_FP_UNKNOWN) ? none_req
+                                            : fpu_reqs[info->fp_abi];
+    interp_req = (info->interp_fp_abi == MIPS_ABI_FP_UNKNOWN) ? none_req
+                                            : fpu_reqs[info->interp_fp_abi];
+
+    prog_req.single &= interp_req.single;
+    prog_req.soft &= interp_req.soft;
+    prog_req.fr1 &= interp_req.fr1;
+    prog_req.frdefault &= interp_req.frdefault;
+    prog_req.fre &= interp_req.fre;
+
+    bool cpu_has_mips_r2_r6 = env->insn_flags & ISA_MIPS32R2 ||
+                              env->insn_flags & ISA_MIPS64R2 ||
+                              env->insn_flags & ISA_MIPS32R6 ||
+                              env->insn_flags & ISA_MIPS64R6;
+
+    if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1) {
+        env->CP0_Config5 |= (1 << CP0C5_FRE);
+        if (env->active_fpu.fcr0 & (1 << FCR0_FREP)) {
+            env->hflags |= MIPS_HFLAG_FRE;
+        }
+    } else if ((prog_req.fr1 && prog_req.frdefault) ||
+         (prog_req.single && !prog_req.frdefault)) {
+        if ((env->active_fpu.fcr0 & (1 << FCR0_F64)
+            && cpu_has_mips_r2_r6) || prog_req.fr1) {
+            env->CP0_Status |= (1 << CP0St_FR);
+            env->hflags |= MIPS_HFLAG_F64;
+        }
+    } else  if (!prog_req.fre && !prog_req.frdefault &&
+          !prog_req.fr1 && !prog_req.single && !prog_req.soft) {
+        fprintf(stderr, "qemu: Can't find a matching FPU mode\n");
+        exit(1);
+    }
+
     if (env->insn_flags & ISA_NANOMIPS32) {
         return;
     }
diff --git a/linux-user/mips/target_elf.h b/linux-user/mips/target_elf.h
index fa5d30b..a98c9bd 100644
--- a/linux-user/mips/target_elf.h
+++ b/linux-user/mips/target_elf.h
@@ -12,6 +12,9 @@
     if ((eflags & EF_MIPS_ARCH) == EF_MIPS_ARCH_32R6) {
         return "mips32r6-generic";
     }
+    if ((eflags & EF_MIPS_MACH) == EF_MIPS_MACH_5900) {
+        return "R5900";
+    }
     return "24Kf";
 }
 #endif
diff --git a/linux-user/mips/target_syscall.h b/linux-user/mips/target_syscall.h
index 2fca1c6..d5509a3 100644
--- a/linux-user/mips/target_syscall.h
+++ b/linux-user/mips/target_syscall.h
@@ -244,4 +244,10 @@
     return 0x40000;
 }
 
+/* MIPS-specific prctl() options */
+#define TARGET_PR_SET_FP_MODE  45
+#define TARGET_PR_GET_FP_MODE  46
+#define TARGET_PR_FP_MODE_FR   (1 << 0)
+#define TARGET_PR_FP_MODE_FRE  (1 << 1)
+
 #endif /* MIPS_TARGET_SYSCALL_H */
diff --git a/linux-user/mips64/target_syscall.h b/linux-user/mips64/target_syscall.h
index 078437d..8ccc468 100644
--- a/linux-user/mips64/target_syscall.h
+++ b/linux-user/mips64/target_syscall.h
@@ -241,4 +241,10 @@
     return 0x40000;
 }
 
+/* MIPS-specific prctl() options */
+#define TARGET_PR_SET_FP_MODE  45
+#define TARGET_PR_GET_FP_MODE  46
+#define TARGET_PR_FP_MODE_FR   (1 << 0)
+#define TARGET_PR_FP_MODE_FRE  (1 << 1)
+
 #endif /* MIPS64_TARGET_SYSCALL_H */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index b4959e4..dd5771c 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -61,6 +61,10 @@
         abi_ulong       interpreter_loadmap_addr;
         abi_ulong       interpreter_pt_dynamic_addr;
         struct image_info *other_info;
+#ifdef TARGET_MIPS
+        int             fp_abi;
+        int             interp_fp_abi;
+#endif
 };
 
 #ifdef TARGET_I386
@@ -143,7 +147,7 @@
     /* Nonzero if process_pending_signals() needs to do something (either
      * handle a pending signal or unblock signals).
      * This flag is written from a signal handler so should be accessed via
-     * the atomic_read() and atomic_write() functions. (It is not accessed
+     * the atomic_read() and atomic_set() functions. (It is not accessed
      * from multiple threads.)
      */
     int signal_pending;
@@ -461,27 +465,59 @@
    These are usually used to access struct data members once the struct has
    been locked - usually with lock_user_struct.  */
 
-/* Tricky points:
-   - Use __builtin_choose_expr to avoid type promotion from ?:,
-   - Invalid sizes result in a compile time error stemming from
-     the fact that abort has no parameters.
-   - It's easier to use the endian-specific unaligned load/store
-     functions than host-endian unaligned load/store plus tswapN.  */
+/*
+ * Tricky points:
+ * - Use __builtin_choose_expr to avoid type promotion from ?:,
+ * - Invalid sizes result in a compile time error stemming from
+ *   the fact that abort has no parameters.
+ * - It's easier to use the endian-specific unaligned load/store
+ *   functions than host-endian unaligned load/store plus tswapN.
+ * - The pragmas are necessary only to silence a clang false-positive
+ *   warning: see https://bugs.llvm.org/show_bug.cgi?id=39113 .
+ * - We have to disable -Wpragmas warnings to avoid a complaint about
+ *   an unknown warning type from older compilers that don't know about
+ *   -Waddress-of-packed-member.
+ * - gcc has bugs in its _Pragma() support in some versions, eg
+ *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 -- so we only
+ *   include the warning-suppression pragmas for clang
+ */
+#ifdef __clang__
+#define PRAGMA_DISABLE_PACKED_WARNING                                   \
+    _Pragma("GCC diagnostic push");                                     \
+    _Pragma("GCC diagnostic ignored \"-Wpragmas\"");                    \
+    _Pragma("GCC diagnostic ignored \"-Waddress-of-packed-member\"")
 
-#define __put_user_e(x, hptr, e)                                        \
-  (__builtin_choose_expr(sizeof(*(hptr)) == 1, stb_p,                   \
-   __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p,             \
-   __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p,             \
-   __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort))))   \
-     ((hptr), (x)), (void)0)
+#define PRAGMA_REENABLE_PACKED_WARNING          \
+    _Pragma("GCC diagnostic pop")
 
-#define __get_user_e(x, hptr, e)                                        \
-  ((x) = (typeof(*hptr))(                                               \
-   __builtin_choose_expr(sizeof(*(hptr)) == 1, ldub_p,                  \
-   __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p,            \
-   __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p,             \
-   __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort))))   \
-     (hptr)), (void)0)
+#else
+#define PRAGMA_DISABLE_PACKED_WARNING
+#define PRAGMA_REENABLE_PACKED_WARNING
+#endif
+
+#define __put_user_e(x, hptr, e)                                            \
+    do {                                                                    \
+        PRAGMA_DISABLE_PACKED_WARNING;                                      \
+        (__builtin_choose_expr(sizeof(*(hptr)) == 1, stb_p,                 \
+        __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p,            \
+        __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p,            \
+        __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort))))  \
+            ((hptr), (x)), (void)0);                                        \
+        PRAGMA_REENABLE_PACKED_WARNING;                                     \
+    } while (0)
+
+#define __get_user_e(x, hptr, e)                                            \
+    do {                                                                    \
+        PRAGMA_DISABLE_PACKED_WARNING;                                      \
+        ((x) = (typeof(*hptr))(                                             \
+        __builtin_choose_expr(sizeof(*(hptr)) == 1, ldub_p,                 \
+        __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p,           \
+        __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p,            \
+        __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort))))  \
+            (hptr)), (void)0);                                              \
+        PRAGMA_REENABLE_PACKED_WARNING;                                     \
+    } while (0)
+
 
 #ifdef TARGET_WORDS_BIGENDIAN
 # define __put_user(x, hptr)  __put_user_e(x, hptr, be)
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index b4c60aa..e44e999 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -258,10 +258,6 @@
         __put_user(val32, &sf->insns[1]);
         if (err)
             goto sigsegv;
-
-        /* Flush instruction space. */
-        // flush_sig_insns(current->mm, (unsigned long) &(sf->insns[0]));
-        // tb_flush(env);
     }
     unlock_user(sf, sf_addr, sizeof(struct target_signal_frame));
     return;
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ae3c0df..810a58b 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -94,6 +94,10 @@
 #include <linux/fiemap.h>
 #endif
 #include <linux/fb.h>
+#if defined(CONFIG_USBFS)
+#include <linux/usbdevice_fs.h>
+#include <linux/usb/ch9.h>
+#endif
 #include <linux/vt.h>
 #include <linux/dm-ioctl.h>
 #include <linux/reboot.h>
@@ -4196,6 +4200,182 @@
     return ret;
 }
 
+#if defined(CONFIG_USBFS)
+#if HOST_LONG_BITS > 64
+#error USBDEVFS thunks do not support >64 bit hosts yet.
+#endif
+struct live_urb {
+    uint64_t target_urb_adr;
+    uint64_t target_buf_adr;
+    char *target_buf_ptr;
+    struct usbdevfs_urb host_urb;
+};
+
+static GHashTable *usbdevfs_urb_hashtable(void)
+{
+    static GHashTable *urb_hashtable;
+
+    if (!urb_hashtable) {
+        urb_hashtable = g_hash_table_new(g_int64_hash, g_int64_equal);
+    }
+    return urb_hashtable;
+}
+
+static void urb_hashtable_insert(struct live_urb *urb)
+{
+    GHashTable *urb_hashtable = usbdevfs_urb_hashtable();
+    g_hash_table_insert(urb_hashtable, urb, urb);
+}
+
+static struct live_urb *urb_hashtable_lookup(uint64_t target_urb_adr)
+{
+    GHashTable *urb_hashtable = usbdevfs_urb_hashtable();
+    return g_hash_table_lookup(urb_hashtable, &target_urb_adr);
+}
+
+static void urb_hashtable_remove(struct live_urb *urb)
+{
+    GHashTable *urb_hashtable = usbdevfs_urb_hashtable();
+    g_hash_table_remove(urb_hashtable, urb);
+}
+
+static abi_long
+do_ioctl_usbdevfs_reapurb(const IOCTLEntry *ie, uint8_t *buf_temp,
+                          int fd, int cmd, abi_long arg)
+{
+    const argtype usbfsurb_arg_type[] = { MK_STRUCT(STRUCT_usbdevfs_urb) };
+    const argtype ptrvoid_arg_type[] = { TYPE_PTRVOID, 0, 0 };
+    struct live_urb *lurb;
+    void *argptr;
+    uint64_t hurb;
+    int target_size;
+    uintptr_t target_urb_adr;
+    abi_long ret;
+
+    target_size = thunk_type_size(usbfsurb_arg_type, THUNK_TARGET);
+
+    memset(buf_temp, 0, sizeof(uint64_t));
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp));
+    if (is_error(ret)) {
+        return ret;
+    }
+
+    memcpy(&hurb, buf_temp, sizeof(uint64_t));
+    lurb = (void *)((uintptr_t)hurb - offsetof(struct live_urb, host_urb));
+    if (!lurb->target_urb_adr) {
+        return -TARGET_EFAULT;
+    }
+    urb_hashtable_remove(lurb);
+    unlock_user(lurb->target_buf_ptr, lurb->target_buf_adr,
+        lurb->host_urb.buffer_length);
+    lurb->target_buf_ptr = NULL;
+
+    /* restore the guest buffer pointer */
+    lurb->host_urb.buffer = (void *)(uintptr_t)lurb->target_buf_adr;
+
+    /* update the guest urb struct */
+    argptr = lock_user(VERIFY_WRITE, lurb->target_urb_adr, target_size, 0);
+    if (!argptr) {
+        g_free(lurb);
+        return -TARGET_EFAULT;
+    }
+    thunk_convert(argptr, &lurb->host_urb, usbfsurb_arg_type, THUNK_TARGET);
+    unlock_user(argptr, lurb->target_urb_adr, target_size);
+
+    target_size = thunk_type_size(ptrvoid_arg_type, THUNK_TARGET);
+    /* write back the urb handle */
+    argptr = lock_user(VERIFY_WRITE, arg, target_size, 0);
+    if (!argptr) {
+        g_free(lurb);
+        return -TARGET_EFAULT;
+    }
+
+    /* GHashTable uses 64-bit keys but thunk_convert expects uintptr_t */
+    target_urb_adr = lurb->target_urb_adr;
+    thunk_convert(argptr, &target_urb_adr, ptrvoid_arg_type, THUNK_TARGET);
+    unlock_user(argptr, arg, target_size);
+
+    g_free(lurb);
+    return ret;
+}
+
+static abi_long
+do_ioctl_usbdevfs_discardurb(const IOCTLEntry *ie,
+                             uint8_t *buf_temp __attribute__((unused)),
+                             int fd, int cmd, abi_long arg)
+{
+    struct live_urb *lurb;
+
+    /* map target address back to host URB with metadata. */
+    lurb = urb_hashtable_lookup(arg);
+    if (!lurb) {
+        return -TARGET_EFAULT;
+    }
+    return get_errno(safe_ioctl(fd, ie->host_cmd, &lurb->host_urb));
+}
+
+static abi_long
+do_ioctl_usbdevfs_submiturb(const IOCTLEntry *ie, uint8_t *buf_temp,
+                            int fd, int cmd, abi_long arg)
+{
+    const argtype *arg_type = ie->arg_type;
+    int target_size;
+    abi_long ret;
+    void *argptr;
+    int rw_dir;
+    struct live_urb *lurb;
+
+    /*
+     * each submitted URB needs to map to a unique ID for the
+     * kernel, and that unique ID needs to be a pointer to
+     * host memory.  hence, we need to malloc for each URB.
+     * isochronous transfers have a variable length struct.
+     */
+    arg_type++;
+    target_size = thunk_type_size(arg_type, THUNK_TARGET);
+
+    /* construct host copy of urb and metadata */
+    lurb = g_try_malloc0(sizeof(struct live_urb));
+    if (!lurb) {
+        return -TARGET_ENOMEM;
+    }
+
+    argptr = lock_user(VERIFY_READ, arg, target_size, 1);
+    if (!argptr) {
+        g_free(lurb);
+        return -TARGET_EFAULT;
+    }
+    thunk_convert(&lurb->host_urb, argptr, arg_type, THUNK_HOST);
+    unlock_user(argptr, arg, 0);
+
+    lurb->target_urb_adr = arg;
+    lurb->target_buf_adr = (uintptr_t)lurb->host_urb.buffer;
+
+    /* buffer space used depends on endpoint type so lock the entire buffer */
+    /* control type urbs should check the buffer contents for true direction */
+    rw_dir = lurb->host_urb.endpoint & USB_DIR_IN ? VERIFY_WRITE : VERIFY_READ;
+    lurb->target_buf_ptr = lock_user(rw_dir, lurb->target_buf_adr,
+        lurb->host_urb.buffer_length, 1);
+    if (lurb->target_buf_ptr == NULL) {
+        g_free(lurb);
+        return -TARGET_EFAULT;
+    }
+
+    /* update buffer pointer in host copy */
+    lurb->host_urb.buffer = lurb->target_buf_ptr;
+
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, &lurb->host_urb));
+    if (is_error(ret)) {
+        unlock_user(lurb->target_buf_ptr, lurb->target_buf_adr, 0);
+        g_free(lurb);
+    } else {
+        urb_hashtable_insert(lurb);
+    }
+
+    return ret;
+}
+#endif /* CONFIG_USBFS */
+
 static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
                             int cmd, abi_long arg)
 {
@@ -9347,6 +9527,68 @@
             return ret;
         }
 #endif
+#ifdef TARGET_MIPS
+        case TARGET_PR_GET_FP_MODE:
+        {
+            CPUMIPSState *env = ((CPUMIPSState *)cpu_env);
+            ret = 0;
+            if (env->CP0_Status & (1 << CP0St_FR)) {
+                ret |= TARGET_PR_FP_MODE_FR;
+            }
+            if (env->CP0_Config5 & (1 << CP0C5_FRE)) {
+                ret |= TARGET_PR_FP_MODE_FRE;
+            }
+            return ret;
+        }
+        case TARGET_PR_SET_FP_MODE:
+        {
+            CPUMIPSState *env = ((CPUMIPSState *)cpu_env);
+            bool old_fr = env->CP0_Status & (1 << CP0St_FR);
+            bool new_fr = arg2 & TARGET_PR_FP_MODE_FR;
+            bool new_fre = arg2 & TARGET_PR_FP_MODE_FRE;
+
+            if (new_fr && !(env->active_fpu.fcr0 & (1 << FCR0_F64))) {
+                /* FR1 is not supported */
+                return -TARGET_EOPNOTSUPP;
+            }
+            if (!new_fr && (env->active_fpu.fcr0 & (1 << FCR0_F64))
+                && !(env->CP0_Status_rw_bitmask & (1 << CP0St_FR))) {
+                /* cannot set FR=0 */
+                return -TARGET_EOPNOTSUPP;
+            }
+            if (new_fre && !(env->active_fpu.fcr0 & (1 << FCR0_FREP))) {
+                /* Cannot set FRE=1 */
+                return -TARGET_EOPNOTSUPP;
+            }
+
+            int i;
+            fpr_t *fpr = env->active_fpu.fpr;
+            for (i = 0; i < 32 ; i += 2) {
+                if (!old_fr && new_fr) {
+                    fpr[i].w[!FP_ENDIAN_IDX] = fpr[i + 1].w[FP_ENDIAN_IDX];
+                } else if (old_fr && !new_fr) {
+                    fpr[i + 1].w[FP_ENDIAN_IDX] = fpr[i].w[!FP_ENDIAN_IDX];
+                }
+            }
+
+            if (new_fr) {
+                env->CP0_Status |= (1 << CP0St_FR);
+                env->hflags |= MIPS_HFLAG_F64;
+            } else {
+                env->CP0_Status &= ~(1 << CP0St_FR);
+            }
+            if (new_fre) {
+                env->CP0_Config5 |= (1 << CP0C5_FRE);
+                if (env->active_fpu.fcr0 & (1 << FCR0_FREP)) {
+                    env->hflags |= MIPS_HFLAG_FRE;
+                }
+            } else {
+                env->CP0_Config5 &= ~(1 << CP0C5_FRE);
+            }
+
+            return 0;
+        }
+#endif /* MIPS */
 #ifdef TARGET_AARCH64
         case TARGET_PR_SVE_SET_VL:
             /*
@@ -9356,7 +9598,7 @@
              * even though the current architectural maximum is VQ=16.
              */
             ret = -TARGET_EINVAL;
-            if (arm_feature(cpu_env, ARM_FEATURE_SVE)
+            if (cpu_isar_feature(aa64_sve, arm_env_get_cpu(cpu_env))
                 && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
                 CPUARMState *env = cpu_env;
                 ARMCPU *cpu = arm_env_get_cpu(env);
@@ -9375,9 +9617,11 @@
             return ret;
         case TARGET_PR_SVE_GET_VL:
             ret = -TARGET_EINVAL;
-            if (arm_feature(cpu_env, ARM_FEATURE_SVE)) {
-                CPUARMState *env = cpu_env;
-                ret = ((env->vfp.zcr_el[1] & 0xf) + 1) * 16;
+            {
+                ARMCPU *cpu = arm_env_get_cpu(cpu_env);
+                if (cpu_isar_feature(aa64_sve, cpu)) {
+                    ret = ((cpu->env.vfp.zcr_el[1] & 0xf) + 1) * 16;
+                }
             }
             return ret;
 #endif /* AARCH64 */
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 18d434d..99bbce0 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -863,6 +863,34 @@
 
 #define TARGET_FS_IOC_FIEMAP TARGET_IOWR('f',11,struct fiemap)
 
+/* usb ioctls */
+#define TARGET_USBDEVFS_CONTROL TARGET_IOWRU('U', 0)
+#define TARGET_USBDEVFS_BULK TARGET_IOWRU('U', 2)
+#define TARGET_USBDEVFS_RESETEP TARGET_IORU('U', 3)
+#define TARGET_USBDEVFS_SETINTERFACE TARGET_IORU('U', 4)
+#define TARGET_USBDEVFS_SETCONFIGURATION TARGET_IORU('U',  5)
+#define TARGET_USBDEVFS_GETDRIVER TARGET_IOWU('U', 8)
+#define TARGET_USBDEVFS_SUBMITURB TARGET_IORU('U', 10)
+#define TARGET_USBDEVFS_DISCARDURB TARGET_IO('U', 11)
+#define TARGET_USBDEVFS_REAPURB TARGET_IOWU('U', 12)
+#define TARGET_USBDEVFS_REAPURBNDELAY TARGET_IOWU('U', 13)
+#define TARGET_USBDEVFS_DISCSIGNAL TARGET_IORU('U', 14)
+#define TARGET_USBDEVFS_CLAIMINTERFACE TARGET_IORU('U', 15)
+#define TARGET_USBDEVFS_RELEASEINTERFACE TARGET_IORU('U', 16)
+#define TARGET_USBDEVFS_CONNECTINFO TARGET_IOWU('U', 17)
+#define TARGET_USBDEVFS_IOCTL TARGET_IOWRU('U', 18)
+#define TARGET_USBDEVFS_HUB_PORTINFO TARGET_IORU('U', 19)
+#define TARGET_USBDEVFS_RESET TARGET_IO('U', 20)
+#define TARGET_USBDEVFS_CLEAR_HALT TARGET_IORU('U', 21)
+#define TARGET_USBDEVFS_DISCONNECT TARGET_IO('U', 22)
+#define TARGET_USBDEVFS_CONNECT TARGET_IO('U', 23)
+#define TARGET_USBDEVFS_CLAIM_PORT TARGET_IORU('U', 24)
+#define TARGET_USBDEVFS_RELEASE_PORT TARGET_IORU('U', 25)
+#define TARGET_USBDEVFS_GET_CAPABILITIES TARGET_IORU('U', 26)
+#define TARGET_USBDEVFS_DISCONNECT_CLAIM TARGET_IORU('U', 27)
+#define TARGET_USBDEVFS_DROP_PRIVILEGES TARGET_IOWU('U', 30)
+#define TARGET_USBDEVFS_GET_SPEED TARGET_IO('U', 31)
+
 /* cdrom commands */
 #define TARGET_CDROMPAUSE		0x5301 /* Pause Audio Operation */
 #define TARGET_CDROMRESUME		0x5302 /* Resume paused Audio Operation */
diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h
index 24631b0..b98a23b 100644
--- a/linux-user/syscall_types.h
+++ b/linux-user/syscall_types.h
@@ -266,3 +266,71 @@
        TYPE_INT, /* flags */
        TYPE_INT, /* datalen */
        TYPE_PTRVOID) /* data */
+
+#if defined(CONFIG_USBFS)
+/* usb device ioctls */
+STRUCT(usbdevfs_ctrltransfer,
+        TYPE_CHAR, /* bRequestType */
+        TYPE_CHAR, /* bRequest */
+        TYPE_SHORT, /* wValue */
+        TYPE_SHORT, /* wIndex */
+        TYPE_SHORT, /* wLength */
+        TYPE_INT, /* timeout */
+        TYPE_PTRVOID) /* data */
+
+STRUCT(usbdevfs_bulktransfer,
+        TYPE_INT, /* ep */
+        TYPE_INT, /* len */
+        TYPE_INT, /* timeout */
+        TYPE_PTRVOID) /* data */
+
+STRUCT(usbdevfs_setinterface,
+        TYPE_INT, /* interface */
+        TYPE_INT) /* altsetting */
+
+STRUCT(usbdevfs_disconnectsignal,
+        TYPE_INT, /* signr */
+        TYPE_PTRVOID) /* context */
+
+STRUCT(usbdevfs_getdriver,
+        TYPE_INT, /* interface */
+        MK_ARRAY(TYPE_CHAR, USBDEVFS_MAXDRIVERNAME + 1)) /* driver */
+
+STRUCT(usbdevfs_connectinfo,
+        TYPE_INT, /* devnum */
+        TYPE_CHAR) /* slow */
+
+STRUCT(usbdevfs_iso_packet_desc,
+        TYPE_INT, /* length */
+        TYPE_INT, /* actual_length */
+        TYPE_INT) /* status */
+
+STRUCT(usbdevfs_urb,
+        TYPE_CHAR, /* type */
+        TYPE_CHAR, /* endpoint */
+        TYPE_INT, /* status */
+        TYPE_INT, /* flags */
+        TYPE_PTRVOID, /* buffer */
+        TYPE_INT, /* buffer_length */
+        TYPE_INT, /* actual_length */
+        TYPE_INT, /* start_frame */
+        TYPE_INT, /* union number_of_packets stream_id */
+        TYPE_INT, /* error_count */
+        TYPE_INT, /* signr */
+        TYPE_PTRVOID, /* usercontext */
+        MK_ARRAY(MK_STRUCT(STRUCT_usbdevfs_iso_packet_desc), 0)) /* desc */
+
+STRUCT(usbdevfs_ioctl,
+        TYPE_INT, /* ifno */
+        TYPE_INT, /* ioctl_code */
+        TYPE_PTRVOID) /* data */
+
+STRUCT(usbdevfs_hub_portinfo,
+        TYPE_CHAR, /* nports */
+        MK_ARRAY(TYPE_CHAR, 127)) /* port */
+
+STRUCT(usbdevfs_disconnect_claim,
+        TYPE_INT, /* interface */
+        TYPE_INT, /* flags */
+        MK_ARRAY(TYPE_CHAR, USBDEVFS_MAXDRIVERNAME + 1)) /* driver */
+#endif /* CONFIG_USBFS */
diff --git a/memory.c b/memory.c
index 9b73892..51204aa 100644
--- a/memory.c
+++ b/memory.c
@@ -374,6 +374,33 @@
     }
 }
 
+static inline void memory_region_shift_read_access(uint64_t *value,
+                                                   signed shift,
+                                                   uint64_t mask,
+                                                   uint64_t tmp)
+{
+    if (shift >= 0) {
+        *value |= (tmp & mask) << shift;
+    } else {
+        *value |= (tmp & mask) >> -shift;
+    }
+}
+
+static inline uint64_t memory_region_shift_write_access(uint64_t *value,
+                                                        signed shift,
+                                                        uint64_t mask)
+{
+    uint64_t tmp;
+
+    if (shift >= 0) {
+        tmp = (*value >> shift) & mask;
+    } else {
+        tmp = (*value << -shift) & mask;
+    }
+
+    return tmp;
+}
+
 static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset)
 {
     MemoryRegion *root;
@@ -396,37 +423,11 @@
     return -1;
 }
 
-static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
-                                                       hwaddr addr,
-                                                       uint64_t *value,
-                                                       unsigned size,
-                                                       unsigned shift,
-                                                       uint64_t mask,
-                                                       MemTxAttrs attrs)
-{
-    uint64_t tmp;
-
-    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
-    if (mr->subpage) {
-        trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
-    } else if (mr == &io_mem_notdirty) {
-        /* Accesses to code which has previously been translated into a TB show
-         * up in the MMIO path, as accesses to the io_mem_notdirty
-         * MemoryRegion. */
-        trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
-    } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
-        hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
-    }
-    *value |= (tmp & mask) << shift;
-    return MEMTX_OK;
-}
-
 static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *value,
                                                 unsigned size,
-                                                unsigned shift,
+                                                signed shift,
                                                 uint64_t mask,
                                                 MemTxAttrs attrs)
 {
@@ -444,7 +445,7 @@
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
     }
-    *value |= (tmp & mask) << shift;
+    memory_region_shift_read_access(value, shift, mask, tmp);
     return MEMTX_OK;
 }
 
@@ -452,7 +453,7 @@
                                                           hwaddr addr,
                                                           uint64_t *value,
                                                           unsigned size,
-                                                          unsigned shift,
+                                                          signed shift,
                                                           uint64_t mask,
                                                           MemTxAttrs attrs)
 {
@@ -471,47 +472,20 @@
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
     }
-    *value |= (tmp & mask) << shift;
+    memory_region_shift_read_access(value, shift, mask, tmp);
     return r;
 }
 
-static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
-                                                        hwaddr addr,
-                                                        uint64_t *value,
-                                                        unsigned size,
-                                                        unsigned shift,
-                                                        uint64_t mask,
-                                                        MemTxAttrs attrs)
-{
-    uint64_t tmp;
-
-    tmp = (*value >> shift) & mask;
-    if (mr->subpage) {
-        trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
-    } else if (mr == &io_mem_notdirty) {
-        /* Accesses to code which has previously been translated into a TB show
-         * up in the MMIO path, as accesses to the io_mem_notdirty
-         * MemoryRegion. */
-        trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
-    } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
-        hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
-    }
-    mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
-    return MEMTX_OK;
-}
-
 static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *value,
                                                 unsigned size,
-                                                unsigned shift,
+                                                signed shift,
                                                 uint64_t mask,
                                                 MemTxAttrs attrs)
 {
-    uint64_t tmp;
+    uint64_t tmp = memory_region_shift_write_access(value, shift, mask);
 
-    tmp = (*value >> shift) & mask;
     if (mr->subpage) {
         trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
     } else if (mr == &io_mem_notdirty) {
@@ -531,13 +505,12 @@
                                                            hwaddr addr,
                                                            uint64_t *value,
                                                            unsigned size,
-                                                           unsigned shift,
+                                                           signed shift,
                                                            uint64_t mask,
                                                            MemTxAttrs attrs)
 {
-    uint64_t tmp;
+    uint64_t tmp = memory_region_shift_write_access(value, shift, mask);
 
-    tmp = (*value >> shift) & mask;
     if (mr->subpage) {
         trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
     } else if (mr == &io_mem_notdirty) {
@@ -562,7 +535,7 @@
                                                    hwaddr addr,
                                                    uint64_t *value,
                                                    unsigned size,
-                                                   unsigned shift,
+                                                   signed shift,
                                                    uint64_t mask,
                                                    MemTxAttrs attrs),
                                       MemoryRegion *mr,
@@ -582,7 +555,7 @@
 
     /* FIXME: support unaligned access? */
     access_size = MAX(MIN(size, access_size_max), access_size_min);
-    access_mask = -1ULL >> (64 - access_size * 8);
+    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
             r |= access_fn(mr, addr + i, value, access_size,
@@ -1394,16 +1367,12 @@
                                          mr->ops->impl.max_access_size,
                                          memory_region_read_accessor,
                                          mr, attrs);
-    } else if (mr->ops->read_with_attrs) {
+    } else {
         return access_with_adjusted_size(addr, pval, size,
                                          mr->ops->impl.min_access_size,
                                          mr->ops->impl.max_access_size,
                                          memory_region_read_with_attrs_accessor,
                                          mr, attrs);
-    } else {
-        return access_with_adjusted_size(addr, pval, size, 1, 4,
-                                         memory_region_oldmmio_read_accessor,
-                                         mr, attrs);
     }
 }
 
@@ -1475,17 +1444,13 @@
                                          mr->ops->impl.max_access_size,
                                          memory_region_write_accessor, mr,
                                          attrs);
-    } else if (mr->ops->write_with_attrs) {
+    } else {
         return
             access_with_adjusted_size(addr, &data, size,
                                       mr->ops->impl.min_access_size,
                                       mr->ops->impl.max_access_size,
                                       memory_region_write_with_attrs_accessor,
                                       mr, attrs);
-    } else {
-        return access_with_adjusted_size(addr, &data, size, 1, 4,
-                                         memory_region_oldmmio_write_accessor,
-                                         mr, attrs);
     }
 }
 
@@ -1518,12 +1483,18 @@
                                              bool share,
                                              Error **errp)
 {
+    Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc(size, share, mr, errp);
+    mr->ram_block = qemu_ram_alloc(size, share, mr, &err);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
 }
 
 void memory_region_init_resizeable_ram(MemoryRegion *mr,
@@ -1536,16 +1507,22 @@
                                                        void *host),
                                        Error **errp)
 {
+    Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized,
-                                              mr, errp);
+                                              mr, &err);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
 }
 
-#ifdef __linux__
+#ifdef CONFIG_POSIX
 void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
                                       const char *name,
@@ -1555,13 +1532,19 @@
                                       const char *path,
                                       Error **errp)
 {
+    Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->align = align;
-    mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, errp);
+    mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, &err);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
 }
 
 void memory_region_init_ram_from_fd(MemoryRegion *mr,
@@ -1572,14 +1555,20 @@
                                     int fd,
                                     Error **errp)
 {
+    Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->ram_block = qemu_ram_alloc_from_fd(size, mr,
                                            share ? RAM_SHARED : 0,
-                                           fd, errp);
+                                           fd, &err);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
 }
 #endif
 
@@ -1630,13 +1619,19 @@
                                       uint64_t size,
                                       Error **errp)
 {
+    Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
     mr->readonly = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc(size, false, mr, errp);
+    mr->ram_block = qemu_ram_alloc(size, false, mr, &err);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
 }
 
 void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
@@ -1647,6 +1642,7 @@
                                              uint64_t size,
                                              Error **errp)
 {
+    Error *err = NULL;
     assert(ops);
     memory_region_init(mr, owner, name, size);
     mr->ops = ops;
@@ -1654,7 +1650,12 @@
     mr->terminates = true;
     mr->rom_device = true;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc(size, false,  mr, errp);
+    mr->ram_block = qemu_ram_alloc(size, false,  mr, &err);
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
 }
 
 void memory_region_init_iommu(void *_iommu_mr,
@@ -2128,7 +2129,7 @@
                 .size = fr->addr.size,
             };
 
-            MEMORY_LISTENER_CALL(as, coalesced_mmio_del, Reverse, &section,
+            MEMORY_LISTENER_CALL(as, coalesced_io_del, Reverse, &section,
                                  int128_get64(fr->addr.start),
                                  int128_get64(fr->addr.size));
             QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
@@ -2139,7 +2140,7 @@
                     continue;
                 }
                 tmp = addrrange_intersection(tmp, fr->addr);
-                MEMORY_LISTENER_CALL(as, coalesced_mmio_add, Forward, &section,
+                MEMORY_LISTENER_CALL(as, coalesced_io_add, Forward, &section,
                                      int128_get64(tmp.start),
                                      int128_get64(tmp.size));
             }
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index c83ec47..a4f3baf 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -1,6 +1,6 @@
 common-obj-y += migration.o socket.o fd.o exec.o
 common-obj-y += tls.o channel.o savevm.o
-common-obj-y += colo-comm.o colo.o colo-failover.o
+common-obj-y += colo.o colo-failover.o
 common-obj-y += vmstate.o vmstate-types.o page_cache.o
 common-obj-y += qemu-file.o global_state.o
 common-obj-y += qemu-file-channel.o
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 4778263..5e90f44 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -301,14 +301,14 @@
                 goto fail;
             }
 
-            if (bdrv_dirty_bitmap_frozen(bitmap)) {
-                error_report("Can't migrate frozen dirty bitmap: '%s",
+            if (bdrv_dirty_bitmap_user_locked(bitmap)) {
+                error_report("Can't migrate a bitmap that is in use by another operation: '%s'",
                              bdrv_dirty_bitmap_name(bitmap));
                 goto fail;
             }
 
-            if (bdrv_dirty_bitmap_qmp_locked(bitmap)) {
-                error_report("Can't migrate locked dirty bitmap: '%s",
+            if (bdrv_dirty_bitmap_readonly(bitmap)) {
+                error_report("Can't migrate read-only dirty bitmap: '%s",
                              bdrv_dirty_bitmap_name(bitmap));
                 goto fail;
             }
@@ -335,9 +335,9 @@
         }
     }
 
-    /* unset persistance here, to not roll back it */
+    /* unset migration flags here, to not roll back it */
     QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
-        bdrv_dirty_bitmap_set_persistance(dbms->bitmap, false);
+        bdrv_dirty_bitmap_set_migration(dbms->bitmap, true);
     }
 
     if (QSIMPLEQ_EMPTY(&dirty_bitmap_mig_state.dbms_list)) {
diff --git a/migration/colo-comm.c b/migration/colo-comm.c
deleted file mode 100644
index df26e4d..0000000
--- a/migration/colo-comm.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
- * (a.k.a. Fault Tolerance or Continuous Replication)
- *
- * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
- * Copyright (c) 2016 FUJITSU LIMITED
- * Copyright (c) 2016 Intel Corporation
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or
- * later. See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "migration.h"
-#include "migration/colo.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-
-typedef struct {
-     bool colo_requested;
-} COLOInfo;
-
-static COLOInfo colo_info;
-
-COLOMode get_colo_mode(void)
-{
-    if (migration_in_colo_state()) {
-        return COLO_MODE_PRIMARY;
-    } else if (migration_incoming_in_colo_state()) {
-        return COLO_MODE_SECONDARY;
-    } else {
-        return COLO_MODE_UNKNOWN;
-    }
-}
-
-static int colo_info_pre_save(void *opaque)
-{
-    COLOInfo *s = opaque;
-
-    s->colo_requested = migrate_colo_enabled();
-
-    return 0;
-}
-
-static bool colo_info_need(void *opaque)
-{
-   return migrate_colo_enabled();
-}
-
-static const VMStateDescription colo_state = {
-    .name = "COLOState",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .pre_save = colo_info_pre_save,
-    .needed = colo_info_need,
-    .fields = (VMStateField[]) {
-        VMSTATE_BOOL(colo_requested, COLOInfo),
-        VMSTATE_END_OF_LIST()
-    },
-};
-
-void colo_info_init(void)
-{
-    vmstate_register(NULL, 0, &colo_state, &colo_info);
-}
-
-bool migration_incoming_enable_colo(void)
-{
-    return colo_info.colo_requested;
-}
-
-void migration_incoming_exit_colo(void)
-{
-    colo_info.colo_requested = false;
-}
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
index 0ae0c41..4854a96 100644
--- a/migration/colo-failover.c
+++ b/migration/colo-failover.c
@@ -77,7 +77,7 @@
 
 void qmp_x_colo_lost_heartbeat(Error **errp)
 {
-    if (get_colo_mode() == COLO_MODE_UNKNOWN) {
+    if (get_colo_mode() == COLO_MODE_NONE) {
         error_setg(errp, QERR_FEATURE_DISABLED, "colo");
         return;
     }
diff --git a/migration/colo.c b/migration/colo.c
index 88936f5..956ac23 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -25,8 +25,16 @@
 #include "qemu/error-report.h"
 #include "migration/failover.h"
 #include "replication.h"
+#include "net/colo-compare.h"
+#include "net/colo.h"
+#include "block/block.h"
+#include "qapi/qapi-events-migration.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/cpus.h"
+#include "net/filter.h"
 
 static bool vmstate_loading;
+static Notifier packets_compare_notifier;
 
 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
 
@@ -53,6 +61,7 @@
 {
     int old_state;
     MigrationIncomingState *mis = migration_incoming_get_current();
+    Error *local_err = NULL;
 
     /* Can not do failover during the process of VM's loading VMstate, Or
      * it will break the secondary VM.
@@ -70,6 +79,17 @@
     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
 
+    replication_stop_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    /* Notify all filters of all NIC to do checkpoint */
+    colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
     if (!autostart) {
         error_report("\"-S\" qemu option will be ignored in secondary side");
         /* recover runstate to normal migration finish state */
@@ -107,9 +127,15 @@
 {
     MigrationState *s = migrate_get_current();
     int old_state;
+    Error *local_err = NULL;
 
     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
+    /*
+     * kick COLO thread which might wait at
+     * qemu_sem_wait(&s->colo_checkpoint_sem).
+     */
+    colo_checkpoint_notify(migrate_get_current());
 
     /*
      * Wake up COLO thread which may blocked in recv() or send(),
@@ -130,10 +156,28 @@
                      FailoverStatus_str(old_state));
         return;
     }
+
+    replication_stop_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        local_err = NULL;
+    }
+
     /* Notify COLO thread that failover work is finished */
     qemu_sem_post(&s->colo_exit_sem);
 }
 
+COLOMode get_colo_mode(void)
+{
+    if (migration_in_colo_state()) {
+        return COLO_MODE_PRIMARY;
+    } else if (migration_incoming_in_colo_state()) {
+        return COLO_MODE_SECONDARY;
+    } else {
+        return COLO_MODE_NONE;
+    }
+}
+
 void colo_do_failover(MigrationState *s)
 {
     /* Make sure VM stopped while failover happened. */
@@ -207,6 +251,26 @@
 #endif
 }
 
+COLOStatus *qmp_query_colo_status(Error **errp)
+{
+    COLOStatus *s = g_new0(COLOStatus, 1);
+
+    s->mode = get_colo_mode();
+
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_NONE:
+        s->reason = COLO_EXIT_REASON_NONE;
+        break;
+    case FAILOVER_STATUS_REQUIRE:
+        s->reason = COLO_EXIT_REASON_REQUEST;
+        break;
+    default:
+        s->reason = COLO_EXIT_REASON_ERROR;
+    }
+
+    return s;
+}
+
 static void colo_send_message(QEMUFile *f, COLOMessage msg,
                               Error **errp)
 {
@@ -343,20 +407,41 @@
         goto out;
     }
 
-    /* Disable block migration */
-    migrate_set_block_enabled(false, &local_err);
-    qemu_savevm_state_header(fb);
-    qemu_savevm_state_setup(fb);
-    qemu_mutex_lock_iothread();
-    qemu_savevm_state_complete_precopy(fb, false, false);
-    qemu_mutex_unlock_iothread();
-
-    qemu_fflush(fb);
-
-    colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
+    colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
     if (local_err) {
         goto out;
     }
+
+    /* Disable block migration */
+    migrate_set_block_enabled(false, &local_err);
+    qemu_mutex_lock_iothread();
+    replication_do_checkpoint_all(&local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+
+    colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+    /* Note: device state is saved into buffer */
+    ret = qemu_save_device_state(fb);
+
+    qemu_mutex_unlock_iothread();
+    if (ret < 0) {
+        goto out;
+    }
+    /*
+     * Only save VM's live state, which not including device state.
+     * TODO: We may need a timeout mechanism to prevent COLO process
+     * to be blocked here.
+     */
+    qemu_savevm_live_state(s->to_dst_file);
+
+    qemu_fflush(fb);
+
     /*
      * We need the size of the VMstate data in Secondary side,
      * With which we can decide how much data should be read.
@@ -400,6 +485,11 @@
     return ret;
 }
 
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
+{
+    colo_checkpoint_notify(data);
+}
+
 static void colo_process_checkpoint(MigrationState *s)
 {
     QIOChannelBuffer *bioc;
@@ -416,6 +506,9 @@
         goto out;
     }
 
+    packets_compare_notifier.notify = colo_compare_notify_checkpoint;
+    colo_compare_register_notifier(&packets_compare_notifier);
+
     /*
      * Wait for Secondary finish loading VM states and enter COLO
      * restore.
@@ -430,6 +523,12 @@
     object_unref(OBJECT(bioc));
 
     qemu_mutex_lock_iothread();
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+
     vm_start();
     qemu_mutex_unlock_iothread();
     trace_colo_vm_state_change("stop", "run");
@@ -445,6 +544,9 @@
 
         qemu_sem_wait(&s->colo_checkpoint_sem);
 
+        if (s->state != MIGRATION_STATUS_COLO) {
+            goto out;
+        }
         ret = colo_do_checkpoint_transaction(s, bioc, fb);
         if (ret < 0) {
             goto out;
@@ -461,11 +563,38 @@
         qemu_fclose(fb);
     }
 
-    timer_del(s->colo_delay_timer);
+    /*
+     * There are only two reasons we can get here, some error happened
+     * or the user triggered failover.
+     */
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_NONE:
+        qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+                                  COLO_EXIT_REASON_ERROR);
+        break;
+    case FAILOVER_STATUS_REQUIRE:
+        qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+                                  COLO_EXIT_REASON_REQUEST);
+        break;
+    default:
+        abort();
+    }
 
     /* Hope this not to be too long to wait here */
     qemu_sem_wait(&s->colo_exit_sem);
     qemu_sem_destroy(&s->colo_exit_sem);
+
+    /*
+     * It is safe to unregister notifier after failover finished.
+     * Besides, colo_delay_timer and colo_checkpoint_sem can't be
+     * released befor unregister notifier, or there will be use-after-free
+     * error.
+     */
+    colo_compare_unregister_notifier(&packets_compare_notifier);
+    timer_del(s->colo_delay_timer);
+    timer_free(s->colo_delay_timer);
+    qemu_sem_destroy(&s->colo_checkpoint_sem);
+
     /*
      * Must be called after failover BH is completed,
      * Or the failover BH may shutdown the wrong fd that
@@ -533,6 +662,7 @@
     uint64_t total_size;
     uint64_t value;
     Error *local_err = NULL;
+    int ret;
 
     rcu_register_thread();
     qemu_sem_init(&mis->colo_incoming_sem, 0);
@@ -559,6 +689,16 @@
     fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
     object_unref(OBJECT(bioc));
 
+    qemu_mutex_lock_iothread();
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+    vm_start();
+    trace_colo_vm_state_change("stop", "run");
+    qemu_mutex_unlock_iothread();
+
     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
                       &local_err);
     if (local_err) {
@@ -578,6 +718,11 @@
             goto out;
         }
 
+        qemu_mutex_lock_iothread();
+        vm_stop_force_state(RUN_STATE_COLO);
+        trace_colo_vm_state_change("run", "stop");
+        qemu_mutex_unlock_iothread();
+
         /* FIXME: This is unnecessary for periodic checkpoint mode */
         colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
                      &local_err);
@@ -591,6 +736,16 @@
             goto out;
         }
 
+        qemu_mutex_lock_iothread();
+        cpu_synchronize_all_pre_loadvm();
+        ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+        qemu_mutex_unlock_iothread();
+
+        if (ret < 0) {
+            error_report("Load VM's live state (ram) error");
+            goto out;
+        }
+
         value = colo_receive_message_value(mis->from_src_file,
                                  COLO_MESSAGE_VMSTATE_SIZE, &local_err);
         if (local_err) {
@@ -622,15 +777,37 @@
         }
 
         qemu_mutex_lock_iothread();
-        qemu_system_reset(SHUTDOWN_CAUSE_NONE);
         vmstate_loading = true;
-        if (qemu_loadvm_state(fb) < 0) {
-            error_report("COLO: loadvm failed");
+        ret = qemu_load_device_state(fb);
+        if (ret < 0) {
+            error_report("COLO: load device state failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+
+        replication_get_error_all(&local_err);
+        if (local_err) {
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+        /* discard colo disk buffer */
+        replication_do_checkpoint_all(&local_err);
+        if (local_err) {
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+
+        /* Notify all filters of all NIC to do checkpoint */
+        colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
+
+        if (local_err) {
             qemu_mutex_unlock_iothread();
             goto out;
         }
 
         vmstate_loading = false;
+        vm_start();
+        trace_colo_vm_state_change("stop", "run");
         qemu_mutex_unlock_iothread();
 
         if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
@@ -654,6 +831,19 @@
         error_report_err(local_err);
     }
 
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_NONE:
+        qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+                                  COLO_EXIT_REASON_ERROR);
+        break;
+    case FAILOVER_STATUS_REQUIRE:
+        qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+                                  COLO_EXIT_REASON_REQUEST);
+        break;
+    default:
+        abort();
+    }
+
     if (fb) {
         qemu_fclose(fb);
     }
@@ -665,7 +855,7 @@
     if (mis->to_src_file) {
         qemu_fclose(mis->to_src_file);
     }
-    migration_incoming_exit_colo();
+    migration_incoming_disable_colo();
 
     rcu_unregister_thread();
     return NULL;
diff --git a/migration/migration.c b/migration/migration.c
index d6ae879..8b36e7f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -76,10 +76,8 @@
 /* Migration XBZRLE default cache size */
 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
 
-/* The delay time (in ms) between two COLO checkpoints
- * Note: Please change this default value to 10000 when we support hybrid mode.
- */
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
+/* The delay time (in ms) between two COLO checkpoints */
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
 #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
 
@@ -298,6 +296,22 @@
     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
 }
 
+static bool migration_colo_enabled;
+bool migration_incoming_colo_enabled(void)
+{
+    return migration_colo_enabled;
+}
+
+void migration_incoming_disable_colo(void)
+{
+    migration_colo_enabled = false;
+}
+
+void migration_incoming_enable_colo(void)
+{
+    migration_colo_enabled = true;
+}
+
 void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
     const char *p;
@@ -388,6 +402,7 @@
     MigrationIncomingState *mis = migration_incoming_get_current();
     PostcopyState ps;
     int ret;
+    Error *local_err = NULL;
 
     assert(mis->from_src_file);
     mis->migration_incoming_co = qemu_coroutine_self();
@@ -419,7 +434,21 @@
     }
 
     /* we get COLO info, and know if we are in COLO mode */
-    if (!ret && migration_incoming_enable_colo()) {
+    if (!ret && migration_incoming_colo_enabled()) {
+        /* Make sure all file formats flush their mutable metadata */
+        bdrv_invalidate_cache_all(&local_err);
+        if (local_err) {
+            migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+                    MIGRATION_STATUS_FAILED);
+            error_report_err(local_err);
+            exit(EXIT_FAILURE);
+        }
+
+        if (colo_init_ram_cache() < 0) {
+            error_report("Init ram cache failed");
+            exit(EXIT_FAILURE);
+        }
+
         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
         mis->have_colo_incoming_thread = true;
@@ -427,6 +456,8 @@
 
         /* Wait checkpoint incoming thread exit before free resource */
         qemu_thread_join(&mis->colo_incoming_thread);
+        /* We hold the global iothread lock, so it is safe here */
+        colo_release_ram_cache();
     }
 
     if (ret < 0) {
@@ -1546,9 +1577,9 @@
 int migrate_add_blocker(Error *reason, Error **errp)
 {
     if (migrate_get_current()->only_migratable) {
-        error_propagate(errp, error_copy(reason));
-        error_prepend(errp, "disallowing migration blocker "
-                          "(--only_migratable) for: ");
+        error_propagate_prepend(errp, error_copy(reason),
+                                "disallowing migration blocker "
+                                "(--only_migratable) for: ");
         return -EACCES;
     }
 
@@ -1557,9 +1588,9 @@
         return 0;
     }
 
-    error_propagate(errp, error_copy(reason));
-    error_prepend(errp, "disallowing migration blocker (migration in "
-                      "progress) for: ");
+    error_propagate_prepend(errp, error_copy(reason),
+                            "disallowing migration blocker "
+                            "(migration in progress) for: ");
     return -EBUSY;
 }
 
@@ -3017,6 +3048,11 @@
         qemu_savevm_send_postcopy_advise(s->to_dst_file);
     }
 
+    if (migrate_colo_enabled()) {
+        /* Notify migration destination that we enable COLO */
+        qemu_savevm_send_colo_enable(s->to_dst_file);
+    }
+
     qemu_savevm_state_setup(s->to_dst_file);
 
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 853d8b3..e5c02a3 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -533,6 +533,12 @@
     if (mis->have_fault_thread) {
         Error *local_err = NULL;
 
+        /* Let the fault thread quit */
+        atomic_set(&mis->fault_thread_quit, 1);
+        postcopy_fault_thread_notify(mis);
+        trace_postcopy_ram_incoming_cleanup_join();
+        qemu_thread_join(&mis->fault_thread);
+
         if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
             error_report_err(local_err);
             return -1;
@@ -541,11 +547,6 @@
         if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
             return -1;
         }
-        /* Let the fault thread quit */
-        atomic_set(&mis->fault_thread_quit, 1);
-        postcopy_fault_thread_notify(mis);
-        trace_postcopy_ram_incoming_cleanup_join();
-        qemu_thread_join(&mis->fault_thread);
 
         trace_postcopy_ram_incoming_cleanup_closeuf();
         close(mis->userfault_fd);
diff --git a/migration/ram.c b/migration/ram.c
index bc38d98..7e7deec 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3447,6 +3447,29 @@
     return block->host + offset;
 }
 
+static inline void *colo_cache_from_block_offset(RAMBlock *block,
+                                                 ram_addr_t offset)
+{
+    if (!offset_in_ramblock(block, offset)) {
+        return NULL;
+    }
+    if (!block->colo_cache) {
+        error_report("%s: colo_cache is NULL in block :%s",
+                     __func__, block->idstr);
+        return NULL;
+    }
+
+    /*
+    * During colo checkpoint, we need bitmap of these migrated pages.
+    * It help us to decide which pages in ram cache should be flushed
+    * into VM's RAM later.
+    */
+    if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
+        ram_state->migration_dirty_pages++;
+    }
+    return block->colo_cache + offset;
+}
+
 /**
  * ram_handle_compressed: handle the zero page case
  *
@@ -3651,6 +3674,88 @@
     qemu_mutex_unlock(&decomp_done_lock);
 }
 
+/*
+ * colo cache: this is for secondary VM, we cache the whole
+ * memory of the secondary VM, it is need to hold the global lock
+ * to call this helper.
+ */
+int colo_init_ram_cache(void)
+{
+    RAMBlock *block;
+
+    rcu_read_lock();
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        block->colo_cache = qemu_anon_ram_alloc(block->used_length,
+                                                NULL,
+                                                false);
+        if (!block->colo_cache) {
+            error_report("%s: Can't alloc memory for COLO cache of block %s,"
+                         "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
+                         block->used_length);
+            goto out_locked;
+        }
+        memcpy(block->colo_cache, block->host, block->used_length);
+    }
+    rcu_read_unlock();
+    /*
+    * Record the dirty pages that sent by PVM, we use this dirty bitmap together
+    * with to decide which page in cache should be flushed into SVM's RAM. Here
+    * we use the same name 'ram_bitmap' as for migration.
+    */
+    if (ram_bytes_total()) {
+        RAMBlock *block;
+
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
+            unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
+
+            block->bmap = bitmap_new(pages);
+            bitmap_set(block->bmap, 0, pages);
+        }
+    }
+    ram_state = g_new0(RAMState, 1);
+    ram_state->migration_dirty_pages = 0;
+    memory_global_dirty_log_start();
+
+    return 0;
+
+out_locked:
+
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        if (block->colo_cache) {
+            qemu_anon_ram_free(block->colo_cache, block->used_length);
+            block->colo_cache = NULL;
+        }
+    }
+
+    rcu_read_unlock();
+    return -errno;
+}
+
+/* It is need to hold the global lock to call this helper */
+void colo_release_ram_cache(void)
+{
+    RAMBlock *block;
+
+    memory_global_dirty_log_stop();
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        g_free(block->bmap);
+        block->bmap = NULL;
+    }
+
+    rcu_read_lock();
+
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        if (block->colo_cache) {
+            qemu_anon_ram_free(block->colo_cache, block->used_length);
+            block->colo_cache = NULL;
+        }
+    }
+
+    rcu_read_unlock();
+    g_free(ram_state);
+    ram_state = NULL;
+}
+
 /**
  * ram_load_setup: Setup RAM for migration incoming side
  *
@@ -3667,6 +3772,7 @@
 
     xbzrle_load_setup();
     ramblock_recv_map_init();
+
     return 0;
 }
 
@@ -3687,6 +3793,7 @@
         g_free(rb->receivedmap);
         rb->receivedmap = NULL;
     }
+
     return 0;
 }
 
@@ -3869,6 +3976,46 @@
     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
 }
 
+/*
+ * Flush content of RAM cache into SVM's memory.
+ * Only flush the pages that be dirtied by PVM or SVM or both.
+ */
+static void colo_flush_ram_cache(void)
+{
+    RAMBlock *block = NULL;
+    void *dst_host;
+    void *src_host;
+    unsigned long offset = 0;
+
+    memory_global_dirty_log_sync();
+    rcu_read_lock();
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
+    }
+    rcu_read_unlock();
+
+    trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
+    rcu_read_lock();
+    block = QLIST_FIRST_RCU(&ram_list.blocks);
+
+    while (block) {
+        offset = migration_bitmap_find_dirty(ram_state, block, offset);
+
+        if (offset << TARGET_PAGE_BITS >= block->used_length) {
+            offset = 0;
+            block = QLIST_NEXT_RCU(block, next);
+        } else {
+            migration_bitmap_clear_dirty(ram_state, block, offset);
+            dst_host = block->host + (offset << TARGET_PAGE_BITS);
+            src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
+            memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+        }
+    }
+
+    rcu_read_unlock();
+    trace_colo_flush_ram_cache_end();
+}
+
 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     int flags = 0, ret = 0, invalid_flags = 0;
@@ -3924,13 +4071,24 @@
                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
             RAMBlock *block = ram_block_from_stream(f, flags);
 
-            host = host_from_ram_block_offset(block, addr);
+            /*
+             * After going into COLO, we should load the Page into colo_cache.
+             */
+            if (migration_incoming_in_colo_state()) {
+                host = colo_cache_from_block_offset(block, addr);
+            } else {
+                host = host_from_ram_block_offset(block, addr);
+            }
             if (!host) {
                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
                 ret = -EINVAL;
                 break;
             }
-            ramblock_recv_bitmap_set(block, host);
+
+            if (!migration_incoming_in_colo_state()) {
+                ramblock_recv_bitmap_set(block, host);
+            }
+
             trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
         }
 
@@ -4034,6 +4192,10 @@
     ret |= wait_for_decompress_done();
     rcu_read_unlock();
     trace_ram_load_complete(ret, seq_iter);
+
+    if (!ret  && migration_incoming_in_colo_state()) {
+        colo_flush_ram_cache();
+    }
     return ret;
 }
 
diff --git a/migration/ram.h b/migration/ram.h
index a139066..83ff1bc 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -71,4 +71,8 @@
                                   const char *block_name);
 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
 
+/* ram cache */
+int colo_init_ram_cache(void);
+void colo_release_ram_cache(void);
+
 #endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 2d10e45..9992af4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -56,6 +56,7 @@
 #include "io/channel-file.h"
 #include "sysemu/replay.h"
 #include "qjson.h"
+#include "migration/colo.h"
 
 #ifndef ETH_P_RARP
 #define ETH_P_RARP 0x8035
@@ -82,6 +83,7 @@
                                       were previously sent during
                                       precopy but are dirty. */
     MIG_CMD_PACKAGED,          /* Send a wrapped stream within this stream */
+    MIG_CMD_ENABLE_COLO,       /* Enable COLO */
     MIG_CMD_POSTCOPY_RESUME,   /* resume postcopy on dest */
     MIG_CMD_RECV_BITMAP,       /* Request for recved bitmap on dst */
     MIG_CMD_MAX
@@ -841,6 +843,12 @@
     qemu_fflush(f);
 }
 
+void qemu_savevm_send_colo_enable(QEMUFile *f)
+{
+    trace_savevm_send_colo_enable();
+    qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
+}
+
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
 {
     uint32_t buf;
@@ -1370,13 +1378,21 @@
     return ret;
 }
 
-static int qemu_save_device_state(QEMUFile *f)
+void qemu_savevm_live_state(QEMUFile *f)
+{
+    /* save QEMU_VM_SECTION_END section */
+    qemu_savevm_state_complete_precopy(f, true, false);
+    qemu_put_byte(f, QEMU_VM_EOF);
+}
+
+int qemu_save_device_state(QEMUFile *f)
 {
     SaveStateEntry *se;
 
-    qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
-    qemu_put_be32(f, QEMU_VM_FILE_VERSION);
-
+    if (!migration_in_colo_state()) {
+        qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+        qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+    }
     cpu_synchronize_all_states();
 
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
@@ -1432,8 +1448,6 @@
     LOADVM_QUIT     =  1,
 };
 
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
-
 /* ------ incoming postcopy messages ------ */
 /* 'advise' arrives before any transfers just to tell us that a postcopy
  * *might* happen - it might be skipped if precopy transferred everything
@@ -1922,6 +1936,12 @@
     return 0;
 }
 
+static int loadvm_process_enable_colo(MigrationIncomingState *mis)
+{
+    migration_incoming_enable_colo();
+    return colo_init_ram_cache();
+}
+
 /*
  * Process an incoming 'QEMU_VM_COMMAND'
  * 0           just a normal return
@@ -2001,6 +2021,9 @@
 
     case MIG_CMD_RECV_BITMAP:
         return loadvm_handle_recv_bitmap(mis, len);
+
+    case MIG_CMD_ENABLE_COLO:
+        return loadvm_process_enable_colo(mis);
     }
 
     return 0;
@@ -2230,7 +2253,7 @@
     return true;
 }
 
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
 {
     uint8_t section_type;
     int ret = 0;
@@ -2401,6 +2424,22 @@
     return ret;
 }
 
+int qemu_load_device_state(QEMUFile *f)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+    int ret;
+
+    /* Load QEMU_VM_SECTION_FULL section */
+    ret = qemu_loadvm_state_main(f, mis);
+    if (ret < 0) {
+        error_report("Failed to load device state: %d", ret);
+        return ret;
+    }
+
+    cpu_synchronize_all_post_init();
+    return 0;
+}
+
 int save_snapshot(const char *name, Error **errp)
 {
     BlockDriverState *bs, *bs1;
@@ -2414,8 +2453,8 @@
     AioContext *aio_context;
 
     if (!replay_can_snapshot()) {
-        error_report("Record/replay does not allow making snapshot "
-                     "right now. Try once more later.");
+        error_setg(errp, "Record/replay does not allow making snapshot "
+                   "right now. Try once more later.");
         return ret;
     }
 
@@ -2611,8 +2650,8 @@
     MigrationIncomingState *mis = migration_incoming_get_current();
 
     if (!replay_can_snapshot()) {
-        error_report("Record/replay does not allow loading snapshot "
-                     "right now. Try once more later.");
+        error_setg(errp, "Record/replay does not allow loading snapshot "
+                   "right now. Try once more later.");
         return -EINVAL;
     }
 
diff --git a/migration/savevm.h b/migration/savevm.h
index a5e65b8..51a4b9c 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -55,8 +55,13 @@
                                            uint16_t len,
                                            uint64_t *start_list,
                                            uint64_t *length_list);
+void qemu_savevm_send_colo_enable(QEMUFile *f);
+void qemu_savevm_live_state(QEMUFile *f);
+int qemu_save_device_state(QEMUFile *f);
 
 int qemu_loadvm_state(QEMUFile *f);
 void qemu_loadvm_state_cleanup(void);
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
+int qemu_load_device_state(QEMUFile *f);
 
 #endif
diff --git a/migration/trace-events b/migration/trace-events
index 9430f3c..bd2d0cd 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -37,6 +37,7 @@
 savevm_send_postcopy_listen(void) ""
 savevm_send_postcopy_run(void) ""
 savevm_send_postcopy_resume(void) ""
+savevm_send_colo_enable(void) ""
 savevm_send_recv_bitmap(char *name) "%s"
 savevm_state_setup(void) ""
 savevm_state_resume_prepare(void) ""
@@ -101,6 +102,8 @@
 ram_dirty_bitmap_sync_wait(void) ""
 ram_dirty_bitmap_sync_complete(void) ""
 ram_state_resume_prepare(uint64_t v) "%" PRId64
+colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64
+colo_flush_ram_cache_end(void) ""
 
 # migration/migration.c
 await_return_path_close_on_source_close(void) ""
diff --git a/monitor.c b/monitor.c
index c4677b5..823b5a1 100644
--- a/monitor.c
+++ b/monitor.c
@@ -83,6 +83,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/iothread.h"
 #include "qemu/cutils.h"
+#include "tcg/tcg.h"
 
 #if defined(TARGET_S390X)
 #include "hw/s390x/storage-keys.h"
@@ -1966,16 +1967,22 @@
 
 #ifdef CONFIG_PROFILER
 
-int64_t tcg_time;
 int64_t dev_time;
 
 static void hmp_info_profile(Monitor *mon, const QDict *qdict)
 {
+    static int64_t last_cpu_exec_time;
+    int64_t cpu_exec_time;
+    int64_t delta;
+
+    cpu_exec_time = tcg_cpu_exec_time();
+    delta = cpu_exec_time - last_cpu_exec_time;
+
     monitor_printf(mon, "async time  %" PRId64 " (%0.3f)\n",
                    dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
     monitor_printf(mon, "qemu time   %" PRId64 " (%0.3f)\n",
-                   tcg_time, tcg_time / (double)NANOSECONDS_PER_SECOND);
-    tcg_time = 0;
+                   delta, delta / (double)NANOSECONDS_PER_SECOND);
+    last_cpu_exec_time = cpu_exec_time;
     dev_time = 0;
 }
 #else
@@ -4493,17 +4500,27 @@
 }
 
 /*
+ * Print to current monitor if we have one, else to stream.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf().  Left for another day.
+ */
+void monitor_vfprintf(FILE *stream, const char *fmt, va_list ap)
+{
+    if (cur_mon && !monitor_cur_is_qmp()) {
+        monitor_vprintf(cur_mon, fmt, ap);
+    } else {
+        vfprintf(stream, fmt, ap);
+    }
+}
+
+/*
  * Print to current monitor if we have one, else to stderr.
  * TODO should return int, so callers can calculate width, but that
  * requires surgery to monitor_vprintf().  Left for another day.
  */
 void error_vprintf(const char *fmt, va_list ap)
 {
-    if (cur_mon && !monitor_cur_is_qmp()) {
-        monitor_vprintf(cur_mon, fmt, ap);
-    } else {
-        vfprintf(stderr, fmt, ap);
-    }
+    monitor_vfprintf(stderr, fmt, ap);
 }
 
 void error_vprintf_unless_qmp(const char *fmt, va_list ap)
diff --git a/nbd/client.c b/nbd/client.c
index 40b74d9..b4d457a 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -117,10 +117,10 @@
         nbd_send_opt_abort(ioc);
         return -1;
     }
-    be64_to_cpus(&reply->magic);
-    be32_to_cpus(&reply->option);
-    be32_to_cpus(&reply->type);
-    be32_to_cpus(&reply->length);
+    reply->magic = be64_to_cpu(reply->magic);
+    reply->option = be32_to_cpu(reply->option);
+    reply->type = be32_to_cpu(reply->type);
+    reply->length = be32_to_cpu(reply->length);
 
     trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
                                    reply->type, nbd_rep_lookup(reply->type),
@@ -396,7 +396,7 @@
             return -1;
         }
         len -= sizeof(type);
-        be16_to_cpus(&type);
+        type = be16_to_cpu(type);
         switch (type) {
         case NBD_INFO_EXPORT:
             if (len != sizeof(info->size) + sizeof(info->flags)) {
@@ -410,13 +410,13 @@
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            be64_to_cpus(&info->size);
+            info->size = be64_to_cpu(info->size);
             if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
                 error_prepend(errp, "failed to read info flags: ");
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            be16_to_cpus(&info->flags);
+            info->flags = be16_to_cpu(info->flags);
             trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
             break;
 
@@ -433,7 +433,7 @@
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            be32_to_cpus(&info->min_block);
+            info->min_block = be32_to_cpu(info->min_block);
             if (!is_power_of_2(info->min_block)) {
                 error_setg(errp, "server minimum block size %" PRIu32
                            " is not a power of two", info->min_block);
@@ -447,7 +447,7 @@
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            be32_to_cpus(&info->opt_block);
+            info->opt_block = be32_to_cpu(info->opt_block);
             if (!is_power_of_2(info->opt_block) ||
                 info->opt_block < info->min_block) {
                 error_setg(errp, "server preferred block size %" PRIu32
@@ -461,7 +461,7 @@
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            be32_to_cpus(&info->max_block);
+            info->max_block = be32_to_cpu(info->max_block);
             if (info->max_block < info->min_block) {
                 error_setg(errp, "server maximum block size %" PRIu32
                            " is not valid", info->max_block);
@@ -668,7 +668,7 @@
         if (nbd_read(ioc, &received_id, sizeof(received_id), errp) < 0) {
             return -1;
         }
-        be32_to_cpus(&received_id);
+        received_id = be32_to_cpu(received_id);
 
         reply.length -= sizeof(received_id);
         name = g_malloc(reply.length + 1);
@@ -872,13 +872,13 @@
             error_prepend(errp, "Failed to read export length: ");
             goto fail;
         }
-        be64_to_cpus(&info->size);
+        info->size = be64_to_cpu(info->size);
 
         if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
             error_prepend(errp, "Failed to read export flags: ");
             goto fail;
         }
-        be16_to_cpus(&info->flags);
+        info->flags = be16_to_cpu(info->flags);
     } else if (magic == NBD_CLIENT_MAGIC) {
         uint32_t oldflags;
 
@@ -895,13 +895,13 @@
             error_prepend(errp, "Failed to read export length: ");
             goto fail;
         }
-        be64_to_cpus(&info->size);
+        info->size = be64_to_cpu(info->size);
 
         if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
             error_prepend(errp, "Failed to read export flags: ");
             goto fail;
         }
-        be32_to_cpus(&oldflags);
+        oldflags = be32_to_cpu(oldflags);
         if (oldflags & ~0xffff) {
             error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
             goto fail;
@@ -1080,8 +1080,8 @@
         return ret;
     }
 
-    be32_to_cpus(&reply->error);
-    be64_to_cpus(&reply->handle);
+    reply->error = be32_to_cpu(reply->error);
+    reply->handle = be64_to_cpu(reply->handle);
 
     return 0;
 }
@@ -1105,10 +1105,10 @@
         return ret;
     }
 
-    be16_to_cpus(&chunk->flags);
-    be16_to_cpus(&chunk->type);
-    be64_to_cpus(&chunk->handle);
-    be32_to_cpus(&chunk->length);
+    chunk->flags = be16_to_cpu(chunk->flags);
+    chunk->type = be16_to_cpu(chunk->type);
+    chunk->handle = be64_to_cpu(chunk->handle);
+    chunk->length = be32_to_cpu(chunk->length);
 
     return 0;
 }
@@ -1128,7 +1128,7 @@
         return ret;
     }
 
-    be32_to_cpus(&reply->magic);
+    reply->magic = be32_to_cpu(reply->magic);
 
     switch (reply->magic) {
     case NBD_SIMPLE_REPLY_MAGIC:
diff --git a/nbd/server.c b/nbd/server.c
index c3dd402..4e8f5ae 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -333,7 +333,7 @@
     if (ret <= 0) {
         return ret;
     }
-    cpu_to_be32s(&len);
+    len = cpu_to_be32(len);
 
     if (len > NBD_MAX_NAME_SIZE) {
         return nbd_opt_invalid(client, errp,
@@ -486,7 +486,7 @@
     if (rc < 0) {
         return rc;
     }
-    cpu_to_be16s(&info);
+    info = cpu_to_be16(info);
     if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
         return -EIO;
     }
@@ -551,14 +551,14 @@
     if (rc <= 0) {
         return rc;
     }
-    be16_to_cpus(&requests);
+    requests = be16_to_cpu(requests);
     trace_nbd_negotiate_handle_info_requests(requests);
     while (requests--) {
         rc = nbd_opt_read(client, &request, sizeof(request), errp);
         if (rc <= 0) {
             return rc;
         }
-        be16_to_cpus(&request);
+        request = be16_to_cpu(request);
         trace_nbd_negotiate_handle_info_request(request,
                                                 nbd_info_lookup(request));
         /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE;
@@ -618,9 +618,9 @@
     /* maximum - At most 32M, but smaller as appropriate. */
     sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
     trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
-    cpu_to_be32s(&sizes[0]);
-    cpu_to_be32s(&sizes[1]);
-    cpu_to_be32s(&sizes[2]);
+    sizes[0] = cpu_to_be32(sizes[0]);
+    sizes[1] = cpu_to_be32(sizes[1]);
+    sizes[2] = cpu_to_be32(sizes[2]);
     rc = nbd_negotiate_send_info(client, NBD_INFO_BLOCK_SIZE,
                                  sizeof(sizes), sizes, errp);
     if (rc < 0) {
@@ -904,7 +904,7 @@
     if (ret <= 0) {
         return ret;
     }
-    cpu_to_be32s(&len);
+    len = cpu_to_be32(len);
 
     if (len < ns_len) {
         trace_nbd_negotiate_meta_query_skip("length too short");
@@ -971,7 +971,7 @@
     if (ret <= 0) {
         return ret;
     }
-    cpu_to_be32s(&nb_queries);
+    nb_queries = cpu_to_be32(nb_queries);
     trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt),
                                      export_name, nb_queries);
 
@@ -1049,7 +1049,7 @@
         error_prepend(errp, "read failed: ");
         return -EIO;
     }
-    be32_to_cpus(&flags);
+    flags = be32_to_cpu(flags);
     trace_nbd_negotiate_options_flags(flags);
     if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
         fixedNewstyle = true;
@@ -1253,7 +1253,6 @@
     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
                               NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
                               NBD_FLAG_SEND_WRITE_ZEROES | NBD_FLAG_SEND_CACHE);
-    bool oldStyle;
 
     /* Old style negotiation header, no room for options
         [ 0 ..   7]   passwd       ("NBDMAGIC")
@@ -1274,33 +1273,19 @@
     trace_nbd_negotiate_begin();
     memcpy(buf, "NBDMAGIC", 8);
 
-    oldStyle = client->exp != NULL && !client->tlscreds;
-    if (oldStyle) {
-        trace_nbd_negotiate_old_style(client->exp->size,
-                                      client->exp->nbdflags | myflags);
-        stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
-        stq_be_p(buf + 16, client->exp->size);
-        stl_be_p(buf + 24, client->exp->nbdflags | myflags);
+    stq_be_p(buf + 8, NBD_OPTS_MAGIC);
+    stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
 
-        if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) {
-            error_prepend(errp, "write failed: ");
-            return -EINVAL;
+    if (nbd_write(client->ioc, buf, 18, errp) < 0) {
+        error_prepend(errp, "write failed: ");
+        return -EINVAL;
+    }
+    ret = nbd_negotiate_options(client, myflags, errp);
+    if (ret != 0) {
+        if (ret < 0) {
+            error_prepend(errp, "option negotiation failed: ");
         }
-    } else {
-        stq_be_p(buf + 8, NBD_OPTS_MAGIC);
-        stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
-
-        if (nbd_write(client->ioc, buf, 18, errp) < 0) {
-            error_prepend(errp, "write failed: ");
-            return -EINVAL;
-        }
-        ret = nbd_negotiate_options(client, myflags, errp);
-        if (ret != 0) {
-            if (ret < 0) {
-                error_prepend(errp, "option negotiation failed: ");
-            }
-            return ret;
-        }
+        return ret;
     }
 
     assert(!client->optlen);
@@ -1900,8 +1885,8 @@
     extents_end = extent + 1;
 
     for (extent = extents; extent < extents_end; extent++) {
-        cpu_to_be32s(&extent->flags);
-        cpu_to_be32s(&extent->length);
+        extent->flags = cpu_to_be32(extent->flags);
+        extent->length = cpu_to_be32(extent->length);
     }
 
     *bytes -= remaining_bytes;
@@ -2177,7 +2162,8 @@
     }
 
     if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
-        request->len) {
+        request->len && request->type != NBD_CMD_CACHE)
+    {
         return nbd_co_send_sparse_read(client, request->handle, request->from,
                                        data, request->len, errp);
     }
@@ -2395,13 +2381,8 @@
 static coroutine_fn void nbd_co_client_start(void *opaque)
 {
     NBDClient *client = opaque;
-    NBDExport *exp = client->exp;
     Error *local_err = NULL;
 
-    if (exp) {
-        nbd_export_get(exp);
-        QTAILQ_INSERT_TAIL(&exp->clients, client, next);
-    }
     qemu_co_mutex_init(&client->send_lock);
 
     if (nbd_negotiate(client, &local_err)) {
@@ -2416,13 +2397,11 @@
 }
 
 /*
- * Create a new client listener on the given export @exp, using the
- * given channel @sioc.  Begin servicing it in a coroutine.  When the
- * connection closes, call @close_fn with an indication of whether the
- * client completed negotiation.
+ * Create a new client listener using the given channel @sioc.
+ * Begin servicing it in a coroutine.  When the connection closes, call
+ * @close_fn with an indication of whether the client completed negotiation.
  */
-void nbd_client_new(NBDExport *exp,
-                    QIOChannelSocket *sioc,
+void nbd_client_new(QIOChannelSocket *sioc,
                     QCryptoTLSCreds *tlscreds,
                     const char *tlsaclname,
                     void (*close_fn)(NBDClient *, bool))
@@ -2432,7 +2411,6 @@
 
     client = g_new0(NBDClient, 1);
     client->refcount = 1;
-    client->exp = exp;
     client->tlscreds = tlscreds;
     if (tlscreds) {
         object_ref(OBJECT(client->tlscreds));
@@ -2478,8 +2456,8 @@
         return;
     }
 
-    if (bdrv_dirty_bitmap_qmp_locked(bm)) {
-        error_setg(errp, "Bitmap '%s' is locked", bitmap);
+    if (bdrv_dirty_bitmap_user_locked(bm)) {
+        error_setg(errp, "Bitmap '%s' is in use", bitmap);
         return;
     }
 
diff --git a/net/colo-compare.c b/net/colo-compare.c
index dd745a4..a39191d 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -27,11 +27,20 @@
 #include "qemu/sockets.h"
 #include "colo.h"
 #include "sysemu/iothread.h"
+#include "net/colo-compare.h"
+#include "migration/colo.h"
+#include "migration/migration.h"
 
 #define TYPE_COLO_COMPARE "colo-compare"
 #define COLO_COMPARE(obj) \
     OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
 
+static QTAILQ_HEAD(, CompareState) net_compares =
+       QTAILQ_HEAD_INITIALIZER(net_compares);
+
+static NotifierList colo_compare_notifiers =
+    NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
+
 #define COMPARE_READ_LEN_MAX NET_BUFSIZE
 #define MAX_QUEUE_SIZE 1024
 
@@ -41,6 +50,10 @@
 /* TODO: Should be configurable */
 #define REGULAR_PACKET_CHECK_MS 3000
 
+static QemuMutex event_mtx;
+static QemuCond event_complete_cond;
+static int event_unhandled_count;
+
 /*
  *  + CompareState ++
  *  |               |
@@ -87,6 +100,11 @@
     IOThread *iothread;
     GMainContext *worker_context;
     QEMUTimer *packet_check_timer;
+
+    QEMUBH *event_bh;
+    enum colo_event event;
+
+    QTAILQ_ENTRY(CompareState) next;
 } CompareState;
 
 typedef struct CompareClass {
@@ -98,6 +116,12 @@
     SECONDARY_IN,
 };
 
+static void colo_compare_inconsistency_notify(void)
+{
+    notifier_list_notify(&colo_compare_notifiers,
+                migrate_get_current());
+}
+
 static int compare_chr_send(CompareState *s,
                             const uint8_t *buf,
                             uint32_t size,
@@ -413,10 +437,7 @@
         qemu_hexdump((char *)spkt->data, stderr,
                      "colo-compare spkt", spkt->size);
 
-        /*
-         * colo_compare_inconsistent_notify();
-         * TODO: notice to checkpoint();
-         */
+        colo_compare_inconsistency_notify();
     }
 }
 
@@ -547,8 +568,18 @@
     }
 }
 
+void colo_compare_register_notifier(Notifier *notify)
+{
+    notifier_list_add(&colo_compare_notifiers, notify);
+}
+
+void colo_compare_unregister_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
 static int colo_old_packet_check_one_conn(Connection *conn,
-                                          void *user_data)
+                                           void *user_data)
 {
     GList *result = NULL;
     int64_t check_time = REGULAR_PACKET_CHECK_MS;
@@ -559,10 +590,7 @@
 
     if (result) {
         /* Do checkpoint will flush old packet */
-        /*
-         * TODO: Notify colo frame to do checkpoint.
-         * colo_compare_inconsistent_notify();
-         */
+        colo_compare_inconsistency_notify();
         return 0;
     }
 
@@ -606,11 +634,12 @@
             /*
              * If one packet arrive late, the secondary_list or
              * primary_list will be empty, so we can't compare it
-             * until next comparison.
+             * until next comparison. If the packets in the list are
+             * timeout, it will trigger a checkpoint request.
              */
             trace_colo_compare_main("packet different");
             g_queue_push_head(&conn->primary_list, pkt);
-            /* TODO: colo_notify_checkpoint();*/
+            colo_compare_inconsistency_notify();
             break;
         }
     }
@@ -736,6 +765,25 @@
                 REGULAR_PACKET_CHECK_MS);
 }
 
+/* Public API, Used for COLO frame to notify compare event */
+void colo_notify_compares_event(void *opaque, int event, Error **errp)
+{
+    CompareState *s;
+
+    qemu_mutex_lock(&event_mtx);
+    QTAILQ_FOREACH(s, &net_compares, next) {
+        s->event = event;
+        qemu_bh_schedule(s->event_bh);
+        event_unhandled_count++;
+    }
+    /* Wait all compare threads to finish handling this event */
+    while (event_unhandled_count > 0) {
+        qemu_cond_wait(&event_complete_cond, &event_mtx);
+    }
+
+    qemu_mutex_unlock(&event_mtx);
+}
+
 static void colo_compare_timer_init(CompareState *s)
 {
     AioContext *ctx = iothread_get_aio_context(s->iothread);
@@ -756,6 +804,30 @@
     }
  }
 
+static void colo_flush_packets(void *opaque, void *user_data);
+
+static void colo_compare_handle_event(void *opaque)
+{
+    CompareState *s = opaque;
+
+    switch (s->event) {
+    case COLO_EVENT_CHECKPOINT:
+        g_queue_foreach(&s->conn_list, colo_flush_packets, s);
+        break;
+    case COLO_EVENT_FAILOVER:
+        break;
+    default:
+        break;
+    }
+
+    assert(event_unhandled_count > 0);
+
+    qemu_mutex_lock(&event_mtx);
+    event_unhandled_count--;
+    qemu_cond_broadcast(&event_complete_cond);
+    qemu_mutex_unlock(&event_mtx);
+}
+
 static void colo_compare_iothread(CompareState *s)
 {
     object_ref(OBJECT(s->iothread));
@@ -769,6 +841,7 @@
                              s, s->worker_context, true);
 
     colo_compare_timer_init(s);
+    s->event_bh = qemu_bh_new(colo_compare_handle_event, s);
 }
 
 static char *compare_get_pri_indev(Object *obj, Error **errp)
@@ -926,8 +999,13 @@
     net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
     net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
 
+    QTAILQ_INSERT_TAIL(&net_compares, s, next);
+
     g_queue_init(&s->conn_list);
 
+    qemu_mutex_init(&event_mtx);
+    qemu_cond_init(&event_complete_cond);
+
     s->connection_track_table = g_hash_table_new_full(connection_key_hash,
                                                       connection_key_equal,
                                                       g_free,
@@ -990,6 +1068,7 @@
 static void colo_compare_finalize(Object *obj)
 {
     CompareState *s = COLO_COMPARE(obj);
+    CompareState *tmp = NULL;
 
     qemu_chr_fe_deinit(&s->chr_pri_in, false);
     qemu_chr_fe_deinit(&s->chr_sec_in, false);
@@ -997,6 +1076,16 @@
     if (s->iothread) {
         colo_compare_timer_del(s);
     }
+
+    qemu_bh_delete(s->event_bh);
+
+    QTAILQ_FOREACH(tmp, &net_compares, next) {
+        if (tmp == s) {
+            QTAILQ_REMOVE(&net_compares, s, next);
+            break;
+        }
+    }
+
     /* Release all unhandled packets after compare thead exited */
     g_queue_foreach(&s->conn_list, colo_flush_packets, s);
 
@@ -1009,6 +1098,10 @@
     if (s->iothread) {
         object_unref(OBJECT(s->iothread));
     }
+
+    qemu_mutex_destroy(&event_mtx);
+    qemu_cond_destroy(&event_complete_cond);
+
     g_free(s->pri_indev);
     g_free(s->sec_indev);
     g_free(s->outdev);
diff --git a/net/colo-compare.h b/net/colo-compare.h
new file mode 100644
index 0000000..22ddd51
--- /dev/null
+++ b/net/colo-compare.h
@@ -0,0 +1,24 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2017 FUJITSU LIMITED
+ * Copyright (c) 2017 Intel Corporation
+ *
+ * Authors:
+ *    zhanghailiang <zhang.zhanghailiang@huawei.com>
+ *    Zhang Chen <zhangckid@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_COLO_COMPARE_H
+#define QEMU_COLO_COMPARE_H
+
+void colo_notify_compares_event(void *opaque, int event, Error **errp);
+void colo_compare_register_notifier(Notifier *notify);
+void colo_compare_unregister_notifier(Notifier *notify);
+
+#endif /* QEMU_COLO_COMPARE_H */
diff --git a/net/colo.c b/net/colo.c
index 6dda4ed..49176bf 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -137,7 +137,7 @@
     conn->ip_proto = key->ip_proto;
     conn->processing = false;
     conn->offset = 0;
-    conn->syn_flag = 0;
+    conn->tcp_state = TCPS_CLOSED;
     conn->pack = 0;
     conn->sack = 0;
     g_queue_init(&conn->primary_list);
@@ -221,3 +221,11 @@
 
     return conn;
 }
+
+bool connection_has_tracked(GHashTable *connection_track_table,
+                            ConnectionKey *key)
+{
+    Connection *conn = g_hash_table_lookup(connection_track_table, key);
+
+    return conn ? true : false;
+}
diff --git a/net/colo.h b/net/colo.h
index da6c36d..11c5226 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -18,6 +18,7 @@
 #include "slirp/slirp.h"
 #include "qemu/jhash.h"
 #include "qemu/timer.h"
+#include "slirp/tcp.h"
 
 #define HASHTABLE_MAX_SIZE 16384
 
@@ -81,11 +82,9 @@
     uint32_t sack;
     /* offset = secondary_seq - primary_seq */
     tcp_seq  offset;
-    /*
-     * we use this flag update offset func
-     * run once in independent tcp connection
-     */
-    int syn_flag;
+
+    int tcp_state; /* TCP FSM state */
+    tcp_seq fin_ack_seq; /* the seq of 'fin=1,ack=1' */
 } Connection;
 
 uint32_t connection_key_hash(const void *opaque);
@@ -99,6 +98,8 @@
 Connection *connection_get(GHashTable *connection_track_table,
                            ConnectionKey *key,
                            GQueue *conn_list);
+bool connection_has_tracked(GHashTable *connection_track_table,
+                            ConnectionKey *key);
 void connection_hashtable_reset(GHashTable *connection_track_table);
 Packet *packet_new(const void *data, int size, int vnet_hdr_len);
 void packet_destroy(void *opaque, void *user_data);
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index f584e4e..bb8f4d9 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -20,11 +20,15 @@
 #include "qemu/main-loop.h"
 #include "qemu/iov.h"
 #include "net/checksum.h"
+#include "net/colo.h"
+#include "migration/colo.h"
 
 #define FILTER_COLO_REWRITER(obj) \
     OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
 
 #define TYPE_FILTER_REWRITER "filter-rewriter"
+#define FAILOVER_MODE_ON  true
+#define FAILOVER_MODE_OFF false
 
 typedef struct RewriterState {
     NetFilterState parent_obj;
@@ -32,8 +36,14 @@
     /* hashtable to save connection */
     GHashTable *connection_track_table;
     bool vnet_hdr;
+    bool failover_mode;
 } RewriterState;
 
+static void filter_rewriter_failover_mode(RewriterState *s)
+{
+    s->failover_mode = FAILOVER_MODE_ON;
+}
+
 static void filter_rewriter_flush(NetFilterState *nf)
 {
     RewriterState *s = FILTER_COLO_REWRITER(nf);
@@ -59,9 +69,9 @@
 }
 
 /* handle tcp packet from primary guest */
-static int handle_primary_tcp_pkt(NetFilterState *nf,
+static int handle_primary_tcp_pkt(RewriterState *rf,
                                   Connection *conn,
-                                  Packet *pkt)
+                                  Packet *pkt, ConnectionKey *key)
 {
     struct tcphdr *tcp_pkt;
 
@@ -74,23 +84,28 @@
         trace_colo_filter_rewriter_conn_offset(conn->offset);
     }
 
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
+        conn->tcp_state == TCPS_SYN_SENT) {
+        conn->tcp_state = TCPS_ESTABLISHED;
+    }
+
     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
         /*
          * we use this flag update offset func
          * run once in independent tcp connection
          */
-        conn->syn_flag = 1;
+        conn->tcp_state = TCPS_SYN_RECEIVED;
     }
 
     if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
-        if (conn->syn_flag) {
+        if (conn->tcp_state == TCPS_SYN_RECEIVED) {
             /*
              * offset = secondary_seq - primary seq
              * ack packet sent by guest from primary node,
              * so we use th_ack - 1 get primary_seq
              */
             conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
-            conn->syn_flag = 0;
+            conn->tcp_state = TCPS_ESTABLISHED;
         }
         if (conn->offset) {
             /* handle packets to the secondary from the primary */
@@ -99,15 +114,66 @@
             net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
                                    pkt->size - pkt->vnet_hdr_len);
         }
+
+        /*
+         * Passive close step 3
+         */
+        if ((conn->tcp_state == TCPS_LAST_ACK) &&
+            (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
+            conn->tcp_state = TCPS_CLOSED;
+            g_hash_table_remove(rf->connection_track_table, key);
+        }
+    }
+
+    if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
+        /*
+         * Passive close.
+         * Step 1:
+         * The *server* side of this connect is VM, *client* tries to close
+         * the connection. We will into CLOSE_WAIT status.
+         *
+         * Step 2:
+         * In this step we will into LAST_ACK status.
+         *
+         * We got 'fin=1, ack=1' packet from server side, we need to
+         * record the seq of 'fin=1, ack=1' packet.
+         *
+         * Step 3:
+         * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
+         * packet from server side. From this point, we can ensure that there
+         * will be no packets in the connection, except that, some errors
+         * happen between the path of 'filter object' and vNIC, if this rare
+         * case really happen, we can still create a new connection,
+         * So it is safe to remove the connection from connection_track_table.
+         *
+         */
+        if (conn->tcp_state == TCPS_ESTABLISHED) {
+            conn->tcp_state = TCPS_CLOSE_WAIT;
+        }
+
+        /*
+         * Active close step 2.
+         */
+        if (conn->tcp_state == TCPS_FIN_WAIT_1) {
+            conn->tcp_state = TCPS_TIME_WAIT;
+            /*
+             * For simplify implementation, we needn't wait 2MSL time
+             * in filter rewriter. Because guest kernel will track the
+             * TCP status and wait 2MSL time, if client resend the FIN
+             * packet, guest will apply the last ACK too.
+             */
+            conn->tcp_state = TCPS_CLOSED;
+            g_hash_table_remove(rf->connection_track_table, key);
+        }
     }
 
     return 0;
 }
 
 /* handle tcp packet from secondary guest */
-static int handle_secondary_tcp_pkt(NetFilterState *nf,
+static int handle_secondary_tcp_pkt(RewriterState *rf,
                                     Connection *conn,
-                                    Packet *pkt)
+                                    Packet *pkt, ConnectionKey *key)
 {
     struct tcphdr *tcp_pkt;
 
@@ -121,7 +187,8 @@
         trace_colo_filter_rewriter_conn_offset(conn->offset);
     }
 
-    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
+    if (conn->tcp_state == TCPS_SYN_RECEIVED &&
+        ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
         /*
          * save offset = secondary_seq and then
          * in handle_primary_tcp_pkt make offset
@@ -130,6 +197,12 @@
         conn->offset = ntohl(tcp_pkt->th_seq);
     }
 
+    /* VM active connect */
+    if (conn->tcp_state == TCPS_CLOSED &&
+        ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+        conn->tcp_state = TCPS_SYN_SENT;
+    }
+
     if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
         /* Only need to adjust seq while offset is Non-zero */
         if (conn->offset) {
@@ -141,6 +214,32 @@
         }
     }
 
+    /*
+     * Passive close step 2:
+     */
+    if (conn->tcp_state == TCPS_CLOSE_WAIT &&
+        (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
+        conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
+        conn->tcp_state = TCPS_LAST_ACK;
+    }
+
+    /*
+     * Active close
+     *
+     * Step 1:
+     * The *server* side of this connect is VM, *server* tries to close
+     * the connection.
+     *
+     * Step 2:
+     * We will into CLOSE_WAIT status.
+     * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
+     * CLOSING status.
+     */
+    if (conn->tcp_state == TCPS_ESTABLISHED &&
+        (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
+        conn->tcp_state = TCPS_FIN_WAIT_1;
+    }
+
     return 0;
 }
 
@@ -184,13 +283,20 @@
              */
             reverse_connection_key(&key);
         }
+
+        /* After failover we needn't change new TCP packet */
+        if (s->failover_mode &&
+            !connection_has_tracked(s->connection_track_table, &key)) {
+            goto out;
+        }
+
         conn = connection_get(s->connection_track_table,
                               &key,
                               NULL);
 
         if (sender == nf->netdev) {
             /* NET_FILTER_DIRECTION_TX */
-            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
+            if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
                 qemu_net_queue_send(s->incoming_queue, sender, 0,
                 (const uint8_t *)pkt->data, pkt->size, NULL);
                 packet_destroy(pkt, NULL);
@@ -203,7 +309,7 @@
             }
         } else {
             /* NET_FILTER_DIRECTION_RX */
-            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
+            if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
                 qemu_net_queue_send(s->incoming_queue, sender, 0,
                 (const uint8_t *)pkt->data, pkt->size, NULL);
                 packet_destroy(pkt, NULL);
@@ -217,11 +323,49 @@
         }
     }
 
+out:
     packet_destroy(pkt, NULL);
     pkt = NULL;
     return 0;
 }
 
+static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
+{
+    Connection *conn = (Connection *)value;
+
+    conn->offset = 0;
+}
+
+static gboolean offset_is_nonzero(gpointer key,
+                                  gpointer value,
+                                  gpointer user_data)
+{
+    Connection *conn = (Connection *)value;
+
+    return conn->offset ? true : false;
+}
+
+static void colo_rewriter_handle_event(NetFilterState *nf, int event,
+                                       Error **errp)
+{
+    RewriterState *rs = FILTER_COLO_REWRITER(nf);
+
+    switch (event) {
+    case COLO_EVENT_CHECKPOINT:
+        g_hash_table_foreach(rs->connection_track_table,
+                            reset_seq_offset, NULL);
+        break;
+    case COLO_EVENT_FAILOVER:
+        if (!g_hash_table_find(rs->connection_track_table,
+                              offset_is_nonzero, NULL)) {
+            filter_rewriter_failover_mode(rs);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
 static void colo_rewriter_cleanup(NetFilterState *nf)
 {
     RewriterState *s = FILTER_COLO_REWRITER(nf);
@@ -265,6 +409,7 @@
     RewriterState *s = FILTER_COLO_REWRITER(obj);
 
     s->vnet_hdr = false;
+    s->failover_mode = FAILOVER_MODE_OFF;
     object_property_add_bool(obj, "vnet_hdr_support",
                              filter_rewriter_get_vnet_hdr,
                              filter_rewriter_set_vnet_hdr, NULL);
@@ -277,6 +422,7 @@
     nfc->setup = colo_rewriter_setup;
     nfc->cleanup = colo_rewriter_cleanup;
     nfc->receive_iov = colo_rewriter_receive_iov;
+    nfc->handle_event = colo_rewriter_handle_event;
 }
 
 static const TypeInfo colo_rewriter_info = {
diff --git a/net/filter.c b/net/filter.c
index 2fd7d7d..c9f9e5f 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -17,6 +17,8 @@
 #include "net/vhost_net.h"
 #include "qom/object_interfaces.h"
 #include "qemu/iov.h"
+#include "net/colo.h"
+#include "migration/colo.h"
 
 static inline bool qemu_can_skip_netfilter(NetFilterState *nf)
 {
@@ -245,11 +247,26 @@
     g_free(nf->netdev_id);
 }
 
+static void default_handle_event(NetFilterState *nf, int event, Error **errp)
+{
+    switch (event) {
+    case COLO_EVENT_CHECKPOINT:
+        break;
+    case COLO_EVENT_FAILOVER:
+        object_property_set_str(OBJECT(nf), "off", "status", errp);
+        break;
+    default:
+        break;
+    }
+}
+
 static void netfilter_class_init(ObjectClass *oc, void *data)
 {
     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
 
     ucc->complete = netfilter_complete;
+    nfc->handle_event = default_handle_event;
 }
 
 static const TypeInfo netfilter_info = {
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
index 6745b78..81db24d 100644
--- a/net/l2tpv3.c
+++ b/net/l2tpv3.c
@@ -28,6 +28,7 @@
 #include <netdb.h>
 #include "net/net.h"
 #include "clients.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
@@ -528,7 +529,6 @@
                     const char *name,
                     NetClientState *peer, Error **errp)
 {
-    /* FIXME error_setg(errp, ...) on failure */
     const NetdevL2TPv3Options *l2tpv3;
     NetL2TPV3State *s;
     NetClientState *nc;
@@ -555,7 +555,7 @@
     }
 
     if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
-        error_report("l2tpv3_open : offset must be less than 256 bytes");
+        error_setg(errp, "offset must be less than 256 bytes");
         goto outerr;
     }
 
@@ -563,6 +563,8 @@
         if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
             s->cookie = true;
         } else {
+            error_setg(errp,
+                       "require both 'rxcookie' and 'txcookie' or neither");
             goto outerr;
         }
     } else {
@@ -578,7 +580,7 @@
     if (l2tpv3->has_udp && l2tpv3->udp) {
         s->udp = true;
         if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
-            error_report("l2tpv3_open : need both src and dst port for udp");
+            error_setg(errp, "need both src and dst port for udp");
             goto outerr;
         } else {
             srcport = l2tpv3->srcport;
@@ -639,20 +641,19 @@
     gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
 
     if ((gairet != 0) || (result == NULL)) {
-        error_report(
-            "l2tpv3_open : could not resolve src, errno = %s",
-            gai_strerror(gairet)
-        );
+        error_setg(errp, "could not resolve src, errno = %s",
+                   gai_strerror(gairet));
         goto outerr;
     }
     fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
     if (fd == -1) {
         fd = -errno;
-        error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
+        error_setg(errp, "socket creation failed, errno = %d",
+                   -fd);
         goto outerr;
     }
     if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
-        error_report("l2tpv3_open :  could not bind socket err=%i", errno);
+        error_setg(errp, "could not bind socket err=%i", errno);
         goto outerr;
     }
     if (result) {
@@ -677,10 +678,8 @@
     result = NULL;
     gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
     if ((gairet != 0) || (result == NULL)) {
-        error_report(
-            "l2tpv3_open : could not resolve dst, error = %s",
-            gai_strerror(gairet)
-        );
+        error_setg(errp, "could not resolve dst, error = %s",
+                   gai_strerror(gairet));
         goto outerr;
     }
 
diff --git a/net/net.c b/net/net.c
index cdcd5cf..07c194a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -712,10 +712,15 @@
                                 void *opaque)
 {
     NetClientState *nc = opaque;
+    size_t size = iov_size(iov, iovcnt);
     int ret;
 
+    if (size > INT_MAX) {
+        return size;
+    }
+
     if (nc->link_down) {
-        return iov_size(iov, iovcnt);
+        return size;
     }
 
     if (nc->receive_disabled) {
@@ -1335,6 +1340,25 @@
     }
 }
 
+void colo_notify_filters_event(int event, Error **errp)
+{
+    NetClientState *nc;
+    NetFilterState *nf;
+    NetFilterClass *nfc = NULL;
+    Error *local_err = NULL;
+
+    QTAILQ_FOREACH(nc, &net_clients, next) {
+        QTAILQ_FOREACH(nf, &nc->filters, next) {
+            nfc = NETFILTER_GET_CLASS(OBJECT(nf));
+            nfc->handle_event(nf, event, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+        }
+    }
+}
+
 void qmp_set_link(const char *name, bool up, Error **errp)
 {
     NetClientState *ncs[MAX_QUEUE_NUM];
diff --git a/net/slirp.c b/net/slirp.c
index c93b64d..f6dc039 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -150,6 +150,7 @@
                           const char *vnameserver, const char *vnameserver6,
                           const char *smb_export, const char *vsmbserver,
                           const char **dnssearch, const char *vdomainname,
+                          const char *tftp_server_name,
                           Error **errp)
 {
     /* default settings according to historic slirp */
@@ -350,6 +351,20 @@
         return -1;
     }
 
+    if (vdomainname && strlen(vdomainname) > 255) {
+        error_setg(errp, "'domainname' parameter cannot exceed 255 bytes");
+        return -1;
+    }
+
+    if (vhostname && strlen(vhostname) > 255) {
+        error_setg(errp, "'vhostname' parameter cannot exceed 255 bytes");
+        return -1;
+    }
+
+    if (tftp_server_name && strlen(tftp_server_name) > 255) {
+        error_setg(errp, "'tftp-server-name' parameter cannot exceed 255 bytes");
+        return -1;
+    }
 
     nc = qemu_new_net_client(&net_slirp_info, peer, model, name);
 
@@ -361,7 +376,8 @@
 
     s->slirp = slirp_init(restricted, ipv4, net, mask, host,
                           ipv6, ip6_prefix, vprefix6_len, ip6_host,
-                          vhostname, tftp_export, bootfile, dhcp,
+                          vhostname, tftp_server_name,
+                          tftp_export, bootfile, dhcp,
                           dns, ip6_dns, dnssearch, vdomainname, s);
     QTAILQ_INSERT_TAIL(&slirp_stacks, s, entry);
 
@@ -764,7 +780,11 @@
         }
     } else {
         Error *err = NULL;
-        Chardev *chr = qemu_chr_new(buf, p);
+        /*
+         * FIXME: sure we want to support implicit
+         * muxed monitors here?
+         */
+        Chardev *chr = qemu_chr_new_mux_mon(buf, p);
 
         if (!chr) {
             error_setg(errp, "Could not open guest forwarding device '%s'",
@@ -894,7 +914,8 @@
                          user->ipv6_host, user->hostname, user->tftp,
                          user->bootfile, user->dhcpstart,
                          user->dns, user->ipv6_dns, user->smb,
-                         user->smbserver, dnssearch, user->domainname, errp);
+                         user->smbserver, dnssearch, user->domainname,
+                         user->tftp_server_name, errp);
 
     while (slirp_configs) {
         config = slirp_configs;
diff --git a/net/socket.c b/net/socket.c
index 6917fbc..90ef351 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -453,8 +453,8 @@
     case SOCK_STREAM:
         return net_socket_fd_init_stream(peer, model, name, fd, is_connected);
     default:
-        error_report("socket type=%d for fd=%d must be either"
-                     " SOCK_DGRAM or SOCK_STREAM", so_type, fd);
+        error_setg(errp, "socket type=%d for fd=%d must be either"
+                   " SOCK_DGRAM or SOCK_STREAM", so_type, fd);
         closesocket(fd);
     }
     return NULL;
diff --git a/numa.c b/numa.c
index 81542d4..50ec016 100644
--- a/numa.c
+++ b/numa.c
@@ -60,6 +60,7 @@
 static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
                             Error **errp)
 {
+    Error *err = NULL;
     uint16_t nodenr;
     uint16List *cpus = NULL;
     MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -82,8 +83,8 @@
     }
 
     if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) {
-        error_report("NUMA is not supported by this machine-type");
-        exit(1);
+        error_setg(errp, "NUMA is not supported by this machine-type");
+        return;
     }
     for (cpus = node->cpus; cpus; cpus = cpus->next) {
         CpuInstanceProperties props;
@@ -97,7 +98,11 @@
         props = mc->cpu_index_to_instance_props(ms, cpus->value);
         props.node_id = nodenr;
         props.has_node_id = true;
-        machine_set_cpu_numa_node(ms, &props, &error_fatal);
+        machine_set_cpu_numa_node(ms, &props, &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
     }
 
     if (node->has_mem && node->has_memdev) {
@@ -210,7 +215,7 @@
     error_propagate(errp, err);
 }
 
-int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
+static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
 {
     NumaOptions *object = NULL;
     MachineState *ms = MACHINE(opaque);
@@ -234,7 +239,7 @@
 end:
     qapi_free_NumaOptions(object);
     if (err) {
-        error_report_err(err);
+        error_propagate(errp, err);
         return -1;
     }
 
@@ -367,7 +372,7 @@
     if (ms->ram_slots > 0 && nb_numa_nodes == 0 &&
         mc->auto_enable_numa_with_memhp) {
             NumaNodeOptions node = { };
-            parse_numa_node(ms, &node, NULL);
+            parse_numa_node(ms, &node, &error_abort);
     }
 
     assert(max_numa_nodeid <= MAX_NODES);
@@ -439,9 +444,7 @@
 
 void parse_numa_opts(MachineState *ms)
 {
-    if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, &error_fatal);
 }
 
 void qmp_set_numa_node(NumaOptions *cmd, Error **errp)
diff --git a/os-posix.c b/os-posix.c
index 8f39447..4bd80e4 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -344,30 +344,6 @@
     setvbuf(stdout, NULL, _IOLBF, 0);
 }
 
-int qemu_create_pidfile(const char *filename)
-{
-    char buffer[128];
-    int len;
-    int fd;
-
-    fd = qemu_open(filename, O_RDWR | O_CREAT, 0600);
-    if (fd == -1) {
-        return -1;
-    }
-    if (lockf(fd, F_TLOCK, 0) == -1) {
-        close(fd);
-        return -1;
-    }
-    len = snprintf(buffer, sizeof(buffer), FMT_pid "\n", getpid());
-    if (write(fd, buffer, len) != len) {
-        close(fd);
-        return -1;
-    }
-
-    /* keep pidfile open & locked forever */
-    return 0;
-}
-
 bool is_daemonized(void)
 {
     return daemonize;
diff --git a/os-win32.c b/os-win32.c
index 0674f94..0e0d7f5 100644
--- a/os-win32.c
+++ b/os-win32.c
@@ -97,28 +97,3 @@
 {
     return -1;
 }
-
-int qemu_create_pidfile(const char *filename)
-{
-    char buffer[128];
-    int len;
-    HANDLE file;
-    OVERLAPPED overlap;
-    BOOL ret;
-    memset(&overlap, 0, sizeof(overlap));
-
-    file = CreateFile(filename, GENERIC_WRITE, FILE_SHARE_READ, NULL,
-                      OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
-
-    if (file == INVALID_HANDLE_VALUE) {
-        return -1;
-    }
-    len = snprintf(buffer, sizeof(buffer), "%d\n", getpid());
-    ret = WriteFile(file, (LPCVOID)buffer, (DWORD)len,
-                    NULL, &overlap);
-    CloseHandle(file);
-    if (ret == 0) {
-        return -1;
-    }
-    return 0;
-}
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 8199838..1450da7 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 4123bc4..b276570 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 35c9991..91f8eec 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differ
diff --git a/po/Makefile b/po/Makefile
index e47e262..c041f4c 100644
--- a/po/Makefile
+++ b/po/Makefile
@@ -36,7 +36,7 @@
 
 install: $(OBJS)
 	for obj in $(OBJS); do \
-	    base=`basename $$obj .mo`; \
+	    base=$$(basename $$obj .mo); \
 	    $(INSTALL) -d $(DESTDIR)$(prefix)/share/locale/$$base/LC_MESSAGES; \
 	    $(INSTALL) -m644 $$obj $(DESTDIR)$(prefix)/share/locale/$$base/LC_MESSAGES/qemu.mo; \
 	done
diff --git a/qapi/block-core.json b/qapi/block-core.json
index cfb37f8..0fc1590 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1316,6 +1316,10 @@
 # @speed: the maximum speed, in bytes per second. The default is 0,
 #         for unlimited.
 #
+# @bitmap: the name of dirty bitmap if sync is "incremental".
+#          Must be present if sync is "incremental", must NOT be present
+#          otherwise. (Since 3.1)
+#
 # @compress: true to compress data, if the target format supports it.
 #            (default: false) (since 2.8)
 #
@@ -1348,7 +1352,8 @@
 ##
 { 'struct': 'BlockdevBackup',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
-            'sync': 'MirrorSyncMode', '*speed': 'int', '*compress': 'bool',
+            'sync': 'MirrorSyncMode', '*speed': 'int',
+            '*bitmap': 'str', '*compress': 'bool',
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
             '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
diff --git a/qapi/migration.json b/qapi/migration.json
index 6e8c212..0928f4b 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -923,18 +923,18 @@
 ##
 # @COLOMode:
 #
-# The colo mode
+# The COLO current mode.
 #
-# @unknown: unknown mode
+# @none: COLO is disabled.
 #
-# @primary: master side
+# @primary: COLO node in primary side.
 #
-# @secondary: slave side
+# @secondary: COLO node in slave side.
 #
 # Since: 2.8
 ##
 { 'enum': 'COLOMode',
-  'data': [ 'unknown', 'primary', 'secondary'] }
+  'data': [ 'none', 'primary', 'secondary'] }
 
 ##
 # @FailoverStatus:
@@ -957,6 +957,44 @@
   'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
 
 ##
+# @COLO_EXIT:
+#
+# Emitted when VM finishes COLO mode due to some errors happening or
+# at the request of users.
+#
+# @mode: report COLO mode when COLO exited.
+#
+# @reason: describes the reason for the COLO exit.
+#
+# Since: 3.1
+#
+# Example:
+#
+# <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172},
+#      "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } }
+#
+##
+{ 'event': 'COLO_EXIT',
+  'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
+
+##
+# @COLOExitReason:
+#
+# The reason for a COLO exit
+#
+# @none: no failover has ever happened. This can't occur in the
+# COLO_EXIT event, only in the result of query-colo-status.
+#
+# @request: COLO exit is due to an external request
+#
+# @error: COLO exit is due to an internal error
+#
+# Since: 3.1
+##
+{ 'enum': 'COLOExitReason',
+  'data': [ 'none', 'request', 'error' ] }
+
+##
 # @x-colo-lost-heartbeat:
 #
 # Tell qemu that heartbeat is lost, request it to do takeover procedures.
@@ -1270,6 +1308,38 @@
 { 'command': 'xen-colo-do-checkpoint' }
 
 ##
+# @COLOStatus:
+#
+# The result format for 'query-colo-status'.
+#
+# @mode: COLO running mode. If COLO is running, this field will return
+#        'primary' or 'secondary'.
+#
+# @reason: describes the reason for the COLO exit.
+#
+# Since: 3.0
+##
+{ 'struct': 'COLOStatus',
+  'data': { 'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
+
+##
+# @query-colo-status:
+#
+# Query COLO status while the vm is running.
+#
+# Returns: A @COLOStatus object showing the status.
+#
+# Example:
+#
+# -> { "execute": "query-colo-status" }
+# <- { "return": { "mode": "primary", "active": true, "reason": "request" } }
+#
+# Since: 3.0
+##
+{ 'command': 'query-colo-status',
+  'returns': 'COLOStatus' }
+
+##
 # @migrate-recover:
 #
 # Provide a recovery migration stream URI.
diff --git a/qapi/misc.json b/qapi/misc.json
index ada9af5..6c1c5c0 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -839,8 +839,8 @@
 # Since: 2.4
 ##
 { 'struct': 'PciDeviceId',
-  'data': {'device': 'int', 'vendor': 'int', 'subsystem': 'int',
-            'subsystem-vendor': 'int'} }
+  'data': {'device': 'int', 'vendor': 'int', '*subsystem': 'int',
+            '*subsystem-vendor': 'int'} }
 
 ##
 # @PciDeviceInfo:
@@ -2017,7 +2017,7 @@
 #
 # @migration-safe: whether a CPU definition can be safely used for
 #                  migration in combination with a QEMU compatibility machine
-#                  when migrating between different QMU versions and between
+#                  when migrating between different QEMU versions and between
 #                  hosts with different sets of (hardware or software)
 #                  capabilities. If not provided, information is not available
 #                  and callers should not assume the CPU definition to be
@@ -2066,7 +2066,7 @@
 #
 # @plugged-memory: size of memory that can be hot-unplugged. This field
 #                  is omitted if target doesn't support memory hotplug
-#                  (i.e. CONFIG_MEM_HOTPLUG not defined on build time).
+#                  (i.e. CONFIG_MEM_DEVICE not defined at build time).
 #
 # Since: 2.11.0
 ##
@@ -2126,11 +2126,11 @@
 # @static: Expand to a static CPU model, a combination of a static base
 #          model name and property delta changes. As the static base model will
 #          never change, the expanded CPU model will be the same, independent of
-#          independent of QEMU version, machine type, machine options, and
-#          accelerator options. Therefore, the resulting model can be used by
-#          tooling without having to specify a compatibility machine - e.g. when
-#          displaying the "host" model. static CPU models are migration-safe.
-#
+#          QEMU version, machine type, machine options, and accelerator options.
+#          Therefore, the resulting model can be used by tooling without having
+#          to specify a compatibility machine - e.g. when displaying the "host"
+#          model. The @static CPU models are migration-safe.
+
 # @full: Expand all properties. The produced model is not guaranteed to be
 #        migration-safe, but allows tooling to get an insight and work with
 #        model details.
@@ -3070,7 +3070,8 @@
 # Emitted when the guest changes the RTC time.
 #
 # @offset: offset between base RTC clock (as specified by -rtc base), and
-#          new RTC clock value
+#          new RTC clock value. Note that value will be different depending
+#          on clock chosen to drive RTC (specified by -rtc clock).
 #
 # Note: This event is rate-limited.
 #
diff --git a/qapi/net.json b/qapi/net.json
index c86f351..8f99fd9 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -174,6 +174,8 @@
 #
 # @guestfwd: forward guest TCP connections
 #
+# @tftp-server-name: RFC2132 "TFTP server name" string (Since 3.1)
+#
 # Since: 1.2
 ##
 { 'struct': 'NetdevUserOptions',
@@ -198,7 +200,8 @@
     '*smb':       'str',
     '*smbserver': 'str',
     '*hostfwd':   ['String'],
-    '*guestfwd':  ['String'] } }
+    '*guestfwd':  ['String'],
+    '*tftp-server-name': 'str' } }
 
 ##
 # @NetdevTapOptions:
diff --git a/qapi/transaction.json b/qapi/transaction.json
index d7e4274..5875cdb 100644
--- a/qapi/transaction.json
+++ b/qapi/transaction.json
@@ -48,6 +48,7 @@
 # - @block-dirty-bitmap-clear: since 2.5
 # - @x-block-dirty-bitmap-enable: since 3.0
 # - @x-block-dirty-bitmap-disable: since 3.0
+# - @x-block-dirty-bitmap-merge: since 3.1
 # - @blockdev-backup: since 2.3
 # - @blockdev-snapshot: since 2.5
 # - @blockdev-snapshot-internal-sync: since 1.7
@@ -63,6 +64,7 @@
        'block-dirty-bitmap-clear': 'BlockDirtyBitmap',
        'x-block-dirty-bitmap-enable': 'BlockDirtyBitmap',
        'x-block-dirty-bitmap-disable': 'BlockDirtyBitmap',
+       'x-block-dirty-bitmap-merge': 'BlockDirtyBitmapMerge',
        'blockdev-backup': 'BlockdevBackup',
        'blockdev-snapshot': 'BlockdevSnapshot',
        'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal',
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 61e0300..802c18a 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -104,22 +104,31 @@
     return (qdev_class_get_alias(dc) != NULL);
 }
 
+static void out_printf(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    monitor_vfprintf(stdout, fmt, ap);
+    va_end(ap);
+}
+
 static void qdev_print_devinfo(DeviceClass *dc)
 {
-    error_printf("name \"%s\"", object_class_get_name(OBJECT_CLASS(dc)));
+    out_printf("name \"%s\"", object_class_get_name(OBJECT_CLASS(dc)));
     if (dc->bus_type) {
-        error_printf(", bus %s", dc->bus_type);
+        out_printf(", bus %s", dc->bus_type);
     }
     if (qdev_class_has_alias(dc)) {
-        error_printf(", alias \"%s\"", qdev_class_get_alias(dc));
+        out_printf(", alias \"%s\"", qdev_class_get_alias(dc));
     }
     if (dc->desc) {
-        error_printf(", desc \"%s\"", dc->desc);
+        out_printf(", desc \"%s\"", dc->desc);
     }
     if (!dc->user_creatable) {
-        error_printf(", no-user");
+        out_printf(", no-user");
     }
-    error_printf("\n");
+    out_printf("\n");
 }
 
 static void qdev_print_devinfos(bool show_no_user)
@@ -155,8 +164,7 @@
                 continue;
             }
             if (!cat_printed) {
-                error_printf("%s%s devices:\n", i ? "\n" : "",
-                             cat_name[i]);
+                out_printf("%s%s devices:\n", i ? "\n" : "", cat_name[i]);
                 cat_printed = true;
             }
             qdev_print_devinfo(dc);
@@ -278,13 +286,11 @@
     }
 
     for (prop = prop_list; prop; prop = prop->next) {
-        error_printf("%s.%s=%s", driver,
-                     prop->value->name,
-                     prop->value->type);
+        out_printf("%s.%s=%s", driver, prop->value->name, prop->value->type);
         if (prop->value->has_description) {
-            error_printf(" (%s)\n", prop->value->description);
+            out_printf(" (%s)\n", prop->value->description);
         } else {
-            error_printf("\n");
+            out_printf("\n");
         }
     }
 
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index 16ff946..5d2d7a3 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -19,13 +19,6 @@
 
 @section Build options
 
-@subsection GTK 2.x
-
-Previously QEMU has supported building against both GTK 2.x
-and 3.x series APIs. Support for the GTK 2.x builds will be
-discontinued, so maintainers should switch to using GTK 3.x,
-which is the default.
-
 @subsection SDL 1.2
 
 Previously QEMU has supported building against both SDL 1.2
@@ -93,6 +86,18 @@
 The @option{name} parameter of the @option{-net} option is a synonym
 for the @option{id} parameter, which should now be used instead.
 
+@subsection -smp (invalid topologies) (since 3.1)
+
+CPU topology properties should describe whole machine topology including
+possible CPUs.
+
+However, historically it was possible to start QEMU with an incorrect topology
+where @math{@var{n} <= @var{sockets} * @var{cores} * @var{threads} < @var{maxcpus}},
+which could lead to an incorrect topology enumeration by the guest.
+Support for invalid topologies will be removed, the user must ensure
+topologies described with -smp include all possible cpus, i.e.
+  @math{@var{sockets} * @var{cores} * @var{threads} = @var{maxcpus}}.
+
 @section QEMU Machine Protocol (QMP) commands
 
 @subsection block-dirty-bitmap-add "autoload" parameter (since 2.12.0)
diff --git a/qemu-io.c b/qemu-io.c
index 13829f5..6df7731 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -620,11 +620,9 @@
         exit(1);
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(&qemu_object_opts,
+                      user_creatable_add_opts_foreach,
+                      NULL, &error_fatal);
 
     if (!trace_init_backends()) {
         exit(1);
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 51b9d38..ca71096 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -56,7 +56,6 @@
 #define MBR_SIZE 512
 
 static NBDExport *exp;
-static bool newproto;
 static int verbose;
 static char *srcpath;
 static SocketAddress *saddr;
@@ -84,8 +83,8 @@
 "  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
 "  -t, --persistent          don't exit on the last connection\n"
 "  -v, --verbose             display extra debugging information\n"
-"  -x, --export-name=NAME    expose export by name\n"
-"  -D, --description=TEXT    with -x, also export a human-readable description\n"
+"  -x, --export-name=NAME    expose export by name (default is empty string)\n"
+"  -D, --description=TEXT    export a human-readable description\n"
 "\n"
 "Exposing part of the image:\n"
 "  -o, --offset=OFFSET       offset into the image\n"
@@ -94,6 +93,7 @@
 "General purpose options:\n"
 "  --object type,id=ID,...   define an object such as 'secret' for providing\n"
 "                            passwords and/or encryption keys\n"
+"  --tls-creds=ID            use id of an earlier --object to provide TLS\n"
 "  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
 "                            specify tracing options\n"
 "  --fork                    fork off the server process and exit the parent\n"
@@ -354,8 +354,7 @@
 
     nb_fds++;
     nbd_update_server_watch();
-    nbd_client_new(newproto ? NULL : exp, cioc,
-                   tlscreds, NULL, nbd_client_closed);
+    nbd_client_new(cioc, tlscreds, NULL, nbd_client_closed);
 }
 
 static void nbd_update_server_watch(void)
@@ -549,7 +548,7 @@
     Error *local_err = NULL;
     BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
     QDict *options = NULL;
-    const char *export_name = NULL;
+    const char *export_name = ""; /* Default export name */
     const char *export_description = NULL;
     const char *tlscredsid = NULL;
     bool imageOpts = false;
@@ -767,11 +766,9 @@
         exit(EXIT_FAILURE);
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          NULL, NULL)) {
-        exit(EXIT_FAILURE);
-    }
+    qemu_opts_foreach(&qemu_object_opts,
+                      user_creatable_add_opts_foreach,
+                      NULL, &error_fatal);
 
     if (!trace_init_backends()) {
         exit(1);
@@ -808,11 +805,6 @@
             error_report("TLS is not supported with a host device");
             exit(EXIT_FAILURE);
         }
-        if (!export_name) {
-            /* Set the default NBD protocol export name, since
-             * we *must* use new style protocol for TLS */
-            export_name = "";
-        }
         tlscreds = nbd_get_tls_creds(tlscredsid, &local_err);
         if (local_err) {
             error_report("Failed to get TLS creds %s",
@@ -1008,19 +1000,9 @@
     }
 
     exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed,
-                         writethrough, NULL, &local_err);
-    if (!exp) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
-    }
-    if (export_name) {
-        nbd_export_set_name(exp, export_name);
-        nbd_export_set_description(exp, export_description);
-        newproto = true;
-    } else if (export_description) {
-        error_report("Export description requires an export name");
-        exit(EXIT_FAILURE);
-    }
+                         writethrough, NULL, &error_fatal);
+    nbd_export_set_name(exp, export_name);
+    nbd_export_set_description(exp, export_description);
 
     if (device) {
         int ret;
diff --git a/qemu-options.hx b/qemu-options.hx
index f139459..08f8516 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1823,7 +1823,7 @@
     "         [,ipv6[=on|off]][,ipv6-net=addr[/int]][,ipv6-host=addr]\n"
     "         [,restrict=on|off][,hostname=host][,dhcpstart=addr]\n"
     "         [,dns=addr][,ipv6-dns=addr][,dnssearch=domain][,domainname=domain]\n"
-    "         [,tftp=dir][,bootfile=f][,hostfwd=rule][,guestfwd=rule]"
+    "         [,tftp=dir][,tftp-server-name=name][,bootfile=f][,hostfwd=rule][,guestfwd=rule]"
 #ifndef _WIN32
                                              "[,smb=dir[,smbserver=addr]]\n"
 #endif
@@ -2060,6 +2060,11 @@
 The TFTP client on the guest must be configured in binary mode (use the command
 @code{bin} of the Unix TFTP client).
 
+@item tftp-server-name=@var{name}
+In BOOTP reply, broadcast @var{name} as the "TFTP server name" (RFC2132 option
+66). This can be used to advise the guest to load boot files or configurations
+from a different server than the host address.
+
 @item bootfile=@var{file}
 When using the user mode network stack, broadcast @var{file} as the BOOTP
 filename. In conjunction with @option{tftp}, this can be used to network boot
@@ -2256,7 +2261,7 @@
                  -netdev socket,id=n2,mcast=230.0.0.1:1234
 # launch yet another QEMU instance on same "bus"
 qemu-system-i386 linux.img \
-                 -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \
+                 -device e1000,netdev=n3,mac=52:54:00:12:34:58 \
                  -netdev socket,id=n3,mcast=230.0.0.1:1234
 @end example
 
@@ -3458,25 +3463,29 @@
 DEF("clock", HAS_ARG, QEMU_OPTION_clock, "", QEMU_ARCH_ALL)
 
 DEF("rtc", HAS_ARG, QEMU_OPTION_rtc, \
-    "-rtc [base=utc|localtime|date][,clock=host|rt|vm][,driftfix=none|slew]\n" \
+    "-rtc [base=utc|localtime|<datetime>][,clock=host|rt|vm][,driftfix=none|slew]\n" \
     "                set the RTC base and clock, enable drift fix for clock ticks (x86 only)\n",
     QEMU_ARCH_ALL)
 
 STEXI
 
-@item -rtc [base=utc|localtime|@var{date}][,clock=host|vm][,driftfix=none|slew]
+@item -rtc [base=utc|localtime|@var{datetime}][,clock=host|rt|vm][,driftfix=none|slew]
 @findex -rtc
 Specify @option{base} as @code{utc} or @code{localtime} to let the RTC start at the current
 UTC or local time, respectively. @code{localtime} is required for correct date in
-MS-DOS or Windows. To start at a specific point in time, provide @var{date} in the
+MS-DOS or Windows. To start at a specific point in time, provide @var{datetime} in the
 format @code{2006-06-17T16:01:21} or @code{2006-06-17}. The default base is UTC.
 
 By default the RTC is driven by the host system time. This allows using of the
 RTC as accurate reference clock inside the guest, specifically if the host
 time is smoothly following an accurate external reference clock, e.g. via NTP.
 If you want to isolate the guest time from the host, you can set @option{clock}
-to @code{rt} instead.  To even prevent it from progressing during suspension,
-you can set it to @code{vm}.
+to @code{rt} instead, which provides a host monotonic clock if host support it.
+To even prevent the RTC from progressing during suspension, you can set @option{clock}
+to @code{vm} (virtual clock). @samp{clock=vm} is recommended especially in
+icount mode in order to preserve determinism; however, note that in icount mode
+the speed of the virtual clock is variable and can in general differ from the
+host clock.
 
 Enable @option{driftfix} (i386 targets only) if you experience time drift problems,
 specifically with Windows' ACPI HAL. This option will try to figure out how
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 1baa5c6..5c73e6a 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -12,11 +12,12 @@
  * Contributions after 2012-01-13 are licensed under the terms of the
  * GNU GPL, version 2 or (at your option) any later version.
  */
+
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/module.h"
-#include "qemu/error-report.h"
 #include <sys/prctl.h>
 #include <seccomp.h>
 #include "sysemu/seccomp.h"
@@ -190,7 +191,7 @@
                  * to provide a little bit of consistency for
                  * the command line */
             } else {
-                error_report("invalid argument for obsolete");
+                error_setg(errp, "invalid argument for obsolete");
                 return -1;
             }
         }
@@ -205,14 +206,13 @@
                 /* calling prctl directly because we're
                  * not sure if host has CAP_SYS_ADMIN set*/
                 if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
-                    error_report("failed to set no_new_privs "
-                                 "aborting");
+                    error_setg(errp, "failed to set no_new_privs aborting");
                     return -1;
                 }
             } else if (g_str_equal(value, "allow")) {
                 /* default value */
             } else {
-                error_report("invalid argument for elevateprivileges");
+                error_setg(errp, "invalid argument for elevateprivileges");
                 return -1;
             }
         }
@@ -224,7 +224,7 @@
             } else if (g_str_equal(value, "allow")) {
                 /* default value */
             } else {
-                error_report("invalid argument for spawn");
+                error_setg(errp, "invalid argument for spawn");
                 return -1;
             }
         }
@@ -236,14 +236,14 @@
             } else if (g_str_equal(value, "allow")) {
                 /* default value */
             } else {
-                error_report("invalid argument for resourcecontrol");
+                error_setg(errp, "invalid argument for resourcecontrol");
                 return -1;
             }
         }
 
         if (seccomp_start(seccomp_opts) < 0) {
-            error_report("failed to install seccomp syscall filter "
-                         "in the kernel");
+            error_setg(errp, "failed to install seccomp syscall filter "
+                       "in the kernel");
             return -1;
         }
     }
diff --git a/qga/main.c b/qga/main.c
index 6d70242..c399320 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -340,46 +340,6 @@
     return f;
 }
 
-#ifndef _WIN32
-static bool ga_open_pidfile(const char *pidfile)
-{
-    int pidfd;
-    char pidstr[32];
-
-    pidfd = qemu_open(pidfile, O_CREAT|O_WRONLY, S_IRUSR|S_IWUSR);
-    if (pidfd == -1 || lockf(pidfd, F_TLOCK, 0)) {
-        g_critical("Cannot lock pid file, %s", strerror(errno));
-        if (pidfd != -1) {
-            close(pidfd);
-        }
-        return false;
-    }
-
-    if (ftruncate(pidfd, 0)) {
-        g_critical("Failed to truncate pid file");
-        goto fail;
-    }
-    snprintf(pidstr, sizeof(pidstr), "%d\n", getpid());
-    if (write(pidfd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
-        g_critical("Failed to write pid file");
-        goto fail;
-    }
-
-    /* keep pidfile open & locked forever */
-    return true;
-
-fail:
-    unlink(pidfile);
-    close(pidfd);
-    return false;
-}
-#else /* _WIN32 */
-static bool ga_open_pidfile(const char *pidfile)
-{
-    return true;
-}
-#endif
-
 static gint ga_strcmp(gconstpointer str1, gconstpointer str2)
 {
     return strcmp(str1, str2);
@@ -479,8 +439,11 @@
     ga_enable_logging(s);
     g_warning("logging re-enabled due to filesystem unfreeze");
     if (s->deferred_options.pid_filepath) {
-        if (!ga_open_pidfile(s->deferred_options.pid_filepath)) {
-            g_warning("failed to create/open pid file");
+        Error *err = NULL;
+
+        if (!qemu_write_pidfile(s->deferred_options.pid_filepath, &err)) {
+            g_warning("%s", error_get_pretty(err));
+            error_free(err);
         }
         s->deferred_options.pid_filepath = NULL;
     }
@@ -515,8 +478,11 @@
     }
 
     if (pidfile) {
-        if (!ga_open_pidfile(pidfile)) {
-            g_critical("failed to create pidfile");
+        Error *err = NULL;
+
+        if (!qemu_write_pidfile(pidfile, &err)) {
+            g_critical("%s", error_get_pretty(err));
+            error_free(err);
             exit(EXIT_FAILURE);
         }
     }
diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c
index 42054cc..1487cc5 100644
--- a/qobject/block-qdict.c
+++ b/qobject/block-qdict.c
@@ -577,7 +577,7 @@
         if (!tmp) {
             tmp = qdict_clone_shallow(src);
         }
-        qdict_put(tmp, ent->key, qstring_from_str(s));
+        qdict_put_str(tmp, ent->key, s);
         g_free(buf);
     }
 
diff --git a/qom/cpu.c b/qom/cpu.c
index 92599f3..9ad1372 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -194,7 +194,6 @@
     return true;
 }
 
-bool target_words_bigendian(void);
 static bool cpu_common_virtio_is_big_endian(CPUState *cpu)
 {
     return target_words_bigendian();
@@ -266,7 +265,7 @@
     cpu->mem_io_pc = 0;
     cpu->mem_io_vaddr = 0;
     cpu->icount_extra = 0;
-    cpu->icount_decr.u32 = 0;
+    atomic_set(&cpu->icount_decr.u32, 0);
     cpu->can_do_io = 1;
     cpu->exception_index = -1;
     cpu->crash_occurred = false;
diff --git a/qom/object.c b/qom/object.c
index 75d1d48..547dcf9 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -286,7 +286,14 @@
     if (ti->instance_size == 0) {
         ti->abstract = true;
     }
-
+    if (type_is_ancestor(ti, type_interface)) {
+        assert(ti->instance_size == 0);
+        assert(ti->abstract);
+        assert(!ti->instance_init);
+        assert(!ti->instance_post_init);
+        assert(!ti->instance_finalize);
+        assert(!ti->num_interfaces);
+    }
     ti->class = g_malloc0(ti->class_size);
 
     parent = type_get_parent(ti);
@@ -1108,7 +1115,7 @@
                                      ObjectClass *klass)
 {
     g_hash_table_iter_init(&iter->iter, klass->properties);
-    iter->nextclass = klass;
+    iter->nextclass = object_class_get_parent(klass);
 }
 
 ObjectProperty *object_class_property_find(ObjectClass *klass, const char *name,
@@ -2423,9 +2430,10 @@
     op->description = g_strdup(description);
 }
 
-static void object_instance_init(Object *obj)
+static void object_class_init(ObjectClass *klass, void *data)
 {
-    object_property_add_str(obj, "type", qdev_get_type, NULL, NULL);
+    object_class_property_add_str(klass, "type", qdev_get_type,
+                                  NULL, &error_abort);
 }
 
 static void register_types(void)
@@ -2439,7 +2447,7 @@
     static TypeInfo object_info = {
         .name = TYPE_OBJECT,
         .instance_size = sizeof(Object),
-        .instance_init = object_instance_init,
+        .class_init = object_class_init,
         .abstract = true,
     };
 
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index 72b97a8..97b79b4 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -141,20 +141,18 @@
 
 int user_creatable_add_opts_foreach(void *opaque, QemuOpts *opts, Error **errp)
 {
-    bool (*type_predicate)(const char *) = opaque;
+    bool (*type_opt_predicate)(const char *, QemuOpts *) = opaque;
     Object *obj = NULL;
-    Error *err = NULL;
     const char *type;
 
     type = qemu_opt_get(opts, "qom-type");
-    if (type && type_predicate &&
-        !type_predicate(type)) {
+    if (type && type_opt_predicate &&
+        !type_opt_predicate(type, opts)) {
         return 0;
     }
 
-    obj = user_creatable_add_opts(opts, &err);
+    obj = user_creatable_add_opts(opts, errp);
     if (!obj) {
-        error_report_err(err);
         return -1;
     }
     object_unref(obj);
diff --git a/replay/replay-events.c b/replay/replay-events.c
index 707de38..d9a2d49 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -94,18 +94,6 @@
     }
 }
 
-void replay_clear_events(void)
-{
-    g_assert(replay_mutex_locked());
-
-    while (!QTAILQ_EMPTY(&events_list)) {
-        Event *event = QTAILQ_FIRST(&events_list);
-        QTAILQ_REMOVE(&events_list, event, events);
-
-        g_free(event);
-    }
-}
-
 /*! Adds specified async event to the queue */
 void replay_add_event(ReplayAsyncEventKind event_kind,
                       void *opaque,
@@ -202,6 +190,7 @@
 {
     g_assert(replay_mutex_locked());
     g_assert(checkpoint != CHECKPOINT_CLOCK_WARP_START);
+    g_assert(checkpoint != CHECKPOINT_CLOCK_VIRTUAL);
     while (!QTAILQ_EMPTY(&events_list)) {
         Event *event = QTAILQ_FIRST(&events_list);
         replay_save_event(event, checkpoint);
@@ -308,7 +297,7 @@
 void replay_finish_events(void)
 {
     events_enabled = false;
-    replay_clear_events();
+    replay_flush_events();
 }
 
 bool replay_events_enabled(void)
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index b077cb5..1cea1d4 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -217,20 +217,25 @@
     }
 }
 
+void replay_advance_current_step(uint64_t current_step)
+{
+    int diff = (int)(replay_get_current_step() - replay_state.current_step);
+
+    /* Time can only go forward */
+    assert(diff >= 0);
+
+    if (diff > 0) {
+        replay_put_event(EVENT_INSTRUCTION);
+        replay_put_dword(diff);
+        replay_state.current_step += diff;
+    }
+}
+
 /*! Saves cached instructions. */
 void replay_save_instructions(void)
 {
     if (replay_file && replay_mode == REPLAY_MODE_RECORD) {
         g_assert(replay_mutex_locked());
-        int diff = (int)(replay_get_current_step() - replay_state.current_step);
-
-        /* Time can only go forward */
-        assert(diff >= 0);
-
-        if (diff > 0) {
-            replay_put_event(EVENT_INSTRUCTION);
-            replay_put_dword(diff);
-            replay_state.current_step += diff;
-        }
+        replay_advance_current_step(replay_get_current_step());
     }
 }
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index ac4b27b..af6f4d5 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -122,6 +122,8 @@
     data_kind variable. */
 void replay_fetch_data_kind(void);
 
+/*! Advance replay_state.current_step to the specified value. */
+void replay_advance_current_step(uint64_t current_step);
 /*! Saves queued events (like instructions and sound). */
 void replay_save_instructions(void);
 
@@ -142,8 +144,6 @@
 void replay_finish_events(void);
 /*! Flushes events queue */
 void replay_flush_events(void);
-/*! Clears events list before loading new VM state */
-void replay_clear_events(void);
 /*! Returns true if there are any unsaved events in the queue */
 bool replay_has_events(void);
 /*! Saves events from queue into the file */
diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c
index 2ab85cf..16bacc9 100644
--- a/replay/replay-snapshot.c
+++ b/replay/replay-snapshot.c
@@ -33,11 +33,18 @@
 static int replay_post_load(void *opaque, int version_id)
 {
     ReplayState *state = opaque;
-    fseek(replay_file, state->file_offset, SEEK_SET);
-    qemu_clock_set_last(QEMU_CLOCK_HOST, state->host_clock_last);
-    /* If this was a vmstate, saved in recording mode,
-       we need to initialize replay data fields. */
-    replay_fetch_data_kind();
+    if (replay_mode == REPLAY_MODE_PLAY) {
+        fseek(replay_file, state->file_offset, SEEK_SET);
+        qemu_clock_set_last(QEMU_CLOCK_HOST, state->host_clock_last);
+        /* If this was a vmstate, saved in recording mode,
+           we need to initialize replay data fields. */
+        replay_fetch_data_kind();
+    } else if (replay_mode == REPLAY_MODE_RECORD) {
+        /* This is only useful for loading the initial state.
+           Therefore reset all the counters. */
+        state->instructions_count = 0;
+        state->block_request_id = 0;
+    }
 
     return 0;
 }
diff --git a/replay/replay-time.c b/replay/replay-time.c
index 6a7565e..0df1693 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -15,13 +15,15 @@
 #include "replay-internal.h"
 #include "qemu/error-report.h"
 
-int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
+int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, int64_t raw_icount)
 {
-
     if (replay_file) {
         g_assert(replay_mutex_locked());
 
-        replay_save_instructions();
+        /* Due to the caller's locking requirements we get the icount from it
+         * instead of using replay_save_instructions().
+         */
+        replay_advance_current_step(raw_icount);
         replay_put_event(EVENT_CLOCK + kind);
         replay_put_qword(clock);
     }
diff --git a/replay/replay.c b/replay/replay.c
index 8228261..8b172b2 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -214,7 +214,14 @@
         /* This checkpoint belongs to several threads.
            Processing events from different threads is
            non-deterministic */
-        if (checkpoint != CHECKPOINT_CLOCK_WARP_START) {
+        if (checkpoint != CHECKPOINT_CLOCK_WARP_START
+            /* FIXME: this is temporary fix, other checkpoints
+                      may also be invoked from the different threads someday.
+                      Asynchronous event processing should be refactored
+                      to create additional replay event kind which is
+                      nailed to the one of the threads and which processes
+                      the event queue. */
+            && checkpoint != CHECKPOINT_CLOCK_VIRTUAL) {
             replay_save_events(checkpoint);
         }
         res = true;
@@ -224,6 +231,18 @@
     return res;
 }
 
+bool replay_has_checkpoint(void)
+{
+    bool res = false;
+    if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
+        replay_account_executed_instructions();
+        res = EVENT_CHECKPOINT <= replay_state.data_kind
+              && replay_state.data_kind <= EVENT_CHECKPOINT_LAST;
+    }
+    return res;
+}
+
 static void replay_enable(const char *fname, int mode)
 {
     const char *fmode = NULL;
diff --git a/roms/openbios b/roms/openbios
index a128080..441a84d 160000
--- a/roms/openbios
+++ b/roms/openbios
@@ -1 +1 @@
-Subproject commit a1280807a335cc93a4fffb6461c6419cb7a42e96
+Subproject commit 441a84d3a642a10b948369c63f32367e8ff6395b
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index 4e63774..62bd225 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -18,7 +18,7 @@
     error "Usage: $0 <output tarball>"
 fi
 
-tar_file=`realpath "$1"`
+tar_file=$(realpath "$1")
 list_file="${tar_file}.list"
 vroot_dir="${tar_file}.vroot"
 
@@ -34,7 +34,7 @@
 then
     HEAD=HEAD
 else
-    HEAD=`git stash create`
+    HEAD=$(git stash create)
 fi
 git clone --shared . "$vroot_dir"
 test $? -ne 0 && error "failed to clone into '$vroot_dir'"
diff --git a/scripts/check-qerror.sh b/scripts/check-qerror.sh
deleted file mode 100755
index af7fbd5..0000000
--- a/scripts/check-qerror.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/sh
-# This script verifies that qerror definitions and table entries are
-# alphabetically ordered.
-
-check_order() {
-  errmsg=$1
-  shift
-
-  # sort -C verifies order but does not print a message.  sort -c does print a
-  # message.  These options are both in POSIX.
-  if ! "$@" | sort -C; then
-    echo "$errmsg"
-    "$@" | sort -c
-    exit 1
-  fi
-  return 0
-}
-
-check_order 'Definitions in qerror.h must be in alphabetical order:' \
-            grep '^#define QERR_' qerror.h
-check_order 'Entries in qerror.c:qerror_table must be in alphabetical order:' \
-            sed -n '/^static.*qerror_table\[\]/,/^};/s/QERR_/&/gp' qerror.c
diff --git a/scripts/coccinelle/inplace-byteswaps.cocci b/scripts/coccinelle/inplace-byteswaps.cocci
new file mode 100644
index 0000000..a869a90
--- /dev/null
+++ b/scripts/coccinelle/inplace-byteswaps.cocci
@@ -0,0 +1,65 @@
+// Replace uses of in-place byteswapping functions with calls to the
+// equivalent not-in-place functions.  This is necessary to avoid
+// undefined behaviour if the expression being swapped is a field in a
+// packed struct.
+
+@@
+expression E;
+@@
+-be16_to_cpus(&E);
++E = be16_to_cpu(E);
+@@
+expression E;
+@@
+-be32_to_cpus(&E);
++E = be32_to_cpu(E);
+@@
+expression E;
+@@
+-be64_to_cpus(&E);
++E = be64_to_cpu(E);
+@@
+expression E;
+@@
+-cpu_to_be16s(&E);
++E = cpu_to_be16(E);
+@@
+expression E;
+@@
+-cpu_to_be32s(&E);
++E = cpu_to_be32(E);
+@@
+expression E;
+@@
+-cpu_to_be64s(&E);
++E = cpu_to_be64(E);
+@@
+expression E;
+@@
+-le16_to_cpus(&E);
++E = le16_to_cpu(E);
+@@
+expression E;
+@@
+-le32_to_cpus(&E);
++E = le32_to_cpu(E);
+@@
+expression E;
+@@
+-le64_to_cpus(&E);
++E = le64_to_cpu(E);
+@@
+expression E;
+@@
+-cpu_to_le16s(&E);
++E = cpu_to_le16(E);
+@@
+expression E;
+@@
+-cpu_to_le32s(&E);
++E = cpu_to_le32(E);
+@@
+expression E;
+@@
+-cpu_to_le64s(&E);
++E = cpu_to_le64(E);
diff --git a/scripts/coccinelle/use-error_fatal.cocci b/scripts/coccinelle/use-error_fatal.cocci
new file mode 100644
index 0000000..10fff0a
--- /dev/null
+++ b/scripts/coccinelle/use-error_fatal.cocci
@@ -0,0 +1,20 @@
+@@
+type T;
+identifier FUN, RET;
+expression list ARGS;
+expression ERR, EC, FAIL;
+@@
+(
+-    T RET = FUN(ARGS, &ERR);
++    T RET = FUN(ARGS, &error_fatal);
+|
+-    RET = FUN(ARGS, &ERR);
++    RET = FUN(ARGS, &error_fatal);
+|
+-    FUN(ARGS, &ERR);
++    FUN(ARGS, &error_fatal);
+)
+-    if (FAIL) {
+-        error_report_err(ERR);
+-        exit(EC);
+-    }
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 277f9a9..457cffe 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -149,12 +149,10 @@
 #   trans_addl_i(ctx, &arg_opi, insn)
 #
 
-import io
 import os
 import re
 import sys
 import getopt
-import pdb
 
 insnwidth = 32
 insnmask = 0xffffffff
diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 7045594..930200b 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -35,7 +35,6 @@
 import argparse
 from itertools import chain
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'scripts'))
 from qemu import QEMUMachine
 
 logger = logging.getLogger('device-crash-test')
@@ -99,7 +98,6 @@
     {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
     {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
     {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
-    {'device':'isa-serial', 'expected':True},              # Can't create serial device, empty char device
     {'device':'ivshmem', 'expected':True},                 # You must specify either 'shm' or 'chardev'
     {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
     {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
@@ -110,9 +108,6 @@
     {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
     {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
     {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
-    {'device':'pci-serial', 'expected':True},              # Can't create serial device, empty char device
-    {'device':'pci-serial-2x', 'expected':True},           # Can't create serial device, empty char device
-    {'device':'pci-serial-4x', 'expected':True},           # Can't create serial device, empty char device
     {'device':'pxa2xx-dma', 'expected':True},              # channels value invalid
     {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
     {'device':'scsi-block', 'expected':True},              # drive property not set
@@ -218,7 +213,6 @@
     {'exitcode':-6, 'log':r"Object .* is not an instance of type generic-pc-machine", 'loglevel':logging.ERROR},
     {'exitcode':-6, 'log':r"Object .* is not an instance of type e500-ccsr", 'loglevel':logging.ERROR},
     {'exitcode':-6, 'log':r"vmstate_register_with_alias_id: Assertion `!se->compat \|\| se->instance_id == 0' failed", 'loglevel':logging.ERROR},
-    {'exitcode':-11, 'device':'isa-serial', 'loglevel':logging.ERROR, 'expected':True},
 
     # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
     {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
diff --git a/scripts/git-submodule.sh b/scripts/git-submodule.sh
index 807ca0b..98ca0f2 100755
--- a/scripts/git-submodule.sh
+++ b/scripts/git-submodule.sh
@@ -59,8 +59,8 @@
     fi
 
     test -f "$substat" || exit 1
-    CURSTATUS=`$GIT submodule status $modules`
-    OLDSTATUS=`cat $substat`
+    CURSTATUS=$($GIT submodule status $modules)
+    OLDSTATUS=$(cat $substat)
     test "$CURSTATUS" = "$OLDSTATUS"
     exit $?
     ;;
diff --git a/scripts/qemu.py b/scripts/qemu.py
index f099ce7..fd4249f 100644
--- a/scripts/qemu.py
+++ b/scripts/qemu.py
@@ -26,6 +26,12 @@
 LOG = logging.getLogger(__name__)
 
 
+def kvm_available(target_arch=None):
+    if target_arch and target_arch != os.uname()[4]:
+        return False
+    return os.access("/dev/kvm", os.R_OK | os.W_OK)
+
+
 #: Maps machine types to the preferred console device types
 CONSOLE_DEV_TYPES = {
     r'^clipper$': 'isa-serial',
@@ -87,7 +93,7 @@
         @param name: prefix for socket and log file names (default: qemu-PID)
         @param test_dir: where to create socket and log file
         @param monitor_address: address for QMP monitor
-        @param socket_scm_helper: helper program, required for send_fd_scm()"
+        @param socket_scm_helper: helper program, required for send_fd_scm()
         @note: Qemu process is not started until launch() is used.
         '''
         if args is None:
diff --git a/scripts/show-fixed-bugs.sh b/scripts/show-fixed-bugs.sh
index 36f3068..a095a4d 100755
--- a/scripts/show-fixed-bugs.sh
+++ b/scripts/show-fixed-bugs.sh
@@ -23,10 +23,10 @@
 done
 
 if [ "x$start" = "x" ]; then
-    start=`git tag -l 'v[0-9]*\.[0-9]*\.0' | tail -n 2 | head -n 1`
+    start=$(git tag -l 'v[0-9]*\.[0-9]*\.0' | tail -n 2 | head -n 1)
 fi
 if [ "x$end" = "x" ]; then
-    end=`git tag -l  'v[0-9]*\.[0-9]*\.0' | tail -n 1`
+    end=$(git tag -l  'v[0-9]*\.[0-9]*\.0' | tail -n 1)
 fi
 
 if [ "x$start" = "x" ] || [ "x$end" = "x" ]; then
@@ -38,9 +38,9 @@
 echo "Searching git log for bugs in the range $start..$end"
 
 urlstr='https://bugs.launchpad.net/\(bugs\|qemu/+bug\)/'
-bug_urls=`git log $start..$end \
+bug_urls=$(git log $start..$end \
   | sed -n '\,'"$urlstr"', s,\(.*\)\('"$urlstr"'\)\([0-9]*\).*,\2\4,p' \
-  | sort -u`
+  | sort -u)
 
 echo Found bug URLs:
 for i in $bug_urls ; do echo " $i" ; done
@@ -68,7 +68,7 @@
         bugbrowser=xdg-open
     elif command -v gnome-open >/dev/null 2>&1; then
         bugbrowser=gnome-open
-    elif [ "`uname`" = "Darwin" ]; then
+    elif [ "$(uname)" = "Darwin" ]; then
         bugbrowser=open
     elif command -v sensible-browser >/dev/null 2>&1; then
         bugbrowser=sensible-browser
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index ed037aa..ce40008 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -117,39 +117,6 @@
     , name);
 }
 
-static void write_pidfile(void)
-{
-    int pidfd;
-    char pidstr[32];
-
-    pidfd = qemu_open(pidfile, O_CREAT|O_WRONLY, S_IRUSR|S_IWUSR);
-    if (pidfd == -1) {
-        error_report("Cannot open pid file, %s", strerror(errno));
-        exit(EXIT_FAILURE);
-    }
-
-    if (lockf(pidfd, F_TLOCK, 0)) {
-        error_report("Cannot lock pid file, %s", strerror(errno));
-        goto fail;
-    }
-    if (ftruncate(pidfd, 0)) {
-        error_report("Failed to truncate pid file");
-        goto fail;
-    }
-
-    snprintf(pidstr, sizeof(pidstr), "%d\n", getpid());
-    if (write(pidfd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
-        error_report("Failed to write pid file");
-        goto fail;
-    }
-    return;
-
-fail:
-    unlink(pidfile);
-    close(pidfd);
-    exit(EXIT_FAILURE);
-}
-
 /* SG_IO support */
 
 typedef struct PRHelperSGIOData {
@@ -1080,8 +1047,11 @@
         }
     }
 
-    if (daemonize || pidfile_specified)
-        write_pidfile();
+    if ((daemonize || pidfile_specified) &&
+        !qemu_write_pidfile(pidfile, &local_err)) {
+        error_report_err(local_err);
+        exit(EXIT_FAILURE);
+    }
 
 #ifdef CONFIG_LIBCAP
     if (drop_privileges() < 0) {
diff --git a/slirp/bootp.c b/slirp/bootp.c
index 9e7b53b..7b1af73 100644
--- a/slirp/bootp.c
+++ b/slirp/bootp.c
@@ -159,6 +159,7 @@
     struct in_addr preq_addr;
     int dhcp_msg_type, val;
     uint8_t *q;
+    uint8_t *end;
     uint8_t client_ethaddr[ETH_ALEN];
 
     /* extract exact DHCP msg type */
@@ -240,6 +241,7 @@
     rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */
 
     q = rbp->bp_vend;
+    end = (uint8_t *)&rbp[1];
     memcpy(q, rfc1533_cookie, 4);
     q += 4;
 
@@ -292,24 +294,46 @@
 
         if (*slirp->client_hostname) {
             val = strlen(slirp->client_hostname);
-            *q++ = RFC1533_HOSTNAME;
-            *q++ = val;
-            memcpy(q, slirp->client_hostname, val);
-            q += val;
+            if (q + val + 2 >= end) {
+                g_warning("DHCP packet size exceeded, "
+                    "omitting host name option.");
+            } else {
+                *q++ = RFC1533_HOSTNAME;
+                *q++ = val;
+                memcpy(q, slirp->client_hostname, val);
+                q += val;
+            }
         }
 
         if (slirp->vdomainname) {
             val = strlen(slirp->vdomainname);
-            *q++ = RFC1533_DOMAINNAME;
-            *q++ = val;
-            memcpy(q, slirp->vdomainname, val);
-            q += val;
+            if (q + val + 2 >= end) {
+                g_warning("DHCP packet size exceeded, "
+                    "omitting domain name option.");
+            } else {
+                *q++ = RFC1533_DOMAINNAME;
+                *q++ = val;
+                memcpy(q, slirp->vdomainname, val);
+                q += val;
+            }
+        }
+
+        if (slirp->tftp_server_name) {
+            val = strlen(slirp->tftp_server_name);
+            if (q + val + 2 >= end) {
+                g_warning("DHCP packet size exceeded, "
+                    "omitting tftp-server-name option.");
+            } else {
+                *q++ = RFC2132_TFTP_SERVER_NAME;
+                *q++ = val;
+                memcpy(q, slirp->tftp_server_name, val);
+                q += val;
+            }
         }
 
         if (slirp->vdnssearch) {
-            size_t spaceleft = sizeof(rbp->bp_vend) - (q - rbp->bp_vend);
             val = slirp->vdnssearch_len;
-            if (val + 1 > spaceleft) {
+            if (q + val >= end) {
                 g_warning("DHCP packet size exceeded, "
                     "omitting domain-search option.");
             } else {
@@ -331,6 +355,7 @@
         memcpy(q, nak_msg, sizeof(nak_msg) - 1);
         q += sizeof(nak_msg) - 1;
     }
+    assert(q < end);
     *q = RFC1533_END;
 
     daddr.sin_addr.s_addr = 0xffffffffu;
diff --git a/slirp/bootp.h b/slirp/bootp.h
index 3945257..4043489 100644
--- a/slirp/bootp.h
+++ b/slirp/bootp.h
@@ -70,6 +70,7 @@
 #define RFC2132_MAX_SIZE	57
 #define RFC2132_RENEWAL_TIME    58
 #define RFC2132_REBIND_TIME     59
+#define RFC2132_TFTP_SERVER_NAME 66
 
 #define DHCPDISCOVER		1
 #define DHCPOFFER		2
diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c
index ee333d0..cd1e0b9 100644
--- a/slirp/ip6_icmp.c
+++ b/slirp/ip6_icmp.c
@@ -27,7 +27,9 @@
         return;
     }
 
-    slirp->ra_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, ra_timer_handler, slirp);
+    slirp->ra_timer = timer_new_full(NULL, QEMU_CLOCK_VIRTUAL,
+                                     SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
+                                     ra_timer_handler, slirp);
     timer_mod(slirp->ra_timer,
               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NDP_Interval);
 }
diff --git a/slirp/ip_icmp.c b/slirp/ip_icmp.c
index 0b667a4..da100d1 100644
--- a/slirp/ip_icmp.c
+++ b/slirp/ip_icmp.c
@@ -420,7 +420,32 @@
     icp = mtod(m, struct icmp *);
 
     id = icp->icmp_id;
-    len = qemu_recv(so->s, icp, m->m_len, 0);
+    len = qemu_recv(so->s, icp, M_ROOM(m), 0);
+    /*
+     * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent
+     * between host OSes.  On Linux, only the ICMP header and payload is
+     * included.  On macOS/Darwin, the socket acts like a raw socket and
+     * includes the IP header as well.  On other BSDs, SOCK_DGRAM+IPPROTO_ICMP
+     * sockets aren't supported at all, so we treat them like raw sockets.  It
+     * isn't possible to detect this difference at runtime, so we must use an
+     * #ifdef to determine if we need to remove the IP header.
+     */
+#ifdef CONFIG_BSD
+    if (len >= sizeof(struct ip)) {
+        struct ip *inner_ip = mtod(m, struct ip *);
+        int inner_hlen = inner_ip->ip_hl << 2;
+        if (inner_hlen > len) {
+            len = -1;
+            errno = -EINVAL;
+        } else {
+            len -= inner_hlen;
+            memmove(icp, (unsigned char *)icp + inner_hlen, len);
+        }
+    } else {
+      len = -1;
+      errno = -EINVAL;
+    }
+#endif
     icp->icmp_id = id;
 
     m->m_data -= hlen;
diff --git a/slirp/libslirp.h b/slirp/libslirp.h
index 740408a..42e42e9 100644
--- a/slirp/libslirp.h
+++ b/slirp/libslirp.h
@@ -13,6 +13,7 @@
                   bool in6_enabled,
                   struct in6_addr vprefix_addr6, uint8_t vprefix_len,
                   struct in6_addr vhost6, const char *vhostname,
+                  const char *tftp_server_name,
                   const char *tftp_path, const char *bootfile,
                   struct in_addr vdhcp_start, struct in_addr vnameserver,
                   struct in6_addr vnameserver6, const char **vdnssearch,
diff --git a/slirp/mbuf.c b/slirp/mbuf.c
index 1b78683..aa1f28a 100644
--- a/slirp/mbuf.c
+++ b/slirp/mbuf.c
@@ -151,7 +151,7 @@
 void
 m_inc(struct mbuf *m, int size)
 {
-    int datasize;
+    int gapsize;
 
     /* some compilers throw up on gotos.  This one we can fake. */
     if (M_ROOM(m) > size) {
@@ -159,17 +159,17 @@
     }
 
     if (m->m_flags & M_EXT) {
-        datasize = m->m_data - m->m_ext;
-        m->m_ext = g_realloc(m->m_ext, size + datasize);
+        gapsize = m->m_data - m->m_ext;
+        m->m_ext = g_realloc(m->m_ext, size + gapsize);
     } else {
-        datasize = m->m_data - m->m_dat;
-        m->m_ext = g_malloc(size + datasize);
+        gapsize = m->m_data - m->m_dat;
+        m->m_ext = g_malloc(size + gapsize);
         memcpy(m->m_ext, m->m_dat, m->m_size);
         m->m_flags |= M_EXT;
     }
 
-    m->m_data = m->m_ext + datasize;
-    m->m_size = size + datasize;
+    m->m_data = m->m_ext + gapsize;
+    m->m_size = size + gapsize;
 }
 
 
diff --git a/slirp/mbuf.h b/slirp/mbuf.h
index 33b8448..bfdf8c4 100644
--- a/slirp/mbuf.h
+++ b/slirp/mbuf.h
@@ -48,6 +48,19 @@
  */
 
 /*
+ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if
+ * M_EXT is set, m_dat otherwise) and the in-use data:
+ *
+ *  |--gapsize----->|---m_len------->
+ *  |----------m_size------------------------------>
+ *                  |----M_ROOM-------------------->
+ *                                   |-M_FREEROOM-->
+ *
+ *  ^               ^                               ^
+ *  m_dat/m_ext     m_data                          end of buffer
+ */
+
+/*
  * How much room is in the mbuf, from m_data to the end of the mbuf
  */
 #define M_ROOM(m) ((m->m_flags & M_EXT)? \
diff --git a/slirp/slirp.c b/slirp/slirp.c
index 5c3bd61..51de41f 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -283,6 +283,7 @@
                   bool in6_enabled,
                   struct in6_addr vprefix_addr6, uint8_t vprefix_len,
                   struct in6_addr vhost6, const char *vhostname,
+                  const char *tftp_server_name,
                   const char *tftp_path, const char *bootfile,
                   struct in_addr vdhcp_start, struct in_addr vnameserver,
                   struct in6_addr vnameserver6, const char **vdnssearch,
@@ -321,6 +322,7 @@
     slirp->vdhcp_startaddr = vdhcp_start;
     slirp->vnameserver_addr = vnameserver;
     slirp->vnameserver_addr6 = vnameserver6;
+    slirp->tftp_server_name = g_strdup(tftp_server_name);
 
     if (vdnssearch) {
         translate_dnssearch(slirp, vdnssearch);
diff --git a/slirp/slirp.h b/slirp/slirp.h
index 10b4108..b80725a 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -212,6 +212,7 @@
     /* tftp states */
     char *tftp_prefix;
     struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX];
+    char *tftp_server_name;
 
     ArpTable arp_table;
     NdpTable ndp_table;
diff --git a/slirp/socket.c b/slirp/socket.c
index 08fe989..322383a 100644
--- a/slirp/socket.c
+++ b/slirp/socket.c
@@ -204,12 +204,19 @@
 			return 0;
 		else {
 			int err;
-			socklen_t slen = sizeof err;
+			socklen_t elen = sizeof err;
+			struct sockaddr_storage addr;
+			struct sockaddr *paddr = (struct sockaddr *) &addr;
+			socklen_t alen = sizeof addr;
 
 			err = errno;
 			if (nn == 0) {
-				getsockopt(so->s, SOL_SOCKET, SO_ERROR,
-					   &err, &slen);
+				if (getpeername(so->s, paddr, &alen) < 0) {
+					err = errno;
+				} else {
+					getsockopt(so->s, SOL_SOCKET, SO_ERROR,
+						&err, &elen);
+				}
 			}
 
 			DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno)));
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 53d3f32..5dd0aee 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -43,3 +43,4 @@
 stub-obj-y += xen-hvm.o
 stub-obj-y += pci-host-piix.o
 stub-obj-y += ram-block.o
+stub-obj-y += ramfb.o
diff --git a/stubs/cpu-get-icount.c b/stubs/cpu-get-icount.c
index 0b7239d..35f0c1e 100644
--- a/stubs/cpu-get-icount.c
+++ b/stubs/cpu-get-icount.c
@@ -11,6 +11,11 @@
     abort();
 }
 
+int64_t cpu_get_icount_raw(void)
+{
+    abort();
+}
+
 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 {
     qemu_notify_event();
diff --git a/stubs/ramfb.c b/stubs/ramfb.c
new file mode 100644
index 0000000..48143f3
--- /dev/null
+++ b/stubs/ramfb.c
@@ -0,0 +1,13 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/display/ramfb.h"
+
+void ramfb_display_update(QemuConsole *con, RAMFBState *s)
+{
+}
+
+RAMFBState *ramfb_setup(Error **errp)
+{
+    error_setg(errp, "ramfb support not available");
+    return NULL;
+}
diff --git a/stubs/replay.c b/stubs/replay.c
index 04279ab..4ac6078 100644
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -4,7 +4,7 @@
 
 ReplayMode replay_mode;
 
-int64_t replay_save_clock(unsigned int kind, int64_t clock)
+int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount)
 {
     abort();
     return 0;
diff --git a/stubs/tpm.c b/stubs/tpm.c
index 6729bc8..80939cd 100644
--- a/stubs/tpm.c
+++ b/stubs/tpm.c
@@ -9,9 +9,8 @@
 #include "qapi/qapi-commands-tpm.h"
 #include "sysemu/tpm.h"
 
-int tpm_init(void)
+void tpm_init(void)
 {
-    return 0;
 }
 
 void tpm_cleanup(void)
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index b08078e..a953897 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -201,7 +201,6 @@
     CPUAlphaState *env = &cpu->env;
 
     cs->env_ptr = env;
-    tlb_flush(cs);
 
     env->lock_addr = -1;
 #if defined(CONFIG_USER_ONLY)
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index ce55eeb..2b85693 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -103,6 +103,16 @@
     } else {
         /* Processor is not in secure mode */
         target_cpu->env.cp15.scr_el3 |= SCR_NS;
+
+        /*
+         * If QEMU is providing the equivalent of EL3 firmware, then we need
+         * to make sure a CPU targeting EL2 comes out of reset with a
+         * functional HVC insn.
+         */
+        if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3)
+            && info->target_el == 2) {
+            target_cpu->env.cp15.scr_el3 |= SCR_HCE;
+        }
     }
 
     /* We check if the started CPU is now at the correct level */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index b5e61cc..8f16e96 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -144,9 +144,9 @@
     g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu);
 
     env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid;
-    env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0;
-    env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
-    env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
+    env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0;
+    env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1;
+    env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2;
 
     cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON;
     s->halted = cpu->start_powered_off;
@@ -814,7 +814,11 @@
 
     /* Some features automatically imply others: */
     if (arm_feature(env, ARM_FEATURE_V8)) {
-        set_feature(env, ARM_FEATURE_V7VE);
+        if (arm_feature(env, ARM_FEATURE_M)) {
+            set_feature(env, ARM_FEATURE_V7);
+        } else {
+            set_feature(env, ARM_FEATURE_V7VE);
+        }
     }
     if (arm_feature(env, ARM_FEATURE_V7VE)) {
         /* v7 Virtualization Extensions. In real hardware this implies
@@ -825,7 +829,7 @@
          * Presence of EL2 itself is ARM_FEATURE_EL2, and of the
          * Security Extensions is ARM_FEATURE_EL3.
          */
-        set_feature(env, ARM_FEATURE_ARM_DIV);
+        assert(cpu_isar_feature(arm_div, cpu));
         set_feature(env, ARM_FEATURE_LPAE);
         set_feature(env, ARM_FEATURE_V7);
     }
@@ -850,20 +854,14 @@
     }
     if (arm_feature(env, ARM_FEATURE_V6)) {
         set_feature(env, ARM_FEATURE_V5);
-        set_feature(env, ARM_FEATURE_JAZELLE);
         if (!arm_feature(env, ARM_FEATURE_M)) {
+            assert(cpu_isar_feature(jazelle, cpu));
             set_feature(env, ARM_FEATURE_AUXCR);
         }
     }
     if (arm_feature(env, ARM_FEATURE_V5)) {
         set_feature(env, ARM_FEATURE_V4T);
     }
-    if (arm_feature(env, ARM_FEATURE_M)) {
-        set_feature(env, ARM_FEATURE_THUMB_DIV);
-    }
-    if (arm_feature(env, ARM_FEATURE_ARM_DIV)) {
-        set_feature(env, ARM_FEATURE_THUMB_DIV);
-    }
     if (arm_feature(env, ARM_FEATURE_VFP4)) {
         set_feature(env, ARM_FEATURE_VFP3);
         set_feature(env, ARM_FEATURE_VFP_FP16);
@@ -938,7 +936,7 @@
          * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12].
          */
         cpu->id_pfr1 &= ~0xf0;
-        cpu->id_aa64pfr0 &= ~0xf000;
+        cpu->isar.id_aa64pfr0 &= ~0xf000;
     }
 
     if (!cpu->has_el2) {
@@ -955,7 +953,7 @@
          * registers if we don't have EL2. These are id_pfr1[15:12] and
          * id_aa64pfr0_el1[11:8].
          */
-        cpu->id_aa64pfr0 &= ~0xf00;
+        cpu->isar.id_aa64pfr0 &= ~0xf00;
         cpu->id_pfr1 &= ~0xf000;
     }
 
@@ -1084,11 +1082,16 @@
     set_feature(&cpu->env, ARM_FEATURE_VFP);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
     set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN);
-    set_feature(&cpu->env, ARM_FEATURE_JAZELLE);
     cpu->midr = 0x41069265;
     cpu->reset_fpsid = 0x41011090;
     cpu->ctr = 0x1dd20d2;
     cpu->reset_sctlr = 0x00090078;
+
+    /*
+     * ARMv5 does not have the ID_ISAR registers, but we can still
+     * set the field to indicate Jazelle support within QEMU.
+     */
+    cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
 }
 
 static void arm946_initfn(Object *obj)
@@ -1114,12 +1117,18 @@
     set_feature(&cpu->env, ARM_FEATURE_AUXCR);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
     set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN);
-    set_feature(&cpu->env, ARM_FEATURE_JAZELLE);
     cpu->midr = 0x4106a262;
     cpu->reset_fpsid = 0x410110a0;
     cpu->ctr = 0x1dd20d2;
     cpu->reset_sctlr = 0x00090078;
     cpu->reset_auxcr = 1;
+
+    /*
+     * ARMv5 does not have the ID_ISAR registers, but we can still
+     * set the field to indicate Jazelle support within QEMU.
+     */
+    cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
+
     {
         /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */
         ARMCPRegInfo ifar = {
@@ -1151,8 +1160,8 @@
     set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
     cpu->midr = 0x4107b362;
     cpu->reset_fpsid = 0x410120b4;
-    cpu->mvfr0 = 0x11111111;
-    cpu->mvfr1 = 0x00000000;
+    cpu->isar.mvfr0 = 0x11111111;
+    cpu->isar.mvfr1 = 0x00000000;
     cpu->ctr = 0x1dd20d2;
     cpu->reset_sctlr = 0x00050078;
     cpu->id_pfr0 = 0x111;
@@ -1162,11 +1171,11 @@
     cpu->id_mmfr0 = 0x01130003;
     cpu->id_mmfr1 = 0x10030302;
     cpu->id_mmfr2 = 0x01222110;
-    cpu->id_isar0 = 0x00140011;
-    cpu->id_isar1 = 0x12002111;
-    cpu->id_isar2 = 0x11231111;
-    cpu->id_isar3 = 0x01102131;
-    cpu->id_isar4 = 0x141;
+    cpu->isar.id_isar0 = 0x00140011;
+    cpu->isar.id_isar1 = 0x12002111;
+    cpu->isar.id_isar2 = 0x11231111;
+    cpu->isar.id_isar3 = 0x01102131;
+    cpu->isar.id_isar4 = 0x141;
     cpu->reset_auxcr = 7;
 }
 
@@ -1183,8 +1192,8 @@
     set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
     cpu->midr = 0x4117b363;
     cpu->reset_fpsid = 0x410120b4;
-    cpu->mvfr0 = 0x11111111;
-    cpu->mvfr1 = 0x00000000;
+    cpu->isar.mvfr0 = 0x11111111;
+    cpu->isar.mvfr1 = 0x00000000;
     cpu->ctr = 0x1dd20d2;
     cpu->reset_sctlr = 0x00050078;
     cpu->id_pfr0 = 0x111;
@@ -1194,11 +1203,11 @@
     cpu->id_mmfr0 = 0x01130003;
     cpu->id_mmfr1 = 0x10030302;
     cpu->id_mmfr2 = 0x01222110;
-    cpu->id_isar0 = 0x00140011;
-    cpu->id_isar1 = 0x12002111;
-    cpu->id_isar2 = 0x11231111;
-    cpu->id_isar3 = 0x01102131;
-    cpu->id_isar4 = 0x141;
+    cpu->isar.id_isar0 = 0x00140011;
+    cpu->isar.id_isar1 = 0x12002111;
+    cpu->isar.id_isar2 = 0x11231111;
+    cpu->isar.id_isar3 = 0x01102131;
+    cpu->isar.id_isar4 = 0x141;
     cpu->reset_auxcr = 7;
 }
 
@@ -1216,8 +1225,8 @@
     set_feature(&cpu->env, ARM_FEATURE_EL3);
     cpu->midr = 0x410fb767;
     cpu->reset_fpsid = 0x410120b5;
-    cpu->mvfr0 = 0x11111111;
-    cpu->mvfr1 = 0x00000000;
+    cpu->isar.mvfr0 = 0x11111111;
+    cpu->isar.mvfr1 = 0x00000000;
     cpu->ctr = 0x1dd20d2;
     cpu->reset_sctlr = 0x00050078;
     cpu->id_pfr0 = 0x111;
@@ -1227,11 +1236,11 @@
     cpu->id_mmfr0 = 0x01130003;
     cpu->id_mmfr1 = 0x10030302;
     cpu->id_mmfr2 = 0x01222100;
-    cpu->id_isar0 = 0x0140011;
-    cpu->id_isar1 = 0x12002111;
-    cpu->id_isar2 = 0x11231121;
-    cpu->id_isar3 = 0x01102131;
-    cpu->id_isar4 = 0x01141;
+    cpu->isar.id_isar0 = 0x0140011;
+    cpu->isar.id_isar1 = 0x12002111;
+    cpu->isar.id_isar2 = 0x11231121;
+    cpu->isar.id_isar3 = 0x01102131;
+    cpu->isar.id_isar4 = 0x01141;
     cpu->reset_auxcr = 7;
 }
 
@@ -1247,8 +1256,8 @@
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
     cpu->midr = 0x410fb022;
     cpu->reset_fpsid = 0x410120b4;
-    cpu->mvfr0 = 0x11111111;
-    cpu->mvfr1 = 0x00000000;
+    cpu->isar.mvfr0 = 0x11111111;
+    cpu->isar.mvfr1 = 0x00000000;
     cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */
     cpu->id_pfr0 = 0x111;
     cpu->id_pfr1 = 0x1;
@@ -1257,11 +1266,11 @@
     cpu->id_mmfr0 = 0x01100103;
     cpu->id_mmfr1 = 0x10020302;
     cpu->id_mmfr2 = 0x01222000;
-    cpu->id_isar0 = 0x00100011;
-    cpu->id_isar1 = 0x12002111;
-    cpu->id_isar2 = 0x11221011;
-    cpu->id_isar3 = 0x01102131;
-    cpu->id_isar4 = 0x141;
+    cpu->isar.id_isar0 = 0x00100011;
+    cpu->isar.id_isar1 = 0x12002111;
+    cpu->isar.id_isar2 = 0x11221011;
+    cpu->isar.id_isar3 = 0x01102131;
+    cpu->isar.id_isar4 = 0x141;
     cpu->reset_auxcr = 1;
 }
 
@@ -1290,13 +1299,13 @@
     cpu->id_mmfr1 = 0x00000000;
     cpu->id_mmfr2 = 0x00000000;
     cpu->id_mmfr3 = 0x00000000;
-    cpu->id_isar0 = 0x01141110;
-    cpu->id_isar1 = 0x02111000;
-    cpu->id_isar2 = 0x21112231;
-    cpu->id_isar3 = 0x01111110;
-    cpu->id_isar4 = 0x01310102;
-    cpu->id_isar5 = 0x00000000;
-    cpu->id_isar6 = 0x00000000;
+    cpu->isar.id_isar0 = 0x01141110;
+    cpu->isar.id_isar1 = 0x02111000;
+    cpu->isar.id_isar2 = 0x21112231;
+    cpu->isar.id_isar3 = 0x01111110;
+    cpu->isar.id_isar4 = 0x01310102;
+    cpu->isar.id_isar5 = 0x00000000;
+    cpu->isar.id_isar6 = 0x00000000;
 }
 
 static void cortex_m4_initfn(Object *obj)
@@ -1317,13 +1326,13 @@
     cpu->id_mmfr1 = 0x00000000;
     cpu->id_mmfr2 = 0x00000000;
     cpu->id_mmfr3 = 0x00000000;
-    cpu->id_isar0 = 0x01141110;
-    cpu->id_isar1 = 0x02111000;
-    cpu->id_isar2 = 0x21112231;
-    cpu->id_isar3 = 0x01111110;
-    cpu->id_isar4 = 0x01310102;
-    cpu->id_isar5 = 0x00000000;
-    cpu->id_isar6 = 0x00000000;
+    cpu->isar.id_isar0 = 0x01141110;
+    cpu->isar.id_isar1 = 0x02111000;
+    cpu->isar.id_isar2 = 0x21112231;
+    cpu->isar.id_isar3 = 0x01111110;
+    cpu->isar.id_isar4 = 0x01310102;
+    cpu->isar.id_isar5 = 0x00000000;
+    cpu->isar.id_isar6 = 0x00000000;
 }
 
 static void cortex_m33_initfn(Object *obj)
@@ -1346,13 +1355,13 @@
     cpu->id_mmfr1 = 0x00000000;
     cpu->id_mmfr2 = 0x01000000;
     cpu->id_mmfr3 = 0x00000000;
-    cpu->id_isar0 = 0x01101110;
-    cpu->id_isar1 = 0x02212000;
-    cpu->id_isar2 = 0x20232232;
-    cpu->id_isar3 = 0x01111131;
-    cpu->id_isar4 = 0x01310132;
-    cpu->id_isar5 = 0x00000000;
-    cpu->id_isar6 = 0x00000000;
+    cpu->isar.id_isar0 = 0x01101110;
+    cpu->isar.id_isar1 = 0x02212000;
+    cpu->isar.id_isar2 = 0x20232232;
+    cpu->isar.id_isar3 = 0x01111131;
+    cpu->isar.id_isar4 = 0x01310132;
+    cpu->isar.id_isar5 = 0x00000000;
+    cpu->isar.id_isar6 = 0x00000000;
     cpu->clidr = 0x00000000;
     cpu->ctr = 0x8000c000;
 }
@@ -1384,8 +1393,6 @@
     ARMCPU *cpu = ARM_CPU(obj);
 
     set_feature(&cpu->env, ARM_FEATURE_V7);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV);
-    set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_PMSA);
     cpu->midr = 0x411fc153; /* r1p3 */
@@ -1397,13 +1404,13 @@
     cpu->id_mmfr1 = 0x00000000;
     cpu->id_mmfr2 = 0x01200000;
     cpu->id_mmfr3 = 0x0211;
-    cpu->id_isar0 = 0x2101111;
-    cpu->id_isar1 = 0x13112111;
-    cpu->id_isar2 = 0x21232141;
-    cpu->id_isar3 = 0x01112131;
-    cpu->id_isar4 = 0x0010142;
-    cpu->id_isar5 = 0x0;
-    cpu->id_isar6 = 0x0;
+    cpu->isar.id_isar0 = 0x02101111;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232141;
+    cpu->isar.id_isar3 = 0x01112131;
+    cpu->isar.id_isar4 = 0x0010142;
+    cpu->isar.id_isar5 = 0x0;
+    cpu->isar.id_isar6 = 0x0;
     cpu->mp_is_up = true;
     cpu->pmsav7_dregion = 16;
     define_arm_cp_regs(cpu, cortexr5_cp_reginfo);
@@ -1438,8 +1445,8 @@
     set_feature(&cpu->env, ARM_FEATURE_EL3);
     cpu->midr = 0x410fc080;
     cpu->reset_fpsid = 0x410330c0;
-    cpu->mvfr0 = 0x11110222;
-    cpu->mvfr1 = 0x00011111;
+    cpu->isar.mvfr0 = 0x11110222;
+    cpu->isar.mvfr1 = 0x00011111;
     cpu->ctr = 0x82048004;
     cpu->reset_sctlr = 0x00c50078;
     cpu->id_pfr0 = 0x1031;
@@ -1450,11 +1457,11 @@
     cpu->id_mmfr1 = 0x20000000;
     cpu->id_mmfr2 = 0x01202000;
     cpu->id_mmfr3 = 0x11;
-    cpu->id_isar0 = 0x00101111;
-    cpu->id_isar1 = 0x12112111;
-    cpu->id_isar2 = 0x21232031;
-    cpu->id_isar3 = 0x11112131;
-    cpu->id_isar4 = 0x00111142;
+    cpu->isar.id_isar0 = 0x00101111;
+    cpu->isar.id_isar1 = 0x12112111;
+    cpu->isar.id_isar2 = 0x21232031;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x00111142;
     cpu->dbgdidr = 0x15141000;
     cpu->clidr = (1 << 27) | (2 << 24) | 3;
     cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
@@ -1512,8 +1519,8 @@
     set_feature(&cpu->env, ARM_FEATURE_CBAR);
     cpu->midr = 0x410fc090;
     cpu->reset_fpsid = 0x41033090;
-    cpu->mvfr0 = 0x11110222;
-    cpu->mvfr1 = 0x01111111;
+    cpu->isar.mvfr0 = 0x11110222;
+    cpu->isar.mvfr1 = 0x01111111;
     cpu->ctr = 0x80038003;
     cpu->reset_sctlr = 0x00c50078;
     cpu->id_pfr0 = 0x1031;
@@ -1524,11 +1531,11 @@
     cpu->id_mmfr1 = 0x20000000;
     cpu->id_mmfr2 = 0x01230000;
     cpu->id_mmfr3 = 0x00002111;
-    cpu->id_isar0 = 0x00101111;
-    cpu->id_isar1 = 0x13112111;
-    cpu->id_isar2 = 0x21232041;
-    cpu->id_isar3 = 0x11112131;
-    cpu->id_isar4 = 0x00111142;
+    cpu->isar.id_isar0 = 0x00101111;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232041;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x00111142;
     cpu->dbgdidr = 0x35141000;
     cpu->clidr = (1 << 27) | (1 << 24) | 3;
     cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
@@ -1573,8 +1580,8 @@
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7;
     cpu->midr = 0x410fc075;
     cpu->reset_fpsid = 0x41023075;
-    cpu->mvfr0 = 0x10110222;
-    cpu->mvfr1 = 0x11111111;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x11111111;
     cpu->ctr = 0x84448003;
     cpu->reset_sctlr = 0x00c50078;
     cpu->id_pfr0 = 0x00001131;
@@ -1587,11 +1594,14 @@
     cpu->id_mmfr1 = 0x40000000;
     cpu->id_mmfr2 = 0x01240000;
     cpu->id_mmfr3 = 0x02102211;
-    cpu->id_isar0 = 0x01101110;
-    cpu->id_isar1 = 0x13112111;
-    cpu->id_isar2 = 0x21232041;
-    cpu->id_isar3 = 0x11112131;
-    cpu->id_isar4 = 0x10011142;
+    /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
+     * table 4-41 gives 0x02101110, which includes the arm div insns.
+     */
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232041;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x10011142;
     cpu->dbgdidr = 0x3515f005;
     cpu->clidr = 0x0a200023;
     cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
@@ -1616,8 +1626,8 @@
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
     cpu->midr = 0x412fc0f1;
     cpu->reset_fpsid = 0x410430f0;
-    cpu->mvfr0 = 0x10110222;
-    cpu->mvfr1 = 0x11111111;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x11111111;
     cpu->ctr = 0x8444c004;
     cpu->reset_sctlr = 0x00c50078;
     cpu->id_pfr0 = 0x00001131;
@@ -1630,11 +1640,11 @@
     cpu->id_mmfr1 = 0x20000000;
     cpu->id_mmfr2 = 0x01240000;
     cpu->id_mmfr3 = 0x02102211;
-    cpu->id_isar0 = 0x02101110;
-    cpu->id_isar1 = 0x13112111;
-    cpu->id_isar2 = 0x21232041;
-    cpu->id_isar3 = 0x11112131;
-    cpu->id_isar4 = 0x10011142;
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232041;
+    cpu->isar.id_isar3 = 0x11112131;
+    cpu->isar.id_isar4 = 0x10011142;
     cpu->dbgdidr = 0x3515f021;
     cpu->clidr = 0x0a200023;
     cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
@@ -1827,17 +1837,26 @@
         cortex_a15_initfn(obj);
 #ifdef CONFIG_USER_ONLY
         /* We don't set these in system emulation mode for the moment,
-         * since we don't correctly set the ID registers to advertise them,
+         * since we don't correctly set (all of) the ID registers to
+         * advertise them.
          */
         set_feature(&cpu->env, ARM_FEATURE_V8);
-        set_feature(&cpu->env, ARM_FEATURE_V8_AES);
-        set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
-        set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
-        set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
-        set_feature(&cpu->env, ARM_FEATURE_CRC);
-        set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
-        set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
-        set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
+        {
+            uint32_t t;
+
+            t = cpu->isar.id_isar5;
+            t = FIELD_DP32(t, ID_ISAR5, AES, 2);
+            t = FIELD_DP32(t, ID_ISAR5, SHA1, 1);
+            t = FIELD_DP32(t, ID_ISAR5, SHA2, 1);
+            t = FIELD_DP32(t, ID_ISAR5, CRC32, 1);
+            t = FIELD_DP32(t, ID_ISAR5, RDM, 1);
+            t = FIELD_DP32(t, ID_ISAR5, VCMA, 1);
+            cpu->isar.id_isar5 = t;
+
+            t = cpu->isar.id_isar6;
+            t = FIELD_DP32(t, ID_ISAR6, DP, 1);
+            cpu->isar.id_isar6 = t;
+        }
 #endif
     }
 }
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 65c0fa0..8e67799 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -56,6 +56,7 @@
 #define EXCP_SEMIHOST       16   /* semihosting call */
 #define EXCP_NOCP           17   /* v7M NOCP UsageFault */
 #define EXCP_INVSTATE       18   /* v7M INVSTATE UsageFault */
+#define EXCP_STKOF          19   /* v8M STKOF UsageFault */
 /* NB: add new EXCP_ defines to the array in arm_log_exception() too */
 
 #define ARMV7M_EXCP_RESET   1
@@ -530,6 +531,13 @@
          */
     } exception;
 
+    /* Information associated with an SError */
+    struct {
+        uint8_t pending;
+        uint8_t has_esr;
+        uint64_t esr;
+    } serror;
+
     /* Thumb-2 EE state.  */
     uint32_t teecr;
     uint32_t teehbr;
@@ -668,6 +676,8 @@
     PSCI_ON_PENDING = 2
 } ARMPSCIState;
 
+typedef struct ARMISARegisters ARMISARegisters;
+
 /**
  * ARMCPU:
  * @env: #CPUARMState
@@ -787,13 +797,28 @@
      * ARMv7AR ARM Architecture Reference Manual. A reset_ prefix
      * is used for reset values of non-constant registers; no reset_
      * prefix means a constant register.
+     * Some of these registers are split out into a substructure that
+     * is shared with the translators to control the ISA.
      */
+    struct ARMISARegisters {
+        uint32_t id_isar0;
+        uint32_t id_isar1;
+        uint32_t id_isar2;
+        uint32_t id_isar3;
+        uint32_t id_isar4;
+        uint32_t id_isar5;
+        uint32_t id_isar6;
+        uint32_t mvfr0;
+        uint32_t mvfr1;
+        uint32_t mvfr2;
+        uint64_t id_aa64isar0;
+        uint64_t id_aa64isar1;
+        uint64_t id_aa64pfr0;
+        uint64_t id_aa64pfr1;
+    } isar;
     uint32_t midr;
     uint32_t revidr;
     uint32_t reset_fpsid;
-    uint32_t mvfr0;
-    uint32_t mvfr1;
-    uint32_t mvfr2;
     uint32_t ctr;
     uint32_t reset_sctlr;
     uint32_t id_pfr0;
@@ -807,21 +832,10 @@
     uint32_t id_mmfr2;
     uint32_t id_mmfr3;
     uint32_t id_mmfr4;
-    uint32_t id_isar0;
-    uint32_t id_isar1;
-    uint32_t id_isar2;
-    uint32_t id_isar3;
-    uint32_t id_isar4;
-    uint32_t id_isar5;
-    uint32_t id_isar6;
-    uint64_t id_aa64pfr0;
-    uint64_t id_aa64pfr1;
     uint64_t id_aa64dfr0;
     uint64_t id_aa64dfr1;
     uint64_t id_aa64afr0;
     uint64_t id_aa64afr1;
-    uint64_t id_aa64isar0;
-    uint64_t id_aa64isar1;
     uint64_t id_aa64mmfr0;
     uint64_t id_aa64mmfr1;
     uint32_t dbgdidr;
@@ -910,12 +924,23 @@
 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
+void aarch64_sve_change_el(CPUARMState *env, int old_el,
+                           int new_el, bool el0_a64);
+#else
+static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
+static inline void aarch64_sve_change_el(CPUARMState *env, int o,
+                                         int n, bool a)
+{ }
 #endif
 
 target_ulong do_arm_semihosting(CPUARMState *env);
 void aarch64_sync_32_to_64(CPUARMState *env);
 void aarch64_sync_64_to_32(CPUARMState *env);
 
+int fp_exception_el(CPUARMState *env, int cur_el);
+int sve_exception_el(CPUARMState *env, int cur_el);
+uint32_t sve_zcr_len_for_el(CPUARMState *env, int el);
+
 static inline bool is_a64(CPUARMState *env)
 {
     return env->aarch64;
@@ -1336,8 +1361,10 @@
 FIELD(V7M_CCR, DIV_0_TRP, 4, 1)
 FIELD(V7M_CCR, BFHFNMIGN, 8, 1)
 FIELD(V7M_CCR, STKALIGN, 9, 1)
+FIELD(V7M_CCR, STKOFHFNMIGN, 10, 1)
 FIELD(V7M_CCR, DC, 16, 1)
 FIELD(V7M_CCR, IC, 17, 1)
+FIELD(V7M_CCR, BP, 18, 1)
 
 /* V7M SCR bits */
 FIELD(V7M_SCR, SLEEPONEXIT, 1, 1)
@@ -1378,6 +1405,7 @@
 FIELD(V7M_CFSR, INVSTATE, 16 + 1, 1)
 FIELD(V7M_CFSR, INVPC, 16 + 2, 1)
 FIELD(V7M_CFSR, NOCP, 16 + 3, 1)
+FIELD(V7M_CFSR, STKOF, 16 + 4, 1)
 FIELD(V7M_CFSR, UNALIGNED, 16 + 8, 1)
 FIELD(V7M_CFSR, DIVBYZERO, 16 + 9, 1)
 
@@ -1428,6 +1456,104 @@
  */
 FIELD(V7M_CSSELR, INDEX, 0, 4)
 
+/*
+ * System register ID fields.
+ */
+FIELD(ID_ISAR0, SWAP, 0, 4)
+FIELD(ID_ISAR0, BITCOUNT, 4, 4)
+FIELD(ID_ISAR0, BITFIELD, 8, 4)
+FIELD(ID_ISAR0, CMPBRANCH, 12, 4)
+FIELD(ID_ISAR0, COPROC, 16, 4)
+FIELD(ID_ISAR0, DEBUG, 20, 4)
+FIELD(ID_ISAR0, DIVIDE, 24, 4)
+
+FIELD(ID_ISAR1, ENDIAN, 0, 4)
+FIELD(ID_ISAR1, EXCEPT, 4, 4)
+FIELD(ID_ISAR1, EXCEPT_AR, 8, 4)
+FIELD(ID_ISAR1, EXTEND, 12, 4)
+FIELD(ID_ISAR1, IFTHEN, 16, 4)
+FIELD(ID_ISAR1, IMMEDIATE, 20, 4)
+FIELD(ID_ISAR1, INTERWORK, 24, 4)
+FIELD(ID_ISAR1, JAZELLE, 28, 4)
+
+FIELD(ID_ISAR2, LOADSTORE, 0, 4)
+FIELD(ID_ISAR2, MEMHINT, 4, 4)
+FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4)
+FIELD(ID_ISAR2, MULT, 12, 4)
+FIELD(ID_ISAR2, MULTS, 16, 4)
+FIELD(ID_ISAR2, MULTU, 20, 4)
+FIELD(ID_ISAR2, PSR_AR, 24, 4)
+FIELD(ID_ISAR2, REVERSAL, 28, 4)
+
+FIELD(ID_ISAR3, SATURATE, 0, 4)
+FIELD(ID_ISAR3, SIMD, 4, 4)
+FIELD(ID_ISAR3, SVC, 8, 4)
+FIELD(ID_ISAR3, SYNCHPRIM, 12, 4)
+FIELD(ID_ISAR3, TABBRANCH, 16, 4)
+FIELD(ID_ISAR3, T32COPY, 20, 4)
+FIELD(ID_ISAR3, TRUENOP, 24, 4)
+FIELD(ID_ISAR3, T32EE, 28, 4)
+
+FIELD(ID_ISAR4, UNPRIV, 0, 4)
+FIELD(ID_ISAR4, WITHSHIFTS, 4, 4)
+FIELD(ID_ISAR4, WRITEBACK, 8, 4)
+FIELD(ID_ISAR4, SMC, 12, 4)
+FIELD(ID_ISAR4, BARRIER, 16, 4)
+FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4)
+FIELD(ID_ISAR4, PSR_M, 24, 4)
+FIELD(ID_ISAR4, SWP_FRAC, 28, 4)
+
+FIELD(ID_ISAR5, SEVL, 0, 4)
+FIELD(ID_ISAR5, AES, 4, 4)
+FIELD(ID_ISAR5, SHA1, 8, 4)
+FIELD(ID_ISAR5, SHA2, 12, 4)
+FIELD(ID_ISAR5, CRC32, 16, 4)
+FIELD(ID_ISAR5, RDM, 24, 4)
+FIELD(ID_ISAR5, VCMA, 28, 4)
+
+FIELD(ID_ISAR6, JSCVT, 0, 4)
+FIELD(ID_ISAR6, DP, 4, 4)
+FIELD(ID_ISAR6, FHM, 8, 4)
+FIELD(ID_ISAR6, SB, 12, 4)
+FIELD(ID_ISAR6, SPECRES, 16, 4)
+
+FIELD(ID_AA64ISAR0, AES, 4, 4)
+FIELD(ID_AA64ISAR0, SHA1, 8, 4)
+FIELD(ID_AA64ISAR0, SHA2, 12, 4)
+FIELD(ID_AA64ISAR0, CRC32, 16, 4)
+FIELD(ID_AA64ISAR0, ATOMIC, 20, 4)
+FIELD(ID_AA64ISAR0, RDM, 28, 4)
+FIELD(ID_AA64ISAR0, SHA3, 32, 4)
+FIELD(ID_AA64ISAR0, SM3, 36, 4)
+FIELD(ID_AA64ISAR0, SM4, 40, 4)
+FIELD(ID_AA64ISAR0, DP, 44, 4)
+FIELD(ID_AA64ISAR0, FHM, 48, 4)
+FIELD(ID_AA64ISAR0, TS, 52, 4)
+FIELD(ID_AA64ISAR0, TLB, 56, 4)
+FIELD(ID_AA64ISAR0, RNDR, 60, 4)
+
+FIELD(ID_AA64ISAR1, DPB, 0, 4)
+FIELD(ID_AA64ISAR1, APA, 4, 4)
+FIELD(ID_AA64ISAR1, API, 8, 4)
+FIELD(ID_AA64ISAR1, JSCVT, 12, 4)
+FIELD(ID_AA64ISAR1, FCMA, 16, 4)
+FIELD(ID_AA64ISAR1, LRCPC, 20, 4)
+FIELD(ID_AA64ISAR1, GPA, 24, 4)
+FIELD(ID_AA64ISAR1, GPI, 28, 4)
+FIELD(ID_AA64ISAR1, FRINTTS, 32, 4)
+FIELD(ID_AA64ISAR1, SB, 36, 4)
+FIELD(ID_AA64ISAR1, SPECRES, 40, 4)
+
+FIELD(ID_AA64PFR0, EL0, 0, 4)
+FIELD(ID_AA64PFR0, EL1, 4, 4)
+FIELD(ID_AA64PFR0, EL2, 8, 4)
+FIELD(ID_AA64PFR0, EL3, 12, 4)
+FIELD(ID_AA64PFR0, FP, 16, 4)
+FIELD(ID_AA64PFR0, ADVSIMD, 20, 4)
+FIELD(ID_AA64PFR0, GIC, 24, 4)
+FIELD(ID_AA64PFR0, RAS, 28, 4)
+FIELD(ID_AA64PFR0, SVE, 32, 4)
+
 QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK);
 
 /* If adding a feature bit which corresponds to a Linux ELF
@@ -1447,7 +1573,6 @@
     ARM_FEATURE_VFP3,
     ARM_FEATURE_VFP_FP16,
     ARM_FEATURE_NEON,
-    ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */
     ARM_FEATURE_M, /* Microcontroller profile.  */
     ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling.  */
     ARM_FEATURE_THUMB2EE,
@@ -1457,7 +1582,6 @@
     ARM_FEATURE_V5,
     ARM_FEATURE_STRONGARM,
     ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */
-    ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */
     ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */
     ARM_FEATURE_GENERIC_TIMER,
     ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */
@@ -1470,30 +1594,15 @@
     ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
     ARM_FEATURE_V8,
     ARM_FEATURE_AARCH64, /* supports 64 bit mode */
-    ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
     ARM_FEATURE_CBAR, /* has cp15 CBAR */
     ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
     ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
     ARM_FEATURE_EL2, /* has EL2 Virtualization support */
     ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
-    ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
-    ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
-    ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
     ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */
     ARM_FEATURE_PMU, /* has PMU support */
     ARM_FEATURE_VBAR, /* has cp15 VBAR */
     ARM_FEATURE_M_SECURITY, /* M profile Security Extension */
-    ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */
-    ARM_FEATURE_SVE, /* has Scalable Vector Extension */
-    ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */
-    ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
-    ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
-    ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
-    ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
-    ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
-    ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */
-    ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
-    ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions.  */
     ARM_FEATURE_M_MAIN, /* M profile Main Extension */
 };
 
@@ -2842,6 +2951,9 @@
 /* For M profile only, Handler (ie not Thread) mode */
 #define ARM_TBFLAG_HANDLER_SHIFT    21
 #define ARM_TBFLAG_HANDLER_MASK     (1 << ARM_TBFLAG_HANDLER_SHIFT)
+/* For M profile only, whether we should generate stack-limit checks */
+#define ARM_TBFLAG_STACKCHECK_SHIFT 22
+#define ARM_TBFLAG_STACKCHECK_MASK  (1 << ARM_TBFLAG_STACKCHECK_SHIFT)
 
 /* Bit usage when in AArch64 state */
 #define ARM_TBFLAG_TBI0_SHIFT 0        /* TBI0 for EL0/1 or TBI for EL2/3 */
@@ -2884,6 +2996,8 @@
     (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT)
 #define ARM_TBFLAG_HANDLER(F) \
     (((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT)
+#define ARM_TBFLAG_STACKCHECK(F) \
+    (((F) & ARM_TBFLAG_STACKCHECK_MASK) >> ARM_TBFLAG_STACKCHECK_SHIFT)
 #define ARM_TBFLAG_TBI0(F) \
     (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT)
 #define ARM_TBFLAG_TBI1(F) \
@@ -3040,4 +3154,157 @@
 /* Shared between translate-sve.c and sve_helper.c.  */
 extern const uint64_t pred_esz_masks[4];
 
+/*
+ * 32-bit feature tests via id registers.
+ */
+static inline bool isar_feature_thumb_div(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0;
+}
+
+static inline bool isar_feature_arm_div(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1;
+}
+
+static inline bool isar_feature_jazelle(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0;
+}
+
+static inline bool isar_feature_aa32_aes(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0;
+}
+
+static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1;
+}
+
+static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0;
+}
+
+static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0;
+}
+
+static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0;
+}
+
+static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0;
+}
+
+static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0;
+}
+
+static inline bool isar_feature_aa32_dp(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0;
+}
+
+static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
+{
+    /*
+     * This is a placeholder for use by VCMA until the rest of
+     * the ARMv8.2-FP16 extension is implemented for aa32 mode.
+     * At which point we can properly set and check MVFR1.FPHP.
+     */
+    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
+}
+
+/*
+ * 64-bit feature tests via id registers.
+ */
+static inline bool isar_feature_aa64_aes(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0;
+}
+
+static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1;
+}
+
+static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0;
+}
+
+static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0;
+}
+
+static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1;
+}
+
+static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0;
+}
+
+static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0;
+}
+
+static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0;
+}
+
+static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0;
+}
+
+static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0;
+}
+
+static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0;
+}
+
+static inline bool isar_feature_aa64_dp(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0;
+}
+
+static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0;
+}
+
+static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
+{
+    /* We always set the AdvSIMD and FP fields identically wrt FP16.  */
+    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
+}
+
+static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;
+}
+
+/*
+ * Forward to the above feature tests given an ARMCPU pointer.
+ */
+#define cpu_isar_feature(name, cpu) \
+    ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); })
+
 #endif
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 800bff7..873f059 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -51,7 +51,7 @@
 }
 #endif
 
-static const ARMCPRegInfo cortex_a57_a53_cp_reginfo[] = {
+static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = {
 #ifndef CONFIG_USER_ONLY
     { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2,
@@ -109,11 +109,6 @@
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
-    set_feature(&cpu->env, ARM_FEATURE_V8_AES);
-    set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
-    set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
-    set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
-    set_feature(&cpu->env, ARM_FEATURE_CRC);
     set_feature(&cpu->env, ARM_FEATURE_EL2);
     set_feature(&cpu->env, ARM_FEATURE_EL3);
     set_feature(&cpu->env, ARM_FEATURE_PMU);
@@ -121,9 +116,9 @@
     cpu->midr = 0x411fd070;
     cpu->revidr = 0x00000000;
     cpu->reset_fpsid = 0x41034070;
-    cpu->mvfr0 = 0x10110222;
-    cpu->mvfr1 = 0x12111111;
-    cpu->mvfr2 = 0x00000043;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x12111111;
+    cpu->isar.mvfr2 = 0x00000043;
     cpu->ctr = 0x8444c004;
     cpu->reset_sctlr = 0x00c50838;
     cpu->id_pfr0 = 0x00000131;
@@ -134,18 +129,18 @@
     cpu->id_mmfr1 = 0x40000000;
     cpu->id_mmfr2 = 0x01260000;
     cpu->id_mmfr3 = 0x02102211;
-    cpu->id_isar0 = 0x02101110;
-    cpu->id_isar1 = 0x13112111;
-    cpu->id_isar2 = 0x21232042;
-    cpu->id_isar3 = 0x01112131;
-    cpu->id_isar4 = 0x00011142;
-    cpu->id_isar5 = 0x00011121;
-    cpu->id_isar6 = 0;
-    cpu->id_aa64pfr0 = 0x00002222;
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232042;
+    cpu->isar.id_isar3 = 0x01112131;
+    cpu->isar.id_isar4 = 0x00011142;
+    cpu->isar.id_isar5 = 0x00011121;
+    cpu->isar.id_isar6 = 0;
+    cpu->isar.id_aa64pfr0 = 0x00002222;
     cpu->id_aa64dfr0 = 0x10305106;
     cpu->pmceid0 = 0x00000000;
     cpu->pmceid1 = 0x00000000;
-    cpu->id_aa64isar0 = 0x00011120;
+    cpu->isar.id_aa64isar0 = 0x00011120;
     cpu->id_aa64mmfr0 = 0x00001124;
     cpu->dbgdidr = 0x3516d000;
     cpu->clidr = 0x0a200023;
@@ -156,7 +151,7 @@
     cpu->gic_num_lrs = 4;
     cpu->gic_vpribits = 5;
     cpu->gic_vprebits = 5;
-    define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo);
+    define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
 }
 
 static void aarch64_a53_initfn(Object *obj)
@@ -170,11 +165,6 @@
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
-    set_feature(&cpu->env, ARM_FEATURE_V8_AES);
-    set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
-    set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
-    set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
-    set_feature(&cpu->env, ARM_FEATURE_CRC);
     set_feature(&cpu->env, ARM_FEATURE_EL2);
     set_feature(&cpu->env, ARM_FEATURE_EL3);
     set_feature(&cpu->env, ARM_FEATURE_PMU);
@@ -182,9 +172,9 @@
     cpu->midr = 0x410fd034;
     cpu->revidr = 0x00000000;
     cpu->reset_fpsid = 0x41034070;
-    cpu->mvfr0 = 0x10110222;
-    cpu->mvfr1 = 0x12111111;
-    cpu->mvfr2 = 0x00000043;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x12111111;
+    cpu->isar.mvfr2 = 0x00000043;
     cpu->ctr = 0x84448004; /* L1Ip = VIPT */
     cpu->reset_sctlr = 0x00c50838;
     cpu->id_pfr0 = 0x00000131;
@@ -195,16 +185,16 @@
     cpu->id_mmfr1 = 0x40000000;
     cpu->id_mmfr2 = 0x01260000;
     cpu->id_mmfr3 = 0x02102211;
-    cpu->id_isar0 = 0x02101110;
-    cpu->id_isar1 = 0x13112111;
-    cpu->id_isar2 = 0x21232042;
-    cpu->id_isar3 = 0x01112131;
-    cpu->id_isar4 = 0x00011142;
-    cpu->id_isar5 = 0x00011121;
-    cpu->id_isar6 = 0;
-    cpu->id_aa64pfr0 = 0x00002222;
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232042;
+    cpu->isar.id_isar3 = 0x01112131;
+    cpu->isar.id_isar4 = 0x00011142;
+    cpu->isar.id_isar5 = 0x00011121;
+    cpu->isar.id_isar6 = 0;
+    cpu->isar.id_aa64pfr0 = 0x00002222;
     cpu->id_aa64dfr0 = 0x10305106;
-    cpu->id_aa64isar0 = 0x00011120;
+    cpu->isar.id_aa64isar0 = 0x00011120;
     cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */
     cpu->dbgdidr = 0x3516d000;
     cpu->clidr = 0x0a200023;
@@ -215,7 +205,61 @@
     cpu->gic_num_lrs = 4;
     cpu->gic_vpribits = 5;
     cpu->gic_vprebits = 5;
-    define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo);
+    define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
+}
+
+static void aarch64_a72_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a72";
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    set_feature(&cpu->env, ARM_FEATURE_VFP4);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    set_feature(&cpu->env, ARM_FEATURE_EL2);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
+    cpu->midr = 0x410fd083;
+    cpu->revidr = 0x00000000;
+    cpu->reset_fpsid = 0x41034080;
+    cpu->isar.mvfr0 = 0x10110222;
+    cpu->isar.mvfr1 = 0x12111111;
+    cpu->isar.mvfr2 = 0x00000043;
+    cpu->ctr = 0x8444c004;
+    cpu->reset_sctlr = 0x00c50838;
+    cpu->id_pfr0 = 0x00000131;
+    cpu->id_pfr1 = 0x00011011;
+    cpu->id_dfr0 = 0x03010066;
+    cpu->id_afr0 = 0x00000000;
+    cpu->id_mmfr0 = 0x10201105;
+    cpu->id_mmfr1 = 0x40000000;
+    cpu->id_mmfr2 = 0x01260000;
+    cpu->id_mmfr3 = 0x02102211;
+    cpu->isar.id_isar0 = 0x02101110;
+    cpu->isar.id_isar1 = 0x13112111;
+    cpu->isar.id_isar2 = 0x21232042;
+    cpu->isar.id_isar3 = 0x01112131;
+    cpu->isar.id_isar4 = 0x00011142;
+    cpu->isar.id_isar5 = 0x00011121;
+    cpu->isar.id_aa64pfr0 = 0x00002222;
+    cpu->id_aa64dfr0 = 0x10305106;
+    cpu->pmceid0 = 0x00000000;
+    cpu->pmceid1 = 0x00000000;
+    cpu->isar.id_aa64isar0 = 0x00011120;
+    cpu->id_aa64mmfr0 = 0x00001124;
+    cpu->dbgdidr = 0x3516d000;
+    cpu->clidr = 0x0a200023;
+    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
+    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
+    cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */
+    cpu->dcz_blocksize = 4; /* 64 bytes */
+    cpu->gic_num_lrs = 4;
+    cpu->gic_vpribits = 5;
+    cpu->gic_vprebits = 5;
+    define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
 }
 
 static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name,
@@ -253,24 +297,55 @@
     if (kvm_enabled()) {
         kvm_arm_set_cpu_features_from_host(cpu);
     } else {
+        uint64_t t;
+        uint32_t u;
         aarch64_a57_initfn(obj);
-#ifdef CONFIG_USER_ONLY
-        /* We don't set these in system emulation mode for the moment,
-         * since we don't correctly set the ID registers to advertise them,
-         * and in some cases they're only available in AArch64 and not AArch32,
-         * whereas the architecture requires them to be present in both if
-         * present in either.
+
+        t = cpu->isar.id_aa64isar0;
+        t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */
+        t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */
+        t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2);
+        t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1);
+        cpu->isar.id_aa64isar0 = t;
+
+        t = cpu->isar.id_aa64isar1;
+        t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
+        cpu->isar.id_aa64isar1 = t;
+
+        t = cpu->isar.id_aa64pfr0;
+        t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
+        t = FIELD_DP64(t, ID_AA64PFR0, FP, 1);
+        t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1);
+        cpu->isar.id_aa64pfr0 = t;
+
+        /* Replicate the same data to the 32-bit id registers.  */
+        u = cpu->isar.id_isar5;
+        u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */
+        u = FIELD_DP32(u, ID_ISAR5, SHA1, 1);
+        u = FIELD_DP32(u, ID_ISAR5, SHA2, 1);
+        u = FIELD_DP32(u, ID_ISAR5, CRC32, 1);
+        u = FIELD_DP32(u, ID_ISAR5, RDM, 1);
+        u = FIELD_DP32(u, ID_ISAR5, VCMA, 1);
+        cpu->isar.id_isar5 = u;
+
+        u = cpu->isar.id_isar6;
+        u = FIELD_DP32(u, ID_ISAR6, DP, 1);
+        cpu->isar.id_isar6 = u;
+
+        /*
+         * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet,
+         * so do not set MVFR1.FPHP.  Strictly speaking this is not legal,
+         * but it is also not legal to enable SVE without support for FP16,
+         * and enabling SVE in system mode is more useful in the short term.
          */
-        set_feature(&cpu->env, ARM_FEATURE_V8_SHA512);
-        set_feature(&cpu->env, ARM_FEATURE_V8_SHA3);
-        set_feature(&cpu->env, ARM_FEATURE_V8_SM3);
-        set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
-        set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
-        set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
-        set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
-        set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
-        set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
-        set_feature(&cpu->env, ARM_FEATURE_SVE);
+
+#ifdef CONFIG_USER_ONLY
         /* For usermode -cpu max we can use a larger and more efficient DCZ
          * blocksize since we don't have to follow what the hardware does.
          */
@@ -293,6 +368,7 @@
 static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
     { .name = "cortex-a53",         .initfn = aarch64_a53_initfn },
+    { .name = "cortex-a72",         .initfn = aarch64_a72_initfn },
     { .name = "max",                .initfn = aarch64_max_initfn },
     { .name = NULL }
 };
@@ -410,45 +486,3 @@
 }
 
 type_init(aarch64_cpu_register_types)
-
-/* The manual says that when SVE is enabled and VQ is widened the
- * implementation is allowed to zero the previously inaccessible
- * portion of the registers.  The corollary to that is that when
- * SVE is enabled and VQ is narrowed we are also allowed to zero
- * the now inaccessible portion of the registers.
- *
- * The intent of this is that no predicate bit beyond VQ is ever set.
- * Which means that some operations on predicate registers themselves
- * may operate on full uint64_t or even unrolled across the maximum
- * uint64_t[4].  Performing 4 bits of host arithmetic unconditionally
- * may well be cheaper than conditionals to restrict the operation
- * to the relevant portion of a uint16_t[16].
- *
- * TODO: Need to call this for changes to the real system registers
- * and EL state changes.
- */
-void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
-{
-    int i, j;
-    uint64_t pmask;
-
-    assert(vq >= 1 && vq <= ARM_MAX_VQ);
-    assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
-
-    /* Zap the high bits of the zregs.  */
-    for (i = 0; i < 32; i++) {
-        memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
-    }
-
-    /* Zap the high bits of the pregs and ffr.  */
-    pmask = 0;
-    if (vq & 3) {
-        pmask = ~(-1ULL << (16 * (vq & 3)));
-    }
-    for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
-        for (i = 0; i < 17; ++i) {
-            env->vfp.pregs[i].p[j] &= pmask;
-        }
-        pmask = 0;
-    }
-}
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 7f6ad30..61799d2 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -30,6 +30,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 #include "tcg.h"
 #include "fpu/softfloat.h"
 #include <zlib.h> /* For crc32 */
@@ -509,189 +510,187 @@
     return crc32c(acc, buf, bytes) ^ 0xffffffff;
 }
 
-/* Returns 0 on success; 1 otherwise.  */
-static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
-                                       uint64_t new_lo, uint64_t new_hi,
-                                       bool parallel, uintptr_t ra)
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
 {
-    Int128 oldv, cmpv, newv;
+    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 oldv;
+    uintptr_t ra = GETPC();
+    uint64_t o0, o1;
     bool success;
 
-    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-    newv = int128_make128(new_lo, new_hi);
-
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
-        success = int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t o0, o1;
-
 #ifdef CONFIG_USER_ONLY
-        /* ??? Enforce alignment.  */
-        uint64_t *haddr = g2h(addr);
+    /* ??? Enforce alignment.  */
+    uint64_t *haddr = g2h(addr);
 
-        helper_retaddr = ra;
-        o0 = ldq_le_p(haddr + 0);
-        o1 = ldq_le_p(haddr + 1);
-        oldv = int128_make128(o0, o1);
+    helper_retaddr = ra;
+    o0 = ldq_le_p(haddr + 0);
+    o1 = ldq_le_p(haddr + 1);
+    oldv = int128_make128(o0, o1);
 
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            stq_le_p(haddr + 0, int128_getlo(newv));
-            stq_le_p(haddr + 1, int128_gethi(newv));
-        }
-        helper_retaddr = 0;
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-        TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
-
-        o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
-        o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
-            helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
-        }
-#endif
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        stq_le_p(haddr + 0, int128_getlo(newv));
+        stq_le_p(haddr + 1, int128_gethi(newv));
     }
+    helper_retaddr = 0;
+#else
+    int mem_idx = cpu_mmu_index(env, false);
+    TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
+
+    o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
+    o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
+        helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
+    }
+#endif
 
     return !success;
 }
 
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
-                                              uint64_t new_lo, uint64_t new_hi)
-{
-    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
-}
-
 uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
                                               uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
-}
-
-static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
-                                       uint64_t new_lo, uint64_t new_hi,
-                                       bool parallel, uintptr_t ra)
-{
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
     bool success;
+    int mem_idx;
+    TCGMemOpIdx oi;
 
-    /* high and low need to be switched here because this is not actually a
-     * 128bit store but two doublewords stored consecutively
-     */
-    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
-    newv = int128_make128(new_hi, new_lo);
+    assert(HAVE_CMPXCHG128);
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-        success = int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t o0, o1;
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
-#ifdef CONFIG_USER_ONLY
-        /* ??? Enforce alignment.  */
-        uint64_t *haddr = g2h(addr);
+    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    newv = int128_make128(new_lo, new_hi);
+    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
-        helper_retaddr = ra;
-        o1 = ldq_be_p(haddr + 0);
-        o0 = ldq_be_p(haddr + 1);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            stq_be_p(haddr + 0, int128_gethi(newv));
-            stq_be_p(haddr + 1, int128_getlo(newv));
-        }
-        helper_retaddr = 0;
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-        TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
-
-        o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
-        o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
-        oldv = int128_make128(o0, o1);
-
-        success = int128_eq(oldv, cmpv);
-        if (success) {
-            helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
-            helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
-        }
-#endif
-    }
-
+    success = int128_eq(oldv, cmpv);
     return !success;
 }
 
 uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
                                      uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
+    /*
+     * High and low need to be switched here because this is not actually a
+     * 128bit store but two doublewords stored consecutively
+     */
+    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 oldv;
+    uintptr_t ra = GETPC();
+    uint64_t o0, o1;
+    bool success;
+
+#ifdef CONFIG_USER_ONLY
+    /* ??? Enforce alignment.  */
+    uint64_t *haddr = g2h(addr);
+
+    helper_retaddr = ra;
+    o1 = ldq_be_p(haddr + 0);
+    o0 = ldq_be_p(haddr + 1);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        stq_be_p(haddr + 0, int128_gethi(newv));
+        stq_be_p(haddr + 1, int128_getlo(newv));
+    }
+    helper_retaddr = 0;
+#else
+    int mem_idx = cpu_mmu_index(env, false);
+    TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+    TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
+
+    o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
+    o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
+    oldv = int128_make128(o0, o1);
+
+    success = int128_eq(oldv, cmpv);
+    if (success) {
+        helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
+        helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
+    }
+#endif
+
+    return !success;
 }
 
 uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
-                                     uint64_t new_lo, uint64_t new_hi)
+                                              uint64_t new_lo, uint64_t new_hi)
 {
-    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
+    Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    bool success;
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+
+    /*
+     * High and low need to be switched here because this is not actually a
+     * 128bit store but two doublewords stored consecutively
+     */
+    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
+    newv = int128_make128(new_hi, new_lo);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+
+    success = int128_eq(oldv, cmpv);
+    return !success;
 }
 
 /* Writes back the old data into Rs.  */
 void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
                               uint64_t new_lo, uint64_t new_hi)
 {
-    uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
-    cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
     cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
     newv = int128_make128(new_lo, new_hi);
-
-    int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
     oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
     env->xregs[rs] = int128_getlo(oldv);
     env->xregs[rs + 1] = int128_gethi(oldv);
-#endif
 }
 
 void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
                               uint64_t new_hi, uint64_t new_lo)
 {
-    uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
-    cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
     Int128 oldv, cmpv, newv;
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 
     cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
     newv = int128_make128(new_lo, new_hi);
-
-    int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
     oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 
     env->xregs[rs + 1] = int128_getlo(oldv);
     env->xregs[rs] = int128_gethi(oldv);
-#endif
 }
 
 /*
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 023952a..9e79182 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1128,20 +1128,35 @@
 DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ld1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ld1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ld1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1150,13 +1165,21 @@
 DEF_HELPER_FLAGS_4(sve_ld1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ld1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ld1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ld1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1166,17 +1189,28 @@
 DEF_HELPER_FLAGS_4(sve_ldff1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldff1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ldff1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ldff1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldff1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ldff1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1186,218 +1220,357 @@
 DEF_HELPER_FLAGS_4(sve_ldnf1bss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ldnf1bds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ldnf1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1hds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ldnf1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1sdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_ldnf1sds_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_ldnf1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st4bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_st1hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st2hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st3hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st4hh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_st1ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st2ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st3ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st4ss_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_st1dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st2dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st3dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st4dd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_st1hs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
-DEF_HELPER_FLAGS_4(sve_st1hd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
-DEF_HELPER_FLAGS_4(sve_st1sd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhsu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldssu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhss_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhsu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldssu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhss_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldddu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldddu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldddu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldhds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldsds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhsu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffssu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldffbss_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhss_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldffbsu_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhsu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffssu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldffbss_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhss_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsdu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffddu_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldffbds_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsds_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldffbdu_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsdu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffddu_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldffbds_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsds_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_ldffbdu_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsdu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffddu_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_ldffbds_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffhds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_ldffsds_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sths_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stss_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sths_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stss_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sthd_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stsd_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stdd_zsu, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sthd_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stsd_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stdd_zss, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
 DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_sthd_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stsd_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
-DEF_HELPER_FLAGS_6(sve_stdd_zd, TCG_CALL_NO_WG,
+DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
+                   void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 64b1564..0ea95b0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -56,6 +56,8 @@
                                 V8M_SAttributes *sattrs);
 #endif
 
+static void switch_mode(CPUARMState *env, int mode);
+
 static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
 {
     int nregs;
@@ -552,42 +554,6 @@
     raw_write(env, ri, value);
 }
 
-static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
-{
-    /* Invalidate all (TLBIALL) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
-
-    tlb_flush(CPU(cpu));
-}
-
-static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
-{
-    /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
-
-    tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
-}
-
-static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                           uint64_t value)
-{
-    /* Invalidate by ASID (TLBIASID) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
-
-    tlb_flush(CPU(cpu));
-}
-
-static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                           uint64_t value)
-{
-    /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
-
-    tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
-}
-
 /* IS variants of TLB operations must affect all cores */
 static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                              uint64_t value)
@@ -621,6 +587,73 @@
     tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
 }
 
+/*
+ * Non-IS variants of TLB operations are upgraded to
+ * IS versions if we are at NS EL1 and HCR_EL2.FB is set to
+ * force broadcast of these operations.
+ */
+static bool tlb_force_broadcast(CPUARMState *env)
+{
+    return (env->cp15.hcr_el2 & HCR_FB) &&
+        arm_current_el(env) == 1 && arm_is_secure_below_el3(env);
+}
+
+static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
+{
+    /* Invalidate all (TLBIALL) */
+    ARMCPU *cpu = arm_env_get_cpu(env);
+
+    if (tlb_force_broadcast(env)) {
+        tlbiall_is_write(env, NULL, value);
+        return;
+    }
+
+    tlb_flush(CPU(cpu));
+}
+
+static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
+{
+    /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
+    ARMCPU *cpu = arm_env_get_cpu(env);
+
+    if (tlb_force_broadcast(env)) {
+        tlbimva_is_write(env, NULL, value);
+        return;
+    }
+
+    tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
+}
+
+static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
+{
+    /* Invalidate by ASID (TLBIASID) */
+    ARMCPU *cpu = arm_env_get_cpu(env);
+
+    if (tlb_force_broadcast(env)) {
+        tlbiasid_is_write(env, NULL, value);
+        return;
+    }
+
+    tlb_flush(CPU(cpu));
+}
+
+static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
+{
+    /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
+    ARMCPU *cpu = arm_env_get_cpu(env);
+
+    if (tlb_force_broadcast(env)) {
+        tlbimvaa_is_write(env, NULL, value);
+        return;
+    }
+
+    tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
+}
+
 static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
 {
@@ -1179,6 +1212,7 @@
 static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                          uint64_t value)
 {
+    value &= pmu_counter_mask(env);
     env->cp15.c9_pmovsr &= ~value;
 }
 
@@ -1295,12 +1329,26 @@
     CPUState *cs = ENV_GET_CPU(env);
     uint64_t ret = 0;
 
-    if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
-        ret |= CPSR_I;
+    if (arm_hcr_el2_imo(env)) {
+        if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) {
+            ret |= CPSR_I;
+        }
+    } else {
+        if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+            ret |= CPSR_I;
+        }
     }
-    if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
-        ret |= CPSR_F;
+
+    if (arm_hcr_el2_fmo(env)) {
+        if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) {
+            ret |= CPSR_F;
+        }
+    } else {
+        if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
+            ret |= CPSR_F;
+        }
     }
+
     /* External aborts are not possible in QEMU so A bit is always clear */
     return ret;
 }
@@ -1423,12 +1471,14 @@
       .writefn = pmintenset_write, .raw_writefn = raw_write,
       .resetvalue = 0x0 },
     { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS,
+      .access = PL1_RW, .accessfn = access_tpm,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .writefn = pmintenclr_write, },
     { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2,
-      .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS,
+      .access = PL1_RW, .accessfn = access_tpm,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .writefn = pmintenclr_write },
     { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
@@ -2267,13 +2317,15 @@
          * * The Non-secure TTBCR.EAE bit is set to 1
          * * The implementation includes EL2, and the value of HCR.VM is 1
          *
+         * (Note that HCR.DC makes HCR.VM behave as if it is 1.)
+         *
          * ATS1Hx always uses the 64bit format (not supported yet).
          */
         format64 = arm_s1_regime_using_lpae_format(env, mmu_idx);
 
         if (arm_feature(env, ARM_FEATURE_EL2)) {
             if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
-                format64 |= env->cp15.hcr_el2 & HCR_VM;
+                format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC);
             } else {
                 format64 |= arm_current_el(env) == 2;
             }
@@ -2706,12 +2758,10 @@
 static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    /* 64 bit accesses to the TTBRs can change the ASID and so we
-     * must flush the TLB.
-     */
-    if (cpreg_field_is_64bit(ri)) {
+    /* If the ASID changes (with a 64-bit write), we must flush the TLB.  */
+    if (cpreg_field_is_64bit(ri) &&
+        extract64(raw_read(env, ri) ^ value, 48, 16) != 0) {
         ARMCPU *cpu = arm_env_get_cpu(env);
-
         tlb_flush(CPU(cpu));
     }
     raw_write(env, ri, value);
@@ -3080,22 +3130,6 @@
  * Page D4-1736 (DDI0487A.b)
  */
 
-static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                    uint64_t value)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-
-    if (arm_is_secure_below_el3(env)) {
-        tlb_flush_by_mmuidx(cs,
-                            ARMMMUIdxBit_S1SE1 |
-                            ARMMMUIdxBit_S1SE0);
-    } else {
-        tlb_flush_by_mmuidx(cs,
-                            ARMMMUIdxBit_S12NSE1 |
-                            ARMMMUIdxBit_S12NSE0);
-    }
-}
-
 static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                       uint64_t value)
 {
@@ -3113,6 +3147,27 @@
     }
 }
 
+static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                    uint64_t value)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    if (tlb_force_broadcast(env)) {
+        tlbi_aa64_vmalle1_write(env, NULL, value);
+        return;
+    }
+
+    if (arm_is_secure_below_el3(env)) {
+        tlb_flush_by_mmuidx(cs,
+                            ARMMMUIdxBit_S1SE1 |
+                            ARMMMUIdxBit_S1SE0);
+    } else {
+        tlb_flush_by_mmuidx(cs,
+                            ARMMMUIdxBit_S12NSE1 |
+                            ARMMMUIdxBit_S12NSE0);
+    }
+}
+
 static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                   uint64_t value)
 {
@@ -3202,29 +3257,6 @@
     tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3);
 }
 
-static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                 uint64_t value)
-{
-    /* Invalidate by VA, EL1&0 (AArch64 version).
-     * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
-     * since we don't support flush-for-specific-ASID-only or
-     * flush-last-level-only.
-     */
-    ARMCPU *cpu = arm_env_get_cpu(env);
-    CPUState *cs = CPU(cpu);
-    uint64_t pageaddr = sextract64(value << 12, 0, 56);
-
-    if (arm_is_secure_below_el3(env)) {
-        tlb_flush_page_by_mmuidx(cs, pageaddr,
-                                 ARMMMUIdxBit_S1SE1 |
-                                 ARMMMUIdxBit_S1SE0);
-    } else {
-        tlb_flush_page_by_mmuidx(cs, pageaddr,
-                                 ARMMMUIdxBit_S12NSE1 |
-                                 ARMMMUIdxBit_S12NSE0);
-    }
-}
-
 static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
@@ -3272,6 +3304,34 @@
     }
 }
 
+static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
+{
+    /* Invalidate by VA, EL1&0 (AArch64 version).
+     * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
+     * since we don't support flush-for-specific-ASID-only or
+     * flush-last-level-only.
+     */
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+    uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+    if (tlb_force_broadcast(env)) {
+        tlbi_aa64_vae1is_write(env, NULL, value);
+        return;
+    }
+
+    if (arm_is_secure_below_el3(env)) {
+        tlb_flush_page_by_mmuidx(cs, pageaddr,
+                                 ARMMMUIdxBit_S1SE1 |
+                                 ARMMMUIdxBit_S1SE0);
+    } else {
+        tlb_flush_page_by_mmuidx(cs, pageaddr,
+                                 ARMMMUIdxBit_S12NSE1 |
+                                 ARMMMUIdxBit_S12NSE0);
+    }
+}
+
 static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
 {
@@ -3869,6 +3929,7 @@
 static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = ENV_GET_CPU(env);
     uint64_t valid_mask = HCR_MASK;
 
     if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -3887,6 +3948,28 @@
     /* Clear RES0 bits.  */
     value &= valid_mask;
 
+    /*
+     * VI and VF are kept in cs->interrupt_request. Modifying that
+     * requires that we have the iothread lock, which is done by
+     * marking the reginfo structs as ARM_CP_IO.
+     * Note that if a write to HCR pends a VIRQ or VFIQ it is never
+     * possible for it to be taken immediately, because VIRQ and
+     * VFIQ are masked unless running at EL0 or EL1, and HCR
+     * can only be written at EL2.
+     */
+    g_assert(qemu_mutex_iothread_locked());
+    if (value & HCR_VI) {
+        cs->interrupt_request |= CPU_INTERRUPT_VIRQ;
+    } else {
+        cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+    }
+    if (value & HCR_VF) {
+        cs->interrupt_request |= CPU_INTERRUPT_VFIQ;
+    } else {
+        cs->interrupt_request &= ~CPU_INTERRUPT_VFIQ;
+    }
+    value &= ~(HCR_VI | HCR_VF);
+
     /* These bits change the MMU setup:
      * HCR_VM enables stage 2 translation
      * HCR_PTW forbids certain page-table setups
@@ -3914,16 +3997,32 @@
     hcr_write(env, NULL, value);
 }
 
+static uint64_t hcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* The VI and VF bits live in cs->interrupt_request */
+    uint64_t ret = env->cp15.hcr_el2 & ~(HCR_VI | HCR_VF);
+    CPUState *cs = ENV_GET_CPU(env);
+
+    if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) {
+        ret |= HCR_VI;
+    }
+    if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) {
+        ret |= HCR_VF;
+    }
+    return ret;
+}
+
 static const ARMCPRegInfo el2_cp_reginfo[] = {
     { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_IO,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
-      .writefn = hcr_write },
+      .writefn = hcr_write, .readfn = hcr_read },
     { .name = "HCR", .state = ARM_CP_STATE_AA32,
-      .type = ARM_CP_ALIAS,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
       .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
-      .writefn = hcr_writelow },
+      .writefn = hcr_writelow, .readfn = hcr_read },
     { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_ALIAS,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
@@ -4160,7 +4259,7 @@
 
 static const ARMCPRegInfo el2_v8_cp_reginfo[] = {
     { .name = "HCR2", .state = ARM_CP_STATE_AA32,
-      .type = ARM_CP_ALIAS,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
       .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
       .access = PL2_RW,
       .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2),
@@ -4211,7 +4310,7 @@
       .fieldoffset = offsetof(CPUARMState, cp15.mvbar) },
     { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0,
-      .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+      .access = PL3_RW, .resetvalue = 0,
       .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) },
     { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2,
@@ -4400,78 +4499,105 @@
     REGINFO_SENTINEL
 };
 
-/* Return the exception level to which SVE-disabled exceptions should
- * be taken, or 0 if SVE is enabled.
+/* Return the exception level to which exceptions should be taken
+ * via SVEAccessTrap.  If an exception should be routed through
+ * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
+ * take care of raising that exception.
+ * C.f. the ARM pseudocode function CheckSVEEnabled.
  */
-static int sve_exception_el(CPUARMState *env)
+int sve_exception_el(CPUARMState *env, int el)
 {
 #ifndef CONFIG_USER_ONLY
-    unsigned current_el = arm_current_el(env);
+    if (el <= 1) {
+        bool disabled = false;
 
-    /* The CPACR.ZEN controls traps to EL1:
-     * 0, 2 : trap EL0 and EL1 accesses
-     * 1    : trap only EL0 accesses
-     * 3    : trap no accesses
+        /* The CPACR.ZEN controls traps to EL1:
+         * 0, 2 : trap EL0 and EL1 accesses
+         * 1    : trap only EL0 accesses
+         * 3    : trap no accesses
+         */
+        if (!extract32(env->cp15.cpacr_el1, 16, 1)) {
+            disabled = true;
+        } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) {
+            disabled = el == 0;
+        }
+        if (disabled) {
+            /* route_to_el2 */
+            return (arm_feature(env, ARM_FEATURE_EL2)
+                    && !arm_is_secure(env)
+                    && (env->cp15.hcr_el2 & HCR_TGE) ? 2 : 1);
+        }
+
+        /* Check CPACR.FPEN.  */
+        if (!extract32(env->cp15.cpacr_el1, 20, 1)) {
+            disabled = true;
+        } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) {
+            disabled = el == 0;
+        }
+        if (disabled) {
+            return 0;
+        }
+    }
+
+    /* CPTR_EL2.  Since TZ and TFP are positive,
+     * they will be zero when EL2 is not present.
      */
-    switch (extract32(env->cp15.cpacr_el1, 16, 2)) {
-    default:
-        if (current_el <= 1) {
-            /* Trap to PL1, which might be EL1 or EL3 */
-            if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
-                return 3;
-            }
-            return 1;
+    if (el <= 2 && !arm_is_secure_below_el3(env)) {
+        if (env->cp15.cptr_el[2] & CPTR_TZ) {
+            return 2;
         }
-        break;
-    case 1:
-        if (current_el == 0) {
-            return 1;
+        if (env->cp15.cptr_el[2] & CPTR_TFP) {
+            return 0;
         }
-        break;
-    case 3:
-        break;
     }
 
-    /* Similarly for CPACR.FPEN, after having checked ZEN.  */
-    switch (extract32(env->cp15.cpacr_el1, 20, 2)) {
-    default:
-        if (current_el <= 1) {
-            if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
-                return 3;
-            }
-            return 1;
-        }
-        break;
-    case 1:
-        if (current_el == 0) {
-            return 1;
-        }
-        break;
-    case 3:
-        break;
-    }
-
-    /* CPTR_EL2.  Check both TZ and TFP.  */
-    if (current_el <= 2
-        && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ))
-        && !arm_is_secure_below_el3(env)) {
-        return 2;
-    }
-
-    /* CPTR_EL3.  Check both EZ and TFP.  */
-    if (!(env->cp15.cptr_el[3] & CPTR_EZ)
-        || (env->cp15.cptr_el[3] & CPTR_TFP)) {
+    /* CPTR_EL3.  Since EZ is negative we must check for EL3.  */
+    if (arm_feature(env, ARM_FEATURE_EL3)
+        && !(env->cp15.cptr_el[3] & CPTR_EZ)) {
         return 3;
     }
 #endif
     return 0;
 }
 
+/*
+ * Given that SVE is enabled, return the vector length for EL.
+ */
+uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    uint32_t zcr_len = cpu->sve_max_vq - 1;
+
+    if (el <= 1) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
+    }
+    if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+    }
+    if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+    }
+    return zcr_len;
+}
+
 static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                       uint64_t value)
 {
+    int cur_el = arm_current_el(env);
+    int old_len = sve_zcr_len_for_el(env, cur_el);
+    int new_len;
+
     /* Bits other than [3:0] are RAZ/WI.  */
     raw_write(env, ri, value & 0xf);
+
+    /*
+     * Because we arrived here, we know both FP and SVE are enabled;
+     * otherwise we would have trapped access to the ZCR_ELn register.
+     */
+    new_len = sve_zcr_len_for_el(env, cur_el);
+    if (new_len < old_len) {
+        aarch64_sve_narrow_vq(env, new_len + 1);
+    }
 }
 
 static const ARMCPRegInfo zcr_el1_reginfo = {
@@ -4843,7 +4969,7 @@
 static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
-    uint64_t pfr0 = cpu->id_aa64pfr0;
+    uint64_t pfr0 = cpu->isar.id_aa64pfr0;
 
     if (env->gicv3state) {
         pfr0 |= 1 << 24;
@@ -4910,27 +5036,27 @@
             { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar0 },
+              .resetvalue = cpu->isar.id_isar0 },
             { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar1 },
+              .resetvalue = cpu->isar.id_isar1 },
             { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar2 },
+              .resetvalue = cpu->isar.id_isar2 },
             { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar3 },
+              .resetvalue = cpu->isar.id_isar3 },
             { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar4 },
+              .resetvalue = cpu->isar.id_isar4 },
             { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar5 },
+              .resetvalue = cpu->isar.id_isar5 },
             { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -4938,7 +5064,7 @@
             { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar6 },
+              .resetvalue = cpu->isar.id_isar6 },
             REGINFO_SENTINEL
         };
         define_arm_cp_regs(cpu, v6_idregs);
@@ -5009,7 +5135,7 @@
             { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64pfr1},
+              .resetvalue = cpu->isar.id_aa64pfr1},
             { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -5018,9 +5144,10 @@
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3,
               .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = 0 },
-            { .name = "ID_AA64PFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+            { .name = "ID_AA64ZFR0_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4,
               .access = PL1_R, .type = ARM_CP_CONST,
+              /* At present, only SVEver == 0 is defined anyway.  */
               .resetvalue = 0 },
             { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
@@ -5069,11 +5196,11 @@
             { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64isar0 },
+              .resetvalue = cpu->isar.id_aa64isar0 },
             { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64isar1 },
+              .resetvalue = cpu->isar.id_aa64isar1 },
             { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -5133,15 +5260,15 @@
             { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->mvfr0 },
+              .resetvalue = cpu->isar.mvfr0 },
             { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->mvfr1 },
+              .resetvalue = cpu->isar.mvfr1 },
             { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->mvfr2 },
+              .resetvalue = cpu->isar.mvfr2 },
             { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -5587,7 +5714,7 @@
         define_one_arm_cp_reg(cpu, &sctlr);
     }
 
-    if (arm_feature(env, ARM_FEATURE_SVE)) {
+    if (cpu_isar_feature(aa64_sve, cpu)) {
         define_one_arm_cp_reg(cpu, &zcr_el1_reginfo);
         if (arm_feature(env, ARM_FEATURE_EL2)) {
             define_one_arm_cp_reg(cpu, &zcr_el2_reginfo);
@@ -6177,7 +6304,17 @@
                 mask |= CPSR_IL;
                 val |= CPSR_IL;
             }
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Illegal AArch32 mode switch attempt from %s to %s\n",
+                          aarch32_mode_name(env->uncached_cpsr),
+                          aarch32_mode_name(val));
         } else {
+            qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n",
+                          write_type == CPSRWriteExceptionReturn ?
+                          "Exception return from AArch32" :
+                          "AArch32 mode switch from",
+                          aarch32_mode_name(env->uncached_cpsr),
+                          aarch32_mode_name(val), env->regs[15]);
             switch_mode(env, val & CPSR_M);
         }
     }
@@ -6275,7 +6412,7 @@
     return 0;
 }
 
-void switch_mode(CPUARMState *env, int mode)
+static void switch_mode(CPUARMState *env, int mode)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
@@ -6297,7 +6434,7 @@
 
 #else
 
-void switch_mode(CPUARMState *env, int mode)
+static void switch_mode(CPUARMState *env, int mode)
 {
     int old_mode;
     int i;
@@ -6441,7 +6578,7 @@
     target_ulong page_size;
     hwaddr physaddr;
     int prot;
-    ARMMMUFaultInfo fi;
+    ARMMMUFaultInfo fi = {};
     bool secure = mmu_idx & ARM_MMU_IDX_M_S;
     int exc;
     bool exc_secure;
@@ -6503,7 +6640,7 @@
     target_ulong page_size;
     hwaddr physaddr;
     int prot;
-    ARMMMUFaultInfo fi;
+    ARMMMUFaultInfo fi = {};
     bool secure = mmu_idx & ARM_MMU_IDX_M_S;
     int exc;
     bool exc_secure;
@@ -6554,18 +6691,6 @@
     return false;
 }
 
-/* Return true if we're using the process stack pointer (not the MSP) */
-static bool v7m_using_psp(CPUARMState *env)
-{
-    /* Handler mode always uses the main stack; for thread mode
-     * the CONTROL.SPSEL bit determines the answer.
-     * Note that in v7M it is not possible to be in Handler mode with
-     * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both.
-     */
-    return !arm_v7m_is_handler_mode(env) &&
-        env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK;
-}
-
 /* Write to v7M CONTROL.SPSEL bit for the specified security bank.
  * This may change the current stack pointer between Main and Process
  * stack pointers if it is done for the CONTROL register for the current
@@ -6722,6 +6847,10 @@
                       "BLXNS with misaligned SP is UNPREDICTABLE\n");
     }
 
+    if (sp < v7m_sp_limit(env)) {
+        raise_exception(env, EXCP_STKOF, 0, 1);
+    }
+
     saved_psr = env->v7m.exception;
     if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) {
         saved_psr |= XPSR_SFPA;
@@ -6851,6 +6980,8 @@
     uint32_t frameptr;
     ARMMMUIdx mmu_idx;
     bool stacked_ok;
+    uint32_t limit;
+    bool want_psp;
 
     if (dotailchain) {
         bool mode = lr & R_V7M_EXCRET_MODE_MASK;
@@ -6860,12 +6991,34 @@
         mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv);
         frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode,
                                     lr & R_V7M_EXCRET_SPSEL_MASK);
+        want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK);
+        if (want_psp) {
+            limit = env->v7m.psplim[M_REG_S];
+        } else {
+            limit = env->v7m.msplim[M_REG_S];
+        }
     } else {
         mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
         frame_sp_p = &env->regs[13];
+        limit = v7m_sp_limit(env);
     }
 
     frameptr = *frame_sp_p - 0x28;
+    if (frameptr < limit) {
+        /*
+         * Stack limit failure: set SP to the limit value, and generate
+         * STKOF UsageFault. Stack pushes below the limit must not be
+         * performed. It is IMPDEF whether pushes above the limit are
+         * performed; we choose not to.
+         */
+        qemu_log_mask(CPU_LOG_INT,
+                      "...STKOF during callee-saves register stacking\n");
+        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                env->v7m.secure);
+        *frame_sp_p = limit;
+        return true;
+    }
 
     /* Write as much of the stack frame as we can. A write failure may
      * cause us to pend a derived exception.
@@ -6889,10 +7042,7 @@
         v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx,
                         ignore_faults);
 
-    /* Update SP regardless of whether any of the stack accesses failed.
-     * When we implement v8M stack limit checking then this attempt to
-     * update SP might also fail and result in a derived exception.
-     */
+    /* Update SP regardless of whether any of the stack accesses failed. */
     *frame_sp_p = frameptr;
 
     return !stacked_ok;
@@ -6938,7 +7088,7 @@
                  * not already saved.
                  */
                 if (lr & R_V7M_EXCRET_DCRS_MASK &&
-                    !(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) {
+                    !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) {
                     push_failed = v7m_push_callee_stack(cpu, lr, dotailchain,
                                                         ignore_stackfaults);
                 }
@@ -7040,6 +7190,26 @@
 
     frameptr -= 0x20;
 
+    if (arm_feature(env, ARM_FEATURE_V8)) {
+        uint32_t limit = v7m_sp_limit(env);
+
+        if (frameptr < limit) {
+            /*
+             * Stack limit failure: set SP to the limit value, and generate
+             * STKOF UsageFault. Stack pushes below the limit must not be
+             * performed. It is IMPDEF whether pushes above the limit are
+             * performed; we choose not to.
+             */
+            qemu_log_mask(CPU_LOG_INT,
+                          "...STKOF during stacking\n");
+            env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+            armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                    env->v7m.secure);
+            env->regs[13] = limit;
+            return true;
+        }
+    }
+
     /* Write as much of the stack frame as we can. If we fail a stack
      * write this will result in a derived exception being pended
      * (which may be taken in preference to the one we started with
@@ -7055,10 +7225,7 @@
         v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) &&
         v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false);
 
-    /* Update SP regardless of whether any of the stack accesses failed.
-     * When we implement v8M stack limit checking then this attempt to
-     * update SP might also fail and result in a derived exception.
-     */
+    /* Update SP regardless of whether any of the stack accesses failed. */
     env->regs[13] = frameptr;
 
     return !stacked_ok;
@@ -7304,7 +7471,6 @@
 
             pop_ok = pop_ok &&
                 v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
-                v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
                 v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) &&
                 v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) &&
                 v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) &&
@@ -7512,6 +7678,7 @@
             [EXCP_SEMIHOST] = "Semihosting call",
             [EXCP_NOCP] = "v7M NOCP UsageFault",
             [EXCP_INVSTATE] = "v7M INVSTATE UsageFault",
+            [EXCP_STKOF] = "v8M STKOF UsageFault",
         };
 
         if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -7667,6 +7834,10 @@
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
         env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK;
         break;
+    case EXCP_STKOF:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+        break;
     case EXCP_SWI:
         /* The PC already points to the next instruction.  */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
@@ -8129,6 +8300,19 @@
     }
 
     if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) {
+        if (!arm_feature(env, ARM_FEATURE_V8)) {
+            /*
+             * QEMU syndrome values are v8-style. v7 has the IL bit
+             * UNK/SBZP for "field not valid" cases, where v8 uses RES1.
+             * If this is a v7 CPU, squash the IL bit in those cases.
+             */
+            if (cs->exception_index == EXCP_PREFETCH_ABORT ||
+                (cs->exception_index == EXCP_DATA_ABORT &&
+                 !(env->exception.syndrome & ARM_EL_ISV)) ||
+                syn_get_ec(env->exception.syndrome) == EC_UNCATEGORIZED) {
+                env->exception.syndrome &= ~ARM_EL_IL;
+            }
+        }
         env->cp15.esr_el[2] = env->exception.syndrome;
     }
 
@@ -8163,7 +8347,7 @@
     uint32_t moe;
 
     /* If this is a debug exception we must update the DBGDSCR.MOE bits */
-    switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) {
+    switch (syn_get_ec(env->exception.syndrome)) {
     case EC_BREAKPOINT:
     case EC_BREAKPOINT_SAME_EL:
         moe = 1;
@@ -8310,8 +8494,15 @@
     unsigned int new_el = env->exception.target_el;
     target_ulong addr = env->cp15.vbar_el[new_el];
     unsigned int new_mode = aarch64_pstate_mode(new_el, true);
+    unsigned int cur_el = arm_current_el(env);
 
-    if (arm_current_el(env) < new_el) {
+    /*
+     * Note that new_el can never be 0.  If cur_el is 0, then
+     * el0_a64 is is_a64(), else el0_a64 is ignored.
+     */
+    aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+
+    if (cur_el < new_el) {
         /* Entry vector offset depends on whether the implemented EL
          * immediately lower than the target level is using AArch32 or AArch64
          */
@@ -8353,6 +8544,15 @@
     case EXCP_HVC:
     case EXCP_HYP_TRAP:
     case EXCP_SMC:
+        if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) {
+            /*
+             * QEMU internal FP/SIMD syndromes from AArch32 include the
+             * TA and coproc fields which are only exposed if the exception
+             * is taken to AArch32 Hyp mode. Mask them out to get a valid
+             * AArch64 format syndrome.
+             */
+            env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20);
+        }
         env->cp15.esr_el[new_el] = env->exception.syndrome;
         break;
     case EXCP_IRQ:
@@ -8496,7 +8696,7 @@
     if (qemu_loglevel_mask(CPU_LOG_INT)
         && !excp_is_internal(cs->exception_index)) {
         qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n",
-                      env->exception.syndrome >> ARM_EL_EC_SHIFT,
+                      syn_get_ec(env->exception.syndrome),
                       env->exception.syndrome);
     }
 
@@ -8593,7 +8793,8 @@
     }
 
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        return (env->cp15.hcr_el2 & HCR_VM) == 0;
+        /* HCR.DC means HCR.VM behaves as 1 */
+        return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0;
     }
 
     if (env->cp15.hcr_el2 & HCR_TGE) {
@@ -8603,6 +8804,12 @@
         }
     }
 
+    if ((env->cp15.hcr_el2 & HCR_DC) &&
+        (mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1)) {
+        /* HCR.DC means SCTLR_EL1.M behaves as 0 */
+        return true;
+    }
+
     return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
 }
 
@@ -8954,9 +9161,20 @@
         hwaddr s2pa;
         int s2prot;
         int ret;
+        ARMCacheAttrs cacheattrs = {};
+        ARMCacheAttrs *pcacheattrs = NULL;
+
+        if (env->cp15.hcr_el2 & HCR_PTW) {
+            /*
+             * PTW means we must fault if this S1 walk touches S2 Device
+             * memory; otherwise we don't care about the attributes and can
+             * save the S2 translation the effort of computing them.
+             */
+            pcacheattrs = &cacheattrs;
+        }
 
         ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa,
-                                 &txattrs, &s2prot, &s2size, fi, NULL);
+                                 &txattrs, &s2prot, &s2size, fi, pcacheattrs);
         if (ret) {
             assert(fi->type != ARMFault_None);
             fi->s2addr = addr;
@@ -8964,6 +9182,14 @@
             fi->s1ptw = true;
             return ~0;
         }
+        if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) {
+            /* Access was to Device memory: generate Permission fault */
+            fi->type = ARMFault_Permission;
+            fi->s2addr = addr;
+            fi->stage2 = true;
+            fi->s1ptw = true;
+            return ~0;
+        }
         addr = s2pa;
     }
     return addr;
@@ -10583,6 +10809,16 @@
 
             /* Combine the S1 and S2 cache attributes, if needed */
             if (!ret && cacheattrs != NULL) {
+                if (env->cp15.hcr_el2 & HCR_DC) {
+                    /*
+                     * HCR.DC forces the first stage attributes to
+                     *  Normal Non-Shareable,
+                     *  Inner Write-Back Read-Allocate Write-Allocate,
+                     *  Outer Write-Back Read-Allocate Write-Allocate.
+                     */
+                    cacheattrs->attrs = 0xff;
+                    cacheattrs->shareability = 0;
+                }
                 *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
             }
 
@@ -10929,11 +11165,23 @@
              * currently in handler mode or not, using the NS CONTROL.SPSEL.
              */
             bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
+            bool is_psp = !arm_v7m_is_handler_mode(env) && spsel;
+            uint32_t limit;
 
             if (!env->v7m.secure) {
                 return;
             }
-            if (!arm_v7m_is_handler_mode(env) && spsel) {
+
+            limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
+
+            if (val < limit) {
+                CPUState *cs = CPU(arm_env_get_cpu(env));
+
+                cpu_restore_state(cs, GETPC(), true);
+                raise_exception(env, EXCP_STKOF, 0, 1);
+            }
+
+            if (is_psp) {
                 env->v7m.other_ss_psp = val;
             } else {
                 env->v7m.other_ss_msp = val;
@@ -11528,7 +11776,7 @@
     uint32_t changed;
 
     /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
-    if (!arm_feature(env, ARM_FEATURE_V8_FP16)) {
+    if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
         val &= ~FPCR_FZ16;
     }
 
@@ -12516,11 +12764,10 @@
 /* Return the exception level to which FP-disabled exceptions should
  * be taken, or 0 if FP is enabled.
  */
-static inline int fp_exception_el(CPUARMState *env)
+int fp_exception_el(CPUARMState *env, int cur_el)
 {
 #ifndef CONFIG_USER_ONLY
     int fpen;
-    int cur_el = arm_current_el(env);
 
     /* CPACR and the CPTR registers don't exist before v6, so FP is
      * always accessible
@@ -12583,18 +12830,21 @@
                           target_ulong *cs_base, uint32_t *pflags)
 {
     ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
-    int fp_el = fp_exception_el(env);
+    int current_el = arm_current_el(env);
+    int fp_el = fp_exception_el(env, current_el);
     uint32_t flags;
 
     if (is_a64(env)) {
+        ARMCPU *cpu = arm_env_get_cpu(env);
+
         *pc = env->pc;
         flags = ARM_TBFLAG_AARCH64_STATE_MASK;
         /* Get control bits for tagged addresses */
         flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT);
         flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT);
 
-        if (arm_feature(env, ARM_FEATURE_SVE)) {
-            int sve_el = sve_exception_el(env);
+        if (cpu_isar_feature(aa64_sve, cpu)) {
+            int sve_el = sve_exception_el(env, current_el);
             uint32_t zcr_len;
 
             /* If SVE is disabled, but FP is enabled,
@@ -12603,19 +12853,7 @@
             if (sve_el != 0 && fp_el == 0) {
                 zcr_len = 0;
             } else {
-                int current_el = arm_current_el(env);
-                ARMCPU *cpu = arm_env_get_cpu(env);
-
-                zcr_len = cpu->sve_max_vq - 1;
-                if (current_el <= 1) {
-                    zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
-                }
-                if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
-                    zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
-                }
-                if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
-                    zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
-                }
+                zcr_len = sve_zcr_len_for_el(env, current_el);
             }
             flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
             flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
@@ -12668,6 +12906,103 @@
         flags |= ARM_TBFLAG_HANDLER_MASK;
     }
 
+    /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
+     * suppressing them because the requested execution priority is less than 0.
+     */
+    if (arm_feature(env, ARM_FEATURE_V8) &&
+        arm_feature(env, ARM_FEATURE_M) &&
+        !((mmu_idx  & ARM_MMU_IDX_M_NEGPRI) &&
+          (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
+        flags |= ARM_TBFLAG_STACKCHECK_MASK;
+    }
+
     *pflags = flags;
     *cs_base = 0;
 }
+
+#ifdef TARGET_AARCH64
+/*
+ * The manual says that when SVE is enabled and VQ is widened the
+ * implementation is allowed to zero the previously inaccessible
+ * portion of the registers.  The corollary to that is that when
+ * SVE is enabled and VQ is narrowed we are also allowed to zero
+ * the now inaccessible portion of the registers.
+ *
+ * The intent of this is that no predicate bit beyond VQ is ever set.
+ * Which means that some operations on predicate registers themselves
+ * may operate on full uint64_t or even unrolled across the maximum
+ * uint64_t[4].  Performing 4 bits of host arithmetic unconditionally
+ * may well be cheaper than conditionals to restrict the operation
+ * to the relevant portion of a uint16_t[16].
+ */
+void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
+{
+    int i, j;
+    uint64_t pmask;
+
+    assert(vq >= 1 && vq <= ARM_MAX_VQ);
+    assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
+
+    /* Zap the high bits of the zregs.  */
+    for (i = 0; i < 32; i++) {
+        memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
+    }
+
+    /* Zap the high bits of the pregs and ffr.  */
+    pmask = 0;
+    if (vq & 3) {
+        pmask = ~(-1ULL << (16 * (vq & 3)));
+    }
+    for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
+        for (i = 0; i < 17; ++i) {
+            env->vfp.pregs[i].p[j] &= pmask;
+        }
+        pmask = 0;
+    }
+}
+
+/*
+ * Notice a change in SVE vector size when changing EL.
+ */
+void aarch64_sve_change_el(CPUARMState *env, int old_el,
+                           int new_el, bool el0_a64)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int old_len, new_len;
+    bool old_a64, new_a64;
+
+    /* Nothing to do if no SVE.  */
+    if (!cpu_isar_feature(aa64_sve, cpu)) {
+        return;
+    }
+
+    /* Nothing to do if FP is disabled in either EL.  */
+    if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) {
+        return;
+    }
+
+    /*
+     * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
+     * at ELx, or not available because the EL is in AArch32 state, then
+     * for all purposes other than a direct read, the ZCR_ELx.LEN field
+     * has an effective value of 0".
+     *
+     * Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0).
+     * If we ignore aa32 state, we would fail to see the vq4->vq0 transition
+     * from EL2->EL1.  Thus we go ahead and narrow when entering aa32 so that
+     * we already have the correct register contents when encountering the
+     * vq0->vq0 transition between EL0->EL1.
+     */
+    old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
+    old_len = (old_a64 && !sve_exception_el(env, old_el)
+               ? sve_zcr_len_for_el(env, old_el) : 0);
+    new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
+    new_len = (new_a64 && !sve_exception_el(env, new_el)
+               ? sve_zcr_len_for_el(env, new_el) : 0);
+
+    /* When changing vector length, clear inaccessible state.  */
+    if (new_len < old_len) {
+        aarch64_sve_narrow_vq(env, new_len + 1);
+    }
+}
+#endif
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 59e8c3b..8c95900 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -69,6 +69,8 @@
 
 DEF_HELPER_3(v7m_tt, i32, env, i32, i32)
 
+DEF_HELPER_2(v8m_stackcheck, void, env, i32)
+
 DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32)
 DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
 DEF_HELPER_2(get_cp_reg, i32, env, ptr)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc93577..6c2bb2d 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -94,6 +94,15 @@
 #define M_FAKE_FSR_NSC_EXEC 0xf /* NS executing in S&NSC memory */
 #define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */
 
+/**
+ * raise_exception: Raise the specified exception.
+ * Raise a guest exception with the specified value, syndrome register
+ * and target exception level. This should be called from helper functions,
+ * and never returns because we will longjump back up to the CPU main loop.
+ */
+void QEMU_NORETURN raise_exception(CPUARMState *env, uint32_t excp,
+                                   uint32_t syndrome, uint32_t target_el);
+
 /*
  * For AArch64, map a given EL to an index in the banked_spsr array.
  * Note that this mapping and the AArch32 mapping defined in bank_number()
@@ -136,7 +145,6 @@
     g_assert_not_reached();
 }
 
-void switch_mode(CPUARMState *, int);
 void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
 void arm_translate_init(void);
 
@@ -270,14 +278,19 @@
 #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT)
 #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT)
 
+static inline uint32_t syn_get_ec(uint32_t syn)
+{
+    return syn >> ARM_EL_EC_SHIFT;
+}
+
 /* Utility functions for constructing various kinds of syndrome value.
  * Note that in general we follow the AArch64 syndrome values; in a
  * few cases the value in HSR for exceptions taken to AArch32 Hyp
- * mode differs slightly, so if we ever implemented Hyp mode then the
- * syndrome value would need some massaging on exception entry.
- * (One example of this is that AArch64 defaults to IL bit set for
- * exceptions which don't specifically indicate information about the
- * trapping instruction, whereas AArch32 defaults to IL bit clear.)
+ * mode differs slightly, and we fix this up when populating HSR in
+ * arm_cpu_do_interrupt_aarch32_hyp().
+ * The exception is FP/SIMD access traps -- these report extra information
+ * when taking an exception to AArch32. For those we include the extra coproc
+ * and TA fields, and mask them out when taking the exception to AArch64.
  */
 static inline uint32_t syn_uncategorized(void)
 {
@@ -377,9 +390,18 @@
 
 static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit)
 {
+    /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 coproc == 0xa */
     return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
         | (is_16bit ? 0 : ARM_EL_IL)
-        | (cv << 24) | (cond << 20);
+        | (cv << 24) | (cond << 20) | 0xa;
+}
+
+static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit)
+{
+    /* AArch32 SIMD trap: TA == 1 coproc == 0 */
+    return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
+        | (is_16bit ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (1 << 5);
 }
 
 static inline uint32_t syn_sve_access_trap(void)
@@ -796,4 +818,57 @@
     }
 }
 
+/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
+ * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
+ */
+#define MEMOPIDX_SHIFT  8
+
+/**
+ * v7m_using_psp: Return true if using process stack pointer
+ * Return true if the CPU is currently using the process stack
+ * pointer, or false if it is using the main stack pointer.
+ */
+static inline bool v7m_using_psp(CPUARMState *env)
+{
+    /* Handler mode always uses the main stack; for thread mode
+     * the CONTROL.SPSEL bit determines the answer.
+     * Note that in v7M it is not possible to be in Handler mode with
+     * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both.
+     */
+    return !arm_v7m_is_handler_mode(env) &&
+        env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK;
+}
+
+/**
+ * v7m_sp_limit: Return SP limit for current CPU state
+ * Return the SP limit value for the current CPU security state
+ * and stack pointer.
+ */
+static inline uint32_t v7m_sp_limit(CPUARMState *env)
+{
+    if (v7m_using_psp(env)) {
+        return env->v7m.psplim[env->v7m.secure];
+    } else {
+        return env->v7m.msplim[env->v7m.secure];
+    }
+}
+
+/**
+ * aarch32_mode_name(): Return name of the AArch32 CPU mode
+ * @psr: Program Status Register indicating CPU mode
+ *
+ * Returns, for debug logging purposes, a printable representation
+ * of the AArch32 CPU mode ("svc", "usr", etc) as indicated by
+ * the low bits of the specified PSR.
+ */
+static inline const char *aarch32_mode_name(uint32_t psr)
+{
+    static const char cpu_mode_names[16][4] = {
+        "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
+        "???", "???", "hyp", "und", "???", "???", "???", "sys"
+    };
+
+    return cpu_mode_names[psr & 0xf];
+}
+
 #endif
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 65f867d..09a86e2 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -34,6 +34,7 @@
 };
 
 static bool cap_has_mp_state;
+static bool cap_has_inject_serror_esr;
 
 static ARMHostCPUFeatures arm_host_cpu_features;
 
@@ -48,6 +49,12 @@
     return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
 }
 
+void kvm_arm_init_serror_injection(CPUState *cs)
+{
+    cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
+                                    KVM_CAP_ARM_INJECT_SERROR_ESR);
+}
+
 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
                                       int *fdarray,
                                       struct kvm_vcpu_init *init)
@@ -310,7 +317,7 @@
     return 0;
 }
 
-/* Initialize the CPUState's cpreg list according to the kernel's
+/* Initialize the ARMCPU cpreg list according to the kernel's
  * definition of what CPU registers it knows about (and throw away
  * the previous TCG-created cpreg list).
  */
@@ -522,6 +529,59 @@
     return 0;
 }
 
+int kvm_put_vcpu_events(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
+    struct kvm_vcpu_events events;
+    int ret;
+
+    if (!kvm_has_vcpu_events()) {
+        return 0;
+    }
+
+    memset(&events, 0, sizeof(events));
+    events.exception.serror_pending = env->serror.pending;
+
+    /* Inject SError to guest with specified syndrome if host kernel
+     * supports it, otherwise inject SError without syndrome.
+     */
+    if (cap_has_inject_serror_esr) {
+        events.exception.serror_has_esr = env->serror.has_esr;
+        events.exception.serror_esr = env->serror.esr;
+    }
+
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
+    if (ret) {
+        error_report("failed to put vcpu events");
+    }
+
+    return ret;
+}
+
+int kvm_get_vcpu_events(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
+    struct kvm_vcpu_events events;
+    int ret;
+
+    if (!kvm_has_vcpu_events()) {
+        return 0;
+    }
+
+    memset(&events, 0, sizeof(events));
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
+    if (ret) {
+        error_report("failed to get vcpu events");
+        return ret;
+    }
+
+    env->serror.pending = events.exception.serror_pending;
+    env->serror.has_esr = events.exception.serror_has_esr;
+    env->serror.esr = events.exception.serror_esr;
+
+    return 0;
+}
+
 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 {
 }
diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c
index 4e91c11..0f1e94c 100644
--- a/target/arm/kvm32.c
+++ b/target/arm/kvm32.c
@@ -217,6 +217,9 @@
     }
     cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK;
 
+    /* Check whether userspace can specify guest syndrome value */
+    kvm_arm_init_serror_injection(cs);
+
     return kvm_arm_init_cpreg_list(cpu);
 }
 
@@ -358,6 +361,11 @@
         return ret;
     }
 
+    ret = kvm_put_vcpu_events(cpu);
+    if (ret) {
+        return ret;
+    }
+
     /* Note that we do not call write_cpustate_to_list()
      * here, so we are only writing the tuple list back to
      * KVM. This is safe because nothing can change the
@@ -445,6 +453,11 @@
     }
     vfp_set_fpscr(env, fpscr);
 
+    ret = kvm_get_vcpu_events(cpu);
+    if (ret) {
+        return ret;
+    }
+
     if (!write_kvmstate_to_list(cpu)) {
         return EINVAL;
     }
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index e0b8246..5de8ff0 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -546,6 +546,9 @@
 
     kvm_arm_init_debug(cs);
 
+    /* Check whether user space can specify guest syndrome value */
+    kvm_arm_init_serror_injection(cs);
+
     return kvm_arm_init_cpreg_list(cpu);
 }
 
@@ -727,6 +730,11 @@
         return ret;
     }
 
+    ret = kvm_put_vcpu_events(cpu);
+    if (ret) {
+        return ret;
+    }
+
     if (!write_list_to_kvmstate(cpu, level)) {
         return EINVAL;
     }
@@ -863,6 +871,11 @@
     }
     vfp_set_fpcr(env, fpr);
 
+    ret = kvm_get_vcpu_events(cpu);
+    if (ret) {
+        return ret;
+    }
+
     if (!write_kvmstate_to_list(cpu)) {
         return EINVAL;
     }
@@ -920,7 +933,7 @@
 
 bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
 {
-    int hsr_ec = debug_exit->hsr >> ARM_EL_EC_SHIFT;
+    int hsr_ec = syn_get_ec(debug_exit->hsr);
     ARMCPU *cpu = ARM_CPU(cs);
     CPUClass *cc = CPU_GET_CLASS(cs);
     CPUARMState *env = &cpu->env;
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 863f205..21c0129 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -50,9 +50,9 @@
 
 /**
  * kvm_arm_init_cpreg_list:
- * @cs: CPUState
+ * @cpu: ARMCPU
  *
- * Initialize the CPUState's cpreg list according to the kernel's
+ * Initialize the ARMCPU cpreg list according to the kernel's
  * definition of what CPU registers it knows about (and throw away
  * the previous TCG-created cpreg list).
  *
@@ -121,6 +121,30 @@
  */
 void kvm_arm_reset_vcpu(ARMCPU *cpu);
 
+/**
+ * kvm_arm_init_serror_injection:
+ * @cs: CPUState
+ *
+ * Check whether KVM can set guest SError syndrome.
+ */
+void kvm_arm_init_serror_injection(CPUState *cs);
+
+/**
+ * kvm_get_vcpu_events:
+ * @cpu: ARMCPU
+ *
+ * Get VCPU related state from kvm.
+ */
+int kvm_get_vcpu_events(ARMCPU *cpu);
+
+/**
+ * kvm_put_vcpu_events:
+ * @cpu: ARMCPU
+ *
+ * Put VCPU related state to kvm.
+ */
+int kvm_put_vcpu_events(ARMCPU *cpu);
+
 #ifdef CONFIG_KVM
 /**
  * kvm_arm_create_scratch_host_vcpu:
diff --git a/target/arm/machine.c b/target/arm/machine.c
index ff4ec22..239fe4e 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -131,9 +131,8 @@
 static bool sve_needed(void *opaque)
 {
     ARMCPU *cpu = opaque;
-    CPUARMState *env = &cpu->env;
 
-    return arm_feature(env, ARM_FEATURE_SVE);
+    return cpu_isar_feature(aa64_sve, cpu);
 }
 
 /* The first two words of each Zreg is stored in VFP state.  */
@@ -172,6 +171,27 @@
 };
 #endif /* AARCH64 */
 
+static bool serror_needed(void *opaque)
+{
+    ARMCPU *cpu = opaque;
+    CPUARMState *env = &cpu->env;
+
+    return env->serror.pending != 0;
+}
+
+static const VMStateDescription vmstate_serror = {
+    .name = "cpu/serror",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = serror_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(env.serror.pending, ARMCPU),
+        VMSTATE_UINT8(env.serror.has_esr, ARMCPU),
+        VMSTATE_UINT64(env.serror.esr, ARMCPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool m_needed(void *opaque)
 {
     ARMCPU *cpu = opaque;
@@ -726,6 +746,7 @@
 #ifdef TARGET_AARCH64
         &vmstate_sve,
 #endif
+        &vmstate_serror,
         NULL
     }
 };
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 952b8d1..90741f6 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -28,8 +28,8 @@
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-static void raise_exception(CPUARMState *env, uint32_t excp,
-                            uint32_t syndrome, uint32_t target_el)
+void raise_exception(CPUARMState *env, uint32_t excp,
+                     uint32_t syndrome, uint32_t target_el)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
 
@@ -42,7 +42,7 @@
          * (see DDI0478C.a D1.10.4)
          */
         target_el = 2;
-        if (syndrome >> ARM_EL_EC_SHIFT == EC_ADVSIMDFPACCESSTRAP) {
+        if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) {
             syndrome = syn_uncategorized();
         }
     }
@@ -238,6 +238,25 @@
 
 #endif /* !defined(CONFIG_USER_ONLY) */
 
+void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
+{
+    /*
+     * Perform the v8M stack limit check for SP updates from translated code,
+     * raising an exception if the limit is breached.
+     */
+    if (newvalue < v7m_sp_limit(env)) {
+        CPUState *cs = CPU(arm_env_get_cpu(env));
+
+        /*
+         * Stack limit exceptions are a rare case, so rather than syncing
+         * PC/condbits before the call, we use cpu_restore_state() to
+         * get them right before raising the exception.
+         */
+        cpu_restore_state(cs, GETPC(), true);
+        raise_exception(env, EXCP_STKOF, 0, 1);
+    }
+}
+
 uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
@@ -1082,6 +1101,11 @@
                       "AArch64 EL%d PC 0x%" PRIx64 "\n",
                       cur_el, new_el, env->pc);
     }
+    /*
+     * Note that cur_el can never be 0.  If new_el is 0, then
+     * el0_a64 is return_to_aa64, else el0_a64 is ignored.
+     */
+    aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
 
     qemu_mutex_lock_iothread();
     arm_call_el_change_hook(arm_env_get_cpu(env));
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 0f98097..8cbc651 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "internals.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
@@ -1688,6 +1689,47 @@
     }
 }
 
+/* Similarly for memset of 0.  */
+static void swap_memzero(void *vd, size_t n)
+{
+    uintptr_t d = (uintptr_t)vd;
+    uintptr_t o = (d | n) & 7;
+    size_t i;
+
+    /* Usually, the first bit of a predicate is set, so N is 0.  */
+    if (likely(n == 0)) {
+        return;
+    }
+
+#ifndef HOST_WORDS_BIGENDIAN
+    o = 0;
+#endif
+    switch (o) {
+    case 0:
+        memset(vd, 0, n);
+        break;
+
+    case 4:
+        for (i = 0; i < n; i += 4) {
+            *(uint32_t *)H1_4(d + i) = 0;
+        }
+        break;
+
+    case 2:
+    case 6:
+        for (i = 0; i < n; i += 2) {
+            *(uint16_t *)H1_2(d + i) = 0;
+        }
+        break;
+
+    default:
+        for (i = 0; i < n; i++) {
+            *(uint8_t *)H1(d + i) = 0;
+        }
+        break;
+    }
+}
+
 void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     intptr_t opr_sz = simd_oprsz(desc);
@@ -3927,162 +3969,472 @@
 /*
  * Load contiguous data, protected by a governing predicate.
  */
-#define DO_LD1(NAME, FN, TYPEE, TYPEM, H)                  \
-static void do_##NAME(CPUARMState *env, void *vd, void *vg, \
-                      target_ulong addr, intptr_t oprsz,   \
-                      uintptr_t ra)                        \
-{                                                          \
-    intptr_t i = 0;                                        \
-    do {                                                   \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m = 0;                                   \
-            if (pg & 1) {                                  \
-                m = FN(env, addr, ra);                     \
-            }                                              \
-            *(TYPEE *)(vd + H(i)) = m;                     \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += sizeof(TYPEM);                         \
-        } while (i & 15);                                  \
-    } while (i < oprsz);                                   \
-}                                                          \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    do_##NAME(env, &env->vfp.zregs[simd_data(desc)], vg,   \
-              addr, simd_oprsz(desc), GETPC());            \
+
+/*
+ * Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
+ * Memory is valid through @host + @mem_max.  The register element
+ * indicies are inferred from @mem_ofs, as modified by the types for
+ * which the helper is built.  Return the @mem_ofs of the first element
+ * not loaded (which is @mem_max if they are all loaded).
+ *
+ * For softmmu, we have fully validated the guest page.  For user-only,
+ * we cannot fully validate without taking the mmap lock, but since we
+ * know the access is within one host page, if any access is valid they
+ * all must be valid.  However, when @vg is all false, it may be that
+ * no access is valid.
+ */
+typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
+                                 intptr_t mem_ofs, intptr_t mem_max);
+
+/*
+ * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
+ * The controlling predicate is known to be true.
+ */
+typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+                            target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra);
+typedef sve_ld1_tlb_fn sve_st1_tlb_fn;
+
+/*
+ * Generate the above primitives.
+ */
+
+#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
+static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host,           \
+                                  intptr_t mem_off, const intptr_t mem_max) \
+{                                                                           \
+    intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM));           \
+    uint64_t *pg = vg;                                                      \
+    while (mem_off + sizeof(TYPEM) <= mem_max) {                            \
+        TYPEM val = 0;                                                      \
+        if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) {             \
+            val = HOST(host + mem_off);                                     \
+        }                                                                   \
+        *(TYPEE *)(vd + H(reg_off)) = val;                                  \
+        mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE);                 \
+    }                                                                       \
+    return mem_off;                                                         \
 }
 
-#define DO_LD2(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m1 = 0, m2 = 0;                          \
-            if (pg & 1) {                                  \
-                m1 = FN(env, addr, ra);                    \
-                m2 = FN(env, addr + sizeof(TYPEM), ra);    \
-            }                                              \
-            *(TYPEE *)(d1 + H(i)) = m1;                    \
-            *(TYPEE *)(d2 + H(i)) = m2;                    \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 2 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
+#ifdef CONFIG_SOFTMMU
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra)  \
+{                                                                           \
+    TYPEM val = TLB(env, addr, oi, ra);                                     \
+    *(TYPEE *)(vd + H(reg_off)) = val;                                      \
+}
+#else
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB)                  \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra)  \
+{                                                                           \
+    TYPEM val = HOST(g2h(addr));                                            \
+    *(TYPEE *)(vd + H(reg_off)) = val;                                      \
+}
+#endif
+
+#define DO_LD_PRIM_1(NAME, H, TE, TM)                   \
+    DO_LD_HOST(NAME, H, TE, TM, ldub_p)                 \
+    DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu)
+
+DO_LD_PRIM_1(ld1bb,  H1,   uint8_t,  uint8_t)
+DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
+DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t,  int8_t)
+DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
+DO_LD_PRIM_1(ld1bss, H1_4, uint32_t,  int8_t)
+DO_LD_PRIM_1(ld1bdu,     , uint64_t, uint8_t)
+DO_LD_PRIM_1(ld1bds,     , uint64_t,  int8_t)
+
+#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT)  \
+    DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p)    \
+    DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p,     \
+              MOEND, helper_##end##_##PT##_mmu)
+
+DO_LD_PRIM_2(ld1hh,  le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t,  int16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hdu, le, MO_LE,     , uint64_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hds, le, MO_LE,     , uint64_t,  int16_t, lduw, lduw)
+
+DO_LD_PRIM_2(ld1ss,  le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sdu, le, MO_LE,     , uint64_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sds, le, MO_LE,     , uint64_t,  int32_t, ldl, ldul)
+
+DO_LD_PRIM_2(ld1dd,  le, MO_LE,     , uint64_t, uint64_t, ldq, ldq)
+
+DO_LD_PRIM_2(ld1hh,  be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t,  int16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hdu, be, MO_BE,     , uint64_t, uint16_t, lduw, lduw)
+DO_LD_PRIM_2(ld1hds, be, MO_BE,     , uint64_t,  int16_t, lduw, lduw)
+
+DO_LD_PRIM_2(ld1ss,  be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sdu, be, MO_BE,     , uint64_t, uint32_t, ldl, ldul)
+DO_LD_PRIM_2(ld1sds, be, MO_BE,     , uint64_t,  int32_t, ldl, ldul)
+
+DO_LD_PRIM_2(ld1dd,  be, MO_BE,     , uint64_t, uint64_t, ldq, ldq)
+
+#undef DO_LD_TLB
+#undef DO_LD_HOST
+#undef DO_LD_PRIM_1
+#undef DO_LD_PRIM_2
+
+/*
+ * Skip through a sequence of inactive elements in the guarding predicate @vg,
+ * beginning at @reg_off bounded by @reg_max.  Return the offset of the active
+ * element >= @reg_off, or @reg_max if there were no active elements at all.
+ */
+static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
+                                 intptr_t reg_max, int esz)
+{
+    uint64_t pg_mask = pred_esz_masks[esz];
+    uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63);
+
+    /* In normal usage, the first element is active.  */
+    if (likely(pg & 1)) {
+        return reg_off;
+    }
+
+    if (pg == 0) {
+        reg_off &= -64;
+        do {
+            reg_off += 64;
+            if (unlikely(reg_off >= reg_max)) {
+                /* The entire predicate was false.  */
+                return reg_max;
+            }
+            pg = vg[reg_off >> 6] & pg_mask;
+        } while (pg == 0);
+    }
+    reg_off += ctz64(pg);
+
+    /* We should never see an out of range predicate bit set.  */
+    tcg_debug_assert(reg_off < reg_max);
+    return reg_off;
 }
 
-#define DO_LD3(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m1 = 0, m2 = 0, m3 = 0;                  \
-            if (pg & 1) {                                  \
-                m1 = FN(env, addr, ra);                    \
-                m2 = FN(env, addr + sizeof(TYPEM), ra);    \
-                m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \
-            }                                              \
-            *(TYPEE *)(d1 + H(i)) = m1;                    \
-            *(TYPEE *)(d2 + H(i)) = m2;                    \
-            *(TYPEE *)(d3 + H(i)) = m3;                    \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 3 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
+/*
+ * Return the maximum offset <= @mem_max which is still within the page
+ * referenced by @base + @mem_off.
+ */
+static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
+                             intptr_t mem_max)
+{
+    target_ulong addr = base + mem_off;
+    intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK);
+    return MIN(split, mem_max - mem_off) + mem_off;
 }
 
-#define DO_LD4(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];             \
-    void *d4 = &env->vfp.zregs[(rd + 3) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m1 = 0, m2 = 0, m3 = 0, m4 = 0;          \
-            if (pg & 1) {                                  \
-                m1 = FN(env, addr, ra);                    \
-                m2 = FN(env, addr + sizeof(TYPEM), ra);    \
-                m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \
-                m4 = FN(env, addr + 3 * sizeof(TYPEM), ra); \
-            }                                              \
-            *(TYPEE *)(d1 + H(i)) = m1;                    \
-            *(TYPEE *)(d2 + H(i)) = m2;                    \
-            *(TYPEE *)(d3 + H(i)) = m3;                    \
-            *(TYPEE *)(d4 + H(i)) = m4;                    \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 4 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
+static inline void set_helper_retaddr(uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    helper_retaddr = ra;
+#endif
 }
 
-DO_LD1(sve_ld1bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2)
-DO_LD1(sve_ld1bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2)
-DO_LD1(sve_ld1bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4)
-DO_LD1(sve_ld1bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4)
-DO_LD1(sve_ld1bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, )
-DO_LD1(sve_ld1bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, )
+/*
+ * The result of tlb_vaddr_to_host for user-only is just g2h(x),
+ * which is always non-null.  Elide the useless test.
+ */
+static inline bool test_host_page(void *host)
+{
+#ifdef CONFIG_USER_ONLY
+    return true;
+#else
+    return likely(host != NULL);
+#endif
+}
 
-DO_LD1(sve_ld1hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4)
-DO_LD1(sve_ld1hss_r, cpu_ldsw_data_ra, uint32_t, int16_t, H1_4)
-DO_LD1(sve_ld1hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, )
-DO_LD1(sve_ld1hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, )
+/*
+ * Common helper for all contiguous one-register predicated loads.
+ */
+static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
+                      uint32_t desc, const uintptr_t retaddr,
+                      const int esz, const int msz,
+                      sve_ld1_host_fn *host_fn,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
+    const int diffsz = esz - msz;
+    const intptr_t reg_max = simd_oprsz(desc);
+    const intptr_t mem_max = reg_max >> diffsz;
+    ARMVectorReg scratch;
+    void *host;
+    intptr_t split, reg_off, mem_off;
 
-DO_LD1(sve_ld1sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, )
-DO_LD1(sve_ld1sds_r, cpu_ldl_data_ra, uint64_t, int32_t, )
+    /* Find the first active element.  */
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off == reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        memset(vd, 0, reg_max);
+        return;
+    }
+    mem_off = reg_off >> diffsz;
+    set_helper_retaddr(retaddr);
 
-DO_LD1(sve_ld1bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD2(sve_ld2bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD3(sve_ld3bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD4(sve_ld4bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
+    /*
+     * If the (remaining) load is entirely within a single page, then:
+     * For softmmu, and the tlb hits, then no faults will occur;
+     * For user-only, either the first load will fault or none will.
+     * We can thus perform the load directly to the destination and
+     * Vd will be unmodified on any exception path.
+     */
+    split = max_for_page(addr, mem_off, mem_max);
+    if (likely(split == mem_max)) {
+        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+        if (test_host_page(host)) {
+            mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
+            tcg_debug_assert(mem_off == mem_max);
+            set_helper_retaddr(0);
+            /* After having taken any fault, zero leading inactive elements. */
+            swap_memzero(vd, reg_off);
+            return;
+        }
+    }
 
-DO_LD1(sve_ld1hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD2(sve_ld2hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD3(sve_ld3hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD4(sve_ld4hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
+    /*
+     * Perform the predicated read into a temporary, thus ensuring
+     * if the load of the last element faults, Vd is not modified.
+     */
+#ifdef CONFIG_USER_ONLY
+    swap_memzero(&scratch, reg_off);
+    host_fn(&scratch, vg, g2h(addr), mem_off, mem_max);
+#else
+    memset(&scratch, 0, reg_max);
+    goto start;
+    while (1) {
+        reg_off = find_next_active(vg, reg_off, reg_max, esz);
+        if (reg_off >= reg_max) {
+            break;
+        }
+        mem_off = reg_off >> diffsz;
+        split = max_for_page(addr, mem_off, mem_max);
 
-DO_LD1(sve_ld1ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD2(sve_ld2ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD3(sve_ld3ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD4(sve_ld4ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
+    start:
+        if (split - mem_off >= (1 << msz)) {
+            /* At least one whole element on this page.  */
+            host = tlb_vaddr_to_host(env, addr + mem_off,
+                                     MMU_DATA_LOAD, mmu_idx);
+            if (host) {
+                mem_off = host_fn(&scratch, vg, host - mem_off,
+                                  mem_off, split);
+                reg_off = mem_off << diffsz;
+                continue;
+            }
+        }
 
-DO_LD1(sve_ld1dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD2(sve_ld2dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD3(sve_ld3dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD4(sve_ld4dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
+        /*
+         * Perform one normal read.  This may fault, longjmping out to the
+         * main loop in order to raise an exception.  It may succeed, and
+         * as a side-effect load the TLB entry for the next round.  Finally,
+         * in the extremely unlikely case we're performing this operation
+         * on I/O memory, it may succeed but not bring in the TLB entry.
+         * But even then we have still made forward progress.
+         */
+        tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
+        reg_off += 1 << esz;
+    }
+#endif
 
-#undef DO_LD1
-#undef DO_LD2
-#undef DO_LD3
-#undef DO_LD4
+    set_helper_retaddr(0);
+    memcpy(vd, &scratch, reg_max);
+}
+
+#define DO_LD1_1(NAME, ESZ) \
+void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg,        \
+                            target_ulong addr, uint32_t desc)  \
+{                                                              \
+    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0,            \
+              sve_##NAME##_host, sve_##NAME##_tlb);            \
+}
+
+#define DO_LD1_2(NAME, ESZ, MSZ) \
+void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg,        \
+                               target_ulong addr, uint32_t desc)  \
+{                                                                 \
+    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,             \
+              sve_##NAME##_le_host, sve_##NAME##_le_tlb);         \
+}                                                                 \
+void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg,        \
+                               target_ulong addr, uint32_t desc)  \
+{                                                                 \
+    sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,             \
+              sve_##NAME##_be_host, sve_##NAME##_be_tlb);         \
+}
+
+DO_LD1_1(ld1bb,  0)
+DO_LD1_1(ld1bhu, 1)
+DO_LD1_1(ld1bhs, 1)
+DO_LD1_1(ld1bsu, 2)
+DO_LD1_1(ld1bss, 2)
+DO_LD1_1(ld1bdu, 3)
+DO_LD1_1(ld1bds, 3)
+
+DO_LD1_2(ld1hh,  1, 1)
+DO_LD1_2(ld1hsu, 2, 1)
+DO_LD1_2(ld1hss, 2, 1)
+DO_LD1_2(ld1hdu, 3, 1)
+DO_LD1_2(ld1hds, 3, 1)
+
+DO_LD1_2(ld1ss,  2, 2)
+DO_LD1_2(ld1sdu, 3, 2)
+DO_LD1_2(ld1sds, 3, 2)
+
+DO_LD1_2(ld1dd,  3, 3)
+
+#undef DO_LD1_1
+#undef DO_LD1_2
+
+/*
+ * Common helpers for all contiguous 2,3,4-register predicated loads.
+ */
+static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch[2] = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+            }
+            i += size, pg >>= size;
+            addr += 2 * size;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+}
+
+static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch[3] = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+            }
+            i += size, pg >>= size;
+            addr += 3 * size;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+}
+
+static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch[4] = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+                tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
+            }
+            i += size, pg >>= size;
+            addr += 4 * size;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz);
+}
+
+#define DO_LDN_1(N) \
+void __attribute__((flatten)) HELPER(sve_ld##N##bb_r)               \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
+{                                                                   \
+    sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb);  \
+}
+
+#define DO_LDN_2(N, SUFF, SIZE)                                       \
+void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_le_r)          \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
+                  sve_ld1##SUFF##_le_tlb);                            \
+}                                                                     \
+void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_be_r)          \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
+                  sve_ld1##SUFF##_be_tlb);                            \
+}
+
+DO_LDN_1(2)
+DO_LDN_1(3)
+DO_LDN_1(4)
+
+DO_LDN_2(2, hh, 2)
+DO_LDN_2(3, hh, 2)
+DO_LDN_2(4, hh, 2)
+
+DO_LDN_2(2, ss, 4)
+DO_LDN_2(3, ss, 4)
+DO_LDN_2(4, ss, 4)
+
+DO_LDN_2(2, dd, 8)
+DO_LDN_2(3, dd, 8)
+DO_LDN_2(4, dd, 8)
+
+#undef DO_LDN_1
+#undef DO_LDN_2
 
 /*
  * Load contiguous data, first-fault and no-fault.
+ *
+ * For user-only, one could argue that we should hold the mmap_lock during
+ * the operation so that there is no race between page_check_range and the
+ * load operation.  However, unmapping pages out from under a running thread
+ * is extraordinarily unlikely.  This theoretical race condition also affects
+ * linux-user/ in its get_user/put_user macros.
+ *
+ * TODO: Construct some helpers, written in assembly, that interact with
+ * handle_cpu_signal to produce memory ops which can properly report errors
+ * without racing.
  */
 
-#ifdef CONFIG_USER_ONLY
-
 /* Fault on byte I.  All bits in FFR from I are cleared.  The vector
  * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE
  * option, which leaves subsequent data unchanged.
@@ -4100,573 +4452,932 @@
     }
 }
 
-/* Hold the mmap lock during the operation so that there is no race
- * between page_check_range and the load operation.  We expect the
- * usual case to have no faults at all, so we check the whole range
- * first and if successful defer to the normal load operation.
- *
- * TODO: Change mmap_lock to a rwlock so that multiple readers
- * can run simultaneously.  This will probably help other uses
- * within QEMU as well.
+/*
+ * Common helper for all contiguous first-fault loads.
  */
-#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H)                             \
-static void do_sve_ldff1##PART(CPUARMState *env, void *vd, void *vg,    \
-                               target_ulong addr, intptr_t oprsz,       \
-                               bool first, uintptr_t ra)                \
-{                                                                       \
-    intptr_t i = 0;                                                     \
-    do {                                                                \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));                 \
-        do {                                                            \
-            TYPEM m = 0;                                                \
-            if (pg & 1) {                                               \
-                if (!first &&                                           \
-                    unlikely(page_check_range(addr, sizeof(TYPEM),      \
-                                              PAGE_READ))) {            \
-                    record_fault(env, i, oprsz);                        \
-                    return;                                             \
-                }                                                       \
-                m = FN(env, addr, ra);                                  \
-                first = false;                                          \
-            }                                                           \
-            *(TYPEE *)(vd + H(i)) = m;                                  \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);                   \
-            addr += sizeof(TYPEM);                                      \
-        } while (i & 15);                                               \
-    } while (i < oprsz);                                                \
-}                                                                       \
-void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg,                \
-                             target_ulong addr, uint32_t desc)          \
-{                                                                       \
-    intptr_t oprsz = simd_oprsz(desc);                                  \
-    unsigned rd = simd_data(desc);                                      \
-    void *vd = &env->vfp.zregs[rd];                                     \
-    mmap_lock();                                                        \
-    if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) {        \
-        do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC());            \
-    } else {                                                            \
-        do_sve_ldff1##PART(env, vd, vg, addr, oprsz, true, GETPC());    \
-    }                                                                   \
-    mmap_unlock();                                                      \
-}
+static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
+                        uint32_t desc, const uintptr_t retaddr,
+                        const int esz, const int msz,
+                        sve_ld1_host_fn *host_fn,
+                        sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
+    const int diffsz = esz - msz;
+    const intptr_t reg_max = simd_oprsz(desc);
+    const intptr_t mem_max = reg_max >> diffsz;
+    intptr_t split, reg_off, mem_off;
+    void *host;
 
-/* No-fault loads are like first-fault loads without the
- * first faulting special case.
- */
-#define DO_LDNF1(PART)                                                  \
-void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg,                \
-                             target_ulong addr, uint32_t desc)          \
-{                                                                       \
-    intptr_t oprsz = simd_oprsz(desc);                                  \
-    unsigned rd = simd_data(desc);                                      \
-    void *vd = &env->vfp.zregs[rd];                                     \
-    mmap_lock();                                                        \
-    if (likely(page_check_range(addr, oprsz, PAGE_READ) == 0)) {        \
-        do_sve_ld1##PART(env, vd, vg, addr, oprsz, GETPC());            \
-    } else {                                                            \
-        do_sve_ldff1##PART(env, vd, vg, addr, oprsz, false, GETPC());   \
-    }                                                                   \
-    mmap_unlock();                                                      \
-}
+    /* Skip to the first active element.  */
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off == reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        memset(vd, 0, reg_max);
+        return;
+    }
+    mem_off = reg_off >> diffsz;
+    set_helper_retaddr(retaddr);
 
+    /*
+     * If the (remaining) load is entirely within a single page, then:
+     * For softmmu, and the tlb hits, then no faults will occur;
+     * For user-only, either the first load will fault or none will.
+     * We can thus perform the load directly to the destination and
+     * Vd will be unmodified on any exception path.
+     */
+    split = max_for_page(addr, mem_off, mem_max);
+    if (likely(split == mem_max)) {
+        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+        if (test_host_page(host)) {
+            mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
+            tcg_debug_assert(mem_off == mem_max);
+            set_helper_retaddr(0);
+            /* After any fault, zero any leading inactive elements.  */
+            swap_memzero(vd, reg_off);
+            return;
+        }
+    }
+
+#ifdef CONFIG_USER_ONLY
+    /*
+     * The page(s) containing this first element at ADDR+MEM_OFF must
+     * be valid.  Considering that this first element may be misaligned
+     * and cross a page boundary itself, take the rest of the page from
+     * the last byte of the element.
+     */
+    split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
+    mem_off = host_fn(vd, vg, g2h(addr), mem_off, split);
+
+    /* After any fault, zero any leading inactive elements.  */
+    swap_memzero(vd, reg_off);
+    reg_off = mem_off << diffsz;
 #else
+    /*
+     * Perform one normal read, which will fault or not.
+     * But it is likely to bring the page into the tlb.
+     */
+    tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
 
-/* TODO: System mode is not yet supported.
- * This would probably use tlb_vaddr_to_host.
- */
-#define DO_LDFF1(PART, FN, TYPEE, TYPEM, H)                     \
-void HELPER(sve_ldff1##PART)(CPUARMState *env, void *vg,        \
-                  target_ulong addr, uint32_t desc)             \
-{                                                               \
-    g_assert_not_reached();                                     \
-}
+    /* After any fault, zero any leading predicated false elts.  */
+    swap_memzero(vd, reg_off);
+    mem_off += 1 << msz;
+    reg_off += 1 << esz;
 
-#define DO_LDNF1(PART)                                          \
-void HELPER(sve_ldnf1##PART)(CPUARMState *env, void *vg,        \
-                  target_ulong addr, uint32_t desc)             \
-{                                                               \
-    g_assert_not_reached();                                     \
-}
-
+    /* Try again to read the balance of the page.  */
+    split = max_for_page(addr, mem_off - 1, mem_max);
+    if (split >= (1 << msz)) {
+        host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+        if (host) {
+            mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
+            reg_off = mem_off << diffsz;
+        }
+    }
 #endif
 
-DO_LDFF1(bb_r,  cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LDFF1(bhu_r, cpu_ldub_data_ra, uint16_t, uint8_t, H1_2)
-DO_LDFF1(bhs_r, cpu_ldsb_data_ra, uint16_t, int8_t, H1_2)
-DO_LDFF1(bsu_r, cpu_ldub_data_ra, uint32_t, uint8_t, H1_4)
-DO_LDFF1(bss_r, cpu_ldsb_data_ra, uint32_t, int8_t, H1_4)
-DO_LDFF1(bdu_r, cpu_ldub_data_ra, uint64_t, uint8_t, )
-DO_LDFF1(bds_r, cpu_ldsb_data_ra, uint64_t, int8_t, )
+    set_helper_retaddr(0);
+    record_fault(env, reg_off, reg_max);
+}
 
-DO_LDFF1(hh_r,  cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LDFF1(hsu_r, cpu_lduw_data_ra, uint32_t, uint16_t, H1_4)
-DO_LDFF1(hss_r, cpu_ldsw_data_ra, uint32_t, int8_t, H1_4)
-DO_LDFF1(hdu_r, cpu_lduw_data_ra, uint64_t, uint16_t, )
-DO_LDFF1(hds_r, cpu_ldsw_data_ra, uint64_t, int16_t, )
+/*
+ * Common helper for all contiguous no-fault loads.
+ */
+static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
+                        uint32_t desc, const int esz, const int msz,
+                        sve_ld1_host_fn *host_fn)
+{
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
+    const int diffsz = esz - msz;
+    const intptr_t reg_max = simd_oprsz(desc);
+    const intptr_t mem_max = reg_max >> diffsz;
+    const int mmu_idx = cpu_mmu_index(env, false);
+    intptr_t split, reg_off, mem_off;
+    void *host;
 
-DO_LDFF1(ss_r,  cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LDFF1(sdu_r, cpu_ldl_data_ra, uint64_t, uint32_t, )
-DO_LDFF1(sds_r, cpu_ldl_data_ra, uint64_t, int32_t, )
+#ifdef CONFIG_USER_ONLY
+    host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);
+    if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) {
+        /* The entire operation is valid and will not fault.  */
+        host_fn(vd, vg, host, 0, mem_max);
+        return;
+    }
+#endif
 
-DO_LDFF1(dd_r,  cpu_ldq_data_ra, uint64_t, uint64_t, )
+    /* There will be no fault, so we may modify in advance.  */
+    memset(vd, 0, reg_max);
 
-#undef DO_LDFF1
+    /* Skip to the first active element.  */
+    reg_off = find_next_active(vg, 0, reg_max, esz);
+    if (unlikely(reg_off == reg_max)) {
+        /* The entire predicate was false; no load occurs.  */
+        return;
+    }
+    mem_off = reg_off >> diffsz;
 
-DO_LDNF1(bb_r)
-DO_LDNF1(bhu_r)
-DO_LDNF1(bhs_r)
-DO_LDNF1(bsu_r)
-DO_LDNF1(bss_r)
-DO_LDNF1(bdu_r)
-DO_LDNF1(bds_r)
+#ifdef CONFIG_USER_ONLY
+    if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) {
+        /* At least one load is valid; take the rest of the page.  */
+        split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
+        mem_off = host_fn(vd, vg, host, mem_off, split);
+        reg_off = mem_off << diffsz;
+    }
+#else
+    /*
+     * If the address is not in the TLB, we have no way to bring the
+     * entry into the TLB without also risking a fault.  Note that
+     * the corollary is that we never load from an address not in RAM.
+     *
+     * This last is out of spec, in a weird corner case.
+     * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
+     * must not actually hit the bus -- it returns UNKNOWN data instead.
+     * But if you map non-RAM with Normal memory attributes and do a NF
+     * load then it should access the bus.  (Nobody ought actually do this
+     * in the real world, obviously.)
+     *
+     * Then there are the annoying special cases with watchpoints...
+     *
+     * TODO: Add a form of tlb_fill that does not raise an exception,
+     * with a form of tlb_vaddr_to_host and a set of loads to match.
+     * The non_fault_vaddr_to_host would handle everything, usually,
+     * and the loads would handle the iomem path for watchpoints.
+     */
+    host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
+    split = max_for_page(addr, mem_off, mem_max);
+    if (host && split >= (1 << msz)) {
+        mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
+        reg_off = mem_off << diffsz;
+    }
+#endif
 
-DO_LDNF1(hh_r)
-DO_LDNF1(hsu_r)
-DO_LDNF1(hss_r)
-DO_LDNF1(hdu_r)
-DO_LDNF1(hds_r)
+    record_fault(env, reg_off, reg_max);
+}
 
-DO_LDNF1(ss_r)
-DO_LDNF1(sdu_r)
-DO_LDNF1(sds_r)
+#define DO_LDFF1_LDNF1_1(PART, ESZ) \
+void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg,            \
+                                 target_ulong addr, uint32_t desc)      \
+{                                                                       \
+    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0,                   \
+                sve_ld1##PART##_host, sve_ld1##PART##_tlb);             \
+}                                                                       \
+void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg,            \
+                                 target_ulong addr, uint32_t desc)      \
+{                                                                       \
+    sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host);     \
+}
 
-DO_LDNF1(dd_r)
+#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
+void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,                 \
+                sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb);       \
+}                                                                       \
+void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \
+}                                                                       \
+void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ,                 \
+                sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb);       \
+}                                                                       \
+void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg,         \
+                                    target_ulong addr, uint32_t desc)   \
+{                                                                       \
+    sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \
+}
 
-#undef DO_LDNF1
+DO_LDFF1_LDNF1_1(bb,  0)
+DO_LDFF1_LDNF1_1(bhu, 1)
+DO_LDFF1_LDNF1_1(bhs, 1)
+DO_LDFF1_LDNF1_1(bsu, 2)
+DO_LDFF1_LDNF1_1(bss, 2)
+DO_LDFF1_LDNF1_1(bdu, 3)
+DO_LDFF1_LDNF1_1(bds, 3)
+
+DO_LDFF1_LDNF1_2(hh,  1, 1)
+DO_LDFF1_LDNF1_2(hsu, 2, 1)
+DO_LDFF1_LDNF1_2(hss, 2, 1)
+DO_LDFF1_LDNF1_2(hdu, 3, 1)
+DO_LDFF1_LDNF1_2(hds, 3, 1)
+
+DO_LDFF1_LDNF1_2(ss,  2, 2)
+DO_LDFF1_LDNF1_2(sdu, 3, 2)
+DO_LDFF1_LDNF1_2(sds, 3, 2)
+
+DO_LDFF1_LDNF1_2(dd,  3, 3)
+
+#undef DO_LDFF1_LDNF1_1
+#undef DO_LDFF1_LDNF1_2
 
 /*
  * Store contiguous data, protected by a governing predicate.
  */
-#define DO_ST1(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *vd = &env->vfp.zregs[rd];                        \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            if (pg & 1) {                                  \
-                TYPEM m = *(TYPEE *)(vd + H(i));           \
-                FN(env, addr, m, ra);                      \
-            }                                              \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += sizeof(TYPEM);                         \
-        } while (i & 15);                                  \
-    }                                                      \
+
+#ifdef CONFIG_SOFTMMU
+#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{                                                                           \
+    TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra);                    \
 }
-
-#define DO_ST1_D(NAME, FN, TYPEM)                          \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;              \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    uint64_t *d = &env->vfp.zregs[rd].d[0];                \
-    uint8_t *pg = vg;                                      \
-    for (i = 0; i < oprsz; i += 1) {                       \
-        if (pg[H1(i)] & 1) {                               \
-            FN(env, addr, d[i], ra);                       \
-        }                                                  \
-        addr += sizeof(TYPEM);                             \
-    }                                                      \
+#else
+#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
+static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
+{                                                                           \
+    HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off)));                           \
 }
+#endif
 
-#define DO_ST2(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            if (pg & 1) {                                  \
-                TYPEM m1 = *(TYPEE *)(d1 + H(i));          \
-                TYPEM m2 = *(TYPEE *)(d2 + H(i));          \
-                FN(env, addr, m1, ra);                     \
-                FN(env, addr + sizeof(TYPEM), m2, ra);     \
-            }                                              \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 2 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
-}
+DO_ST_TLB(st1bb,   H1,  uint8_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu)
+DO_ST_TLB(st1bd,     , uint64_t, stb_p, 0, helper_ret_stb_mmu)
 
-#define DO_ST3(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            if (pg & 1) {                                  \
-                TYPEM m1 = *(TYPEE *)(d1 + H(i));          \
-                TYPEM m2 = *(TYPEE *)(d2 + H(i));          \
-                TYPEM m3 = *(TYPEE *)(d3 + H(i));          \
-                FN(env, addr, m1, ra);                     \
-                FN(env, addr + sizeof(TYPEM), m2, ra);     \
-                FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
-            }                                              \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 3 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
-}
+DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu)
+DO_ST_TLB(st1hd_le,     , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu)
 
-#define DO_ST4(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];             \
-    void *d4 = &env->vfp.zregs[(rd + 3) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            if (pg & 1) {                                  \
-                TYPEM m1 = *(TYPEE *)(d1 + H(i));          \
-                TYPEM m2 = *(TYPEE *)(d2 + H(i));          \
-                TYPEM m3 = *(TYPEE *)(d3 + H(i));          \
-                TYPEM m4 = *(TYPEE *)(d4 + H(i));          \
-                FN(env, addr, m1, ra);                     \
-                FN(env, addr + sizeof(TYPEM), m2, ra);     \
-                FN(env, addr + 2 * sizeof(TYPEM), m3, ra); \
-                FN(env, addr + 3 * sizeof(TYPEM), m4, ra); \
-            }                                              \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 4 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
-}
+DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu)
+DO_ST_TLB(st1sd_le,     , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu)
 
-DO_ST1(sve_st1bh_r, cpu_stb_data_ra, uint16_t, uint8_t, H1_2)
-DO_ST1(sve_st1bs_r, cpu_stb_data_ra, uint32_t, uint8_t, H1_4)
-DO_ST1_D(sve_st1bd_r, cpu_stb_data_ra, uint8_t)
+DO_ST_TLB(st1dd_le,     , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu)
 
-DO_ST1(sve_st1hs_r, cpu_stw_data_ra, uint32_t, uint16_t, H1_4)
-DO_ST1_D(sve_st1hd_r, cpu_stw_data_ra, uint16_t)
+DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu)
+DO_ST_TLB(st1hd_be,     , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu)
 
-DO_ST1_D(sve_st1sd_r, cpu_stl_data_ra, uint32_t)
+DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu)
+DO_ST_TLB(st1sd_be,     , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu)
 
-DO_ST1(sve_st1bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
-DO_ST2(sve_st2bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
-DO_ST3(sve_st3bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
-DO_ST4(sve_st4bb_r, cpu_stb_data_ra, uint8_t, uint8_t, H1)
+DO_ST_TLB(st1dd_be,     , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu)
 
-DO_ST1(sve_st1hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
-DO_ST2(sve_st2hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
-DO_ST3(sve_st3hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
-DO_ST4(sve_st4hh_r, cpu_stw_data_ra, uint16_t, uint16_t, H1_2)
+#undef DO_ST_TLB
 
-DO_ST1(sve_st1ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-DO_ST2(sve_st2ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-DO_ST3(sve_st3ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-DO_ST4(sve_st4ss_r, cpu_stl_data_ra, uint32_t, uint32_t, H1_4)
-
-DO_ST1_D(sve_st1dd_r, cpu_stq_data_ra, uint64_t)
-
-void HELPER(sve_st2dd_r)(CPUARMState *env, void *vg,
-                         target_ulong addr, uint32_t desc)
+/*
+ * Common helpers for all contiguous 1,2,3,4-register predicated stores.
+ */
+static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
 {
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;
-    intptr_t ra = GETPC();
-    unsigned rd = simd_data(desc);
-    uint64_t *d1 = &env->vfp.zregs[rd].d[0];
-    uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
-    uint8_t *pg = vg;
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *vd = &env->vfp.zregs[rd];
 
-    for (i = 0; i < oprsz; i += 1) {
-        if (pg[H1(i)] & 1) {
-            cpu_stq_data_ra(env, addr, d1[i], ra);
-            cpu_stq_data_ra(env, addr + 8, d2[i], ra);
-        }
-        addr += 2 * 8;
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, vd, i, addr, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += msize;
+        } while (i & 15);
     }
+    set_helper_retaddr(0);
 }
 
-void HELPER(sve_st3dd_r)(CPUARMState *env, void *vg,
-                         target_ulong addr, uint32_t desc)
+static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
 {
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;
-    intptr_t ra = GETPC();
-    unsigned rd = simd_data(desc);
-    uint64_t *d1 = &env->vfp.zregs[rd].d[0];
-    uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
-    uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
-    uint8_t *pg = vg;
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *d1 = &env->vfp.zregs[rd];
+    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
 
-    for (i = 0; i < oprsz; i += 1) {
-        if (pg[H1(i)] & 1) {
-            cpu_stq_data_ra(env, addr, d1[i], ra);
-            cpu_stq_data_ra(env, addr + 8, d2[i], ra);
-            cpu_stq_data_ra(env, addr + 16, d3[i], ra);
-        }
-        addr += 3 * 8;
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += 2 * msize;
+        } while (i & 15);
     }
+    set_helper_retaddr(0);
 }
 
-void HELPER(sve_st4dd_r)(CPUARMState *env, void *vg,
-                         target_ulong addr, uint32_t desc)
+static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
 {
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;
-    intptr_t ra = GETPC();
-    unsigned rd = simd_data(desc);
-    uint64_t *d1 = &env->vfp.zregs[rd].d[0];
-    uint64_t *d2 = &env->vfp.zregs[(rd + 1) & 31].d[0];
-    uint64_t *d3 = &env->vfp.zregs[(rd + 2) & 31].d[0];
-    uint64_t *d4 = &env->vfp.zregs[(rd + 3) & 31].d[0];
-    uint8_t *pg = vg;
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *d1 = &env->vfp.zregs[rd];
+    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+    void *d3 = &env->vfp.zregs[(rd + 2) & 31];
 
-    for (i = 0; i < oprsz; i += 1) {
-        if (pg[H1(i)] & 1) {
-            cpu_stq_data_ra(env, addr, d1[i], ra);
-            cpu_stq_data_ra(env, addr + 8, d2[i], ra);
-            cpu_stq_data_ra(env, addr + 16, d3[i], ra);
-            cpu_stq_data_ra(env, addr + 24, d4[i], ra);
-        }
-        addr += 4 * 8;
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += 3 * msize;
+        } while (i & 15);
     }
+    set_helper_retaddr(0);
 }
 
-/* Loads with a vector index.  */
+static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, const uintptr_t ra,
+                      const int esize, const int msize,
+                      sve_st1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    void *d1 = &env->vfp.zregs[rd];
+    void *d2 = &env->vfp.zregs[(rd + 1) & 31];
+    void *d3 = &env->vfp.zregs[(rd + 2) & 31];
+    void *d4 = &env->vfp.zregs[(rd + 3) & 31];
 
-#define DO_LD1_ZPZ_S(NAME, TYPEI, TYPEM, FN)                            \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm,       \
-                  target_ulong base, uint32_t desc)                     \
-{                                                                       \
-    intptr_t i, oprsz = simd_oprsz(desc);                               \
-    unsigned scale = simd_data(desc);                                   \
-    uintptr_t ra = GETPC();                                             \
-    for (i = 0; i < oprsz; ) {                                          \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));                 \
-        do {                                                            \
-            TYPEM m = 0;                                                \
-            if (pg & 1) {                                               \
-                target_ulong off = *(TYPEI *)(vm + H1_4(i));            \
-                m = FN(env, base + (off << scale), ra);                 \
-            }                                                           \
-            *(uint32_t *)(vd + H1_4(i)) = m;                            \
-            i += 4, pg >>= 4;                                           \
-        } while (i & 15);                                               \
-    }                                                                   \
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+                tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
+            }
+            i += esize, pg >>= esize;
+            addr += 4 * msize;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
 }
 
-#define DO_LD1_ZPZ_D(NAME, TYPEI, TYPEM, FN)                            \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm,       \
-                  target_ulong base, uint32_t desc)                     \
-{                                                                       \
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;                           \
-    unsigned scale = simd_data(desc);                                   \
-    uintptr_t ra = GETPC();                                             \
-    uint64_t *d = vd, *m = vm; uint8_t *pg = vg;                        \
-    for (i = 0; i < oprsz; i++) {                                       \
-        TYPEM mm = 0;                                                   \
-        if (pg[H1(i)] & 1) {                                            \
-            target_ulong off = (TYPEI)m[i];                             \
-            mm = FN(env, base + (off << scale), ra);                    \
-        }                                                               \
-        d[i] = mm;                                                      \
-    }                                                                   \
+#define DO_STN_1(N, NAME, ESIZE) \
+void __attribute__((flatten)) HELPER(sve_st##N##NAME##_r)           \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
+{                                                                   \
+    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1,           \
+                  sve_st1##NAME##_tlb);                             \
 }
 
-DO_LD1_ZPZ_S(sve_ldbsu_zsu, uint32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_S(sve_ldssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_S(sve_ldbss_zsu, uint32_t, int8_t,   cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhss_zsu, uint32_t, int16_t,  cpu_lduw_data_ra)
+#define DO_STN_2(N, NAME, ESIZE, MSIZE) \
+void __attribute__((flatten)) HELPER(sve_st##N##NAME##_le_r)          \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE,         \
+                  sve_st1##NAME##_le_tlb);                            \
+}                                                                     \
+void __attribute__((flatten)) HELPER(sve_st##N##NAME##_be_r)          \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE,         \
+                  sve_st1##NAME##_be_tlb);                            \
+}
 
-DO_LD1_ZPZ_S(sve_ldbsu_zss, int32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_S(sve_ldssu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_S(sve_ldbss_zss, int32_t, int8_t,   cpu_ldub_data_ra)
-DO_LD1_ZPZ_S(sve_ldhss_zss, int32_t, int16_t,  cpu_lduw_data_ra)
+DO_STN_1(1, bb, 1)
+DO_STN_1(1, bh, 2)
+DO_STN_1(1, bs, 4)
+DO_STN_1(1, bd, 8)
+DO_STN_1(2, bb, 1)
+DO_STN_1(3, bb, 1)
+DO_STN_1(4, bb, 1)
 
-DO_LD1_ZPZ_D(sve_ldbdu_zsu, uint32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_D(sve_ldddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra)
-DO_LD1_ZPZ_D(sve_ldbds_zsu, uint32_t, int8_t,   cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhds_zsu, uint32_t, int16_t,  cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsds_zsu, uint32_t, int32_t,  cpu_ldl_data_ra)
+DO_STN_2(1, hh, 2, 2)
+DO_STN_2(1, hs, 4, 2)
+DO_STN_2(1, hd, 8, 2)
+DO_STN_2(2, hh, 2, 2)
+DO_STN_2(3, hh, 2, 2)
+DO_STN_2(4, hh, 2, 2)
 
-DO_LD1_ZPZ_D(sve_ldbdu_zss, int32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_D(sve_ldddu_zss, int32_t, uint64_t, cpu_ldq_data_ra)
-DO_LD1_ZPZ_D(sve_ldbds_zss, int32_t, int8_t,   cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhds_zss, int32_t, int16_t,  cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsds_zss, int32_t, int32_t,  cpu_ldl_data_ra)
+DO_STN_2(1, ss, 4, 4)
+DO_STN_2(1, sd, 8, 4)
+DO_STN_2(2, ss, 4, 4)
+DO_STN_2(3, ss, 4, 4)
+DO_STN_2(4, ss, 4, 4)
 
-DO_LD1_ZPZ_D(sve_ldbdu_zd, uint64_t, uint8_t,  cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra)
-DO_LD1_ZPZ_D(sve_ldddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra)
-DO_LD1_ZPZ_D(sve_ldbds_zd, uint64_t, int8_t,   cpu_ldub_data_ra)
-DO_LD1_ZPZ_D(sve_ldhds_zd, uint64_t, int16_t,  cpu_lduw_data_ra)
-DO_LD1_ZPZ_D(sve_ldsds_zd, uint64_t, int32_t,  cpu_ldl_data_ra)
+DO_STN_2(1, dd, 8, 8)
+DO_STN_2(2, dd, 8, 8)
+DO_STN_2(3, dd, 8, 8)
+DO_STN_2(4, dd, 8, 8)
+
+#undef DO_STN_1
+#undef DO_STN_2
+
+/*
+ * Loads with a vector index.
+ */
+
+/*
+ * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
+ */
+typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
+
+static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
+{
+    return *(uint32_t *)(reg + H1_4(reg_ofs));
+}
+
+static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
+{
+    return *(int32_t *)(reg + H1_4(reg_ofs));
+}
+
+static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
+{
+    return (uint32_t)*(uint64_t *)(reg + reg_ofs);
+}
+
+static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
+{
+    return (int32_t)*(uint64_t *)(reg + reg_ofs);
+}
+
+static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
+{
+    return *(uint64_t *)(reg + reg_ofs);
+}
+
+static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    ARMVectorReg scratch = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (likely(pg & 1)) {
+                target_ulong off = off_fn(vm, i);
+                tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
+            }
+            i += 4, pg >>= 4;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(vd, &scratch, oprsz);
+}
+
+static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc) / 8;
+    ARMVectorReg scratch = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; i++) {
+        uint8_t pg = *(uint8_t *)(vg + H1(i));
+        if (likely(pg & 1)) {
+            target_ulong off = off_fn(vm, i * 8);
+            tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
+        }
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(vd, &scratch, oprsz * 8);
+}
+
+#define DO_LD1_ZPZ_S(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS)    \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
+              off_##OFS##_s, sve_ld1##MEM##_tlb);            \
+}
+
+#define DO_LD1_ZPZ_D(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS)    \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
+               off_##OFS##_d, sve_ld1##MEM##_tlb);           \
+}
+
+DO_LD1_ZPZ_S(bsu, zsu)
+DO_LD1_ZPZ_S(bsu, zss)
+DO_LD1_ZPZ_D(bdu, zsu)
+DO_LD1_ZPZ_D(bdu, zss)
+DO_LD1_ZPZ_D(bdu, zd)
+
+DO_LD1_ZPZ_S(bss, zsu)
+DO_LD1_ZPZ_S(bss, zss)
+DO_LD1_ZPZ_D(bds, zsu)
+DO_LD1_ZPZ_D(bds, zss)
+DO_LD1_ZPZ_D(bds, zd)
+
+DO_LD1_ZPZ_S(hsu_le, zsu)
+DO_LD1_ZPZ_S(hsu_le, zss)
+DO_LD1_ZPZ_D(hdu_le, zsu)
+DO_LD1_ZPZ_D(hdu_le, zss)
+DO_LD1_ZPZ_D(hdu_le, zd)
+
+DO_LD1_ZPZ_S(hsu_be, zsu)
+DO_LD1_ZPZ_S(hsu_be, zss)
+DO_LD1_ZPZ_D(hdu_be, zsu)
+DO_LD1_ZPZ_D(hdu_be, zss)
+DO_LD1_ZPZ_D(hdu_be, zd)
+
+DO_LD1_ZPZ_S(hss_le, zsu)
+DO_LD1_ZPZ_S(hss_le, zss)
+DO_LD1_ZPZ_D(hds_le, zsu)
+DO_LD1_ZPZ_D(hds_le, zss)
+DO_LD1_ZPZ_D(hds_le, zd)
+
+DO_LD1_ZPZ_S(hss_be, zsu)
+DO_LD1_ZPZ_S(hss_be, zss)
+DO_LD1_ZPZ_D(hds_be, zsu)
+DO_LD1_ZPZ_D(hds_be, zss)
+DO_LD1_ZPZ_D(hds_be, zd)
+
+DO_LD1_ZPZ_S(ss_le, zsu)
+DO_LD1_ZPZ_S(ss_le, zss)
+DO_LD1_ZPZ_D(sdu_le, zsu)
+DO_LD1_ZPZ_D(sdu_le, zss)
+DO_LD1_ZPZ_D(sdu_le, zd)
+
+DO_LD1_ZPZ_S(ss_be, zsu)
+DO_LD1_ZPZ_S(ss_be, zss)
+DO_LD1_ZPZ_D(sdu_be, zsu)
+DO_LD1_ZPZ_D(sdu_be, zss)
+DO_LD1_ZPZ_D(sdu_be, zd)
+
+DO_LD1_ZPZ_D(sds_le, zsu)
+DO_LD1_ZPZ_D(sds_le, zss)
+DO_LD1_ZPZ_D(sds_le, zd)
+
+DO_LD1_ZPZ_D(sds_be, zsu)
+DO_LD1_ZPZ_D(sds_be, zss)
+DO_LD1_ZPZ_D(sds_be, zd)
+
+DO_LD1_ZPZ_D(dd_le, zsu)
+DO_LD1_ZPZ_D(dd_le, zss)
+DO_LD1_ZPZ_D(dd_le, zd)
+
+DO_LD1_ZPZ_D(dd_be, zsu)
+DO_LD1_ZPZ_D(dd_be, zss)
+DO_LD1_ZPZ_D(dd_be, zd)
+
+#undef DO_LD1_ZPZ_S
+#undef DO_LD1_ZPZ_D
 
 /* First fault loads with a vector index.  */
 
-#ifdef CONFIG_USER_ONLY
+/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting.
+ * The controlling predicate is known to be true.  Return true if the
+ * load was successful.
+ */
+typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+                           target_ulong vaddr, int mmu_idx);
 
-#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm,       \
-                  target_ulong base, uint32_t desc)                     \
-{                                                                       \
-    intptr_t i, oprsz = simd_oprsz(desc);                               \
-    unsigned scale = simd_data(desc);                                   \
-    uintptr_t ra = GETPC();                                             \
-    bool first = true;                                                  \
-    mmap_lock();                                                        \
-    for (i = 0; i < oprsz; ) {                                          \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));                 \
-        do {                                                            \
-            TYPEM m = 0;                                                \
-            if (pg & 1) {                                               \
-                target_ulong off = *(TYPEI *)(vm + H(i));               \
-                target_ulong addr = base + (off << scale);              \
-                if (!first &&                                           \
-                    page_check_range(addr, sizeof(TYPEM), PAGE_READ)) { \
-                    record_fault(env, i, oprsz);                        \
-                    goto exit;                                          \
-                }                                                       \
-                m = FN(env, addr, ra);                                  \
-                first = false;                                          \
-            }                                                           \
-            *(TYPEE *)(vd + H(i)) = m;                                  \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);                   \
-        } while (i & 15);                                               \
-    }                                                                   \
- exit:                                                                  \
-    mmap_unlock();                                                      \
+#ifdef CONFIG_SOFTMMU
+#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
+static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
+                              target_ulong addr, int mmu_idx)               \
+{                                                                           \
+    target_ulong next_page = -(addr | TARGET_PAGE_MASK);                    \
+    if (likely(next_page - addr >= sizeof(TYPEM))) {                        \
+        void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);  \
+        if (likely(host)) {                                                 \
+            TYPEM val = HOST(host);                                         \
+            *(TYPEE *)(vd + H(reg_off)) = val;                              \
+            return true;                                                    \
+        }                                                                   \
+    }                                                                       \
+    return false;                                                           \
 }
-
 #else
-
-#define DO_LDFF1_ZPZ(NAME, TYPEE, TYPEI, TYPEM, FN, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm,       \
-                  target_ulong base, uint32_t desc)                     \
-{                                                                       \
-    g_assert_not_reached();                                             \
+#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
+static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
+                            target_ulong addr, int mmu_idx)                 \
+{                                                                           \
+    if (likely(page_check_range(addr, sizeof(TYPEM), PAGE_READ))) {         \
+        TYPEM val = HOST(g2h(addr));                                        \
+        *(TYPEE *)(vd + H(reg_off)) = val;                                  \
+        return true;                                                        \
+    }                                                                       \
+    return false;                                                           \
 }
-
 #endif
 
-#define DO_LDFF1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \
-    DO_LDFF1_ZPZ(NAME, uint32_t, TYPEI, TYPEM, FN, H1_4)
-#define DO_LDFF1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \
-    DO_LDFF1_ZPZ(NAME, uint64_t, TYPEI, TYPEM, FN, )
+DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p)
+DO_LD_NF(bss, H1_4, uint32_t,  int8_t, ldsb_p)
+DO_LD_NF(bdu,     , uint64_t, uint8_t, ldub_p)
+DO_LD_NF(bds,     , uint64_t,  int8_t, ldsb_p)
 
-DO_LDFF1_ZPZ_S(sve_ldffbsu_zsu, uint32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffbss_zsu, uint32_t, int8_t,   cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhss_zsu, uint32_t, int16_t,  cpu_lduw_data_ra)
+DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p)
+DO_LD_NF(hss_le, H1_4, uint32_t,  int16_t, ldsw_le_p)
+DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p)
+DO_LD_NF(hss_be, H1_4, uint32_t,  int16_t, ldsw_be_p)
+DO_LD_NF(hdu_le,     , uint64_t, uint16_t, lduw_le_p)
+DO_LD_NF(hds_le,     , uint64_t,  int16_t, ldsw_le_p)
+DO_LD_NF(hdu_be,     , uint64_t, uint16_t, lduw_be_p)
+DO_LD_NF(hds_be,     , uint64_t,  int16_t, ldsw_be_p)
 
-DO_LDFF1_ZPZ_S(sve_ldffbsu_zss, int32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffssu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffbss_zss, int32_t, int8_t,   cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_S(sve_ldffhss_zss, int32_t, int16_t,  cpu_lduw_data_ra)
+DO_LD_NF(ss_le,  H1_4, uint32_t, uint32_t, ldl_le_p)
+DO_LD_NF(ss_be,  H1_4, uint32_t, uint32_t, ldl_be_p)
+DO_LD_NF(sdu_le,     , uint64_t, uint32_t, ldl_le_p)
+DO_LD_NF(sds_le,     , uint64_t,  int32_t, ldl_le_p)
+DO_LD_NF(sdu_be,     , uint64_t, uint32_t, ldl_be_p)
+DO_LD_NF(sds_be,     , uint64_t,  int32_t, ldl_be_p)
 
-DO_LDFF1_ZPZ_D(sve_ldffbdu_zsu, uint32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffbds_zsu, uint32_t, int8_t,   cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhds_zsu, uint32_t, int16_t,  cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsds_zsu, uint32_t, int32_t,  cpu_ldl_data_ra)
+DO_LD_NF(dd_le,      , uint64_t, uint64_t, ldq_le_p)
+DO_LD_NF(dd_be,      , uint64_t, uint64_t, ldq_be_p)
 
-DO_LDFF1_ZPZ_D(sve_ldffbdu_zss, int32_t, uint8_t,  cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffddu_zss, int32_t, uint64_t, cpu_ldq_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffbds_zss, int32_t, int8_t,   cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhds_zss, int32_t, int16_t,  cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsds_zss, int32_t, int32_t,  cpu_ldl_data_ra)
+/*
+ * Common helper for all gather first-faulting loads.
+ */
+static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+                                target_ulong base, uint32_t desc, uintptr_t ra,
+                                zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
+                                sve_ld1_nf_fn *nonfault_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t reg_off, reg_max = simd_oprsz(desc);
+    target_ulong addr;
 
-DO_LDFF1_ZPZ_D(sve_ldffbdu_zd, uint64_t, uint8_t,  cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffbds_zd, uint64_t, int8_t,   cpu_ldub_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffhds_zd, uint64_t, int16_t,  cpu_lduw_data_ra)
-DO_LDFF1_ZPZ_D(sve_ldffsds_zd, uint64_t, int32_t,  cpu_ldl_data_ra)
+    /* Skip to the first true predicate.  */
+    reg_off = find_next_active(vg, 0, reg_max, MO_32);
+    if (likely(reg_off < reg_max)) {
+        /* Perform one normal read, which will fault or not.  */
+        set_helper_retaddr(ra);
+        addr = off_fn(vm, reg_off);
+        addr = base + (addr << scale);
+        tlb_fn(env, vd, reg_off, addr, oi, ra);
+
+        /* The rest of the reads will be non-faulting.  */
+        set_helper_retaddr(0);
+    }
+
+    /* After any fault, zero the leading predicated false elements.  */
+    swap_memzero(vd, reg_off);
+
+    while (likely((reg_off += 4) < reg_max)) {
+        uint64_t pg = *(uint64_t *)(vg + (reg_off >> 6) * 8);
+        if (likely((pg >> (reg_off & 63)) & 1)) {
+            addr = off_fn(vm, reg_off);
+            addr = base + (addr << scale);
+            if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
+                record_fault(env, reg_off, reg_max);
+                break;
+            }
+        } else {
+            *(uint32_t *)(vd + H1_4(reg_off)) = 0;
+        }
+    }
+}
+
+static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+                                target_ulong base, uint32_t desc, uintptr_t ra,
+                                zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
+                                sve_ld1_nf_fn *nonfault_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t reg_off, reg_max = simd_oprsz(desc);
+    target_ulong addr;
+
+    /* Skip to the first true predicate.  */
+    reg_off = find_next_active(vg, 0, reg_max, MO_64);
+    if (likely(reg_off < reg_max)) {
+        /* Perform one normal read, which will fault or not.  */
+        set_helper_retaddr(ra);
+        addr = off_fn(vm, reg_off);
+        addr = base + (addr << scale);
+        tlb_fn(env, vd, reg_off, addr, oi, ra);
+
+        /* The rest of the reads will be non-faulting.  */
+        set_helper_retaddr(0);
+    }
+
+    /* After any fault, zero the leading predicated false elements.  */
+    swap_memzero(vd, reg_off);
+
+    while (likely((reg_off += 8) < reg_max)) {
+        uint8_t pg = *(uint8_t *)(vg + H1(reg_off >> 3));
+        if (likely(pg & 1)) {
+            addr = off_fn(vm, reg_off);
+            addr = base + (addr << scale);
+            if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
+                record_fault(env, reg_off, reg_max);
+                break;
+            }
+        } else {
+            *(uint64_t *)(vd + reg_off) = 0;
+        }
+    }
+}
+
+#define DO_LDFF1_ZPZ_S(MEM, OFS) \
+void HELPER(sve_ldff##MEM##_##OFS)                                      \
+    (CPUARMState *env, void *vd, void *vg, void *vm,                    \
+     target_ulong base, uint32_t desc)                                  \
+{                                                                       \
+    sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(),                  \
+                 off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf);  \
+}
+
+#define DO_LDFF1_ZPZ_D(MEM, OFS) \
+void HELPER(sve_ldff##MEM##_##OFS)                                      \
+    (CPUARMState *env, void *vd, void *vg, void *vm,                    \
+     target_ulong base, uint32_t desc)                                  \
+{                                                                       \
+    sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(),                  \
+                 off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf);  \
+}
+
+DO_LDFF1_ZPZ_S(bsu, zsu)
+DO_LDFF1_ZPZ_S(bsu, zss)
+DO_LDFF1_ZPZ_D(bdu, zsu)
+DO_LDFF1_ZPZ_D(bdu, zss)
+DO_LDFF1_ZPZ_D(bdu, zd)
+
+DO_LDFF1_ZPZ_S(bss, zsu)
+DO_LDFF1_ZPZ_S(bss, zss)
+DO_LDFF1_ZPZ_D(bds, zsu)
+DO_LDFF1_ZPZ_D(bds, zss)
+DO_LDFF1_ZPZ_D(bds, zd)
+
+DO_LDFF1_ZPZ_S(hsu_le, zsu)
+DO_LDFF1_ZPZ_S(hsu_le, zss)
+DO_LDFF1_ZPZ_D(hdu_le, zsu)
+DO_LDFF1_ZPZ_D(hdu_le, zss)
+DO_LDFF1_ZPZ_D(hdu_le, zd)
+
+DO_LDFF1_ZPZ_S(hsu_be, zsu)
+DO_LDFF1_ZPZ_S(hsu_be, zss)
+DO_LDFF1_ZPZ_D(hdu_be, zsu)
+DO_LDFF1_ZPZ_D(hdu_be, zss)
+DO_LDFF1_ZPZ_D(hdu_be, zd)
+
+DO_LDFF1_ZPZ_S(hss_le, zsu)
+DO_LDFF1_ZPZ_S(hss_le, zss)
+DO_LDFF1_ZPZ_D(hds_le, zsu)
+DO_LDFF1_ZPZ_D(hds_le, zss)
+DO_LDFF1_ZPZ_D(hds_le, zd)
+
+DO_LDFF1_ZPZ_S(hss_be, zsu)
+DO_LDFF1_ZPZ_S(hss_be, zss)
+DO_LDFF1_ZPZ_D(hds_be, zsu)
+DO_LDFF1_ZPZ_D(hds_be, zss)
+DO_LDFF1_ZPZ_D(hds_be, zd)
+
+DO_LDFF1_ZPZ_S(ss_le,  zsu)
+DO_LDFF1_ZPZ_S(ss_le,  zss)
+DO_LDFF1_ZPZ_D(sdu_le, zsu)
+DO_LDFF1_ZPZ_D(sdu_le, zss)
+DO_LDFF1_ZPZ_D(sdu_le, zd)
+
+DO_LDFF1_ZPZ_S(ss_be,  zsu)
+DO_LDFF1_ZPZ_S(ss_be,  zss)
+DO_LDFF1_ZPZ_D(sdu_be, zsu)
+DO_LDFF1_ZPZ_D(sdu_be, zss)
+DO_LDFF1_ZPZ_D(sdu_be, zd)
+
+DO_LDFF1_ZPZ_D(sds_le, zsu)
+DO_LDFF1_ZPZ_D(sds_le, zss)
+DO_LDFF1_ZPZ_D(sds_le, zd)
+
+DO_LDFF1_ZPZ_D(sds_be, zsu)
+DO_LDFF1_ZPZ_D(sds_be, zss)
+DO_LDFF1_ZPZ_D(sds_be, zd)
+
+DO_LDFF1_ZPZ_D(dd_le, zsu)
+DO_LDFF1_ZPZ_D(dd_le, zss)
+DO_LDFF1_ZPZ_D(dd_le, zd)
+
+DO_LDFF1_ZPZ_D(dd_be, zsu)
+DO_LDFF1_ZPZ_D(dd_be, zss)
+DO_LDFF1_ZPZ_D(dd_be, zd)
 
 /* Stores with a vector index.  */
 
-#define DO_ST1_ZPZ_S(NAME, TYPEI, FN)                                   \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm,       \
-                  target_ulong base, uint32_t desc)                     \
-{                                                                       \
-    intptr_t i, oprsz = simd_oprsz(desc);                               \
-    unsigned scale = simd_data(desc);                                   \
-    uintptr_t ra = GETPC();                                             \
-    for (i = 0; i < oprsz; ) {                                          \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));                 \
-        do {                                                            \
-            if (likely(pg & 1)) {                                       \
-                target_ulong off = *(TYPEI *)(vm + H1_4(i));            \
-                uint32_t d = *(uint32_t *)(vd + H1_4(i));               \
-                FN(env, base + (off << scale), d, ra);                  \
-            }                                                           \
-            i += sizeof(uint32_t), pg >>= sizeof(uint32_t);             \
-        } while (i & 15);                                               \
-    }                                                                   \
+static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc);
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (likely(pg & 1)) {
+                target_ulong off = off_fn(vm, i);
+                tlb_fn(env, vd, i, base + (off << scale), oi, ra);
+            }
+            i += 4, pg >>= 4;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
 }
 
-#define DO_ST1_ZPZ_D(NAME, TYPEI, FN)                                   \
-void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm,       \
-                  target_ulong base, uint32_t desc)                     \
-{                                                                       \
-    intptr_t i, oprsz = simd_oprsz(desc) / 8;                           \
-    unsigned scale = simd_data(desc);                                   \
-    uintptr_t ra = GETPC();                                             \
-    uint64_t *d = vd, *m = vm; uint8_t *pg = vg;                        \
-    for (i = 0; i < oprsz; i++) {                                       \
-        if (likely(pg[H1(i)] & 1)) {                                    \
-            target_ulong off = (target_ulong)(TYPEI)m[i] << scale;      \
-            FN(env, base + off, d[i], ra);                              \
-        }                                                               \
-    }                                                                   \
+static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
+                       target_ulong base, uint32_t desc, uintptr_t ra,
+                       zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
+{
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
+    intptr_t i, oprsz = simd_oprsz(desc) / 8;
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; i++) {
+        uint8_t pg = *(uint8_t *)(vg + H1(i));
+        if (likely(pg & 1)) {
+            target_ulong off = off_fn(vm, i * 8);
+            tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
+        }
+    }
+    set_helper_retaddr(0);
 }
 
-DO_ST1_ZPZ_S(sve_stbs_zsu, uint32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_S(sve_sths_zsu, uint32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_S(sve_stss_zsu, uint32_t, cpu_stl_data_ra)
+#define DO_ST1_ZPZ_S(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS)    \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(),         \
+              off_##OFS##_s, sve_st1##MEM##_tlb);            \
+}
 
-DO_ST1_ZPZ_S(sve_stbs_zss, int32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_S(sve_sths_zss, int32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_S(sve_stss_zss, int32_t, cpu_stl_data_ra)
+#define DO_ST1_ZPZ_D(MEM, OFS) \
+void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS)    \
+    (CPUARMState *env, void *vd, void *vg, void *vm,         \
+     target_ulong base, uint32_t desc)                       \
+{                                                            \
+    sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(),         \
+               off_##OFS##_d, sve_st1##MEM##_tlb);           \
+}
 
-DO_ST1_ZPZ_D(sve_stbd_zsu, uint32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_D(sve_sthd_zsu, uint32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_D(sve_stsd_zsu, uint32_t, cpu_stl_data_ra)
-DO_ST1_ZPZ_D(sve_stdd_zsu, uint32_t, cpu_stq_data_ra)
+DO_ST1_ZPZ_S(bs, zsu)
+DO_ST1_ZPZ_S(hs_le, zsu)
+DO_ST1_ZPZ_S(hs_be, zsu)
+DO_ST1_ZPZ_S(ss_le, zsu)
+DO_ST1_ZPZ_S(ss_be, zsu)
 
-DO_ST1_ZPZ_D(sve_stbd_zss, int32_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_D(sve_sthd_zss, int32_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_D(sve_stsd_zss, int32_t, cpu_stl_data_ra)
-DO_ST1_ZPZ_D(sve_stdd_zss, int32_t, cpu_stq_data_ra)
+DO_ST1_ZPZ_S(bs, zss)
+DO_ST1_ZPZ_S(hs_le, zss)
+DO_ST1_ZPZ_S(hs_be, zss)
+DO_ST1_ZPZ_S(ss_le, zss)
+DO_ST1_ZPZ_S(ss_be, zss)
 
-DO_ST1_ZPZ_D(sve_stbd_zd, uint64_t, cpu_stb_data_ra)
-DO_ST1_ZPZ_D(sve_sthd_zd, uint64_t, cpu_stw_data_ra)
-DO_ST1_ZPZ_D(sve_stsd_zd, uint64_t, cpu_stl_data_ra)
-DO_ST1_ZPZ_D(sve_stdd_zd, uint64_t, cpu_stq_data_ra)
+DO_ST1_ZPZ_D(bd, zsu)
+DO_ST1_ZPZ_D(hd_le, zsu)
+DO_ST1_ZPZ_D(hd_be, zsu)
+DO_ST1_ZPZ_D(sd_le, zsu)
+DO_ST1_ZPZ_D(sd_be, zsu)
+DO_ST1_ZPZ_D(dd_le, zsu)
+DO_ST1_ZPZ_D(dd_be, zsu)
+
+DO_ST1_ZPZ_D(bd, zss)
+DO_ST1_ZPZ_D(hd_le, zss)
+DO_ST1_ZPZ_D(hd_be, zss)
+DO_ST1_ZPZ_D(sd_le, zss)
+DO_ST1_ZPZ_D(sd_be, zss)
+DO_ST1_ZPZ_D(dd_le, zss)
+DO_ST1_ZPZ_D(dd_be, zss)
+
+DO_ST1_ZPZ_D(bd, zd)
+DO_ST1_ZPZ_D(hd_le, zd)
+DO_ST1_ZPZ_D(hd_be, zd)
+DO_ST1_ZPZ_D(sd_le, zd)
+DO_ST1_ZPZ_D(sd_be, zd)
+DO_ST1_ZPZ_D(dd_le, zd)
+DO_ST1_ZPZ_D(dd_be, zd)
+
+#undef DO_ST1_ZPZ_S
+#undef DO_ST1_ZPZ_D
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8ca3876..88195ab 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -37,6 +37,7 @@
 
 #include "trace-tcg.h"
 #include "translate-a64.h"
+#include "qemu/atomic128.h"
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
@@ -166,11 +167,15 @@
         cpu_fprintf(f, "\n");
         return;
     }
+    if (fp_exception_el(env, el) != 0) {
+        cpu_fprintf(f, "    FPU disabled\n");
+        return;
+    }
     cpu_fprintf(f, "     FPCR=%08x FPSR=%08x\n",
                 vfp_get_fpcr(env), vfp_get_fpsr(env));
 
-    if (arm_feature(env, ARM_FEATURE_SVE)) {
-        int j, zcr_len = env->vfp.zcr_el[1] & 0xf; /* fix for system mode */
+    if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
+        int j, zcr_len = sve_zcr_len_for_el(env, el);
 
         for (i = 0; i <= FFR_PRED_NUM; i++) {
             bool eol;
@@ -1196,25 +1201,23 @@
 
 /* Store from vector register to memory */
 static void do_vec_st(DisasContext *s, int srcidx, int element,
-                      TCGv_i64 tcg_addr, int size)
+                      TCGv_i64 tcg_addr, int size, TCGMemOp endian)
 {
-    TCGMemOp memop = s->be_data + size;
     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
     read_vec_element(s, tcg_tmp, srcidx, element, size);
-    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
 
     tcg_temp_free_i64(tcg_tmp);
 }
 
 /* Load from memory to vector register */
 static void do_vec_ld(DisasContext *s, int destidx, int element,
-                      TCGv_i64 tcg_addr, int size)
+                      TCGv_i64 tcg_addr, int size, TCGMemOp endian)
 {
-    TCGMemOp memop = s->be_data + size;
     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
-    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
     write_vec_element(s, tcg_tmp, destidx, element, size);
 
     tcg_temp_free_i64(tcg_tmp);
@@ -2082,26 +2085,27 @@
                                        get_mem_index(s),
                                        MO_64 | MO_ALIGN | s->be_data);
             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
-        } else if (s->be_data == MO_LE) {
-            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+            if (!HAVE_CMPXCHG128) {
+                gen_helper_exit_atomic(cpu_env);
+                s->base.is_jmp = DISAS_NORETURN;
+            } else if (s->be_data == MO_LE) {
                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
                                                         cpu_exclusive_addr,
                                                         cpu_reg(s, rt),
                                                         cpu_reg(s, rt2));
             } else {
-                gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
-                                               cpu_reg(s, rt), cpu_reg(s, rt2));
-            }
-        } else {
-            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
                                                         cpu_exclusive_addr,
                                                         cpu_reg(s, rt),
                                                         cpu_reg(s, rt2));
-            } else {
-                gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
-                                               cpu_reg(s, rt), cpu_reg(s, rt2));
             }
+        } else if (s->be_data == MO_LE) {
+            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
+                                           cpu_reg(s, rt), cpu_reg(s, rt2));
+        } else {
+            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
+                                           cpu_reg(s, rt), cpu_reg(s, rt2));
         }
     } else {
         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
@@ -2171,14 +2175,18 @@
         }
         tcg_temp_free_i64(cmp);
     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-        TCGv_i32 tcg_rs = tcg_const_i32(rs);
-
-        if (s->be_data == MO_LE) {
-            gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+        if (HAVE_CMPXCHG128) {
+            TCGv_i32 tcg_rs = tcg_const_i32(rs);
+            if (s->be_data == MO_LE) {
+                gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            } else {
+                gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            }
+            tcg_temp_free_i32(tcg_rs);
         } else {
-            gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+            gen_helper_exit_atomic(cpu_env);
+            s->base.is_jmp = DISAS_NORETURN;
         }
-        tcg_temp_free_i32(tcg_rs);
     } else {
         TCGv_i64 d1 = tcg_temp_new_i64();
         TCGv_i64 d2 = tcg_temp_new_i64();
@@ -2318,7 +2326,7 @@
         }
         if (rt2 == 31
             && ((rt | rs) & 1) == 0
-            && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+            && dc_isar_feature(aa64_atomics, s)) {
             /* CASP / CASPL */
             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
             return;
@@ -2340,7 +2348,7 @@
         }
         if (rt2 == 31
             && ((rt | rs) & 1) == 0
-            && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+            && dc_isar_feature(aa64_atomics, s)) {
             /* CASPA / CASPAL */
             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
             return;
@@ -2351,7 +2359,7 @@
     case 0xb: /* CASL */
     case 0xe: /* CASA */
     case 0xf: /* CASAL */
-        if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+        if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
             gen_compare_and_swap(s, rs, rt, rn, size);
             return;
         }
@@ -2890,11 +2898,10 @@
     int rs = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int o3_opc = extract32(insn, 12, 4);
-    int feature = ARM_FEATURE_V8_ATOMICS;
     TCGv_i64 tcg_rn, tcg_rs;
     AtomicThreeOpFn *fn;
 
-    if (is_vector) {
+    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
         unallocated_encoding(s);
         return;
     }
@@ -2930,10 +2937,6 @@
         unallocated_encoding(s);
         return;
     }
-    if (!arm_dc_feature(s, feature)) {
-        unallocated_encoding(s);
-        return;
-    }
 
     if (rn == 31) {
         gen_check_sp_alignment(s);
@@ -3013,10 +3016,11 @@
     bool is_store = !extract32(insn, 22, 1);
     bool is_postidx = extract32(insn, 23, 1);
     bool is_q = extract32(insn, 30, 1);
-    TCGv_i64 tcg_addr, tcg_rn;
+    TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
+    TCGMemOp endian = s->be_data;
 
-    int ebytes = 1 << size;
-    int elements = (is_q ? 128 : 64) / (8 << size);
+    int ebytes;   /* bytes per element */
+    int elements; /* elements per vector */
     int rpt;    /* num iterations */
     int selem;  /* structure elements */
     int r;
@@ -3075,39 +3079,55 @@
         gen_check_sp_alignment(s);
     }
 
+    /* For our purposes, bytes are always little-endian.  */
+    if (size == 0) {
+        endian = MO_LE;
+    }
+
+    /* Consecutive little-endian elements from a single register
+     * can be promoted to a larger little-endian operation.
+     */
+    if (selem == 1 && endian == MO_LE) {
+        size = 3;
+    }
+    ebytes = 1 << size;
+    elements = (is_q ? 16 : 8) / ebytes;
+
     tcg_rn = cpu_reg_sp(s, rn);
     tcg_addr = tcg_temp_new_i64();
     tcg_gen_mov_i64(tcg_addr, tcg_rn);
+    tcg_ebytes = tcg_const_i64(ebytes);
 
     for (r = 0; r < rpt; r++) {
         int e;
         for (e = 0; e < elements; e++) {
-            int tt = (rt + r) % 32;
             int xs;
             for (xs = 0; xs < selem; xs++) {
+                int tt = (rt + r + xs) % 32;
                 if (is_store) {
-                    do_vec_st(s, tt, e, tcg_addr, size);
+                    do_vec_st(s, tt, e, tcg_addr, size, endian);
                 } else {
-                    do_vec_ld(s, tt, e, tcg_addr, size);
-
-                    /* For non-quad operations, setting a slice of the low
-                     * 64 bits of the register clears the high 64 bits (in
-                     * the ARM ARM pseudocode this is implicit in the fact
-                     * that 'rval' is a 64 bit wide variable).
-                     * For quad operations, we might still need to zero the
-                     * high bits of SVE.  We optimize by noticing that we only
-                     * need to do this the first time we touch a register.
-                     */
-                    if (e == 0 && (r == 0 || xs == selem - 1)) {
-                        clear_vec_high(s, is_q, tt);
-                    }
+                    do_vec_ld(s, tt, e, tcg_addr, size, endian);
                 }
-                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
-                tt = (tt + 1) % 32;
+                tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
             }
         }
     }
 
+    if (!is_store) {
+        /* For non-quad operations, setting a slice of the low
+         * 64 bits of the register clears the high 64 bits (in
+         * the ARM ARM pseudocode this is implicit in the fact
+         * that 'rval' is a 64 bit wide variable).
+         * For quad operations, we might still need to zero the
+         * high bits of SVE.
+         */
+        for (r = 0; r < rpt * selem; r++) {
+            int tt = (rt + r) % 32;
+            clear_vec_high(s, is_q, tt);
+        }
+    }
+
     if (is_postidx) {
         int rm = extract32(insn, 16, 5);
         if (rm == 31) {
@@ -3116,6 +3136,7 @@
             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
         }
     }
+    tcg_temp_free_i64(tcg_ebytes);
     tcg_temp_free_i64(tcg_addr);
 }
 
@@ -3158,7 +3179,7 @@
     bool replicate = false;
     int index = is_q << 3 | S << 2 | size;
     int ebytes, xs;
-    TCGv_i64 tcg_addr, tcg_rn;
+    TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
 
     switch (scale) {
     case 3:
@@ -3211,49 +3232,28 @@
     tcg_rn = cpu_reg_sp(s, rn);
     tcg_addr = tcg_temp_new_i64();
     tcg_gen_mov_i64(tcg_addr, tcg_rn);
+    tcg_ebytes = tcg_const_i64(ebytes);
 
     for (xs = 0; xs < selem; xs++) {
         if (replicate) {
             /* Load and replicate to all elements */
-            uint64_t mulconst;
             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
                                 get_mem_index(s), s->be_data + scale);
-            switch (scale) {
-            case 0:
-                mulconst = 0x0101010101010101ULL;
-                break;
-            case 1:
-                mulconst = 0x0001000100010001ULL;
-                break;
-            case 2:
-                mulconst = 0x0000000100000001ULL;
-                break;
-            case 3:
-                mulconst = 0;
-                break;
-            default:
-                g_assert_not_reached();
-            }
-            if (mulconst) {
-                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
-            }
-            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
-            if (is_q) {
-                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
-            }
+            tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
+                                 (is_q + 1) * 8, vec_full_reg_size(s),
+                                 tcg_tmp);
             tcg_temp_free_i64(tcg_tmp);
-            clear_vec_high(s, is_q, rt);
         } else {
             /* Load/store one element per register */
             if (is_load) {
-                do_vec_ld(s, rt, index, tcg_addr, scale);
+                do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data);
             } else {
-                do_vec_st(s, rt, index, tcg_addr, scale);
+                do_vec_st(s, rt, index, tcg_addr, scale, s->be_data);
             }
         }
-        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
+        tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
         rt = (rt + 1) % 32;
     }
 
@@ -3265,6 +3265,7 @@
             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
         }
     }
+    tcg_temp_free_i64(tcg_ebytes);
     tcg_temp_free_i64(tcg_addr);
 }
 
@@ -4564,7 +4565,7 @@
     TCGv_i64 tcg_acc, tcg_val;
     TCGv_i32 tcg_bytes;
 
-    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
+    if (!dc_isar_feature(aa64_crc32, s)
         || (sf == 1 && sz != 3)
         || (sf == 0 && sz == 3)) {
         unallocated_encoding(s);
@@ -4806,7 +4807,7 @@
         break;
     case 3:
         size = MO_16;
-        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (dc_isar_feature(aa64_fp16, s)) {
             break;
         }
         /* fallthru */
@@ -4857,7 +4858,7 @@
         break;
     case 3:
         size = MO_16;
-        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (dc_isar_feature(aa64_fp16, s)) {
             break;
         }
         /* fallthru */
@@ -4923,7 +4924,7 @@
         break;
     case 3:
         sz = MO_16;
-        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (dc_isar_feature(aa64_fp16, s)) {
             break;
         }
         /* fallthru */
@@ -5256,7 +5257,7 @@
             handle_fp_1src_double(s, opcode, rd, rn);
             break;
         case 3:
-            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            if (!dc_isar_feature(aa64_fp16, s)) {
                 unallocated_encoding(s);
                 return;
             }
@@ -5471,7 +5472,7 @@
         handle_fp_2src_double(s, opcode, rd, rn, rm);
         break;
     case 3:
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (!dc_isar_feature(aa64_fp16, s)) {
             unallocated_encoding(s);
             return;
         }
@@ -5629,7 +5630,7 @@
         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
         break;
     case 3:
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (!dc_isar_feature(aa64_fp16, s)) {
             unallocated_encoding(s);
             return;
         }
@@ -5699,7 +5700,7 @@
         break;
     case 3:
         sz = MO_16;
-        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (dc_isar_feature(aa64_fp16, s)) {
             break;
         }
         /* fallthru */
@@ -5924,7 +5925,7 @@
     case 1: /* float64 */
         break;
     case 3: /* float16 */
-        if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (dc_isar_feature(aa64_fp16, s)) {
             break;
         }
         /* fallthru */
@@ -6054,7 +6055,7 @@
             break;
         case 0x6: /* 16-bit float, 32-bit int */
         case 0xe: /* 16-bit float, 64-bit int */
-            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            if (dc_isar_feature(aa64_fp16, s)) {
                 break;
             }
             /* fallthru */
@@ -6081,7 +6082,7 @@
         case 1: /* float64 */
             break;
         case 3: /* float16 */
-            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            if (dc_isar_feature(aa64_fp16, s)) {
                 break;
             }
             /* fallthru */
@@ -6518,7 +6519,7 @@
          */
         is_min = extract32(size, 1, 1);
         is_fp = true;
-        if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (!is_u && dc_isar_feature(aa64_fp16, s)) {
             size = 1;
         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
             unallocated_encoding(s);
@@ -6914,7 +6915,7 @@
 
     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
         /* Check for FMOV (vector, immediate) - half-precision */
-        if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
+        if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
             unallocated_encoding(s);
             return;
         }
@@ -7081,7 +7082,7 @@
     case 0x2f: /* FMINP */
         /* FP op, size[0] is 32 or 64 bit*/
         if (!u) {
-            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            if (!dc_isar_feature(aa64_fp16, s)) {
                 unallocated_encoding(s);
                 return;
             } else {
@@ -7726,7 +7727,7 @@
         size = MO_32;
     } else if (immh & 2) {
         size = MO_16;
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (!dc_isar_feature(aa64_fp16, s)) {
             unallocated_encoding(s);
             return;
         }
@@ -7771,7 +7772,7 @@
         size = MO_32;
     } else if (immh & 0x2) {
         size = MO_16;
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+        if (!dc_isar_feature(aa64_fp16, s)) {
             unallocated_encoding(s);
             return;
         }
@@ -8036,28 +8037,6 @@
     }
 }
 
-/* CMTST : test is "if (X & Y != 0)". */
-static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    tcg_gen_and_i32(d, a, b);
-    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
-    tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
-    tcg_gen_and_i64(d, a, b);
-    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
-    tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
-    tcg_gen_and_vec(vece, d, a, b);
-    tcg_gen_dupi_vec(vece, a, 0);
-    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
-}
-
 static void handle_3same_64(DisasContext *s, int opcode, bool u,
                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
 {
@@ -8535,7 +8514,7 @@
         return;
     }
 
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+    if (!dc_isar_feature(aa64_fp16, s)) {
         unallocated_encoding(s);
     }
 
@@ -8608,7 +8587,7 @@
     bool u = extract32(insn, 29, 1);
     TCGv_i32 ele1, ele2, ele3;
     TCGv_i64 res;
-    int feature;
+    bool feature;
 
     switch (u * 16 + opcode) {
     case 0x10: /* SQRDMLAH (vector) */
@@ -8617,13 +8596,13 @@
             unallocated_encoding(s);
             return;
         }
-        feature = ARM_FEATURE_V8_RDM;
+        feature = dc_isar_feature(aa64_rdm, s);
         break;
     default:
         unallocated_encoding(s);
         return;
     }
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -9397,191 +9376,10 @@
     }
 }
 
-static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_vec_sar8i_i64(a, a, shift);
-    tcg_gen_vec_add8_i64(d, d, a);
-}
-
-static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_vec_sar16i_i64(a, a, shift);
-    tcg_gen_vec_add16_i64(d, d, a);
-}
-
-static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
-    tcg_gen_sari_i32(a, a, shift);
-    tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_sari_i64(a, a, shift);
-    tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
-    tcg_gen_sari_vec(vece, a, a, sh);
-    tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_vec_shr8i_i64(a, a, shift);
-    tcg_gen_vec_add8_i64(d, d, a);
-}
-
-static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_vec_shr16i_i64(a, a, shift);
-    tcg_gen_vec_add16_i64(d, d, a);
-}
-
-static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
-    tcg_gen_shri_i32(a, a, shift);
-    tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_shri_i64(a, a, shift);
-    tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
-    tcg_gen_shri_vec(vece, a, a, sh);
-    tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    uint64_t mask = dup_const(MO_8, 0xff >> shift);
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    tcg_gen_shri_i64(t, a, shift);
-    tcg_gen_andi_i64(t, t, mask);
-    tcg_gen_andi_i64(d, d, ~mask);
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    tcg_gen_shri_i64(t, a, shift);
-    tcg_gen_andi_i64(t, t, mask);
-    tcg_gen_andi_i64(d, d, ~mask);
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
-    tcg_gen_shri_i32(a, a, shift);
-    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
-}
-
-static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_shri_i64(a, a, shift);
-    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
-}
-
-static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
-    uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
-    TCGv_vec t = tcg_temp_new_vec_matching(d);
-    TCGv_vec m = tcg_temp_new_vec_matching(d);
-
-    tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
-    tcg_gen_shri_vec(vece, t, a, sh);
-    tcg_gen_and_vec(vece, d, d, m);
-    tcg_gen_or_vec(vece, d, d, t);
-
-    tcg_temp_free_vec(t);
-    tcg_temp_free_vec(m);
-}
-
 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
                                  int immh, int immb, int opcode, int rn, int rd)
 {
-    static const GVecGen2i ssra_op[4] = {
-        { .fni8 = gen_ssra8_i64,
-          .fniv = gen_ssra_vec,
-          .load_dest = true,
-          .opc = INDEX_op_sari_vec,
-          .vece = MO_8 },
-        { .fni8 = gen_ssra16_i64,
-          .fniv = gen_ssra_vec,
-          .load_dest = true,
-          .opc = INDEX_op_sari_vec,
-          .vece = MO_16 },
-        { .fni4 = gen_ssra32_i32,
-          .fniv = gen_ssra_vec,
-          .load_dest = true,
-          .opc = INDEX_op_sari_vec,
-          .vece = MO_32 },
-        { .fni8 = gen_ssra64_i64,
-          .fniv = gen_ssra_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .opc = INDEX_op_sari_vec,
-          .vece = MO_64 },
-    };
-    static const GVecGen2i usra_op[4] = {
-        { .fni8 = gen_usra8_i64,
-          .fniv = gen_usra_vec,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_8, },
-        { .fni8 = gen_usra16_i64,
-          .fniv = gen_usra_vec,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_16, },
-        { .fni4 = gen_usra32_i32,
-          .fniv = gen_usra_vec,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_32, },
-        { .fni8 = gen_usra64_i64,
-          .fniv = gen_usra_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_64, },
-    };
-    static const GVecGen2i sri_op[4] = {
-        { .fni8 = gen_shr8_ins_i64,
-          .fniv = gen_shr_ins_vec,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_8 },
-        { .fni8 = gen_shr16_ins_i64,
-          .fniv = gen_shr_ins_vec,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_16 },
-        { .fni4 = gen_shr32_ins_i32,
-          .fniv = gen_shr_ins_vec,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_32 },
-        { .fni8 = gen_shr64_ins_i64,
-          .fniv = gen_shr_ins_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .opc = INDEX_op_shri_vec,
-          .vece = MO_64 },
-    };
-
     int size = 32 - clz32(immh) - 1;
     int immhb = immh << 3 | immb;
     int shift = 2 * (8 << size) - immhb;
@@ -9677,85 +9475,10 @@
     clear_vec_high(s, is_q, rd);
 }
 
-static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    uint64_t mask = dup_const(MO_8, 0xff << shift);
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    tcg_gen_shli_i64(t, a, shift);
-    tcg_gen_andi_i64(t, t, mask);
-    tcg_gen_andi_i64(d, d, ~mask);
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    uint64_t mask = dup_const(MO_16, 0xffff << shift);
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    tcg_gen_shli_i64(t, a, shift);
-    tcg_gen_andi_i64(t, t, mask);
-    tcg_gen_andi_i64(d, d, ~mask);
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
-    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
-}
-
-static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
-    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
-}
-
-static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
-    uint64_t mask = (1ull << sh) - 1;
-    TCGv_vec t = tcg_temp_new_vec_matching(d);
-    TCGv_vec m = tcg_temp_new_vec_matching(d);
-
-    tcg_gen_dupi_vec(vece, m, mask);
-    tcg_gen_shli_vec(vece, t, a, sh);
-    tcg_gen_and_vec(vece, d, d, m);
-    tcg_gen_or_vec(vece, d, d, t);
-
-    tcg_temp_free_vec(t);
-    tcg_temp_free_vec(m);
-}
-
 /* SHL/SLI - Vector shift left */
 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
                                  int immh, int immb, int opcode, int rn, int rd)
 {
-    static const GVecGen2i shi_op[4] = {
-        { .fni8 = gen_shl8_ins_i64,
-          .fniv = gen_shl_ins_vec,
-          .opc = INDEX_op_shli_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .vece = MO_8 },
-        { .fni8 = gen_shl16_ins_i64,
-          .fniv = gen_shl_ins_vec,
-          .opc = INDEX_op_shli_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .vece = MO_16 },
-        { .fni4 = gen_shl32_ins_i32,
-          .fniv = gen_shl_ins_vec,
-          .opc = INDEX_op_shli_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .vece = MO_32 },
-        { .fni8 = gen_shl64_ins_i64,
-          .fniv = gen_shl_ins_vec,
-          .opc = INDEX_op_shli_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .vece = MO_64 },
-    };
     int size = 32 - clz32(immh) - 1;
     int immhb = immh << 3 | immb;
     int shift = immhb - (8 << size);
@@ -9775,7 +9498,7 @@
     }
 
     if (insert) {
-        gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
+        gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
     } else {
         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
     }
@@ -10352,7 +10075,7 @@
             return;
         }
         if (size == 3) {
-            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
+            if (!dc_isar_feature(aa64_pmull, s)) {
                 unallocated_encoding(s);
                 return;
             }
@@ -10397,70 +10120,9 @@
     }
 }
 
-static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    tcg_gen_xor_i64(rn, rn, rm);
-    tcg_gen_and_i64(rn, rn, rd);
-    tcg_gen_xor_i64(rd, rm, rn);
-}
-
-static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    tcg_gen_xor_i64(rn, rn, rd);
-    tcg_gen_and_i64(rn, rn, rm);
-    tcg_gen_xor_i64(rd, rd, rn);
-}
-
-static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    tcg_gen_xor_i64(rn, rn, rd);
-    tcg_gen_andc_i64(rn, rn, rm);
-    tcg_gen_xor_i64(rd, rd, rn);
-}
-
-static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
-{
-    tcg_gen_xor_vec(vece, rn, rn, rm);
-    tcg_gen_and_vec(vece, rn, rn, rd);
-    tcg_gen_xor_vec(vece, rd, rm, rn);
-}
-
-static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
-{
-    tcg_gen_xor_vec(vece, rn, rn, rd);
-    tcg_gen_and_vec(vece, rn, rn, rm);
-    tcg_gen_xor_vec(vece, rd, rd, rn);
-}
-
-static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
-{
-    tcg_gen_xor_vec(vece, rn, rn, rd);
-    tcg_gen_andc_vec(vece, rn, rn, rm);
-    tcg_gen_xor_vec(vece, rd, rd, rn);
-}
-
 /* Logic op (opcode == 3) subgroup of C3.6.16. */
 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
 {
-    static const GVecGen3 bsl_op = {
-        .fni8 = gen_bsl_i64,
-        .fniv = gen_bsl_vec,
-        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-        .load_dest = true
-    };
-    static const GVecGen3 bit_op = {
-        .fni8 = gen_bit_i64,
-        .fniv = gen_bit_vec,
-        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-        .load_dest = true
-    };
-    static const GVecGen3 bif_op = {
-        .fni8 = gen_bif_i64,
-        .fniv = gen_bif_vec,
-        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-        .load_dest = true
-    };
-
     int rd = extract32(insn, 0, 5);
     int rn = extract32(insn, 5, 5);
     int rm = extract32(insn, 16, 5);
@@ -10732,131 +10394,9 @@
     }
 }
 
-static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    gen_helper_neon_mul_u8(a, a, b);
-    gen_helper_neon_add_u8(d, d, a);
-}
-
-static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    gen_helper_neon_mul_u16(a, a, b);
-    gen_helper_neon_add_u16(d, d, a);
-}
-
-static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    tcg_gen_mul_i32(a, a, b);
-    tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
-    tcg_gen_mul_i64(a, a, b);
-    tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
-    tcg_gen_mul_vec(vece, a, a, b);
-    tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    gen_helper_neon_mul_u8(a, a, b);
-    gen_helper_neon_sub_u8(d, d, a);
-}
-
-static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    gen_helper_neon_mul_u16(a, a, b);
-    gen_helper_neon_sub_u16(d, d, a);
-}
-
-static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
-    tcg_gen_mul_i32(a, a, b);
-    tcg_gen_sub_i32(d, d, a);
-}
-
-static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
-    tcg_gen_mul_i64(a, a, b);
-    tcg_gen_sub_i64(d, d, a);
-}
-
-static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
-    tcg_gen_mul_vec(vece, a, a, b);
-    tcg_gen_sub_vec(vece, d, d, a);
-}
-
 /* Integer op subgroup of C3.6.16. */
 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
 {
-    static const GVecGen3 cmtst_op[4] = {
-        { .fni4 = gen_helper_neon_tst_u8,
-          .fniv = gen_cmtst_vec,
-          .vece = MO_8 },
-        { .fni4 = gen_helper_neon_tst_u16,
-          .fniv = gen_cmtst_vec,
-          .vece = MO_16 },
-        { .fni4 = gen_cmtst_i32,
-          .fniv = gen_cmtst_vec,
-          .vece = MO_32 },
-        { .fni8 = gen_cmtst_i64,
-          .fniv = gen_cmtst_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .vece = MO_64 },
-    };
-    static const GVecGen3 mla_op[4] = {
-        { .fni4 = gen_mla8_i32,
-          .fniv = gen_mla_vec,
-          .opc = INDEX_op_mul_vec,
-          .load_dest = true,
-          .vece = MO_8 },
-        { .fni4 = gen_mla16_i32,
-          .fniv = gen_mla_vec,
-          .opc = INDEX_op_mul_vec,
-          .load_dest = true,
-          .vece = MO_16 },
-        { .fni4 = gen_mla32_i32,
-          .fniv = gen_mla_vec,
-          .opc = INDEX_op_mul_vec,
-          .load_dest = true,
-          .vece = MO_32 },
-        { .fni8 = gen_mla64_i64,
-          .fniv = gen_mla_vec,
-          .opc = INDEX_op_mul_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .vece = MO_64 },
-    };
-    static const GVecGen3 mls_op[4] = {
-        { .fni4 = gen_mls8_i32,
-          .fniv = gen_mls_vec,
-          .opc = INDEX_op_mul_vec,
-          .load_dest = true,
-          .vece = MO_8 },
-        { .fni4 = gen_mls16_i32,
-          .fniv = gen_mls_vec,
-          .opc = INDEX_op_mul_vec,
-          .load_dest = true,
-          .vece = MO_16 },
-        { .fni4 = gen_mls32_i32,
-          .fniv = gen_mls_vec,
-          .opc = INDEX_op_mul_vec,
-          .load_dest = true,
-          .vece = MO_32 },
-        { .fni8 = gen_mls64_i64,
-          .fniv = gen_mls_vec,
-          .opc = INDEX_op_mul_vec,
-          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-          .load_dest = true,
-          .vece = MO_64 },
-    };
-
     int is_q = extract32(insn, 30, 1);
     int u = extract32(insn, 29, 1);
     int size = extract32(insn, 22, 2);
@@ -11216,7 +10756,7 @@
     TCGv_ptr fpst;
     bool pairwise = false;
 
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+    if (!dc_isar_feature(aa64_fp16, s)) {
         unallocated_encoding(s);
         return;
     }
@@ -11404,7 +10944,8 @@
     int size = extract32(insn, 22, 2);
     bool u = extract32(insn, 29, 1);
     bool is_q = extract32(insn, 30, 1);
-    int feature, rot;
+    bool feature;
+    int rot;
 
     switch (u * 16 + opcode) {
     case 0x10: /* SQRDMLAH (vector) */
@@ -11413,7 +10954,7 @@
             unallocated_encoding(s);
             return;
         }
-        feature = ARM_FEATURE_V8_RDM;
+        feature = dc_isar_feature(aa64_rdm, s);
         break;
     case 0x02: /* SDOT (vector) */
     case 0x12: /* UDOT (vector) */
@@ -11421,7 +10962,7 @@
             unallocated_encoding(s);
             return;
         }
-        feature = ARM_FEATURE_V8_DOTPROD;
+        feature = dc_isar_feature(aa64_dp, s);
         break;
     case 0x18: /* FCMLA, #0 */
     case 0x19: /* FCMLA, #90 */
@@ -11430,18 +10971,18 @@
     case 0x1c: /* FCADD, #90 */
     case 0x1e: /* FCADD, #270 */
         if (size == 0
-            || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
+            || (size == 1 && !dc_isar_feature(aa64_fp16, s))
             || (size == 3 && !is_q)) {
             unallocated_encoding(s);
             return;
         }
-        feature = ARM_FEATURE_V8_FCMA;
+        feature = dc_isar_feature(aa64_fcma, s);
         break;
     default:
         unallocated_encoding(s);
         return;
     }
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -12310,7 +11851,7 @@
     bool need_fpst = true;
     int rmode;
 
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+    if (!dc_isar_feature(aa64_fp16, s)) {
         unallocated_encoding(s);
         return;
     }
@@ -12655,14 +12196,14 @@
         break;
     case 0x1d: /* SQRDMLAH */
     case 0x1f: /* SQRDMLSH */
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
+        if (!dc_isar_feature(aa64_rdm, s)) {
             unallocated_encoding(s);
             return;
         }
         break;
     case 0x0e: /* SDOT */
     case 0x1e: /* UDOT */
-        if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+        if (size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
             unallocated_encoding(s);
             return;
         }
@@ -12671,7 +12212,7 @@
     case 0x13: /* FCMLA #90 */
     case 0x15: /* FCMLA #180 */
     case 0x17: /* FCMLA #270 */
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
+        if (!dc_isar_feature(aa64_fcma, s)) {
             unallocated_encoding(s);
             return;
         }
@@ -12727,7 +12268,7 @@
         }
         break;
     }
-    if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+    if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
         unallocated_encoding(s);
         return;
     }
@@ -13198,8 +12739,7 @@
     TCGv_i32 tcg_decrypt;
     CryptoThreeOpIntFn *genfn;
 
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
-        || size != 0) {
+    if (!dc_isar_feature(aa64_aes, s) || size != 0) {
         unallocated_encoding(s);
         return;
     }
@@ -13256,7 +12796,7 @@
     int rd = extract32(insn, 0, 5);
     CryptoThreeOpFn *genfn;
     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
-    int feature = ARM_FEATURE_V8_SHA256;
+    bool feature;
 
     if (size != 0) {
         unallocated_encoding(s);
@@ -13269,23 +12809,26 @@
     case 2: /* SHA1M */
     case 3: /* SHA1SU0 */
         genfn = NULL;
-        feature = ARM_FEATURE_V8_SHA1;
+        feature = dc_isar_feature(aa64_sha1, s);
         break;
     case 4: /* SHA256H */
         genfn = gen_helper_crypto_sha256h;
+        feature = dc_isar_feature(aa64_sha256, s);
         break;
     case 5: /* SHA256H2 */
         genfn = gen_helper_crypto_sha256h2;
+        feature = dc_isar_feature(aa64_sha256, s);
         break;
     case 6: /* SHA256SU1 */
         genfn = gen_helper_crypto_sha256su1;
+        feature = dc_isar_feature(aa64_sha256, s);
         break;
     default:
         unallocated_encoding(s);
         return;
     }
 
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -13326,7 +12869,7 @@
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
     CryptoTwoOpFn *genfn;
-    int feature;
+    bool feature;
     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
 
     if (size != 0) {
@@ -13336,15 +12879,15 @@
 
     switch (opcode) {
     case 0: /* SHA1H */
-        feature = ARM_FEATURE_V8_SHA1;
+        feature = dc_isar_feature(aa64_sha1, s);
         genfn = gen_helper_crypto_sha1h;
         break;
     case 1: /* SHA1SU1 */
-        feature = ARM_FEATURE_V8_SHA1;
+        feature = dc_isar_feature(aa64_sha1, s);
         genfn = gen_helper_crypto_sha1su1;
         break;
     case 2: /* SHA256SU0 */
-        feature = ARM_FEATURE_V8_SHA256;
+        feature = dc_isar_feature(aa64_sha256, s);
         genfn = gen_helper_crypto_sha256su0;
         break;
     default:
@@ -13352,7 +12895,7 @@
         return;
     }
 
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -13383,40 +12926,40 @@
     int rm = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    int feature;
+    bool feature;
     CryptoThreeOpFn *genfn;
 
     if (o == 0) {
         switch (opcode) {
         case 0: /* SHA512H */
-            feature = ARM_FEATURE_V8_SHA512;
+            feature = dc_isar_feature(aa64_sha512, s);
             genfn = gen_helper_crypto_sha512h;
             break;
         case 1: /* SHA512H2 */
-            feature = ARM_FEATURE_V8_SHA512;
+            feature = dc_isar_feature(aa64_sha512, s);
             genfn = gen_helper_crypto_sha512h2;
             break;
         case 2: /* SHA512SU1 */
-            feature = ARM_FEATURE_V8_SHA512;
+            feature = dc_isar_feature(aa64_sha512, s);
             genfn = gen_helper_crypto_sha512su1;
             break;
         case 3: /* RAX1 */
-            feature = ARM_FEATURE_V8_SHA3;
+            feature = dc_isar_feature(aa64_sha3, s);
             genfn = NULL;
             break;
         }
     } else {
         switch (opcode) {
         case 0: /* SM3PARTW1 */
-            feature = ARM_FEATURE_V8_SM3;
+            feature = dc_isar_feature(aa64_sm3, s);
             genfn = gen_helper_crypto_sm3partw1;
             break;
         case 1: /* SM3PARTW2 */
-            feature = ARM_FEATURE_V8_SM3;
+            feature = dc_isar_feature(aa64_sm3, s);
             genfn = gen_helper_crypto_sm3partw2;
             break;
         case 2: /* SM4EKEY */
-            feature = ARM_FEATURE_V8_SM4;
+            feature = dc_isar_feature(aa64_sm4, s);
             genfn = gen_helper_crypto_sm4ekey;
             break;
         default:
@@ -13425,7 +12968,7 @@
         }
     }
 
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -13484,16 +13027,16 @@
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
-    int feature;
+    bool feature;
     CryptoTwoOpFn *genfn;
 
     switch (opcode) {
     case 0: /* SHA512SU0 */
-        feature = ARM_FEATURE_V8_SHA512;
+        feature = dc_isar_feature(aa64_sha512, s);
         genfn = gen_helper_crypto_sha512su0;
         break;
     case 1: /* SM4E */
-        feature = ARM_FEATURE_V8_SM4;
+        feature = dc_isar_feature(aa64_sm4, s);
         genfn = gen_helper_crypto_sm4e;
         break;
     default:
@@ -13501,7 +13044,7 @@
         return;
     }
 
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -13532,22 +13075,22 @@
     int ra = extract32(insn, 10, 5);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    int feature;
+    bool feature;
 
     switch (op0) {
     case 0: /* EOR3 */
     case 1: /* BCAX */
-        feature = ARM_FEATURE_V8_SHA3;
+        feature = dc_isar_feature(aa64_sha3, s);
         break;
     case 2: /* SM3SS1 */
-        feature = ARM_FEATURE_V8_SM3;
+        feature = dc_isar_feature(aa64_sm3, s);
         break;
     default:
         unallocated_encoding(s);
         return;
     }
 
-    if (!arm_dc_feature(s, feature)) {
+    if (!feature) {
         unallocated_encoding(s);
         return;
     }
@@ -13634,7 +13177,7 @@
     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
     int pass;
 
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
+    if (!dc_isar_feature(aa64_sha3, s)) {
         unallocated_encoding(s);
         return;
     }
@@ -13680,7 +13223,7 @@
     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
     TCGv_i32 tcg_imm2, tcg_opcode;
 
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
+    if (!dc_isar_feature(aa64_sm3, s)) {
         unallocated_encoding(s);
         return;
     }
@@ -13788,7 +13331,7 @@
         unallocated_encoding(s);
         break;
     case 0x2:
-        if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) {
+        if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
             unallocated_encoding(s);
         }
         break;
@@ -13829,6 +13372,7 @@
     ARMCPU *arm_cpu = arm_env_get_cpu(env);
     int bound;
 
+    dc->isar = &arm_cpu->isar;
     dc->pc = dc->base.pc_first;
     dc->condjmp = 0;
 
@@ -13892,7 +13436,6 @@
 
 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
 {
-    tcg_clear_temp_count();
 }
 
 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 6678795..fe7aebd 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4600,62 +4600,97 @@
     3, 2, 1, 3
 };
 
+static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
+{
+    return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
+}
+
 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
-                       gen_helper_gvec_mem *fn)
+                       int dtype, gen_helper_gvec_mem *fn)
 {
     unsigned vsz = vec_full_reg_size(s);
     TCGv_ptr t_pg;
-    TCGv_i32 desc;
+    TCGv_i32 t_desc;
+    int desc;
 
     /* For e.g. LD4, there are not enough arguments to pass all 4
      * registers as pointers, so encode the regno into the data field.
      * For consistency, do this even for LD1.
      */
-    desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
+    desc = sve_memopidx(s, dtype);
+    desc |= zt << MEMOPIDX_SHIFT;
+    desc = simd_desc(vsz, vsz, desc);
+    t_desc = tcg_const_i32(desc);
     t_pg = tcg_temp_new_ptr();
 
     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
-    fn(cpu_env, t_pg, addr, desc);
+    fn(cpu_env, t_pg, addr, t_desc);
 
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(desc);
+    tcg_temp_free_i32(t_desc);
 }
 
 static void do_ld_zpa(DisasContext *s, int zt, int pg,
                       TCGv_i64 addr, int dtype, int nreg)
 {
-    static gen_helper_gvec_mem * const fns[16][4] = {
-        { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
-          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
-        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+    static gen_helper_gvec_mem * const fns[2][16][4] = {
+        /* Little-endian */
+        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
 
-        { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
-          gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
-        { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
+            gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
+          { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
 
-        { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
-          gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
-        { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
+            gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
+          { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
 
-        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
-        { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
-          gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
+          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
+            gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+
+        /* Big-endian */
+        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+          { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
+            gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
+          { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
+
+          { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
+            gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
+          { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
+
+          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+          { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
+            gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
     };
-    gen_helper_gvec_mem *fn = fns[dtype][nreg];
+    gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
 
     /* While there are holes in the table, they are not
      * accessible via the instruction encoding.
      */
     assert(fn != NULL);
-    do_mem_zpa(s, zt, pg, addr, fn);
+    do_mem_zpa(s, zt, pg, addr, dtype, fn);
 }
 
 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
@@ -4689,59 +4724,104 @@
 
 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
 {
-    static gen_helper_gvec_mem * const fns[16] = {
-        gen_helper_sve_ldff1bb_r,
-        gen_helper_sve_ldff1bhu_r,
-        gen_helper_sve_ldff1bsu_r,
-        gen_helper_sve_ldff1bdu_r,
+    static gen_helper_gvec_mem * const fns[2][16] = {
+        /* Little-endian */
+        { gen_helper_sve_ldff1bb_r,
+          gen_helper_sve_ldff1bhu_r,
+          gen_helper_sve_ldff1bsu_r,
+          gen_helper_sve_ldff1bdu_r,
 
-        gen_helper_sve_ldff1sds_r,
-        gen_helper_sve_ldff1hh_r,
-        gen_helper_sve_ldff1hsu_r,
-        gen_helper_sve_ldff1hdu_r,
+          gen_helper_sve_ldff1sds_le_r,
+          gen_helper_sve_ldff1hh_le_r,
+          gen_helper_sve_ldff1hsu_le_r,
+          gen_helper_sve_ldff1hdu_le_r,
 
-        gen_helper_sve_ldff1hds_r,
-        gen_helper_sve_ldff1hss_r,
-        gen_helper_sve_ldff1ss_r,
-        gen_helper_sve_ldff1sdu_r,
+          gen_helper_sve_ldff1hds_le_r,
+          gen_helper_sve_ldff1hss_le_r,
+          gen_helper_sve_ldff1ss_le_r,
+          gen_helper_sve_ldff1sdu_le_r,
 
-        gen_helper_sve_ldff1bds_r,
-        gen_helper_sve_ldff1bss_r,
-        gen_helper_sve_ldff1bhs_r,
-        gen_helper_sve_ldff1dd_r,
+          gen_helper_sve_ldff1bds_r,
+          gen_helper_sve_ldff1bss_r,
+          gen_helper_sve_ldff1bhs_r,
+          gen_helper_sve_ldff1dd_le_r },
+
+        /* Big-endian */
+        { gen_helper_sve_ldff1bb_r,
+          gen_helper_sve_ldff1bhu_r,
+          gen_helper_sve_ldff1bsu_r,
+          gen_helper_sve_ldff1bdu_r,
+
+          gen_helper_sve_ldff1sds_be_r,
+          gen_helper_sve_ldff1hh_be_r,
+          gen_helper_sve_ldff1hsu_be_r,
+          gen_helper_sve_ldff1hdu_be_r,
+
+          gen_helper_sve_ldff1hds_be_r,
+          gen_helper_sve_ldff1hss_be_r,
+          gen_helper_sve_ldff1ss_be_r,
+          gen_helper_sve_ldff1sdu_be_r,
+
+          gen_helper_sve_ldff1bds_r,
+          gen_helper_sve_ldff1bss_r,
+          gen_helper_sve_ldff1bhs_r,
+          gen_helper_sve_ldff1dd_be_r },
     };
 
     if (sve_access_check(s)) {
         TCGv_i64 addr = new_tmp_a64(s);
         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
-        do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
+        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
+                   fns[s->be_data == MO_BE][a->dtype]);
     }
     return true;
 }
 
 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
 {
-    static gen_helper_gvec_mem * const fns[16] = {
-        gen_helper_sve_ldnf1bb_r,
-        gen_helper_sve_ldnf1bhu_r,
-        gen_helper_sve_ldnf1bsu_r,
-        gen_helper_sve_ldnf1bdu_r,
+    static gen_helper_gvec_mem * const fns[2][16] = {
+        /* Little-endian */
+        { gen_helper_sve_ldnf1bb_r,
+          gen_helper_sve_ldnf1bhu_r,
+          gen_helper_sve_ldnf1bsu_r,
+          gen_helper_sve_ldnf1bdu_r,
 
-        gen_helper_sve_ldnf1sds_r,
-        gen_helper_sve_ldnf1hh_r,
-        gen_helper_sve_ldnf1hsu_r,
-        gen_helper_sve_ldnf1hdu_r,
+          gen_helper_sve_ldnf1sds_le_r,
+          gen_helper_sve_ldnf1hh_le_r,
+          gen_helper_sve_ldnf1hsu_le_r,
+          gen_helper_sve_ldnf1hdu_le_r,
 
-        gen_helper_sve_ldnf1hds_r,
-        gen_helper_sve_ldnf1hss_r,
-        gen_helper_sve_ldnf1ss_r,
-        gen_helper_sve_ldnf1sdu_r,
+          gen_helper_sve_ldnf1hds_le_r,
+          gen_helper_sve_ldnf1hss_le_r,
+          gen_helper_sve_ldnf1ss_le_r,
+          gen_helper_sve_ldnf1sdu_le_r,
 
-        gen_helper_sve_ldnf1bds_r,
-        gen_helper_sve_ldnf1bss_r,
-        gen_helper_sve_ldnf1bhs_r,
-        gen_helper_sve_ldnf1dd_r,
+          gen_helper_sve_ldnf1bds_r,
+          gen_helper_sve_ldnf1bss_r,
+          gen_helper_sve_ldnf1bhs_r,
+          gen_helper_sve_ldnf1dd_le_r },
+
+        /* Big-endian */
+        { gen_helper_sve_ldnf1bb_r,
+          gen_helper_sve_ldnf1bhu_r,
+          gen_helper_sve_ldnf1bsu_r,
+          gen_helper_sve_ldnf1bdu_r,
+
+          gen_helper_sve_ldnf1sds_be_r,
+          gen_helper_sve_ldnf1hh_be_r,
+          gen_helper_sve_ldnf1hsu_be_r,
+          gen_helper_sve_ldnf1hdu_be_r,
+
+          gen_helper_sve_ldnf1hds_be_r,
+          gen_helper_sve_ldnf1hss_be_r,
+          gen_helper_sve_ldnf1ss_be_r,
+          gen_helper_sve_ldnf1sdu_be_r,
+
+          gen_helper_sve_ldnf1bds_r,
+          gen_helper_sve_ldnf1bss_r,
+          gen_helper_sve_ldnf1bhs_r,
+          gen_helper_sve_ldnf1dd_be_r },
     };
 
     if (sve_access_check(s)) {
@@ -4751,30 +4831,57 @@
         TCGv_i64 addr = new_tmp_a64(s);
 
         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
-        do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
+        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
+                   fns[s->be_data == MO_BE][a->dtype]);
     }
     return true;
 }
 
 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
 {
-    static gen_helper_gvec_mem * const fns[4] = {
-        gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
-        gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
+    static gen_helper_gvec_mem * const fns[2][4] = {
+        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
+          gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
+        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
+          gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
     };
     unsigned vsz = vec_full_reg_size(s);
     TCGv_ptr t_pg;
-    TCGv_i32 desc;
+    TCGv_i32 t_desc;
+    int desc, poff;
 
     /* Load the first quadword using the normal predicated load helpers.  */
-    desc = tcg_const_i32(simd_desc(16, 16, zt));
-    t_pg = tcg_temp_new_ptr();
+    desc = sve_memopidx(s, msz_dtype(msz));
+    desc |= zt << MEMOPIDX_SHIFT;
+    desc = simd_desc(16, 16, desc);
+    t_desc = tcg_const_i32(desc);
 
-    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
-    fns[msz](cpu_env, t_pg, addr, desc);
+    poff = pred_full_reg_offset(s, pg);
+    if (vsz > 16) {
+        /*
+         * Zero-extend the first 16 bits of the predicate into a temporary.
+         * This avoids triggering an assert making sure we don't have bits
+         * set within a predicate beyond VQ, but we have lowered VQ to 1
+         * for this load operation.
+         */
+        TCGv_i64 tmp = tcg_temp_new_i64();
+#ifdef HOST_WORDS_BIGENDIAN
+        poff += 6;
+#endif
+        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
+
+        poff = offsetof(CPUARMState, vfp.preg_tmp);
+        tcg_gen_st_i64(tmp, cpu_env, poff);
+        tcg_temp_free_i64(tmp);
+    }
+
+    t_pg = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
+
+    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
 
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(desc);
+    tcg_temp_free_i32(t_desc);
 
     /* Replicate that first quadword.  */
     if (vsz > 16) {
@@ -4860,35 +4967,73 @@
 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
                       int msz, int esz, int nreg)
 {
-    static gen_helper_gvec_mem * const fn_single[4][4] = {
-        { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
-          gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
-        { NULL,                   gen_helper_sve_st1hh_r,
-          gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
-        { NULL, NULL,
-          gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
-        { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
+    static gen_helper_gvec_mem * const fn_single[2][4][4] = {
+        { { gen_helper_sve_st1bb_r,
+            gen_helper_sve_st1bh_r,
+            gen_helper_sve_st1bs_r,
+            gen_helper_sve_st1bd_r },
+          { NULL,
+            gen_helper_sve_st1hh_le_r,
+            gen_helper_sve_st1hs_le_r,
+            gen_helper_sve_st1hd_le_r },
+          { NULL, NULL,
+            gen_helper_sve_st1ss_le_r,
+            gen_helper_sve_st1sd_le_r },
+          { NULL, NULL, NULL,
+            gen_helper_sve_st1dd_le_r } },
+        { { gen_helper_sve_st1bb_r,
+            gen_helper_sve_st1bh_r,
+            gen_helper_sve_st1bs_r,
+            gen_helper_sve_st1bd_r },
+          { NULL,
+            gen_helper_sve_st1hh_be_r,
+            gen_helper_sve_st1hs_be_r,
+            gen_helper_sve_st1hd_be_r },
+          { NULL, NULL,
+            gen_helper_sve_st1ss_be_r,
+            gen_helper_sve_st1sd_be_r },
+          { NULL, NULL, NULL,
+            gen_helper_sve_st1dd_be_r } },
     };
-    static gen_helper_gvec_mem * const fn_multiple[3][4] = {
-        { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
-          gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
-        { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
-          gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
-        { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
-          gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
+    static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
+        { { gen_helper_sve_st2bb_r,
+            gen_helper_sve_st2hh_le_r,
+            gen_helper_sve_st2ss_le_r,
+            gen_helper_sve_st2dd_le_r },
+          { gen_helper_sve_st3bb_r,
+            gen_helper_sve_st3hh_le_r,
+            gen_helper_sve_st3ss_le_r,
+            gen_helper_sve_st3dd_le_r },
+          { gen_helper_sve_st4bb_r,
+            gen_helper_sve_st4hh_le_r,
+            gen_helper_sve_st4ss_le_r,
+            gen_helper_sve_st4dd_le_r } },
+        { { gen_helper_sve_st2bb_r,
+            gen_helper_sve_st2hh_be_r,
+            gen_helper_sve_st2ss_be_r,
+            gen_helper_sve_st2dd_be_r },
+          { gen_helper_sve_st3bb_r,
+            gen_helper_sve_st3hh_be_r,
+            gen_helper_sve_st3ss_be_r,
+            gen_helper_sve_st3dd_be_r },
+          { gen_helper_sve_st4bb_r,
+            gen_helper_sve_st4hh_be_r,
+            gen_helper_sve_st4ss_be_r,
+            gen_helper_sve_st4dd_be_r } },
     };
     gen_helper_gvec_mem *fn;
+    int be = s->be_data == MO_BE;
 
     if (nreg == 0) {
         /* ST1 */
-        fn = fn_single[msz][esz];
+        fn = fn_single[be][msz][esz];
     } else {
         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
         assert(msz == esz);
-        fn = fn_multiple[nreg - 1][msz];
+        fn = fn_multiple[be][nreg - 1][msz];
     }
     assert(fn != NULL);
-    do_mem_zpa(s, zt, pg, addr, fn);
+    do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
 }
 
 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
@@ -4926,111 +5071,203 @@
  *** SVE gather loads / scatter stores
  */
 
-static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
-                       TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
+static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
+                       int scale, TCGv_i64 scalar, int msz,
+                       gen_helper_gvec_mem_scatter *fn)
 {
     unsigned vsz = vec_full_reg_size(s);
-    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
     TCGv_ptr t_zm = tcg_temp_new_ptr();
     TCGv_ptr t_pg = tcg_temp_new_ptr();
     TCGv_ptr t_zt = tcg_temp_new_ptr();
+    TCGv_i32 t_desc;
+    int desc;
+
+    desc = sve_memopidx(s, msz_dtype(msz));
+    desc |= scale << MEMOPIDX_SHIFT;
+    desc = simd_desc(vsz, vsz, desc);
+    t_desc = tcg_const_i32(desc);
 
     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
-    fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
+    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
 
     tcg_temp_free_ptr(t_zt);
     tcg_temp_free_ptr(t_zm);
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(desc);
+    tcg_temp_free_i32(t_desc);
 }
 
-/* Indexed by [ff][xs][u][msz].  */
-static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
-    { { { gen_helper_sve_ldbss_zsu,
-          gen_helper_sve_ldhss_zsu,
-          NULL, },
-        { gen_helper_sve_ldbsu_zsu,
-          gen_helper_sve_ldhsu_zsu,
-          gen_helper_sve_ldssu_zsu, } },
-      { { gen_helper_sve_ldbss_zss,
-          gen_helper_sve_ldhss_zss,
-          NULL, },
-        { gen_helper_sve_ldbsu_zss,
-          gen_helper_sve_ldhsu_zss,
-          gen_helper_sve_ldssu_zss, } } },
+/* Indexed by [be][ff][xs][u][msz].  */
+static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
+    /* Little-endian */
+    { { { { gen_helper_sve_ldbss_zsu,
+            gen_helper_sve_ldhss_le_zsu,
+            NULL, },
+          { gen_helper_sve_ldbsu_zsu,
+            gen_helper_sve_ldhsu_le_zsu,
+            gen_helper_sve_ldss_le_zsu, } },
+        { { gen_helper_sve_ldbss_zss,
+            gen_helper_sve_ldhss_le_zss,
+            NULL, },
+          { gen_helper_sve_ldbsu_zss,
+            gen_helper_sve_ldhsu_le_zss,
+            gen_helper_sve_ldss_le_zss, } } },
 
-    { { { gen_helper_sve_ldffbss_zsu,
-          gen_helper_sve_ldffhss_zsu,
-          NULL, },
-        { gen_helper_sve_ldffbsu_zsu,
-          gen_helper_sve_ldffhsu_zsu,
-          gen_helper_sve_ldffssu_zsu, } },
-      { { gen_helper_sve_ldffbss_zss,
-          gen_helper_sve_ldffhss_zss,
-          NULL, },
-        { gen_helper_sve_ldffbsu_zss,
-          gen_helper_sve_ldffhsu_zss,
-          gen_helper_sve_ldffssu_zss, } } }
+      /* First-fault */
+      { { { gen_helper_sve_ldffbss_zsu,
+            gen_helper_sve_ldffhss_le_zsu,
+            NULL, },
+          { gen_helper_sve_ldffbsu_zsu,
+            gen_helper_sve_ldffhsu_le_zsu,
+            gen_helper_sve_ldffss_le_zsu, } },
+        { { gen_helper_sve_ldffbss_zss,
+            gen_helper_sve_ldffhss_le_zss,
+            NULL, },
+          { gen_helper_sve_ldffbsu_zss,
+            gen_helper_sve_ldffhsu_le_zss,
+            gen_helper_sve_ldffss_le_zss, } } } },
+
+    /* Big-endian */
+    { { { { gen_helper_sve_ldbss_zsu,
+            gen_helper_sve_ldhss_be_zsu,
+            NULL, },
+          { gen_helper_sve_ldbsu_zsu,
+            gen_helper_sve_ldhsu_be_zsu,
+            gen_helper_sve_ldss_be_zsu, } },
+        { { gen_helper_sve_ldbss_zss,
+            gen_helper_sve_ldhss_be_zss,
+            NULL, },
+          { gen_helper_sve_ldbsu_zss,
+            gen_helper_sve_ldhsu_be_zss,
+            gen_helper_sve_ldss_be_zss, } } },
+
+      /* First-fault */
+      { { { gen_helper_sve_ldffbss_zsu,
+            gen_helper_sve_ldffhss_be_zsu,
+            NULL, },
+          { gen_helper_sve_ldffbsu_zsu,
+            gen_helper_sve_ldffhsu_be_zsu,
+            gen_helper_sve_ldffss_be_zsu, } },
+        { { gen_helper_sve_ldffbss_zss,
+            gen_helper_sve_ldffhss_be_zss,
+            NULL, },
+          { gen_helper_sve_ldffbsu_zss,
+            gen_helper_sve_ldffhsu_be_zss,
+            gen_helper_sve_ldffss_be_zss, } } } },
 };
 
 /* Note that we overload xs=2 to indicate 64-bit offset.  */
-static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
-    { { { gen_helper_sve_ldbds_zsu,
-          gen_helper_sve_ldhds_zsu,
-          gen_helper_sve_ldsds_zsu,
-          NULL, },
-        { gen_helper_sve_ldbdu_zsu,
-          gen_helper_sve_ldhdu_zsu,
-          gen_helper_sve_ldsdu_zsu,
-          gen_helper_sve_ldddu_zsu, } },
-      { { gen_helper_sve_ldbds_zss,
-          gen_helper_sve_ldhds_zss,
-          gen_helper_sve_ldsds_zss,
-          NULL, },
-        { gen_helper_sve_ldbdu_zss,
-          gen_helper_sve_ldhdu_zss,
-          gen_helper_sve_ldsdu_zss,
-          gen_helper_sve_ldddu_zss, } },
-      { { gen_helper_sve_ldbds_zd,
-          gen_helper_sve_ldhds_zd,
-          gen_helper_sve_ldsds_zd,
-          NULL, },
-        { gen_helper_sve_ldbdu_zd,
-          gen_helper_sve_ldhdu_zd,
-          gen_helper_sve_ldsdu_zd,
-          gen_helper_sve_ldddu_zd, } } },
+static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
+    /* Little-endian */
+    { { { { gen_helper_sve_ldbds_zsu,
+            gen_helper_sve_ldhds_le_zsu,
+            gen_helper_sve_ldsds_le_zsu,
+            NULL, },
+          { gen_helper_sve_ldbdu_zsu,
+            gen_helper_sve_ldhdu_le_zsu,
+            gen_helper_sve_ldsdu_le_zsu,
+            gen_helper_sve_lddd_le_zsu, } },
+        { { gen_helper_sve_ldbds_zss,
+            gen_helper_sve_ldhds_le_zss,
+            gen_helper_sve_ldsds_le_zss,
+            NULL, },
+          { gen_helper_sve_ldbdu_zss,
+            gen_helper_sve_ldhdu_le_zss,
+            gen_helper_sve_ldsdu_le_zss,
+            gen_helper_sve_lddd_le_zss, } },
+        { { gen_helper_sve_ldbds_zd,
+            gen_helper_sve_ldhds_le_zd,
+            gen_helper_sve_ldsds_le_zd,
+            NULL, },
+          { gen_helper_sve_ldbdu_zd,
+            gen_helper_sve_ldhdu_le_zd,
+            gen_helper_sve_ldsdu_le_zd,
+            gen_helper_sve_lddd_le_zd, } } },
 
-    { { { gen_helper_sve_ldffbds_zsu,
-          gen_helper_sve_ldffhds_zsu,
-          gen_helper_sve_ldffsds_zsu,
-          NULL, },
-        { gen_helper_sve_ldffbdu_zsu,
-          gen_helper_sve_ldffhdu_zsu,
-          gen_helper_sve_ldffsdu_zsu,
-          gen_helper_sve_ldffddu_zsu, } },
-      { { gen_helper_sve_ldffbds_zss,
-          gen_helper_sve_ldffhds_zss,
-          gen_helper_sve_ldffsds_zss,
-          NULL, },
-        { gen_helper_sve_ldffbdu_zss,
-          gen_helper_sve_ldffhdu_zss,
-          gen_helper_sve_ldffsdu_zss,
-          gen_helper_sve_ldffddu_zss, } },
-      { { gen_helper_sve_ldffbds_zd,
-          gen_helper_sve_ldffhds_zd,
-          gen_helper_sve_ldffsds_zd,
-          NULL, },
-        { gen_helper_sve_ldffbdu_zd,
-          gen_helper_sve_ldffhdu_zd,
-          gen_helper_sve_ldffsdu_zd,
-          gen_helper_sve_ldffddu_zd, } } }
+      /* First-fault */
+      { { { gen_helper_sve_ldffbds_zsu,
+            gen_helper_sve_ldffhds_le_zsu,
+            gen_helper_sve_ldffsds_le_zsu,
+            NULL, },
+          { gen_helper_sve_ldffbdu_zsu,
+            gen_helper_sve_ldffhdu_le_zsu,
+            gen_helper_sve_ldffsdu_le_zsu,
+            gen_helper_sve_ldffdd_le_zsu, } },
+        { { gen_helper_sve_ldffbds_zss,
+            gen_helper_sve_ldffhds_le_zss,
+            gen_helper_sve_ldffsds_le_zss,
+            NULL, },
+          { gen_helper_sve_ldffbdu_zss,
+            gen_helper_sve_ldffhdu_le_zss,
+            gen_helper_sve_ldffsdu_le_zss,
+            gen_helper_sve_ldffdd_le_zss, } },
+        { { gen_helper_sve_ldffbds_zd,
+            gen_helper_sve_ldffhds_le_zd,
+            gen_helper_sve_ldffsds_le_zd,
+            NULL, },
+          { gen_helper_sve_ldffbdu_zd,
+            gen_helper_sve_ldffhdu_le_zd,
+            gen_helper_sve_ldffsdu_le_zd,
+            gen_helper_sve_ldffdd_le_zd, } } } },
+
+    /* Big-endian */
+    { { { { gen_helper_sve_ldbds_zsu,
+            gen_helper_sve_ldhds_be_zsu,
+            gen_helper_sve_ldsds_be_zsu,
+            NULL, },
+          { gen_helper_sve_ldbdu_zsu,
+            gen_helper_sve_ldhdu_be_zsu,
+            gen_helper_sve_ldsdu_be_zsu,
+            gen_helper_sve_lddd_be_zsu, } },
+        { { gen_helper_sve_ldbds_zss,
+            gen_helper_sve_ldhds_be_zss,
+            gen_helper_sve_ldsds_be_zss,
+            NULL, },
+          { gen_helper_sve_ldbdu_zss,
+            gen_helper_sve_ldhdu_be_zss,
+            gen_helper_sve_ldsdu_be_zss,
+            gen_helper_sve_lddd_be_zss, } },
+        { { gen_helper_sve_ldbds_zd,
+            gen_helper_sve_ldhds_be_zd,
+            gen_helper_sve_ldsds_be_zd,
+            NULL, },
+          { gen_helper_sve_ldbdu_zd,
+            gen_helper_sve_ldhdu_be_zd,
+            gen_helper_sve_ldsdu_be_zd,
+            gen_helper_sve_lddd_be_zd, } } },
+
+      /* First-fault */
+      { { { gen_helper_sve_ldffbds_zsu,
+            gen_helper_sve_ldffhds_be_zsu,
+            gen_helper_sve_ldffsds_be_zsu,
+            NULL, },
+          { gen_helper_sve_ldffbdu_zsu,
+            gen_helper_sve_ldffhdu_be_zsu,
+            gen_helper_sve_ldffsdu_be_zsu,
+            gen_helper_sve_ldffdd_be_zsu, } },
+        { { gen_helper_sve_ldffbds_zss,
+            gen_helper_sve_ldffhds_be_zss,
+            gen_helper_sve_ldffsds_be_zss,
+            NULL, },
+          { gen_helper_sve_ldffbdu_zss,
+            gen_helper_sve_ldffhdu_be_zss,
+            gen_helper_sve_ldffsdu_be_zss,
+            gen_helper_sve_ldffdd_be_zss, } },
+        { { gen_helper_sve_ldffbds_zd,
+            gen_helper_sve_ldffhds_be_zd,
+            gen_helper_sve_ldffsds_be_zd,
+            NULL, },
+          { gen_helper_sve_ldffbdu_zd,
+            gen_helper_sve_ldffhdu_be_zd,
+            gen_helper_sve_ldffsdu_be_zd,
+            gen_helper_sve_ldffdd_be_zd, } } } },
 };
 
 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;
+    int be = s->be_data == MO_BE;
 
     if (!sve_access_check(s)) {
         return true;
@@ -5038,22 +5275,23 @@
 
     switch (a->esz) {
     case MO_32:
-        fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
+        fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
         break;
     case MO_64:
-        fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
+        fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
         break;
     }
     assert(fn != NULL);
 
     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
-               cpu_reg_sp(s, a->rn), fn);
+               cpu_reg_sp(s, a->rn), a->msz, fn);
     return true;
 }
 
 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;
+    int be = s->be_data == MO_BE;
     TCGv_i64 imm;
 
     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
@@ -5065,10 +5303,10 @@
 
     switch (a->esz) {
     case MO_32:
-        fn = gather_load_fn32[a->ff][0][a->u][a->msz];
+        fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
         break;
     case MO_64:
-        fn = gather_load_fn64[a->ff][2][a->u][a->msz];
+        fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
         break;
     }
     assert(fn != NULL);
@@ -5077,40 +5315,63 @@
      * by loading the immediate into the scalar parameter.
      */
     imm = tcg_const_i64(a->imm << a->msz);
-    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
     tcg_temp_free_i64(imm);
     return true;
 }
 
-/* Indexed by [xs][msz].  */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
-    { gen_helper_sve_stbs_zsu,
-      gen_helper_sve_sths_zsu,
-      gen_helper_sve_stss_zsu, },
-    { gen_helper_sve_stbs_zss,
-      gen_helper_sve_sths_zss,
-      gen_helper_sve_stss_zss, },
+/* Indexed by [be][xs][msz].  */
+static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
+    /* Little-endian */
+    { { gen_helper_sve_stbs_zsu,
+        gen_helper_sve_sths_le_zsu,
+        gen_helper_sve_stss_le_zsu, },
+      { gen_helper_sve_stbs_zss,
+        gen_helper_sve_sths_le_zss,
+        gen_helper_sve_stss_le_zss, } },
+    /* Big-endian */
+    { { gen_helper_sve_stbs_zsu,
+        gen_helper_sve_sths_be_zsu,
+        gen_helper_sve_stss_be_zsu, },
+      { gen_helper_sve_stbs_zss,
+        gen_helper_sve_sths_be_zss,
+        gen_helper_sve_stss_be_zss, } },
 };
 
 /* Note that we overload xs=2 to indicate 64-bit offset.  */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
-    { gen_helper_sve_stbd_zsu,
-      gen_helper_sve_sthd_zsu,
-      gen_helper_sve_stsd_zsu,
-      gen_helper_sve_stdd_zsu, },
-    { gen_helper_sve_stbd_zss,
-      gen_helper_sve_sthd_zss,
-      gen_helper_sve_stsd_zss,
-      gen_helper_sve_stdd_zss, },
-    { gen_helper_sve_stbd_zd,
-      gen_helper_sve_sthd_zd,
-      gen_helper_sve_stsd_zd,
-      gen_helper_sve_stdd_zd, },
+static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
+    /* Little-endian */
+    { { gen_helper_sve_stbd_zsu,
+        gen_helper_sve_sthd_le_zsu,
+        gen_helper_sve_stsd_le_zsu,
+        gen_helper_sve_stdd_le_zsu, },
+      { gen_helper_sve_stbd_zss,
+        gen_helper_sve_sthd_le_zss,
+        gen_helper_sve_stsd_le_zss,
+        gen_helper_sve_stdd_le_zss, },
+      { gen_helper_sve_stbd_zd,
+        gen_helper_sve_sthd_le_zd,
+        gen_helper_sve_stsd_le_zd,
+        gen_helper_sve_stdd_le_zd, } },
+    /* Big-endian */
+    { { gen_helper_sve_stbd_zsu,
+        gen_helper_sve_sthd_be_zsu,
+        gen_helper_sve_stsd_be_zsu,
+        gen_helper_sve_stdd_be_zsu, },
+      { gen_helper_sve_stbd_zss,
+        gen_helper_sve_sthd_be_zss,
+        gen_helper_sve_stsd_be_zss,
+        gen_helper_sve_stdd_be_zss, },
+      { gen_helper_sve_stbd_zd,
+        gen_helper_sve_sthd_be_zd,
+        gen_helper_sve_stsd_be_zd,
+        gen_helper_sve_stdd_be_zd, } },
 };
 
 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
 {
     gen_helper_gvec_mem_scatter *fn;
+    int be = s->be_data == MO_BE;
 
     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
         return false;
@@ -5120,22 +5381,23 @@
     }
     switch (a->esz) {
     case MO_32:
-        fn = scatter_store_fn32[a->xs][a->msz];
+        fn = scatter_store_fn32[be][a->xs][a->msz];
         break;
     case MO_64:
-        fn = scatter_store_fn64[a->xs][a->msz];
+        fn = scatter_store_fn64[be][a->xs][a->msz];
         break;
     default:
         g_assert_not_reached();
     }
     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
-               cpu_reg_sp(s, a->rn), fn);
+               cpu_reg_sp(s, a->rn), a->msz, fn);
     return true;
 }
 
 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;
+    int be = s->be_data == MO_BE;
     TCGv_i64 imm;
 
     if (a->esz < a->msz) {
@@ -5147,10 +5409,10 @@
 
     switch (a->esz) {
     case MO_32:
-        fn = scatter_store_fn32[0][a->msz];
+        fn = scatter_store_fn32[be][0][a->msz];
         break;
     case MO_64:
-        fn = scatter_store_fn64[2][a->msz];
+        fn = scatter_store_fn64[be][2][a->msz];
         break;
     }
     assert(fn != NULL);
@@ -5159,7 +5421,7 @@
      * by loading the immediate into the scalar parameter.
      */
     imm = tcg_const_i64(a->imm << a->msz);
-    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
     tcg_temp_free_i64(imm);
     return true;
 }
diff --git a/target/arm/translate.c b/target/arm/translate.c
index c6a5d2a..7c4675f 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -42,7 +42,7 @@
 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
 /* currently all emulated v5 cores are also v5TE, so don't bother */
 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
-#define ENABLE_ARCH_5J    arm_dc_feature(s, ARM_FEATURE_JAZELLE)
+#define ENABLE_ARCH_5J    dc_isar_feature(jazelle, s)
 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
@@ -72,7 +72,7 @@
 
 #include "exec/gen-icount.h"
 
-static const char *regnames[] =
+static const char * const regnames[] =
     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
 
@@ -239,6 +239,23 @@
     tcg_temp_free_i32(var);
 }
 
+/*
+ * Variant of store_reg which applies v8M stack-limit checks before updating
+ * SP. If the check fails this will result in an exception being taken.
+ * We disable the stack checks for CONFIG_USER_ONLY because we have
+ * no idea what the stack limits should be in that case.
+ * If stack checking is not being done this just acts like store_reg().
+ */
+static void store_sp_checked(DisasContext *s, TCGv_i32 var)
+{
+#ifndef CONFIG_USER_ONLY
+    if (s->v8m_stackcheck) {
+        gen_helper_v8m_stackcheck(cpu_env, var);
+    }
+#endif
+    store_reg(s, 13, var);
+}
+
 /* Value extensions.  */
 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
@@ -1568,6 +1585,25 @@
     return vfp_reg_offset(0, sreg);
 }
 
+/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
+ * where 0 is the least significant end of the register.
+ */
+static inline long
+neon_element_offset(int reg, int element, TCGMemOp size)
+{
+    int element_size = 1 << size;
+    int ofs = element * element_size;
+#ifdef HOST_WORDS_BIGENDIAN
+    /* Calculate the offset assuming fully little-endian,
+     * then XOR to account for the order of the 8-byte units.
+     */
+    if (element_size < 8) {
+        ofs ^= 8 - element_size;
+    }
+#endif
+    return neon_reg_offset(reg, 0) + ofs;
+}
+
 static TCGv_i32 neon_load_reg(int reg, int pass)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
@@ -1575,12 +1611,94 @@
     return tmp;
 }
 
+static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i32(var, cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i32(var, cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld_i32(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i64(var, cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i64(var, cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld32u_i64(var, cpu_env, offset);
+        break;
+    case MO_Q:
+        tcg_gen_ld_i64(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
 {
     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
     tcg_temp_free_i32(var);
 }
 
+static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i32(var, cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i32(var, cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st_i32(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i64(var, cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i64(var, cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st32_i64(var, cpu_env, offset);
+        break;
+    case MO_64:
+        tcg_gen_st_i64(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static inline void neon_load_reg64(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
@@ -2957,19 +3075,6 @@
     tcg_temp_free_i32(tmp);
 }
 
-static void gen_neon_dup_u8(TCGv_i32 var, int shift)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    if (shift)
-        tcg_gen_shri_i32(var, var, shift);
-    tcg_gen_ext8u_i32(var, var);
-    tcg_gen_shli_i32(tmp, var, 8);
-    tcg_gen_or_i32(var, var, tmp);
-    tcg_gen_shli_i32(tmp, var, 16);
-    tcg_gen_or_i32(var, var, tmp);
-    tcg_temp_free_i32(tmp);
-}
-
 static void gen_neon_dup_low16(TCGv_i32 var)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
@@ -2988,28 +3093,6 @@
     tcg_temp_free_i32(tmp);
 }
 
-static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
-{
-    /* Load a single Neon element and replicate into a 32 bit TCG reg */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    switch (size) {
-    case 0:
-        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
-        gen_neon_dup_u8(tmp, 0);
-        break;
-    case 1:
-        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-        gen_neon_dup_low16(tmp);
-        break;
-    case 2:
-        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-        break;
-    default: /* Avoid compiler warnings.  */
-        abort();
-    }
-    return tmp;
-}
-
 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
                        uint32_t dp)
 {
@@ -3415,17 +3498,10 @@
                     tmp = load_reg(s, rd);
                     if (insn & (1 << 23)) {
                         /* VDUP */
-                        if (size == 0) {
-                            gen_neon_dup_u8(tmp, 0);
-                        } else if (size == 1) {
-                            gen_neon_dup_low16(tmp);
-                        }
-                        for (n = 0; n <= pass * 2; n++) {
-                            tmp2 = tcg_temp_new_i32();
-                            tcg_gen_mov_i32(tmp2, tmp);
-                            neon_store_reg(rn, n, tmp2);
-                        }
-                        neon_store_reg(rn, n, tmp);
+                        int vec_size = pass ? 16 : 8;
+                        tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0),
+                                             vec_size, vec_size, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else {
                         /* VMOV */
                         switch (size) {
@@ -4212,6 +4288,18 @@
                 if (insn & (1 << 24)) /* pre-decrement */
                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
 
+                if (s->v8m_stackcheck && rn == 13 && w) {
+                    /*
+                     * Here 'addr' is the lowest address we will store to,
+                     * and is either the old SP (if post-increment) or
+                     * the new SP (if pre-decrement). For post-increment
+                     * where the old value is below the limit and the new
+                     * value is above, it is UNKNOWN whether the limit check
+                     * triggers; we choose to trigger.
+                     */
+                    gen_helper_v8m_stackcheck(cpu_env, addr);
+                }
+
                 if (dp)
                     offset = 8;
                 else
@@ -4878,17 +4966,17 @@
     int nregs;
     int interleave;
     int spacing;
-} neon_ls_element_type[11] = {
-    {4, 4, 1},
-    {4, 4, 2},
+} const neon_ls_element_type[11] = {
+    {1, 4, 1},
+    {1, 4, 2},
     {4, 1, 1},
-    {4, 2, 1},
-    {3, 3, 1},
-    {3, 3, 2},
+    {2, 2, 2},
+    {1, 3, 1},
+    {1, 3, 2},
     {3, 1, 1},
     {1, 1, 1},
-    {2, 2, 1},
-    {2, 2, 2},
+    {1, 2, 1},
+    {1, 2, 2},
     {2, 1, 1}
 };
 
@@ -4904,10 +4992,11 @@
     int stride;
     int size;
     int reg;
-    int pass;
     int load;
-    int shift;
     int n;
+    int vec_size;
+    int mmu_idx;
+    TCGMemOp endian;
     TCGv_i32 addr;
     TCGv_i32 tmp;
     TCGv_i32 tmp2;
@@ -4919,7 +5008,7 @@
      */
     if (s->fp_excp_el) {
         gen_exception_insn(s, 4, EXCP_UDEF,
-                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
         return 0;
     }
 
@@ -4929,6 +5018,8 @@
     rn = (insn >> 16) & 0xf;
     rm = insn & 0xf;
     load = (insn & (1 << 21)) != 0;
+    endian = s->be_data;
+    mmu_idx = get_mem_index(s);
     if ((insn & (1 << 23)) == 0) {
         /* Load store all elements.  */
         op = (insn >> 8) & 0xf;
@@ -4953,104 +5044,44 @@
         nregs = neon_ls_element_type[op].nregs;
         interleave = neon_ls_element_type[op].interleave;
         spacing = neon_ls_element_type[op].spacing;
-        if (size == 3 && (interleave | spacing) != 1)
+        if (size == 3 && (interleave | spacing) != 1) {
             return 1;
+        }
+        /* For our purposes, bytes are always little-endian.  */
+        if (size == 0) {
+            endian = MO_LE;
+        }
+        /* Consecutive little-endian elements from a single register
+         * can be promoted to a larger little-endian operation.
+         */
+        if (interleave == 1 && endian == MO_LE) {
+            size = 3;
+        }
+        tmp64 = tcg_temp_new_i64();
         addr = tcg_temp_new_i32();
+        tmp2 = tcg_const_i32(1 << size);
         load_reg_var(s, addr, rn);
-        stride = (1 << size) * interleave;
         for (reg = 0; reg < nregs; reg++) {
-            if (interleave > 2 || (interleave == 2 && nregs == 2)) {
-                load_reg_var(s, addr, rn);
-                tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
-            } else if (interleave == 2 && nregs == 4 && reg == 2) {
-                load_reg_var(s, addr, rn);
-                tcg_gen_addi_i32(addr, addr, 1 << size);
-            }
-            if (size == 3) {
-                tmp64 = tcg_temp_new_i64();
-                if (load) {
-                    gen_aa32_ld64(s, tmp64, addr, get_mem_index(s));
-                    neon_store_reg64(tmp64, rd);
-                } else {
-                    neon_load_reg64(tmp64, rd);
-                    gen_aa32_st64(s, tmp64, addr, get_mem_index(s));
-                }
-                tcg_temp_free_i64(tmp64);
-                tcg_gen_addi_i32(addr, addr, stride);
-            } else {
-                for (pass = 0; pass < 2; pass++) {
-                    if (size == 2) {
-                        if (load) {
-                            tmp = tcg_temp_new_i32();
-                            gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-                            neon_store_reg(rd, pass, tmp);
-                        } else {
-                            tmp = neon_load_reg(rd, pass);
-                            gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-                            tcg_temp_free_i32(tmp);
-                        }
-                        tcg_gen_addi_i32(addr, addr, stride);
-                    } else if (size == 1) {
-                        if (load) {
-                            tmp = tcg_temp_new_i32();
-                            gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-                            tcg_gen_addi_i32(addr, addr, stride);
-                            tmp2 = tcg_temp_new_i32();
-                            gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s));
-                            tcg_gen_addi_i32(addr, addr, stride);
-                            tcg_gen_shli_i32(tmp2, tmp2, 16);
-                            tcg_gen_or_i32(tmp, tmp, tmp2);
-                            tcg_temp_free_i32(tmp2);
-                            neon_store_reg(rd, pass, tmp);
-                        } else {
-                            tmp = neon_load_reg(rd, pass);
-                            tmp2 = tcg_temp_new_i32();
-                            tcg_gen_shri_i32(tmp2, tmp, 16);
-                            gen_aa32_st16(s, tmp, addr, get_mem_index(s));
-                            tcg_temp_free_i32(tmp);
-                            tcg_gen_addi_i32(addr, addr, stride);
-                            gen_aa32_st16(s, tmp2, addr, get_mem_index(s));
-                            tcg_temp_free_i32(tmp2);
-                            tcg_gen_addi_i32(addr, addr, stride);
-                        }
-                    } else /* size == 0 */ {
-                        if (load) {
-                            tmp2 = NULL;
-                            for (n = 0; n < 4; n++) {
-                                tmp = tcg_temp_new_i32();
-                                gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
-                                tcg_gen_addi_i32(addr, addr, stride);
-                                if (n == 0) {
-                                    tmp2 = tmp;
-                                } else {
-                                    tcg_gen_shli_i32(tmp, tmp, n * 8);
-                                    tcg_gen_or_i32(tmp2, tmp2, tmp);
-                                    tcg_temp_free_i32(tmp);
-                                }
-                            }
-                            neon_store_reg(rd, pass, tmp2);
-                        } else {
-                            tmp2 = neon_load_reg(rd, pass);
-                            for (n = 0; n < 4; n++) {
-                                tmp = tcg_temp_new_i32();
-                                if (n == 0) {
-                                    tcg_gen_mov_i32(tmp, tmp2);
-                                } else {
-                                    tcg_gen_shri_i32(tmp, tmp2, n * 8);
-                                }
-                                gen_aa32_st8(s, tmp, addr, get_mem_index(s));
-                                tcg_temp_free_i32(tmp);
-                                tcg_gen_addi_i32(addr, addr, stride);
-                            }
-                            tcg_temp_free_i32(tmp2);
-                        }
+            for (n = 0; n < 8 >> size; n++) {
+                int xs;
+                for (xs = 0; xs < interleave; xs++) {
+                    int tt = rd + reg + spacing * xs;
+
+                    if (load) {
+                        gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
+                        neon_store_element64(tt, n, size, tmp64);
+                    } else {
+                        neon_load_element64(tmp64, tt, n, size);
+                        gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
                     }
+                    tcg_gen_add_i32(addr, addr, tmp2);
                 }
             }
-            rd += spacing;
         }
         tcg_temp_free_i32(addr);
-        stride = nregs * 8;
+        tcg_temp_free_i32(tmp2);
+        tcg_temp_free_i64(tmp64);
+        stride = nregs * interleave * 8;
     } else {
         size = (insn >> 10) & 3;
         if (size == 3) {
@@ -5077,45 +5108,50 @@
             }
             addr = tcg_temp_new_i32();
             load_reg_var(s, addr, rn);
-            if (nregs == 1) {
-                /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
-                tmp = gen_load_and_replicate(s, addr, size);
-                tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
-                tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
-                if (insn & (1 << 5)) {
-                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
-                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
+
+            /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
+             * VLD2/3/4 to all lanes: bit 5 indicates register stride.
+             */
+            stride = (insn & (1 << 5)) ? 2 : 1;
+            vec_size = nregs == 1 ? stride * 8 : 8;
+
+            tmp = tcg_temp_new_i32();
+            for (reg = 0; reg < nregs; reg++) {
+                gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+                                s->be_data | size);
+                if ((rd & 1) && vec_size == 16) {
+                    /* We cannot write 16 bytes at once because the
+                     * destination is unaligned.
+                     */
+                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
+                                         8, 8, tmp);
+                    tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
+                                     neon_reg_offset(rd, 0), 8, 8);
+                } else {
+                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
+                                         vec_size, vec_size, tmp);
                 }
-                tcg_temp_free_i32(tmp);
-            } else {
-                /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
-                stride = (insn & (1 << 5)) ? 2 : 1;
-                for (reg = 0; reg < nregs; reg++) {
-                    tmp = gen_load_and_replicate(s, addr, size);
-                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
-                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
-                    tcg_temp_free_i32(tmp);
-                    tcg_gen_addi_i32(addr, addr, 1 << size);
-                    rd += stride;
-                }
+                tcg_gen_addi_i32(addr, addr, 1 << size);
+                rd += stride;
             }
+            tcg_temp_free_i32(tmp);
             tcg_temp_free_i32(addr);
             stride = (1 << size) * nregs;
         } else {
             /* Single element.  */
             int idx = (insn >> 4) & 0xf;
-            pass = (insn >> 7) & 1;
+            int reg_idx;
             switch (size) {
             case 0:
-                shift = ((insn >> 5) & 3) * 8;
+                reg_idx = (insn >> 5) & 7;
                 stride = 1;
                 break;
             case 1:
-                shift = ((insn >> 6) & 1) * 16;
+                reg_idx = (insn >> 6) & 3;
                 stride = (insn & (1 << 5)) ? 2 : 1;
                 break;
             case 2:
-                shift = 0;
+                reg_idx = (insn >> 7) & 1;
                 stride = (insn & (1 << 6)) ? 2 : 1;
                 break;
             default:
@@ -5155,52 +5191,24 @@
                  */
                 return 1;
             }
+            tmp = tcg_temp_new_i32();
             addr = tcg_temp_new_i32();
             load_reg_var(s, addr, rn);
             for (reg = 0; reg < nregs; reg++) {
                 if (load) {
-                    tmp = tcg_temp_new_i32();
-                    switch (size) {
-                    case 0:
-                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
-                        break;
-                    case 1:
-                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-                        break;
-                    case 2:
-                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-                        break;
-                    default: /* Avoid compiler warnings.  */
-                        abort();
-                    }
-                    if (size != 2) {
-                        tmp2 = neon_load_reg(rd, pass);
-                        tcg_gen_deposit_i32(tmp, tmp2, tmp,
-                                            shift, size ? 16 : 8);
-                        tcg_temp_free_i32(tmp2);
-                    }
-                    neon_store_reg(rd, pass, tmp);
+                    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+                                    s->be_data | size);
+                    neon_store_element(rd, reg_idx, size, tmp);
                 } else { /* Store */
-                    tmp = neon_load_reg(rd, pass);
-                    if (shift)
-                        tcg_gen_shri_i32(tmp, tmp, shift);
-                    switch (size) {
-                    case 0:
-                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
-                        break;
-                    case 1:
-                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
-                        break;
-                    case 2:
-                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-                        break;
-                    }
-                    tcg_temp_free_i32(tmp);
+                    neon_load_element(tmp, rd, reg_idx, size);
+                    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
+                                    s->be_data | size);
                 }
                 rd += stride;
                 tcg_gen_addi_i32(addr, addr, 1 << size);
             }
             tcg_temp_free_i32(addr);
+            tcg_temp_free_i32(tmp);
             stride = nregs * (1 << size);
         }
     }
@@ -5221,14 +5229,6 @@
     return 0;
 }
 
-/* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
-static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
-{
-    tcg_gen_and_i32(t, t, c);
-    tcg_gen_andc_i32(f, f, c);
-    tcg_gen_or_i32(dest, t, f);
-}
-
 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
 {
     switch (size) {
@@ -5435,7 +5435,7 @@
 #define NEON_3R_VABA 15
 #define NEON_3R_VADD_VSUB 16
 #define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
+#define NEON_3R_VML 18 /* VMLA, VMLS */
 #define NEON_3R_VMUL 19
 #define NEON_3R_VPMAX 20
 #define NEON_3R_VPMIN 21
@@ -5660,7 +5660,7 @@
 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
                          int q, int rd, int rn, int rm)
 {
-    if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
+    if (dc_isar_feature(aa32_rdm, s)) {
         int opr_sz = (1 + q) * 8;
         tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
                            vfp_reg_offset(1, rn),
@@ -5671,6 +5671,483 @@
     return 1;
 }
 
+/*
+ * Expanders for VBitOps_VBIF, VBIT, VBSL.
+ */
+static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    tcg_gen_xor_i64(rn, rn, rm);
+    tcg_gen_and_i64(rn, rn, rd);
+    tcg_gen_xor_i64(rd, rm, rn);
+}
+
+static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    tcg_gen_xor_i64(rn, rn, rd);
+    tcg_gen_and_i64(rn, rn, rm);
+    tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    tcg_gen_xor_i64(rn, rn, rd);
+    tcg_gen_andc_i64(rn, rn, rm);
+    tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+    tcg_gen_xor_vec(vece, rn, rn, rm);
+    tcg_gen_and_vec(vece, rn, rn, rd);
+    tcg_gen_xor_vec(vece, rd, rm, rn);
+}
+
+static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+    tcg_gen_xor_vec(vece, rn, rn, rd);
+    tcg_gen_and_vec(vece, rn, rn, rm);
+    tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
+static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+    tcg_gen_xor_vec(vece, rn, rn, rd);
+    tcg_gen_andc_vec(vece, rn, rn, rm);
+    tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
+const GVecGen3 bsl_op = {
+    .fni8 = gen_bsl_i64,
+    .fniv = gen_bsl_vec,
+    .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    .load_dest = true
+};
+
+const GVecGen3 bit_op = {
+    .fni8 = gen_bit_i64,
+    .fniv = gen_bit_vec,
+    .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    .load_dest = true
+};
+
+const GVecGen3 bif_op = {
+    .fni8 = gen_bif_i64,
+    .fniv = gen_bif_vec,
+    .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    .load_dest = true
+};
+
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_vec_sar8i_i64(a, a, shift);
+    tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_vec_sar16i_i64(a, a, shift);
+    tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+    tcg_gen_sari_i32(a, a, shift);
+    tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_sari_i64(a, a, shift);
+    tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+    tcg_gen_sari_vec(vece, a, a, sh);
+    tcg_gen_add_vec(vece, d, d, a);
+}
+
+const GVecGen2i ssra_op[4] = {
+    { .fni8 = gen_ssra8_i64,
+      .fniv = gen_ssra_vec,
+      .load_dest = true,
+      .opc = INDEX_op_sari_vec,
+      .vece = MO_8 },
+    { .fni8 = gen_ssra16_i64,
+      .fniv = gen_ssra_vec,
+      .load_dest = true,
+      .opc = INDEX_op_sari_vec,
+      .vece = MO_16 },
+    { .fni4 = gen_ssra32_i32,
+      .fniv = gen_ssra_vec,
+      .load_dest = true,
+      .opc = INDEX_op_sari_vec,
+      .vece = MO_32 },
+    { .fni8 = gen_ssra64_i64,
+      .fniv = gen_ssra_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .load_dest = true,
+      .opc = INDEX_op_sari_vec,
+      .vece = MO_64 },
+};
+
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_vec_shr8i_i64(a, a, shift);
+    tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_vec_shr16i_i64(a, a, shift);
+    tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+    tcg_gen_shri_i32(a, a, shift);
+    tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_shri_i64(a, a, shift);
+    tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+    tcg_gen_shri_vec(vece, a, a, sh);
+    tcg_gen_add_vec(vece, d, d, a);
+}
+
+const GVecGen2i usra_op[4] = {
+    { .fni8 = gen_usra8_i64,
+      .fniv = gen_usra_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_8, },
+    { .fni8 = gen_usra16_i64,
+      .fniv = gen_usra_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_16, },
+    { .fni4 = gen_usra32_i32,
+      .fniv = gen_usra_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_32, },
+    { .fni8 = gen_usra64_i64,
+      .fniv = gen_usra_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_64, },
+};
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    uint64_t mask = dup_const(MO_8, 0xff >> shift);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_shri_i64(t, a, shift);
+    tcg_gen_andi_i64(t, t, mask);
+    tcg_gen_andi_i64(d, d, ~mask);
+    tcg_gen_or_i64(d, d, t);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_shri_i64(t, a, shift);
+    tcg_gen_andi_i64(t, t, mask);
+    tcg_gen_andi_i64(d, d, ~mask);
+    tcg_gen_or_i64(d, d, t);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+    tcg_gen_shri_i32(a, a, shift);
+    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_shri_i64(a, a, shift);
+    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+    if (sh == 0) {
+        tcg_gen_mov_vec(d, a);
+    } else {
+        TCGv_vec t = tcg_temp_new_vec_matching(d);
+        TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+        tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+        tcg_gen_shri_vec(vece, t, a, sh);
+        tcg_gen_and_vec(vece, d, d, m);
+        tcg_gen_or_vec(vece, d, d, t);
+
+        tcg_temp_free_vec(t);
+        tcg_temp_free_vec(m);
+    }
+}
+
+const GVecGen2i sri_op[4] = {
+    { .fni8 = gen_shr8_ins_i64,
+      .fniv = gen_shr_ins_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_8 },
+    { .fni8 = gen_shr16_ins_i64,
+      .fniv = gen_shr_ins_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_16 },
+    { .fni4 = gen_shr32_ins_i32,
+      .fniv = gen_shr_ins_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_32 },
+    { .fni8 = gen_shr64_ins_i64,
+      .fniv = gen_shr_ins_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .load_dest = true,
+      .opc = INDEX_op_shri_vec,
+      .vece = MO_64 },
+};
+
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    uint64_t mask = dup_const(MO_8, 0xff << shift);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_shli_i64(t, a, shift);
+    tcg_gen_andi_i64(t, t, mask);
+    tcg_gen_andi_i64(d, d, ~mask);
+    tcg_gen_or_i64(d, d, t);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    uint64_t mask = dup_const(MO_16, 0xffff << shift);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_shli_i64(t, a, shift);
+    tcg_gen_andi_i64(t, t, mask);
+    tcg_gen_andi_i64(d, d, ~mask);
+    tcg_gen_or_i64(d, d, t);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
+}
+
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+}
+
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+    if (sh == 0) {
+        tcg_gen_mov_vec(d, a);
+    } else {
+        TCGv_vec t = tcg_temp_new_vec_matching(d);
+        TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+        tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+        tcg_gen_shli_vec(vece, t, a, sh);
+        tcg_gen_and_vec(vece, d, d, m);
+        tcg_gen_or_vec(vece, d, d, t);
+
+        tcg_temp_free_vec(t);
+        tcg_temp_free_vec(m);
+    }
+}
+
+const GVecGen2i sli_op[4] = {
+    { .fni8 = gen_shl8_ins_i64,
+      .fniv = gen_shl_ins_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shli_vec,
+      .vece = MO_8 },
+    { .fni8 = gen_shl16_ins_i64,
+      .fniv = gen_shl_ins_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shli_vec,
+      .vece = MO_16 },
+    { .fni4 = gen_shl32_ins_i32,
+      .fniv = gen_shl_ins_vec,
+      .load_dest = true,
+      .opc = INDEX_op_shli_vec,
+      .vece = MO_32 },
+    { .fni8 = gen_shl64_ins_i64,
+      .fniv = gen_shl_ins_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .load_dest = true,
+      .opc = INDEX_op_shli_vec,
+      .vece = MO_64 },
+};
+
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    gen_helper_neon_mul_u8(a, a, b);
+    gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    gen_helper_neon_mul_u8(a, a, b);
+    gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    gen_helper_neon_mul_u16(a, a, b);
+    gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    gen_helper_neon_mul_u16(a, a, b);
+    gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    tcg_gen_mul_i32(a, a, b);
+    tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    tcg_gen_mul_i32(a, a, b);
+    tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_mul_i64(a, a, b);
+    tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_mul_i64(a, a, b);
+    tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    tcg_gen_mul_vec(vece, a, a, b);
+    tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    tcg_gen_mul_vec(vece, a, a, b);
+    tcg_gen_sub_vec(vece, d, d, a);
+}
+
+/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
+ * these tables are shared with AArch64 which does support them.
+ */
+const GVecGen3 mla_op[4] = {
+    { .fni4 = gen_mla8_i32,
+      .fniv = gen_mla_vec,
+      .opc = INDEX_op_mul_vec,
+      .load_dest = true,
+      .vece = MO_8 },
+    { .fni4 = gen_mla16_i32,
+      .fniv = gen_mla_vec,
+      .opc = INDEX_op_mul_vec,
+      .load_dest = true,
+      .vece = MO_16 },
+    { .fni4 = gen_mla32_i32,
+      .fniv = gen_mla_vec,
+      .opc = INDEX_op_mul_vec,
+      .load_dest = true,
+      .vece = MO_32 },
+    { .fni8 = gen_mla64_i64,
+      .fniv = gen_mla_vec,
+      .opc = INDEX_op_mul_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .load_dest = true,
+      .vece = MO_64 },
+};
+
+const GVecGen3 mls_op[4] = {
+    { .fni4 = gen_mls8_i32,
+      .fniv = gen_mls_vec,
+      .opc = INDEX_op_mul_vec,
+      .load_dest = true,
+      .vece = MO_8 },
+    { .fni4 = gen_mls16_i32,
+      .fniv = gen_mls_vec,
+      .opc = INDEX_op_mul_vec,
+      .load_dest = true,
+      .vece = MO_16 },
+    { .fni4 = gen_mls32_i32,
+      .fniv = gen_mls_vec,
+      .opc = INDEX_op_mul_vec,
+      .load_dest = true,
+      .vece = MO_32 },
+    { .fni8 = gen_mls64_i64,
+      .fniv = gen_mls_vec,
+      .opc = INDEX_op_mul_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .load_dest = true,
+      .vece = MO_64 },
+};
+
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    tcg_gen_and_i32(d, a, b);
+    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
+    tcg_gen_neg_i32(d, d);
+}
+
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_and_i64(d, a, b);
+    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
+    tcg_gen_neg_i64(d, d);
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+    tcg_gen_and_vec(vece, d, a, b);
+    tcg_gen_dupi_vec(vece, a, 0);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
+const GVecGen3 cmtst_op[4] = {
+    { .fni4 = gen_helper_neon_tst_u8,
+      .fniv = gen_cmtst_vec,
+      .vece = MO_8 },
+    { .fni4 = gen_helper_neon_tst_u16,
+      .fniv = gen_cmtst_vec,
+      .vece = MO_16 },
+    { .fni4 = gen_cmtst_i32,
+      .fniv = gen_cmtst_vec,
+      .vece = MO_32 },
+    { .fni8 = gen_cmtst_i64,
+      .fniv = gen_cmtst_vec,
+      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+      .vece = MO_64 },
+};
+
 /* Translate a NEON data processing instruction.  Return nonzero if the
    instruction is invalid.
    We process data in a mixture of 32-bit and 64-bit chunks.
@@ -5680,14 +6157,15 @@
 {
     int op;
     int q;
-    int rd, rn, rm;
+    int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
     int size;
     int shift;
     int pass;
     int count;
     int pairwise;
     int u;
-    uint32_t imm, mask;
+    int vec_size;
+    uint32_t imm;
     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
     TCGv_ptr ptr1, ptr2, ptr3;
     TCGv_i64 tmp64;
@@ -5698,7 +6176,7 @@
      */
     if (s->fp_excp_el) {
         gen_exception_insn(s, 4, EXCP_UDEF,
-                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
         return 0;
     }
 
@@ -5710,6 +6188,11 @@
     VFP_DREG_N(rn, insn);
     VFP_DREG_M(rm, insn);
     size = (insn >> 20) & 3;
+    vec_size = q ? 16 : 8;
+    rd_ofs = neon_reg_offset(rd, 0);
+    rn_ofs = neon_reg_offset(rn, 0);
+    rm_ofs = neon_reg_offset(rm, 0);
+
     if ((insn & (1 << 23)) == 0) {
         /* Three register same length.  */
         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
@@ -5734,7 +6217,7 @@
                 return 1;
             }
             if (!u) { /* SHA-1 */
-                if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
+                if (!dc_isar_feature(aa32_sha1, s)) {
                     return 1;
                 }
                 ptr1 = vfp_reg_ptr(true, rd);
@@ -5744,7 +6227,7 @@
                 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
                 tcg_temp_free_i32(tmp4);
             } else { /* SHA-256 */
-                if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
+                if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
                     return 1;
                 }
                 ptr1 = vfp_reg_ptr(true, rd);
@@ -5800,8 +6283,100 @@
                                      q, rd, rn, rm);
             }
             return 1;
+
+        case NEON_3R_LOGIC: /* Logic ops.  */
+            switch ((u << 2) | size) {
+            case 0: /* VAND */
+                tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+                break;
+            case 1: /* VBIC */
+                tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
+                                  vec_size, vec_size);
+                break;
+            case 2:
+                if (rn == rm) {
+                    /* VMOV */
+                    tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size);
+                } else {
+                    /* VORR */
+                    tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
+                                    vec_size, vec_size);
+                }
+                break;
+            case 3: /* VORN */
+                tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+                break;
+            case 4: /* VEOR */
+                tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+                break;
+            case 5: /* VBSL */
+                tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+                               vec_size, vec_size, &bsl_op);
+                break;
+            case 6: /* VBIT */
+                tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+                               vec_size, vec_size, &bit_op);
+                break;
+            case 7: /* VBIF */
+                tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+                               vec_size, vec_size, &bif_op);
+                break;
+            }
+            return 0;
+
+        case NEON_3R_VADD_VSUB:
+            if (u) {
+                tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+            } else {
+                tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+            }
+            return 0;
+
+        case NEON_3R_VMUL: /* VMUL */
+            if (u) {
+                /* Polynomial case allows only P8 and is handled below.  */
+                if (size != 0) {
+                    return 1;
+                }
+            } else {
+                tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+                return 0;
+            }
+            break;
+
+        case NEON_3R_VML: /* VMLA, VMLS */
+            tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
+                           u ? &mls_op[size] : &mla_op[size]);
+            return 0;
+
+        case NEON_3R_VTST_VCEQ:
+            if (u) { /* VCEQ */
+                tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
+                                 vec_size, vec_size);
+            } else { /* VTST */
+                tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+                               vec_size, vec_size, &cmtst_op[size]);
+            }
+            return 0;
+
+        case NEON_3R_VCGT:
+            tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
+                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+            return 0;
+
+        case NEON_3R_VCGE:
+            tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
+                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+            return 0;
         }
-        if (size == 3 && op != NEON_3R_LOGIC) {
+
+        if (size == 3) {
             /* 64-bit element instructions. */
             for (pass = 0; pass < (q ? 2 : 1); pass++) {
                 neon_load_reg64(cpu_V0, rn + pass);
@@ -5857,13 +6432,6 @@
                                                   cpu_V1, cpu_V0);
                     }
                     break;
-                case NEON_3R_VADD_VSUB:
-                    if (u) {
-                        tcg_gen_sub_i64(CPU_V001);
-                    } else {
-                        tcg_gen_add_i64(CPU_V001);
-                    }
-                    break;
                 default:
                     abort();
                 }
@@ -5913,12 +6481,6 @@
                 return 1;
             }
             break;
-        case NEON_3R_VMUL:
-            if (u && (size != 0)) {
-                /* UNDEF on invalid size for polynomial subcase */
-                return 1;
-            }
-            break;
         case NEON_3R_VFM_VQRDMLSH:
             if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
                 return 1;
@@ -5959,52 +6521,12 @@
         case NEON_3R_VRHADD:
             GEN_NEON_INTEGER_OP(rhadd);
             break;
-        case NEON_3R_LOGIC: /* Logic ops.  */
-            switch ((u << 2) | size) {
-            case 0: /* VAND */
-                tcg_gen_and_i32(tmp, tmp, tmp2);
-                break;
-            case 1: /* BIC */
-                tcg_gen_andc_i32(tmp, tmp, tmp2);
-                break;
-            case 2: /* VORR */
-                tcg_gen_or_i32(tmp, tmp, tmp2);
-                break;
-            case 3: /* VORN */
-                tcg_gen_orc_i32(tmp, tmp, tmp2);
-                break;
-            case 4: /* VEOR */
-                tcg_gen_xor_i32(tmp, tmp, tmp2);
-                break;
-            case 5: /* VBSL */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp, tmp2, tmp3);
-                tcg_temp_free_i32(tmp3);
-                break;
-            case 6: /* VBIT */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp, tmp3, tmp2);
-                tcg_temp_free_i32(tmp3);
-                break;
-            case 7: /* VBIF */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp3, tmp, tmp2);
-                tcg_temp_free_i32(tmp3);
-                break;
-            }
-            break;
         case NEON_3R_VHSUB:
             GEN_NEON_INTEGER_OP(hsub);
             break;
         case NEON_3R_VQSUB:
             GEN_NEON_INTEGER_OP_ENV(qsub);
             break;
-        case NEON_3R_VCGT:
-            GEN_NEON_INTEGER_OP(cgt);
-            break;
-        case NEON_3R_VCGE:
-            GEN_NEON_INTEGER_OP(cge);
-            break;
         case NEON_3R_VSHL:
             GEN_NEON_INTEGER_OP(shl);
             break;
@@ -6032,61 +6554,9 @@
             tmp2 = neon_load_reg(rd, pass);
             gen_neon_add(size, tmp, tmp2);
             break;
-        case NEON_3R_VADD_VSUB:
-            if (!u) { /* VADD */
-                gen_neon_add(size, tmp, tmp2);
-            } else { /* VSUB */
-                switch (size) {
-                case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
-                case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
-                default: abort();
-                }
-            }
-            break;
-        case NEON_3R_VTST_VCEQ:
-            if (!u) { /* VTST */
-                switch (size) {
-                case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
-                default: abort();
-                }
-            } else { /* VCEQ */
-                switch (size) {
-                case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                default: abort();
-                }
-            }
-            break;
-        case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
-            switch (size) {
-            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
-            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
-            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-            default: abort();
-            }
-            tcg_temp_free_i32(tmp2);
-            tmp2 = neon_load_reg(rd, pass);
-            if (u) { /* VMLS */
-                gen_neon_rsb(size, tmp, tmp2);
-            } else { /* VMLA */
-                gen_neon_add(size, tmp, tmp2);
-            }
-            break;
         case NEON_3R_VMUL:
-            if (u) { /* polynomial */
-                gen_helper_neon_mul_p8(tmp, tmp, tmp2);
-            } else { /* Integer */
-                switch (size) {
-                case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
-                case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                default: abort();
-                }
-            }
+            /* VMUL.P8; other cases already eliminated.  */
+            gen_helper_neon_mul_p8(tmp, tmp, tmp2);
             break;
         case NEON_3R_VPMAX:
             GEN_NEON_INTEGER_OP(pmax);
@@ -6268,8 +6738,6 @@
                     size--;
             }
             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
-            /* To avoid excessive duplication of ops we implement shift
-               by immediate using the variable shift operations.  */
             if (op < 8) {
                 /* Shift by immediate:
                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
@@ -6281,43 +6749,99 @@
                 }
                 /* Right shifts are encoded as N - shift, where N is the
                    element size in bits.  */
-                if (op <= 4)
+                if (op <= 4) {
                     shift = shift - (1 << (size + 3));
+                }
+
+                switch (op) {
+                case 0:  /* VSHR */
+                    /* Right shift comes here negative.  */
+                    shift = -shift;
+                    /* Shifts larger than the element size are architecturally
+                     * valid.  Unsigned results in all zeros; signed results
+                     * in all sign bits.
+                     */
+                    if (!u) {
+                        tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
+                                          MIN(shift, (8 << size) - 1),
+                                          vec_size, vec_size);
+                    } else if (shift >= 8 << size) {
+                        tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
+                    } else {
+                        tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
+                                          vec_size, vec_size);
+                    }
+                    return 0;
+
+                case 1:  /* VSRA */
+                    /* Right shift comes here negative.  */
+                    shift = -shift;
+                    /* Shifts larger than the element size are architecturally
+                     * valid.  Unsigned results in all zeros; signed results
+                     * in all sign bits.
+                     */
+                    if (!u) {
+                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
+                                        MIN(shift, (8 << size) - 1),
+                                        &ssra_op[size]);
+                    } else if (shift >= 8 << size) {
+                        /* rd += 0 */
+                    } else {
+                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
+                                        shift, &usra_op[size]);
+                    }
+                    return 0;
+
+                case 4: /* VSRI */
+                    if (!u) {
+                        return 1;
+                    }
+                    /* Right shift comes here negative.  */
+                    shift = -shift;
+                    /* Shift out of range leaves destination unchanged.  */
+                    if (shift < 8 << size) {
+                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
+                                        shift, &sri_op[size]);
+                    }
+                    return 0;
+
+                case 5: /* VSHL, VSLI */
+                    if (u) { /* VSLI */
+                        /* Shift out of range leaves destination unchanged.  */
+                        if (shift < 8 << size) {
+                            tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
+                                            vec_size, shift, &sli_op[size]);
+                        }
+                    } else { /* VSHL */
+                        /* Shifts larger than the element size are
+                         * architecturally valid and results in zero.
+                         */
+                        if (shift >= 8 << size) {
+                            tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
+                        } else {
+                            tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
+                                              vec_size, vec_size);
+                        }
+                    }
+                    return 0;
+                }
+
                 if (size == 3) {
                     count = q + 1;
                 } else {
                     count = q ? 4: 2;
                 }
-                switch (size) {
-                case 0:
-                    imm = (uint8_t) shift;
-                    imm |= imm << 8;
-                    imm |= imm << 16;
-                    break;
-                case 1:
-                    imm = (uint16_t) shift;
-                    imm |= imm << 16;
-                    break;
-                case 2:
-                case 3:
-                    imm = shift;
-                    break;
-                default:
-                    abort();
-                }
+
+                /* To avoid excessive duplication of ops we implement shift
+                 * by immediate using the variable shift operations.
+                  */
+                imm = dup_const(size, shift);
 
                 for (pass = 0; pass < count; pass++) {
                     if (size == 3) {
                         neon_load_reg64(cpu_V0, rm + pass);
                         tcg_gen_movi_i64(cpu_V1, imm);
                         switch (op) {
-                        case 0:  /* VSHR */
-                        case 1:  /* VSRA */
-                            if (u)
-                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
-                            else
-                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
-                            break;
                         case 2: /* VRSHR */
                         case 3: /* VRSRA */
                             if (u)
@@ -6325,10 +6849,6 @@
                             else
                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
                             break;
-                        case 4: /* VSRI */
-                        case 5: /* VSHL, VSLI */
-                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
-                            break;
                         case 6: /* VQSHLU */
                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
                                                       cpu_V0, cpu_V1);
@@ -6342,26 +6862,13 @@
                                                          cpu_V0, cpu_V1);
                             }
                             break;
+                        default:
+                            g_assert_not_reached();
                         }
-                        if (op == 1 || op == 3) {
+                        if (op == 3) {
                             /* Accumulate.  */
                             neon_load_reg64(cpu_V1, rd + pass);
                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
-                        } else if (op == 4 || (op == 5 && u)) {
-                            /* Insert */
-                            neon_load_reg64(cpu_V1, rd + pass);
-                            uint64_t mask;
-                            if (shift < -63 || shift > 63) {
-                                mask = 0;
-                            } else {
-                                if (op == 4) {
-                                    mask = 0xffffffffffffffffull >> -shift;
-                                } else {
-                                    mask = 0xffffffffffffffffull << shift;
-                                }
-                            }
-                            tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
-                            tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                         }
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else { /* size < 3 */
@@ -6370,23 +6877,10 @@
                         tmp2 = tcg_temp_new_i32();
                         tcg_gen_movi_i32(tmp2, imm);
                         switch (op) {
-                        case 0:  /* VSHR */
-                        case 1:  /* VSRA */
-                            GEN_NEON_INTEGER_OP(shl);
-                            break;
                         case 2: /* VRSHR */
                         case 3: /* VRSRA */
                             GEN_NEON_INTEGER_OP(rshl);
                             break;
-                        case 4: /* VSRI */
-                        case 5: /* VSHL, VSLI */
-                            switch (size) {
-                            case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
-                            case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
-                            default: abort();
-                            }
-                            break;
                         case 6: /* VQSHLU */
                             switch (size) {
                             case 0:
@@ -6408,50 +6902,16 @@
                         case 7: /* VQSHL */
                             GEN_NEON_INTEGER_OP_ENV(qshl);
                             break;
+                        default:
+                            g_assert_not_reached();
                         }
                         tcg_temp_free_i32(tmp2);
 
-                        if (op == 1 || op == 3) {
+                        if (op == 3) {
                             /* Accumulate.  */
                             tmp2 = neon_load_reg(rd, pass);
                             gen_neon_add(size, tmp, tmp2);
                             tcg_temp_free_i32(tmp2);
-                        } else if (op == 4 || (op == 5 && u)) {
-                            /* Insert */
-                            switch (size) {
-                            case 0:
-                                if (op == 4)
-                                    mask = 0xff >> -shift;
-                                else
-                                    mask = (uint8_t)(0xff << shift);
-                                mask |= mask << 8;
-                                mask |= mask << 16;
-                                break;
-                            case 1:
-                                if (op == 4)
-                                    mask = 0xffff >> -shift;
-                                else
-                                    mask = (uint16_t)(0xffff << shift);
-                                mask |= mask << 16;
-                                break;
-                            case 2:
-                                if (shift < -31 || shift > 31) {
-                                    mask = 0;
-                                } else {
-                                    if (op == 4)
-                                        mask = 0xffffffffu >> -shift;
-                                    else
-                                        mask = 0xffffffffu << shift;
-                                }
-                                break;
-                            default:
-                                abort();
-                            }
-                            tmp2 = neon_load_reg(rd, pass);
-                            tcg_gen_andi_i32(tmp, tmp, mask);
-                            tcg_gen_andi_i32(tmp2, tmp2, ~mask);
-                            tcg_gen_or_i32(tmp, tmp, tmp2);
-                            tcg_temp_free_i32(tmp2);
                         }
                         neon_store_reg(rd, pass, tmp);
                     }
@@ -6600,7 +7060,8 @@
                 return 1;
             }
         } else { /* (insn & 0x00380080) == 0 */
-            int invert;
+            int invert, reg_ofs, vec_size;
+
             if (q && (rd & 1)) {
                 return 1;
             }
@@ -6640,8 +7101,9 @@
                 break;
             case 14:
                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
-                if (invert)
+                if (invert) {
                     imm = ~imm;
+                }
                 break;
             case 15:
                 if (invert) {
@@ -6651,36 +7113,45 @@
                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
                 break;
             }
-            if (invert)
+            if (invert) {
                 imm = ~imm;
+            }
 
-            for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                if (op & 1 && op < 12) {
-                    tmp = neon_load_reg(rd, pass);
-                    if (invert) {
-                        /* The immediate value has already been inverted, so
-                           BIC becomes AND.  */
-                        tcg_gen_andi_i32(tmp, tmp, imm);
-                    } else {
-                        tcg_gen_ori_i32(tmp, tmp, imm);
-                    }
+            reg_ofs = neon_reg_offset(rd, 0);
+            vec_size = q ? 16 : 8;
+
+            if (op & 1 && op < 12) {
+                if (invert) {
+                    /* The immediate value has already been inverted,
+                     * so BIC becomes AND.
+                     */
+                    tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
+                                      vec_size, vec_size);
                 } else {
-                    /* VMOV, VMVN.  */
-                    tmp = tcg_temp_new_i32();
-                    if (op == 14 && invert) {
-                        int n;
-                        uint32_t val;
-                        val = 0;
-                        for (n = 0; n < 4; n++) {
-                            if (imm & (1 << (n + (pass & 1) * 4)))
-                                val |= 0xff << (n * 8);
-                        }
-                        tcg_gen_movi_i32(tmp, val);
-                    } else {
-                        tcg_gen_movi_i32(tmp, imm);
-                    }
+                    tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
+                                     vec_size, vec_size);
                 }
-                neon_store_reg(rd, pass, tmp);
+            } else {
+                /* VMOV, VMVN.  */
+                if (op == 14 && invert) {
+                    TCGv_i64 t64 = tcg_temp_new_i64();
+
+                    for (pass = 0; pass <= q; ++pass) {
+                        uint64_t val = 0;
+                        int n;
+
+                        for (n = 0; n < 8; n++) {
+                            if (imm & (1 << (n + pass * 8))) {
+                                val |= 0xffull << (n * 8);
+                            }
+                        }
+                        tcg_gen_movi_i64(t64, val);
+                        neon_store_reg64(t64, rd + pass);
+                    }
+                    tcg_temp_free_i64(t64);
+                } else {
+                    tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
+                }
             }
         }
     } else { /* (insn & 0x00800010 == 0x00800000) */
@@ -6739,7 +7210,7 @@
                 if (op == 14 && size == 2) {
                     TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
 
-                    if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
+                    if (!dc_isar_feature(aa32_pmull, s)) {
                         return 1;
                     }
                     tcg_rn = tcg_temp_new_i64();
@@ -7056,7 +7527,7 @@
                     {
                         NeonGenThreeOpEnvFn *fn;
 
-                        if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
+                        if (!dc_isar_feature(aa32_rdm, s)) {
                             return 1;
                         }
                         if (u && ((rd | rn) & 1)) {
@@ -7330,8 +7801,7 @@
                     break;
                 }
                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
-                    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
-                        || ((rm | rd) & 1)) {
+                    if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
                         return 1;
                     }
                     ptr1 = vfp_reg_ptr(true, rd);
@@ -7352,8 +7822,7 @@
                     tcg_temp_free_i32(tmp3);
                     break;
                 case NEON_2RM_SHA1H:
-                    if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
-                        || ((rm | rd) & 1)) {
+                    if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
                         return 1;
                     }
                     ptr1 = vfp_reg_ptr(true, rd);
@@ -7370,10 +7839,10 @@
                     }
                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
                     if (q) {
-                        if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
+                        if (!dc_isar_feature(aa32_sha2, s)) {
                             return 1;
                         }
-                    } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
+                    } else if (!dc_isar_feature(aa32_sha1, s)) {
                         return 1;
                     }
                     ptr1 = vfp_reg_ptr(true, rd);
@@ -7386,6 +7855,14 @@
                     tcg_temp_free_ptr(ptr1);
                     tcg_temp_free_ptr(ptr2);
                     break;
+
+                case NEON_2RM_VMVN:
+                    tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
+                    break;
+                case NEON_2RM_VNEG:
+                    tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
+                    break;
+
                 default:
                 elementwise:
                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
@@ -7426,9 +7903,6 @@
                         case NEON_2RM_VCNT:
                             gen_helper_neon_cnt_u8(tmp, tmp);
                             break;
-                        case NEON_2RM_VMVN:
-                            tcg_gen_not_i32(tmp, tmp);
-                            break;
                         case NEON_2RM_VQABS:
                             switch (size) {
                             case 0:
@@ -7501,11 +7975,6 @@
                             default: abort();
                             }
                             break;
-                        case NEON_2RM_VNEG:
-                            tmp2 = tcg_const_i32(0);
-                            gen_neon_rsb(size, tmp, tmp2);
-                            tcg_temp_free_i32(tmp2);
-                            break;
                         case NEON_2RM_VCGT0_F:
                         {
                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
@@ -7728,28 +8197,25 @@
                 tcg_temp_free_i32(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
+                int element;
+                TCGMemOp size;
+
                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
                     return 1;
                 }
-                if (insn & (1 << 19)) {
-                    tmp = neon_load_reg(rm, 1);
-                } else {
-                    tmp = neon_load_reg(rm, 0);
-                }
                 if (insn & (1 << 16)) {
-                    gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
+                    size = MO_8;
+                    element = (insn >> 17) & 7;
                 } else if (insn & (1 << 17)) {
-                    if ((insn >> 18) & 1)
-                        gen_neon_dup_high16(tmp);
-                    else
-                        gen_neon_dup_low16(tmp);
+                    size = MO_16;
+                    element = (insn >> 18) & 3;
+                } else {
+                    size = MO_32;
+                    element = (insn >> 19) & 1;
                 }
-                for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                    tmp2 = tcg_temp_new_i32();
-                    tcg_gen_mov_i32(tmp2, tmp);
-                    neon_store_reg(rd, pass, tmp2);
-                }
-                tcg_temp_free_i32(tmp);
+                tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
+                                     neon_element_offset(rm, element, size),
+                                     q ? 16 : 8, q ? 16 : 8);
             } else {
                 return 1;
             }
@@ -7784,8 +8250,8 @@
         /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
         int size = extract32(insn, 20, 1);
         data = extract32(insn, 23, 2); /* rot */
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
-            || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
+        if (!dc_isar_feature(aa32_vcma, s)
+            || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
             return 1;
         }
         fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
@@ -7793,15 +8259,15 @@
         /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
         int size = extract32(insn, 20, 1);
         data = extract32(insn, 24, 1); /* rot */
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
-            || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
+        if (!dc_isar_feature(aa32_vcma, s)
+            || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
             return 1;
         }
         fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
     } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
         /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
         bool u = extract32(insn, 4, 1);
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+        if (!dc_isar_feature(aa32_dp, s)) {
             return 1;
         }
         fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
@@ -7811,7 +8277,7 @@
 
     if (s->fp_excp_el) {
         gen_exception_insn(s, 4, EXCP_UDEF,
-                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
         return 0;
     }
     if (!s->vfp_enabled) {
@@ -7863,11 +8329,11 @@
         int size = extract32(insn, 23, 1);
         int index;
 
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
+        if (!dc_isar_feature(aa32_vcma, s)) {
             return 1;
         }
         if (size == 0) {
-            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+            if (!dc_isar_feature(aa32_fp16_arith, s)) {
                 return 1;
             }
             /* For fp16, rm is just Vm, and index is M.  */
@@ -7884,7 +8350,7 @@
     } else if ((insn & 0xffb00f00) == 0xfe200d00) {
         /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
         int u = extract32(insn, 4, 1);
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+        if (!dc_isar_feature(aa32_dp, s)) {
             return 1;
         }
         fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
@@ -7897,7 +8363,7 @@
 
     if (s->fp_excp_el) {
         gen_exception_insn(s, 4, EXCP_UDEF,
-                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
         return 0;
     }
     if (!s->vfp_enabled) {
@@ -8860,8 +9326,7 @@
              * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
              * Bits 8, 10 and 11 should be zero.
              */
-            if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
-                (c & 0xd) != 0) {
+            if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) {
                 goto illegal_op;
             }
 
@@ -9729,7 +10194,7 @@
                     case 1:
                     case 3:
                         /* SDIV, UDIV */
-                        if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
+                        if (!dc_isar_feature(arm_div, s)) {
                             goto illegal_op;
                         }
                         if (((insn >> 5) & 7) || (rd != 15)) {
@@ -10261,6 +10726,8 @@
                  * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
                  *  - load/store dual (pre-indexed)
                  */
+                bool wback = extract32(insn, 21, 1);
+
                 if (rn == 15) {
                     if (insn & (1 << 21)) {
                         /* UNPREDICTABLE */
@@ -10272,8 +10739,29 @@
                     addr = load_reg(s, rn);
                 }
                 offset = (insn & 0xff) * 4;
-                if ((insn & (1 << 23)) == 0)
+                if ((insn & (1 << 23)) == 0) {
                     offset = -offset;
+                }
+
+                if (s->v8m_stackcheck && rn == 13 && wback) {
+                    /*
+                     * Here 'addr' is the current SP; if offset is +ve we're
+                     * moving SP up, else down. It is UNKNOWN whether the limit
+                     * check triggers when SP starts below the limit and ends
+                     * up above it; check whichever of the current and final
+                     * SP is lower, so QEMU will trigger in that situation.
+                     */
+                    if ((int32_t)offset < 0) {
+                        TCGv_i32 newsp = tcg_temp_new_i32();
+
+                        tcg_gen_addi_i32(newsp, addr, offset);
+                        gen_helper_v8m_stackcheck(cpu_env, newsp);
+                        tcg_temp_free_i32(newsp);
+                    } else {
+                        gen_helper_v8m_stackcheck(cpu_env, addr);
+                    }
+                }
+
                 if (insn & (1 << 24)) {
                     tcg_gen_addi_i32(addr, addr, offset);
                     offset = 0;
@@ -10297,7 +10785,7 @@
                     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
                     tcg_temp_free_i32(tmp);
                 }
-                if (insn & (1 << 21)) {
+                if (wback) {
                     /* Base writeback.  */
                     tcg_gen_addi_i32(addr, addr, offset - 4);
                     store_reg(s, rn, addr);
@@ -10484,6 +10972,7 @@
             } else {
                 int i, loaded_base = 0;
                 TCGv_i32 loaded_var;
+                bool wback = extract32(insn, 21, 1);
                 /* Load/store multiple.  */
                 addr = load_reg(s, rn);
                 offset = 0;
@@ -10491,10 +10980,26 @@
                     if (insn & (1 << i))
                         offset += 4;
                 }
+
                 if (insn & (1 << 24)) {
                     tcg_gen_addi_i32(addr, addr, -offset);
                 }
 
+                if (s->v8m_stackcheck && rn == 13 && wback) {
+                    /*
+                     * If the writeback is incrementing SP rather than
+                     * decrementing it, and the initial SP is below the
+                     * stack limit but the final written-back SP would
+                     * be above, then then we must not perform any memory
+                     * accesses, but it is IMPDEF whether we generate
+                     * an exception. We choose to do so in this case.
+                     * At this point 'addr' is the lowest address, so
+                     * either the original SP (if incrementing) or our
+                     * final SP (if decrementing), so that's what we check.
+                     */
+                    gen_helper_v8m_stackcheck(cpu_env, addr);
+                }
+
                 loaded_var = NULL;
                 for (i = 0; i < 16; i++) {
                     if ((insn & (1 << i)) == 0)
@@ -10522,7 +11027,7 @@
                 if (loaded_base) {
                     store_reg(s, rn, loaded_var);
                 }
-                if (insn & (1 << 21)) {
+                if (wback) {
                     /* Base register writeback.  */
                     if (insn & (1 << 24)) {
                         tcg_gen_addi_i32(addr, addr, -offset);
@@ -10583,7 +11088,13 @@
             if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
                 goto illegal_op;
             tcg_temp_free_i32(tmp2);
-            if (rd != 15) {
+            if (rd == 13 &&
+                ((op == 2 && rn == 15) ||
+                 (op == 8 && rn == 13) ||
+                 (op == 13 && rn == 13))) {
+                /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */
+                store_sp_checked(s, tmp);
+            } else if (rd != 15) {
                 store_reg(s, rd, tmp);
             } else {
                 tcg_temp_free_i32(tmp);
@@ -10600,6 +11111,10 @@
             tmp2 = load_reg(s, rm);
             if ((insn & 0x70) != 0)
                 goto illegal_op;
+            /*
+             * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx:
+             *  - MOV, MOVS (register-shifted register), flagsetting
+             */
             op = (insn >> 21) & 3;
             logic_cc = (insn & (1 << 20)) != 0;
             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
@@ -10706,7 +11221,7 @@
                 case 0x28:
                 case 0x29:
                 case 0x2a:
-                    if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
+                    if (!dc_isar_feature(aa32_crc32, s)) {
                         goto illegal_op;
                     }
                     break;
@@ -10887,7 +11402,7 @@
             tmp2 = load_reg(s, rm);
             if ((op & 0x50) == 0x10) {
                 /* sdiv, udiv */
-                if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
+                if (!dc_isar_feature(thumb_div, s)) {
                     goto illegal_op;
                 }
                 if (op & 0x20)
@@ -11267,8 +11782,15 @@
                 gen_jmp(s, s->pc + offset);
             }
         } else {
-            /* Data processing immediate.  */
+            /*
+             * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx
+             *  - Data-processing (modified immediate, plain binary immediate)
+             */
             if (insn & (1 << 25)) {
+                /*
+                 * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx
+                 *  - Data-processing (plain binary immediate)
+                 */
                 if (insn & (1 << 24)) {
                     if (insn & (1 << 20))
                         goto illegal_op;
@@ -11364,6 +11886,7 @@
                             tmp = tcg_temp_new_i32();
                             tcg_gen_movi_i32(tmp, imm);
                         }
+                        store_reg(s, rd, tmp);
                     } else {
                         /* Add/sub 12-bit immediate.  */
                         if (rn == 15) {
@@ -11374,17 +11897,27 @@
                                 offset += imm;
                             tmp = tcg_temp_new_i32();
                             tcg_gen_movi_i32(tmp, offset);
+                            store_reg(s, rd, tmp);
                         } else {
                             tmp = load_reg(s, rn);
                             if (insn & (1 << 23))
                                 tcg_gen_subi_i32(tmp, tmp, imm);
                             else
                                 tcg_gen_addi_i32(tmp, tmp, imm);
+                            if (rn == 13 && rd == 13) {
+                                /* ADD SP, SP, imm or SUB SP, SP, imm */
+                                store_sp_checked(s, tmp);
+                            } else {
+                                store_reg(s, rd, tmp);
+                            }
                         }
                     }
-                    store_reg(s, rd, tmp);
                 }
             } else {
+                /*
+                 * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx
+                 *  - Data-processing (modified immediate)
+                 */
                 int shifter_out = 0;
                 /* modified 12-bit immediate.  */
                 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
@@ -11426,7 +11959,11 @@
                     goto illegal_op;
                 tcg_temp_free_i32(tmp2);
                 rd = (insn >> 8) & 0xf;
-                if (rd != 15) {
+                if (rd == 13 && rn == 13
+                    && (op == 8 || op == 13)) {
+                    /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */
+                    store_sp_checked(s, tmp);
+                } else if (rd != 15) {
                     store_reg(s, rd, tmp);
                 } else {
                     tcg_temp_free_i32(tmp);
@@ -11535,7 +12072,6 @@
                     imm = -imm;
                     /* Fall through.  */
                 case 0xf: /* Pre-increment.  */
-                    tcg_gen_addi_i32(addr, addr, imm);
                     writeback = 1;
                     break;
                 default:
@@ -11547,6 +12083,28 @@
 
         issinfo = writeback ? ISSInvalid : rs;
 
+        if (s->v8m_stackcheck && rn == 13 && writeback) {
+            /*
+             * Stackcheck. Here we know 'addr' is the current SP;
+             * if imm is +ve we're moving SP up, else down. It is
+             * UNKNOWN whether the limit check triggers when SP starts
+             * below the limit and ends up above it; we chose to do so.
+             */
+            if ((int32_t)imm < 0) {
+                TCGv_i32 newsp = tcg_temp_new_i32();
+
+                tcg_gen_addi_i32(newsp, addr, imm);
+                gen_helper_v8m_stackcheck(cpu_env, newsp);
+                tcg_temp_free_i32(newsp);
+            } else {
+                gen_helper_v8m_stackcheck(cpu_env, addr);
+            }
+        }
+
+        if (writeback && !postinc) {
+            tcg_gen_addi_i32(addr, addr, imm);
+        }
+
         if (insn & (1 << 20)) {
             /* Load.  */
             tmp = tcg_temp_new_i32();
@@ -11629,7 +12187,11 @@
         rd = insn & 7;
         op = (insn >> 11) & 3;
         if (op == 3) {
-            /* add/subtract */
+            /*
+             * 0b0001_1xxx_xxxx_xxxx
+             *  - Add, subtract (three low registers)
+             *  - Add, subtract (two low registers and immediate)
+             */
             rn = (insn >> 3) & 7;
             tmp = load_reg(s, rn);
             if (insn & (1 << 10)) {
@@ -11666,7 +12228,10 @@
         }
         break;
     case 2: case 3:
-        /* arithmetic large immediate */
+        /*
+         * 0b001x_xxxx_xxxx_xxxx
+         *  - Add, subtract, compare, move (one low register and immediate)
+         */
         op = (insn >> 11) & 3;
         rd = (insn >> 8) & 0x7;
         if (op == 0) { /* mov */
@@ -11732,7 +12297,12 @@
                 tmp2 = load_reg(s, rm);
                 tcg_gen_add_i32(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
-                store_reg(s, rd, tmp);
+                if (rd == 13) {
+                    /* ADD SP, SP, reg */
+                    store_sp_checked(s, tmp);
+                } else {
+                    store_reg(s, rd, tmp);
+                }
                 break;
             case 1: /* cmp */
                 tmp = load_reg(s, rd);
@@ -11743,7 +12313,12 @@
                 break;
             case 2: /* mov/cpy */
                 tmp = load_reg(s, rm);
-                store_reg(s, rd, tmp);
+                if (rd == 13) {
+                    /* MOV SP, reg */
+                    store_sp_checked(s, tmp);
+                } else {
+                    store_reg(s, rd, tmp);
+                }
                 break;
             case 3:
             {
@@ -11793,7 +12368,10 @@
             break;
         }
 
-        /* data processing register */
+        /*
+         * 0b0100_00xx_xxxx_xxxx
+         *  - Data-processing (two low registers)
+         */
         rd = insn & 7;
         rm = (insn >> 3) & 7;
         op = (insn >> 6) & 0xf;
@@ -12071,7 +12649,10 @@
         break;
 
     case 10:
-        /* add to high reg */
+        /*
+         * 0b1010_xxxx_xxxx_xxxx
+         *  - Add PC/SP (immediate)
+         */
         rd = (insn >> 8) & 7;
         if (insn & (1 << 11)) {
             /* SP */
@@ -12091,13 +12672,17 @@
         op = (insn >> 8) & 0xf;
         switch (op) {
         case 0:
-            /* adjust stack pointer */
+            /*
+             * 0b1011_0000_xxxx_xxxx
+             *  - ADD (SP plus immediate)
+             *  - SUB (SP minus immediate)
+             */
             tmp = load_reg(s, 13);
             val = (insn & 0x7f) * 4;
             if (insn & (1 << 7))
                 val = -(int32_t)val;
             tcg_gen_addi_i32(tmp, tmp, val);
-            store_reg(s, 13, tmp);
+            store_sp_checked(s, tmp);
             break;
 
         case 2: /* sign/zero extend.  */
@@ -12114,7 +12699,10 @@
             store_reg(s, rd, tmp);
             break;
         case 4: case 5: case 0xc: case 0xd:
-            /* push/pop */
+            /*
+             * 0b1011_x10x_xxxx_xxxx
+             *  - push/pop
+             */
             addr = load_reg(s, 13);
             if (insn & (1 << 8))
                 offset = 4;
@@ -12127,6 +12715,17 @@
             if ((insn & (1 << 11)) == 0) {
                 tcg_gen_addi_i32(addr, addr, -offset);
             }
+
+            if (s->v8m_stackcheck) {
+                /*
+                 * Here 'addr' is the lower of "old SP" and "new SP";
+                 * if this is a pop that starts below the limit and ends
+                 * above it, it is UNKNOWN whether the limit check triggers;
+                 * we choose to trigger.
+                 */
+                gen_helper_v8m_stackcheck(cpu_env, addr);
+            }
+
             for (i = 0; i < 8; i++) {
                 if (insn & (1 << i)) {
                     if (insn & (1 << 11)) {
@@ -12423,6 +13022,7 @@
     CPUARMState *env = cs->env_ptr;
     ARMCPU *cpu = arm_env_get_cpu(env);
 
+    dc->isar = &cpu->isar;
     dc->pc = dc->base.pc_first;
     dc->condjmp = 0;
 
@@ -12451,6 +13051,7 @@
     dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags);
     dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
         regime_is_secure(env, dc->mmu_idx);
+    dc->v8m_stackcheck = ARM_TBFLAG_STACKCHECK(dc->base.tb->flags);
     dc->cp_regs = cpu->cp_regs;
     dc->features = env->features;
 
@@ -12539,7 +13140,6 @@
         tcg_gen_movi_i32(tmp, 0);
         store_cpu_field(tmp, condexec_bits);
     }
-    tcg_clear_temp_count();
 }
 
 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
@@ -12928,11 +13528,6 @@
     translator_loop(ops, &dc.base, cpu, tb);
 }
 
-static const char *cpu_mode_names[16] = {
-  "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
-  "???", "???", "hyp", "und", "???", "???", "???", "sys"
-};
-
 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                         int flags)
 {
@@ -12998,7 +13593,7 @@
                     psr & CPSR_V ? 'V' : '-',
                     psr & CPSR_T ? 'T' : 'A',
                     ns_status,
-                    cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
+                    aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
     }
 
     if (flags & CPU_DUMP_FPU) {
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 45f0424..1550aa8 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -7,6 +7,7 @@
 /* internal defines */
 typedef struct DisasContext {
     DisasContextBase base;
+    const ARMISARegisters *isar;
 
     target_ulong pc;
     target_ulong page_start;
@@ -38,6 +39,7 @@
     int vec_stride;
     bool v7m_handler_mode;
     bool v8m_secure; /* true if v8M and we're in Secure mode */
+    bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */
     /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
      * so that top level loop can generate correct syndrome information.
      */
@@ -189,4 +191,24 @@
     return ret;
 }
 
+
+/* Vector operations shared between ARM and AArch64.  */
+extern const GVecGen3 bsl_op;
+extern const GVecGen3 bit_op;
+extern const GVecGen3 bif_op;
+extern const GVecGen3 mla_op[4];
+extern const GVecGen3 mls_op[4];
+extern const GVecGen3 cmtst_op[4];
+extern const GVecGen2i ssra_op[4];
+extern const GVecGen2i usra_op[4];
+extern const GVecGen2i sri_op[4];
+extern const GVecGen2i sli_op[4];
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+
+/*
+ * Forward to the isar_feature_* tests given a DisasContext pointer.
+ */
+#define dc_isar_feature(name, ctx) \
+    ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); })
+
 #endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/cris/translate.c b/target/cris/translate.c
index 4ae1c04..11b2c11 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -137,11 +137,7 @@
 
 static void gen_BUG(DisasContext *dc, const char *file, int line)
 {
-    fprintf(stderr, "BUG: pc=%x %s %d\n", dc->pc, file, line);
-    if (qemu_log_separate()) {
-        qemu_log("BUG: pc=%x %s %d\n", dc->pc, file, line);
-    }
-    cpu_abort(CPU(dc->cpu), "%s:%d\n", file, line);
+    cpu_abort(CPU(dc->cpu), "%s:%d pc=%x\n", file, line, dc->pc);
 }
 
 static const char *regnames_v32[] =
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index ab160c2..aecf307 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -137,7 +137,8 @@
 
     if (unlikely(!(prot & type))) {
         /* The access isn't allowed -- Inst/Data Memory Protection Fault.  */
-        ret = (type & PAGE_EXEC ? EXCP_IMP : EXCP_DMP);
+        ret = (type & PAGE_EXEC ? EXCP_IMP :
+               prot & PAGE_READ ? EXCP_DMP : EXCP_DMAR);
         goto egress;
     }
 
diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs
index 04678f5..32bf966 100644
--- a/target/i386/Makefile.objs
+++ b/target/i386/Makefile.objs
@@ -3,17 +3,20 @@
 obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o
 obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o
-obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o
-obj-$(CONFIG_KVM) += kvm.o hyperv.o
-obj-$(CONFIG_SEV) += sev.o
+ifeq ($(CONFIG_SOFTMMU),y)
+obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o
+obj-$(CONFIG_KVM) += kvm.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
-obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o
-# HAX support
-ifdef CONFIG_WIN32
+obj-$(CONFIG_HYPERV) += hyperv.o
+obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o
+ifeq ($(CONFIG_WIN32),y)
 obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o
 endif
-ifdef CONFIG_DARWIN
+ifeq ($(CONFIG_DARWIN),y)
 obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o
 obj-$(CONFIG_HVF) += hvf/
 endif
 obj-$(CONFIG_WHPX) += whpx-all.o
+endif
+obj-$(CONFIG_SEV) += sev.o
+obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f24295e..1469a1b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5123,14 +5123,15 @@
      * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise
      * cs->nr_threads hasn't be populated yet and the checking is incorrect.
      */
-     if (IS_AMD_CPU(env) &&
-         !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) &&
-         cs->nr_threads > 1 && !ht_warned) {
-            error_report("This family of AMD CPU doesn't support "
-                         "hyperthreading(%d). Please configure -smp "
-                         "options properly or try enabling topoext feature.",
-                         cs->nr_threads);
-        ht_warned = true;
+    if (IS_AMD_CPU(env) &&
+        !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) &&
+        cs->nr_threads > 1 && !ht_warned) {
+            warn_report("This family of AMD CPU doesn't support "
+                        "hyperthreading(%d)",
+                        cs->nr_threads);
+            error_printf("Please configure -smp options properly"
+                         " or try enabling topoext feature.\n");
+            ht_warned = true;
     }
 
     x86_cpu_apic_realize(cpu, &local_err);
@@ -5429,20 +5430,51 @@
     cpu->env.eip = tb->pc - tb->cs_base;
 }
 
-static bool x86_cpu_has_work(CPUState *cs)
+int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
 {
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
-    return ((cs->interrupt_request & (CPU_INTERRUPT_HARD |
-                                      CPU_INTERRUPT_POLL)) &&
-            (env->eflags & IF_MASK)) ||
-           (cs->interrupt_request & (CPU_INTERRUPT_NMI |
-                                     CPU_INTERRUPT_INIT |
-                                     CPU_INTERRUPT_SIPI |
-                                     CPU_INTERRUPT_MCE)) ||
-           ((cs->interrupt_request & CPU_INTERRUPT_SMI) &&
-            !(env->hflags & HF_SMM_MASK));
+#if !defined(CONFIG_USER_ONLY)
+    if (interrupt_request & CPU_INTERRUPT_POLL) {
+        return CPU_INTERRUPT_POLL;
+    }
+#endif
+    if (interrupt_request & CPU_INTERRUPT_SIPI) {
+        return CPU_INTERRUPT_SIPI;
+    }
+
+    if (env->hflags2 & HF2_GIF_MASK) {
+        if ((interrupt_request & CPU_INTERRUPT_SMI) &&
+            !(env->hflags & HF_SMM_MASK)) {
+            return CPU_INTERRUPT_SMI;
+        } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
+                   !(env->hflags2 & HF2_NMI_MASK)) {
+            return CPU_INTERRUPT_NMI;
+        } else if (interrupt_request & CPU_INTERRUPT_MCE) {
+            return CPU_INTERRUPT_MCE;
+        } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                   (((env->hflags2 & HF2_VINTR_MASK) &&
+                     (env->hflags2 & HF2_HIF_MASK)) ||
+                    (!(env->hflags2 & HF2_VINTR_MASK) &&
+                     (env->eflags & IF_MASK &&
+                      !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
+            return CPU_INTERRUPT_HARD;
+#if !defined(CONFIG_USER_ONLY)
+        } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) &&
+                   (env->eflags & IF_MASK) &&
+                   !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
+            return CPU_INTERRUPT_VIRQ;
+#endif
+        }
+    }
+
+    return 0;
+}
+
+static bool x86_cpu_has_work(CPUState *cs)
+{
+    return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0;
 }
 
 static void x86_disas_set_info(CPUState *cs, disassemble_info *info)
@@ -5533,6 +5565,7 @@
     DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
     DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
     DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
+    DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
@@ -5575,6 +5608,8 @@
      * to the specific Windows version being used."
      */
     DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1),
+    DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only,
+                     false),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index b572a8e..663f3a5 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -171,7 +171,7 @@
 #define HF_AC_SHIFT         18 /* must be same as eflags */
 #define HF_SMM_SHIFT        19 /* CPU in SMM mode */
 #define HF_SVME_SHIFT       20 /* SVME enabled (copy of EFER.SVME) */
-#define HF_SVMI_SHIFT       21 /* SVM intercepts are active */
+#define HF_GUEST_SHIFT      21 /* SVM intercepts are active */
 #define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
 #define HF_SMAP_SHIFT       23 /* CR4.SMAP */
 #define HF_IOBPT_SHIFT      24 /* an io breakpoint enabled */
@@ -196,7 +196,7 @@
 #define HF_AC_MASK           (1 << HF_AC_SHIFT)
 #define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
 #define HF_SVME_MASK         (1 << HF_SVME_SHIFT)
-#define HF_SVMI_MASK         (1 << HF_SVMI_SHIFT)
+#define HF_GUEST_MASK        (1 << HF_GUEST_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
 #define HF_SMAP_MASK         (1 << HF_SMAP_SHIFT)
 #define HF_IOBPT_MASK        (1 << HF_IOBPT_SHIFT)
@@ -1327,7 +1327,9 @@
     bool tsc_valid;
     int64_t tsc_khz;
     int64_t user_tsc_khz; /* for sanity check only */
-    void *kvm_xsave_buf;
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
+    void *xsave_buf;
+#endif
 #if defined(CONFIG_HVF)
     HVFX86EmulatorState *hvf_emul;
 #endif
@@ -1376,10 +1378,12 @@
     bool hyperv_vpindex;
     bool hyperv_runtime;
     bool hyperv_synic;
+    bool hyperv_synic_kvm_only;
     bool hyperv_stimer;
     bool hyperv_frequencies;
     bool hyperv_reenlightenment;
     bool hyperv_tlbflush;
+    bool hyperv_ipi;
     bool check_cpuid;
     bool enforce_cpuid;
     bool expose_kvm;
@@ -1485,6 +1489,7 @@
  */
 void x86_cpu_do_interrupt(CPUState *cpu);
 bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
+int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request);
 
 int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
                              int cpuid, void *opaque);
diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
index 37a33d5..49231f6 100644
--- a/target/i386/excp_helper.c
+++ b/target/i386/excp_helper.c
@@ -53,7 +53,7 @@
 
 #if !defined(CONFIG_USER_ONLY)
     if (env->old_exception == EXCP08_DBLE) {
-        if (env->hflags & HF_SVMI_MASK) {
+        if (env->hflags & HF_GUEST_MASK) {
             cpu_vmexit(env, SVM_EXIT_SHUTDOWN, 0, retaddr); /* does not return */
         }
 
diff --git a/target/i386/hvf/README.md b/target/i386/hvf/README.md
index 0d27a0d..2d33477 100644
--- a/target/i386/hvf/README.md
+++ b/target/i386/hvf/README.md
@@ -2,6 +2,6 @@
 
 These sources (and ../hvf-all.c) are adapted from Veertu Inc's vdhh (Veertu Desktop Hosted Hypervisor) (last known location: https://github.com/veertuinc/vdhh) with some minor changes, the most significant of which were:
 
-1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, kvm_xsave_buf) due to historical differences + QEMU needing to handle more emulation targets.
+1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, xsave_buf) due to historical differences + QEMU needing to handle more emulation targets.
 2. Removal of `apic_page` and hyperv-related functionality.
 3. More relaxed use of `qemu_mutex_lock_iothread`.
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index df69e6d..e193022 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -72,9 +72,7 @@
 #include "sysemu/sysemu.h"
 #include "target/i386/cpu.h"
 
-pthread_rwlock_t mem_lock = PTHREAD_RWLOCK_INITIALIZER;
 HVFState *hvf_state;
-int hvf_disabled = 1;
 
 static void assert_hvf_ok(hv_return_t ret)
 {
@@ -587,7 +585,7 @@
     hvf_reset_vcpu(cpu);
 
     x86cpu = X86_CPU(cpu);
-    x86cpu->env.kvm_xsave_buf = qemu_memalign(4096, 4096);
+    x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
 
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
     hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
@@ -605,11 +603,6 @@
     return 0;
 }
 
-void hvf_disable(int shouldDisable)
-{
-    hvf_disabled = shouldDisable;
-}
-
 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -935,7 +928,7 @@
     return ret;
 }
 
-static bool hvf_allowed;
+bool hvf_allowed;
 
 static int hvf_accel_init(MachineState *ms)
 {
@@ -943,7 +936,6 @@
     hv_return_t ret;
     HVFState *s;
 
-    hvf_disable(0);
     ret = hv_vm_create(HV_VM_DEFAULT);
     assert_hvf_ok(ret);
 
diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 2d7540f..2e33b69 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -113,7 +113,8 @@
 {
     op->type = X86_VAR_REG;
     op->reg = decode->modrm.reg;
-    op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size);
+    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r,
+                          decode->operand_size);
 }
 
 static void decode_rax(CPUX86State *env, struct x86_decode *decode,
@@ -121,7 +122,8 @@
 {
     op->type = X86_VAR_REG;
     op->reg = R_EAX;
-    op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size);
+    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0,
+                          decode->operand_size);
 }
 
 static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode,
@@ -263,16 +265,16 @@
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x40;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_decgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x48;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode)
@@ -288,16 +290,16 @@
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x50;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_popgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x58;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_jxx(CPUX86State *env, struct x86_decode *decode)
@@ -378,16 +380,16 @@
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0x90;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_movgroup(CPUX86State *env, struct x86_decode *decode)
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0xb8;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
     decode_immediate(env, decode, &decode->op[1], decode->operand_size);
 }
 
@@ -402,8 +404,8 @@
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[0] - 0xb0;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
     decode_immediate(env, decode, &decode->op[1], decode->operand_size);
 }
 
@@ -412,7 +414,8 @@
 {
     op->type = X86_VAR_REG;
     op->reg = R_ECX;
-    op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size);
+    op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b,
+                          decode->operand_size);
 }
 
 struct decode_tbl {
@@ -639,8 +642,8 @@
 {
     decode->op[0].type = X86_VAR_REG;
     decode->op[0].reg = decode->opcode[1] - 0xc8;
-    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b,
-                                    decode->operand_size);
+    decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex,
+                                    decode->rex.b, decode->operand_size);
 }
 
 static void decode_d9_4(CPUX86State *env, struct x86_decode *decode)
@@ -1686,7 +1689,8 @@
     }
 }
 
-target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size)
+target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size)
 {
     target_ulong ptr = 0;
     int which = 0;
@@ -1698,7 +1702,7 @@
 
     switch (size) {
     case 1:
-        if (is_extended || reg < 4) {
+        if (is_extended || reg < 4 || rex) {
             which = 1;
             ptr = (target_ulong)&RL(env, reg);
         } else {
@@ -1714,10 +1718,11 @@
     return ptr;
 }
 
-target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size)
+target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size)
 {
     target_ulong val = 0;
-    memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size);
+    memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size);
     return val;
 }
 
@@ -1739,7 +1744,8 @@
         if (base_reg == R_ESP || base_reg == R_EBP) {
             *sel = R_SS;
         }
-        base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size);
+        base = get_reg_val(env, decode->sib.base, decode->rex.rex,
+                           decode->rex.b, addr_size);
     }
 
     if (decode->rex.x) {
@@ -1747,7 +1753,8 @@
     }
 
     if (index_reg != R_ESP) {
-        scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) <<
+        scaled_index = get_reg_val(env, index_reg, decode->rex.rex,
+                                   decode->rex.x, addr_size) <<
                                    decode->sib.scale;
     }
     return base + scaled_index;
@@ -1776,7 +1783,8 @@
         if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) {
             seg = R_SS;
         }
-        ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size);
+        ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex,
+                           decode->rex.b, addr_size);
     }
 
     if (X86_DECODE_CMD_LEA == decode->cmd) {
@@ -1805,7 +1813,8 @@
     } else if (0 == mod && 5 == rm) {
         ptr = RIP(env) + decode->len + (int32_t) offset;
     } else {
-        ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset;
+        ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) +
+              (int64_t) offset;
     }
 
     if (X86_DECODE_CMD_LEA == decode->cmd) {
@@ -1822,8 +1831,8 @@
     if (3 == decode->modrm.mod) {
         op->reg = decode->modrm.reg;
         op->type = X86_VAR_REG;
-        op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b,
-                              decode->operand_size);
+        op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex,
+                              decode->rex.b, decode->operand_size);
         return;
     }
 
diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h
index 5ab6f31..ef4bcab 100644
--- a/target/i386/hvf/x86_decode.h
+++ b/target/i386/hvf/x86_decode.h
@@ -303,8 +303,10 @@
 
 uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
 
-target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size);
-target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size);
+target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size);
+target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended,
+                         int size);
 void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode,
                         struct x86_decode_op *op);
 target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode,
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 6c88939..df8e946 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -75,7 +75,7 @@
 
     struct X86XSaveArea *xsave;
 
-    xsave = X86_CPU(cpu_state)->env.kvm_xsave_buf;
+    xsave = X86_CPU(cpu_state)->env.xsave_buf;
 
     x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave);
 
@@ -163,7 +163,7 @@
 {
     struct X86XSaveArea *xsave;
 
-    xsave = X86_CPU(cpu_state)->env.kvm_xsave_buf;
+    xsave = X86_CPU(cpu_state)->env.xsave_buf;
 
     if (hv_vcpu_read_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) {
         abort();
diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h
index d6d5a79..8c572cd 100644
--- a/target/i386/hyperv-proto.h
+++ b/target/i386/hyperv-proto.h
@@ -1,7 +1,7 @@
 /*
- * Definitions for Hyper-V guest/hypervisor interaction
+ * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part
  *
- * Copyright (C) 2017 Parallels International GmbH
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -10,7 +10,7 @@
 #ifndef TARGET_I386_HYPERV_PROTO_H
 #define TARGET_I386_HYPERV_PROTO_H
 
-#include "qemu/bitmap.h"
+#include "hw/hyperv/hyperv-proto.h"
 
 #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS     0x40000000
 #define HV_CPUID_INTERFACE                    0x40000001
@@ -58,6 +58,7 @@
 #define HV_APIC_ACCESS_RECOMMENDED          (1u << 3)
 #define HV_SYSTEM_RESET_RECOMMENDED         (1u << 4)
 #define HV_RELAXED_TIMING_RECOMMENDED       (1u << 5)
+#define HV_CLUSTER_IPI_RECOMMENDED          (1u << 10)
 #define HV_EX_PROCESSOR_MASKS_RECOMMENDED   (1u << 11)
 
 /*
@@ -138,25 +139,6 @@
 #define HV_X64_MSR_TSC_EMULATION_STATUS         0x40000108
 
 /*
- * Hypercall status code
- */
-#define HV_STATUS_SUCCESS                     0
-#define HV_STATUS_INVALID_HYPERCALL_CODE      2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT     3
-#define HV_STATUS_INVALID_ALIGNMENT           4
-#define HV_STATUS_INVALID_PARAMETER           5
-#define HV_STATUS_INSUFFICIENT_MEMORY         11
-#define HV_STATUS_INVALID_CONNECTION_ID       18
-#define HV_STATUS_INSUFFICIENT_BUFFERS        19
-
-/*
- * Hypercall numbers
- */
-#define HV_POST_MESSAGE                       0x005c
-#define HV_SIGNAL_EVENT                       0x005d
-#define HV_HYPERCALL_FAST                     (1u << 16)
-
-/*
  * Hypercall MSR bits
  */
 #define HV_HYPERCALL_ENABLE                   (1u << 0)
@@ -165,7 +147,6 @@
  * Synthetic interrupt controller definitions
  */
 #define HV_SYNIC_VERSION                      1
-#define HV_SINT_COUNT                         16
 #define HV_SYNIC_ENABLE                       (1u << 0)
 #define HV_SIMP_ENABLE                        (1u << 0)
 #define HV_SIEFP_ENABLE                       (1u << 0)
@@ -175,94 +156,5 @@
 
 #define HV_STIMER_COUNT                       4
 
-/*
- * Message size
- */
-#define HV_MESSAGE_PAYLOAD_SIZE               240
-
-/*
- * Message types
- */
-#define HV_MESSAGE_NONE                       0x00000000
-#define HV_MESSAGE_VMBUS                      0x00000001
-#define HV_MESSAGE_UNMAPPED_GPA               0x80000000
-#define HV_MESSAGE_GPA_INTERCEPT              0x80000001
-#define HV_MESSAGE_TIMER_EXPIRED              0x80000010
-#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE  0x80000020
-#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION    0x80000021
-#define HV_MESSAGE_UNSUPPORTED_FEATURE        0x80000022
-#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE    0x80000040
-#define HV_MESSAGE_X64_IOPORT_INTERCEPT       0x80010000
-#define HV_MESSAGE_X64_MSR_INTERCEPT          0x80010001
-#define HV_MESSAGE_X64_CPUID_INTERCEPT        0x80010002
-#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT    0x80010003
-#define HV_MESSAGE_X64_APIC_EOI               0x80010004
-#define HV_MESSAGE_X64_LEGACY_FP_ERROR        0x80010005
-
-/*
- * Message flags
- */
-#define HV_MESSAGE_FLAG_PENDING               0x1
-
-/*
- * Event flags number per SINT
- */
-#define HV_EVENT_FLAGS_COUNT                  (256 * 8)
-
-/*
- * Connection id valid bits
- */
-#define HV_CONNECTION_ID_MASK                 0x00ffffff
-
-/*
- * Input structure for POST_MESSAGE hypercall
- */
-struct hyperv_post_message_input {
-    uint32_t connection_id;
-    uint32_t _reserved;
-    uint32_t message_type;
-    uint32_t payload_size;
-    uint8_t  payload[HV_MESSAGE_PAYLOAD_SIZE];
-};
-
-/*
- * Input structure for SIGNAL_EVENT hypercall
- */
-struct hyperv_signal_event_input {
-    uint32_t connection_id;
-    uint16_t flag_number;
-    uint16_t _reserved_zero;
-};
-
-/*
- * SynIC message structures
- */
-struct hyperv_message_header {
-    uint32_t message_type;
-    uint8_t  payload_size;
-    uint8_t  message_flags; /* HV_MESSAGE_FLAG_XX */
-    uint8_t  _reserved[2];
-    uint64_t sender;
-};
-
-struct hyperv_message {
-    struct hyperv_message_header header;
-    uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE];
-};
-
-struct hyperv_message_page {
-    struct hyperv_message slot[HV_SINT_COUNT];
-};
-
-/*
- * SynIC event flags structures
- */
-struct hyperv_event_flags {
-    DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT);
-};
-
-struct hyperv_event_flags_page {
-    struct hyperv_event_flags slot[HV_SINT_COUNT];
-};
 
 #endif
diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c
new file mode 100644
index 0000000..fe548cb
--- /dev/null
+++ b/target/i386/hyperv-stub.c
@@ -0,0 +1,48 @@
+/*
+ * Stubs for CONFIG_HYPERV=n
+ *
+ * Copyright (c) 2015-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hyperv.h"
+
+#ifdef CONFIG_KVM
+int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
+{
+    switch (exit->type) {
+    case KVM_EXIT_HYPERV_SYNIC:
+        if (!cpu->hyperv_synic) {
+            return -1;
+        }
+
+        /*
+         * Tracking the changes in the MSRs is unnecessary as there are no
+         * users for them beside save/load, which is handled nicely by the
+         * generic MSR save/load code
+         */
+        return 0;
+    case KVM_EXIT_HYPERV_HCALL:
+        exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE;
+        return 0;
+    default:
+        return -1;
+    }
+}
+#endif
+
+int hyperv_x86_synic_add(X86CPU *cpu)
+{
+    return -ENOSYS;
+}
+
+void hyperv_x86_synic_reset(X86CPU *cpu)
+{
+}
+
+void hyperv_x86_synic_update(X86CPU *cpu)
+{
+}
diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c
index 3065d76..b264a28 100644
--- a/target/i386/hyperv.c
+++ b/target/i386/hyperv.c
@@ -14,16 +14,36 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "hyperv.h"
+#include "hw/hyperv/hyperv.h"
 #include "hyperv-proto.h"
 
-uint32_t hyperv_vp_index(X86CPU *cpu)
+int hyperv_x86_synic_add(X86CPU *cpu)
 {
-    return CPU(cpu)->cpu_index;
+    hyperv_synic_add(CPU(cpu));
+    return 0;
 }
 
-X86CPU *hyperv_find_vcpu(uint32_t vp_index)
+void hyperv_x86_synic_reset(X86CPU *cpu)
 {
-    return X86_CPU(qemu_get_cpu(vp_index));
+    hyperv_synic_reset(CPU(cpu));
+}
+
+void hyperv_x86_synic_update(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+    bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE;
+    hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ?
+        (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0;
+    hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ?
+        (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0;
+    hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr);
+}
+
+static void async_synic_update(CPUState *cs, run_on_cpu_data data)
+{
+    qemu_mutex_lock_iothread();
+    hyperv_x86_synic_update(X86_CPU(cs));
+    qemu_mutex_unlock_iothread();
 }
 
 int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit)
@@ -36,11 +56,6 @@
             return -1;
         }
 
-        /*
-         * For now just track changes in SynIC control and msg/evt pages msr's.
-         * When SynIC messaging/events processing will be added in future
-         * here we will do messages queues flushing and pages remapping.
-         */
         switch (exit->u.synic.msr) {
         case HV_X64_MSR_SCONTROL:
             env->msr_hv_synic_control = exit->u.synic.control;
@@ -54,98 +69,33 @@
         default:
             return -1;
         }
+
+        /*
+         * this will run in this cpu thread before it returns to KVM, but in a
+         * safe environment (i.e. when all cpus are quiescent) -- this is
+         * necessary because memory hierarchy is being changed
+         */
+        async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL);
+
         return 0;
     case KVM_EXIT_HYPERV_HCALL: {
-        uint16_t code;
+        uint16_t code = exit->u.hcall.input & 0xffff;
+        bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST;
+        uint64_t param = exit->u.hcall.params[0];
 
-        code  = exit->u.hcall.input & 0xffff;
         switch (code) {
         case HV_POST_MESSAGE:
+            exit->u.hcall.result = hyperv_hcall_post_message(param, fast);
+            break;
         case HV_SIGNAL_EVENT:
+            exit->u.hcall.result = hyperv_hcall_signal_event(param, fast);
+            break;
         default:
             exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE;
-            return 0;
         }
+        return 0;
     }
     default:
         return -1;
     }
 }
-
-static void kvm_hv_sint_ack_handler(EventNotifier *notifier)
-{
-    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
-                                           sint_ack_notifier);
-    event_notifier_test_and_clear(notifier);
-    if (sint_route->sint_ack_clb) {
-        sint_route->sint_ack_clb(sint_route);
-    }
-}
-
-HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint,
-                                      HvSintAckClb sint_ack_clb)
-{
-    HvSintRoute *sint_route;
-    int r, gsi;
-
-    sint_route = g_malloc0(sizeof(*sint_route));
-    r = event_notifier_init(&sint_route->sint_set_notifier, false);
-    if (r) {
-        goto err;
-    }
-
-    r = event_notifier_init(&sint_route->sint_ack_notifier, false);
-    if (r) {
-        goto err_sint_set_notifier;
-    }
-
-    event_notifier_set_handler(&sint_route->sint_ack_notifier,
-                               kvm_hv_sint_ack_handler);
-
-    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
-    if (gsi < 0) {
-        goto err_gsi;
-    }
-
-    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
-                                           &sint_route->sint_set_notifier,
-                                           &sint_route->sint_ack_notifier, gsi);
-    if (r) {
-        goto err_irqfd;
-    }
-    sint_route->gsi = gsi;
-    sint_route->sint_ack_clb = sint_ack_clb;
-    sint_route->vp_index = vp_index;
-    sint_route->sint = sint;
-
-    return sint_route;
-
-err_irqfd:
-    kvm_irqchip_release_virq(kvm_state, gsi);
-err_gsi:
-    event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
-    event_notifier_cleanup(&sint_route->sint_ack_notifier);
-err_sint_set_notifier:
-    event_notifier_cleanup(&sint_route->sint_set_notifier);
-err:
-    g_free(sint_route);
-
-    return NULL;
-}
-
-void kvm_hv_sint_route_destroy(HvSintRoute *sint_route)
-{
-    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
-                                          &sint_route->sint_set_notifier,
-                                          sint_route->gsi);
-    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
-    event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
-    event_notifier_cleanup(&sint_route->sint_ack_notifier);
-    event_notifier_cleanup(&sint_route->sint_set_notifier);
-    g_free(sint_route);
-}
-
-int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route)
-{
-    return event_notifier_set(&sint_route->sint_set_notifier);
-}
diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h
index 00c9b45..6754329 100644
--- a/target/i386/hyperv.h
+++ b/target/i386/hyperv.h
@@ -16,30 +16,14 @@
 
 #include "cpu.h"
 #include "sysemu/kvm.h"
-#include "qemu/event_notifier.h"
+#include "hw/hyperv/hyperv.h"
 
-typedef struct HvSintRoute HvSintRoute;
-typedef void (*HvSintAckClb)(HvSintRoute *sint_route);
-
-struct HvSintRoute {
-    uint32_t sint;
-    uint32_t vp_index;
-    int gsi;
-    EventNotifier sint_set_notifier;
-    EventNotifier sint_ack_notifier;
-    HvSintAckClb sint_ack_clb;
-};
-
+#ifdef CONFIG_KVM
 int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit);
+#endif
 
-HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint,
-                                      HvSintAckClb sint_ack_clb);
-
-void kvm_hv_sint_route_destroy(HvSintRoute *sint_route);
-
-int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route);
-
-uint32_t hyperv_vp_index(X86CPU *cpu);
-X86CPU *hyperv_find_vcpu(uint32_t vp_index);
+int hyperv_x86_synic_add(X86CPU *cpu);
+void hyperv_x86_synic_reset(X86CPU *cpu);
+void hyperv_x86_synic_update(X86CPU *cpu);
 
 #endif
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 0b2a07d..115d8b4 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -608,7 +608,8 @@
             cpu->hyperv_synic ||
             cpu->hyperv_stimer ||
             cpu->hyperv_reenlightenment ||
-            cpu->hyperv_tlbflush);
+            cpu->hyperv_tlbflush ||
+            cpu->hyperv_ipi);
 }
 
 static int kvm_arch_set_tsc_khz(CPUState *cs)
@@ -733,9 +734,20 @@
         env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE;
     }
     if (cpu->hyperv_synic) {
-        if (!has_msr_hv_synic ||
-            kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) {
-            fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n");
+        unsigned int cap = KVM_CAP_HYPERV_SYNIC;
+        if (!cpu->hyperv_synic_kvm_only) {
+            if (!cpu->hyperv_vpindex) {
+                fprintf(stderr, "Hyper-V SynIC "
+                        "(requested by 'hv-synic' cpu flag) "
+                        "requires Hyper-V VP_INDEX ('hv-vpindex')\n");
+            return -ENOSYS;
+            }
+            cap = KVM_CAP_HYPERV_SYNIC2;
+        }
+
+        if (!has_msr_hv_synic || !kvm_check_extension(cs->kvm_state, cap)) {
+            fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) "
+                    "is not supported by kernel\n");
             return -ENOSYS;
         }
 
@@ -753,12 +765,14 @@
 
 static int hyperv_init_vcpu(X86CPU *cpu)
 {
+    CPUState *cs = CPU(cpu);
+    int ret;
+
     if (cpu->hyperv_vpindex && !hv_vpindex_settable) {
         /*
          * the kernel doesn't support setting vp_index; assert that its value
          * is in sync
          */
-        int ret;
         struct {
             struct kvm_msrs info;
             struct kvm_msr_entry entries[1];
@@ -767,18 +781,38 @@
             .entries[0].index = HV_X64_MSR_VP_INDEX,
         };
 
-        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data);
         if (ret < 0) {
             return ret;
         }
         assert(ret == 1);
 
-        if (msr_data.entries[0].data != hyperv_vp_index(cpu)) {
+        if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) {
             error_report("kernel's vp_index != QEMU's vp_index");
             return -ENXIO;
         }
     }
 
+    if (cpu->hyperv_synic) {
+        uint32_t synic_cap = cpu->hyperv_synic_kvm_only ?
+            KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2;
+        ret = kvm_vcpu_enable_cap(cs, synic_cap, 0);
+        if (ret < 0) {
+            error_report("failed to turn on HyperV SynIC in KVM: %s",
+                         strerror(-ret));
+            return ret;
+        }
+
+        if (!cpu->hyperv_synic_kvm_only) {
+            ret = hyperv_x86_synic_add(cpu);
+            if (ret < 0) {
+                error_report("failed to create HyperV SynIC: %s",
+                             strerror(-ret));
+                return ret;
+            }
+        }
+    }
+
     return 0;
 }
 
@@ -888,6 +922,17 @@
             c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED;
             c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
         }
+        if (cpu->hyperv_ipi) {
+            if (kvm_check_extension(cs->kvm_state,
+                                    KVM_CAP_HYPERV_SEND_IPI) <= 0) {
+                fprintf(stderr, "Hyper-V IPI send support "
+                        "(requested by 'hv-ipi' cpu flag) "
+                        " is not supported by kernel\n");
+                return -ENOSYS;
+            }
+            c->eax |= HV_CLUSTER_IPI_RECOMMENDED;
+            c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
+        }
 
         c->ebx = cpu->hyperv_spinlock_attempts;
 
@@ -1153,7 +1198,7 @@
             if (local_err) {
                 error_report_err(local_err);
                 error_free(invtsc_mig_blocker);
-                goto fail;
+                return r;
             }
             /* for savevm */
             vmstate_x86_cpu.unmigratable = 1;
@@ -1189,7 +1234,7 @@
     }
 
     if (has_xsave) {
-        env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
+        env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
     }
     cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
 
@@ -1226,6 +1271,8 @@
         for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) {
             env->msr_hv_synic_sint[i] = HV_SINT_MASKED;
         }
+
+        hyperv_x86_synic_reset(cpu);
     }
 }
 
@@ -1639,7 +1686,7 @@
 static int kvm_put_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    X86XSaveArea *xsave = env->kvm_xsave_buf;
+    X86XSaveArea *xsave = env->xsave_buf;
 
     if (!has_xsave) {
         return kvm_put_fpu(cpu);
@@ -1937,7 +1984,8 @@
             kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime);
         }
         if (cpu->hyperv_vpindex && hv_vpindex_settable) {
-            kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu));
+            kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX,
+                              hyperv_vp_index(CPU(cpu)));
         }
         if (cpu->hyperv_synic) {
             int j;
@@ -2081,7 +2129,7 @@
 static int kvm_get_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    X86XSaveArea *xsave = env->kvm_xsave_buf;
+    X86XSaveArea *xsave = env->xsave_buf;
     int ret;
 
     if (!has_xsave) {
@@ -2686,7 +2734,6 @@
     events.exception.nr = env->exception_injected;
     events.exception.has_error_code = env->has_error_code;
     events.exception.error_code = env->error_code;
-    events.exception.pad = 0;
 
     events.interrupt.injected = (env->interrupt_injected >= 0);
     events.interrupt.nr = env->interrupt_injected;
@@ -2695,7 +2742,6 @@
     events.nmi.injected = env->nmi_injected;
     events.nmi.pending = env->nmi_pending;
     events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
-    events.nmi.pad = 0;
 
     events.sipi_vector = env->sipi_vector;
     events.flags = 0;
@@ -3669,6 +3715,10 @@
         MSIMessage src, dst;
         X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu);
 
+        if (!class->int_remap) {
+            return 0;
+        }
+
         src.address = route->u.msi.address_hi;
         src.address <<= VTD_MSI_ADDR_HI_SHIFT;
         src.address |= route->u.msi.address_lo;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 084c2c7..225b5d4 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -7,6 +7,7 @@
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
 #include "migration/cpu.h"
+#include "hyperv.h"
 
 #include "sysemu/kvm.h"
 
@@ -672,11 +673,19 @@
     return false;
 }
 
+static int hyperv_synic_post_load(void *opaque, int version_id)
+{
+    X86CPU *cpu = opaque;
+    hyperv_x86_synic_update(cpu);
+    return 0;
+}
+
 static const VMStateDescription vmstate_msr_hyperv_synic = {
     .name = "cpu/msr_hyperv_synic",
     .version_id = 1,
     .minimum_version_id = 1,
     .needed = hyperv_synic_enable_needed,
+    .post_load = hyperv_synic_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU),
         VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU),
diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c
index 30c26b9..6cc53bc 100644
--- a/target/i386/mem_helper.c
+++ b/target/i386/mem_helper.c
@@ -23,6 +23,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 #include "tcg.h"
 
 void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
@@ -137,10 +138,7 @@
 
     if ((a0 & 0xf) != 0) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
-    } else {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
+    } else if (HAVE_CMPXCHG128) {
         int eflags = cpu_cc_compute_all(env, CC_OP);
 
         Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
@@ -159,7 +157,8 @@
             eflags &= ~CC_Z;
         }
         CC_SRC = eflags;
-#endif
+    } else {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
     }
 }
 #endif
diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
index d1cbc6e..33714bc 100644
--- a/target/i386/seg_helper.c
+++ b/target/i386/seg_helper.c
@@ -1244,7 +1244,7 @@
     }
     if (env->cr[0] & CR0_PE_MASK) {
 #if !defined(CONFIG_USER_ONLY)
-        if (env->hflags & HF_SVMI_MASK) {
+        if (env->hflags & HF_GUEST_MASK) {
             handle_even_inj(env, intno, is_int, error_code, is_hw, 0);
         }
 #endif
@@ -1259,7 +1259,7 @@
         }
     } else {
 #if !defined(CONFIG_USER_ONLY)
-        if (env->hflags & HF_SVMI_MASK) {
+        if (env->hflags & HF_GUEST_MASK) {
             handle_even_inj(env, intno, is_int, error_code, is_hw, 1);
         }
 #endif
@@ -1267,7 +1267,7 @@
     }
 
 #if !defined(CONFIG_USER_ONLY)
-    if (env->hflags & HF_SVMI_MASK) {
+    if (env->hflags & HF_GUEST_MASK) {
         CPUState *cs = CPU(cpu);
         uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb +
                                       offsetof(struct vmcb,
@@ -1319,74 +1319,66 @@
 {
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
-    bool ret = false;
+    int intno;
 
+    interrupt_request = x86_cpu_pending_interrupt(cs, interrupt_request);
+    if (!interrupt_request) {
+        return false;
+    }
+
+    /* Don't process multiple interrupt requests in a single call.
+     * This is required to make icount-driven execution deterministic.
+     */
+    switch (interrupt_request) {
 #if !defined(CONFIG_USER_ONLY)
-    if (interrupt_request & CPU_INTERRUPT_POLL) {
+    case CPU_INTERRUPT_POLL:
         cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
         apic_poll_irq(cpu->apic_state);
-        /* Don't process multiple interrupt requests in a single call.
-           This is required to make icount-driven execution deterministic. */
-        return true;
-    }
+        break;
 #endif
-    if (interrupt_request & CPU_INTERRUPT_SIPI) {
+    case CPU_INTERRUPT_SIPI:
         do_cpu_sipi(cpu);
-        ret = true;
-    } else if (env->hflags2 & HF2_GIF_MASK) {
-        if ((interrupt_request & CPU_INTERRUPT_SMI) &&
-            !(env->hflags & HF_SMM_MASK)) {
-            cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0);
-            cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
-            do_smm_enter(cpu);
-            ret = true;
-        } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
-                   !(env->hflags2 & HF2_NMI_MASK)) {
-            cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0);
-            cs->interrupt_request &= ~CPU_INTERRUPT_NMI;
-            env->hflags2 |= HF2_NMI_MASK;
-            do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
-            ret = true;
-        } else if (interrupt_request & CPU_INTERRUPT_MCE) {
-            cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
-            do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
-            ret = true;
-        } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
-                   (((env->hflags2 & HF2_VINTR_MASK) &&
-                     (env->hflags2 & HF2_HIF_MASK)) ||
-                    (!(env->hflags2 & HF2_VINTR_MASK) &&
-                     (env->eflags & IF_MASK &&
-                      !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
-            int intno;
-            cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0);
-            cs->interrupt_request &= ~(CPU_INTERRUPT_HARD |
-                                       CPU_INTERRUPT_VIRQ);
-            intno = cpu_get_pic_interrupt(env);
-            qemu_log_mask(CPU_LOG_TB_IN_ASM,
-                          "Servicing hardware INT=0x%02x\n", intno);
-            do_interrupt_x86_hardirq(env, intno, 1);
-            /* ensure that no TB jump will be modified as
-               the program flow was changed */
-            ret = true;
+        break;
+    case CPU_INTERRUPT_SMI:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0);
+        cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
+        do_smm_enter(cpu);
+        break;
+    case CPU_INTERRUPT_NMI:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0);
+        cs->interrupt_request &= ~CPU_INTERRUPT_NMI;
+        env->hflags2 |= HF2_NMI_MASK;
+        do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
+        break;
+    case CPU_INTERRUPT_MCE:
+        cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
+        do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
+        break;
+    case CPU_INTERRUPT_HARD:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0);
+        cs->interrupt_request &= ~(CPU_INTERRUPT_HARD |
+                                   CPU_INTERRUPT_VIRQ);
+        intno = cpu_get_pic_interrupt(env);
+        qemu_log_mask(CPU_LOG_TB_IN_ASM,
+                      "Servicing hardware INT=0x%02x\n", intno);
+        do_interrupt_x86_hardirq(env, intno, 1);
+        break;
 #if !defined(CONFIG_USER_ONLY)
-        } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) &&
-                   (env->eflags & IF_MASK) &&
-                   !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
-            int intno;
-            /* FIXME: this should respect TPR */
-            cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0);
-            intno = x86_ldl_phys(cs, env->vm_vmcb
+    case CPU_INTERRUPT_VIRQ:
+        /* FIXME: this should respect TPR */
+        cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0);
+        intno = x86_ldl_phys(cs, env->vm_vmcb
                              + offsetof(struct vmcb, control.int_vector));
-            qemu_log_mask(CPU_LOG_TB_IN_ASM,
-                          "Servicing virtual hardware INT=0x%02x\n", intno);
-            do_interrupt_x86_hardirq(env, intno, 1);
-            cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
-            ret = true;
+        qemu_log_mask(CPU_LOG_TB_IN_ASM,
+                      "Servicing virtual hardware INT=0x%02x\n", intno);
+        do_interrupt_x86_hardirq(env, intno, 1);
+        cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+        break;
 #endif
-        }
     }
 
-    return ret;
+    /* Ensure that no TB jump will be modified as the program flow was changed.  */
+    return true;
 }
 
 void helper_lldt(CPUX86State *env, int selector)
diff --git a/target/i386/svm_helper.c b/target/i386/svm_helper.c
index 342ece0..9fd22a8 100644
--- a/target/i386/svm_helper.c
+++ b/target/i386/svm_helper.c
@@ -228,7 +228,7 @@
     }
 
     /* enable intercepts */
-    env->hflags |= HF_SVMI_MASK;
+    env->hflags |= HF_GUEST_MASK;
 
     env->tsc_offset = x86_ldq_phys(cs, env->vm_vmcb +
                                offsetof(struct vmcb, control.tsc_offset));
@@ -503,7 +503,7 @@
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
 
-    if (likely(!(env->hflags & HF_SVMI_MASK))) {
+    if (likely(!(env->hflags & HF_GUEST_MASK))) {
         return;
     }
     switch (type) {
@@ -697,7 +697,7 @@
 
     /* Reload the host state from vm_hsave */
     env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
-    env->hflags &= ~HF_SVMI_MASK;
+    env->hflags &= ~HF_GUEST_MASK;
     env->intercept = 0;
     env->intercept_exceptions = 0;
     cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1f9d1d9..83c1ebe 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -72,27 +72,15 @@
 //#define MACRO_TEST   1
 
 /* global register indexes */
-static TCGv cpu_A0;
-static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
-/* local temps */
-static TCGv cpu_T0, cpu_T1;
-/* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0, cpu_tmp4;
-static TCGv_ptr cpu_ptr0, cpu_ptr1;
-static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
-static TCGv_i64 cpu_tmp1_i64;
 
 #include "exec/gen-icount.h"
 
-#ifdef TARGET_X86_64
-static int x86_64_hregs;
-#endif
-
 typedef struct DisasContext {
     DisasContextBase base;
 
@@ -117,6 +105,9 @@
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+#ifdef TARGET_X86_64
+    bool x86_64_hregs;
+#endif
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -135,6 +126,22 @@
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
     int cpuid_xsave_features;
+
+    /* TCG local temps */
+    TCGv cc_srcT;
+    TCGv A0;
+    TCGv T0;
+    TCGv T1;
+
+    /* TCG local register indexes (only used inside old micro ops) */
+    TCGv tmp0;
+    TCGv tmp4;
+    TCGv_ptr ptr0;
+    TCGv_ptr ptr1;
+    TCGv_i32 tmp2_i32;
+    TCGv_i32 tmp3_i32;
+    TCGv_i64 tmp1_i64;
+
     sigjmp_buf jmpbuf;
 } DisasContext;
 
@@ -244,7 +251,7 @@
         tcg_gen_discard_tl(cpu_cc_src2);
     }
     if (dead & USES_CC_SRCT) {
-        tcg_gen_discard_tl(cpu_cc_srcT);
+        tcg_gen_discard_tl(s->cc_srcT);
     }
 
     if (op == CC_OP_DYNAMIC) {
@@ -299,13 +306,13 @@
  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
  * true for this special case, false otherwise.
  */
-static inline bool byte_reg_is_xH(int reg)
+static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 {
     if (reg < 4) {
         return false;
     }
 #ifdef TARGET_X86_64
-    if (reg >= 8 || x86_64_hregs) {
+    if (reg >= 8 || s->x86_64_hregs) {
         return false;
     }
 #endif
@@ -352,11 +359,11 @@
     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 }
 
-static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
+static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0)
 {
     switch(ot) {
     case MO_8:
-        if (!byte_reg_is_xH(reg)) {
+        if (!byte_reg_is_xH(s, reg)) {
             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
@@ -380,9 +387,10 @@
     }
 }
 
-static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
+static inline
+void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg)
 {
-    if (ot == MO_8 && byte_reg_is_xH(reg)) {
+    if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
     } else {
         tcg_gen_mov_tl(t0, cpu_regs[reg]);
@@ -391,9 +399,9 @@
 
 static void gen_add_A0_im(DisasContext *s, int val)
 {
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+    tcg_gen_addi_tl(s->A0, s->A0, val);
     if (!CODE64(s)) {
-        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+        tcg_gen_ext32u_tl(s->A0, s->A0);
     }
 }
 
@@ -402,16 +410,17 @@
     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 }
 
-static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
+static inline
+void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val)
 {
-    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-    gen_op_mov_reg_v(size, reg, cpu_tmp0);
+    tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
+    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 }
 
-static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
+static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg)
 {
-    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
-    gen_op_mov_reg_v(size, reg, cpu_tmp0);
+    tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
+    gen_op_mov_reg_v(s, size, reg, s->tmp0);
 }
 
 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
@@ -427,16 +436,16 @@
 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
     if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
+        gen_op_st_v(s, idx, s->T0, s->A0);
     } else {
-        gen_op_mov_reg_v(idx, d, cpu_T0);
+        gen_op_mov_reg_v(s, idx, d, s->T0);
     }
 }
 
-static inline void gen_jmp_im(target_ulong pc)
+static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
 {
-    tcg_gen_movi_tl(cpu_tmp0, pc);
-    gen_op_jmp_v(cpu_tmp0);
+    tcg_gen_movi_tl(s->tmp0, pc);
+    gen_op_jmp_v(s->tmp0);
 }
 
 /* Compute SEG:REG into A0.  SEG is selected from the override segment
@@ -449,7 +458,7 @@
 #ifdef TARGET_X86_64
     case MO_64:
         if (ovr_seg < 0) {
-            tcg_gen_mov_tl(cpu_A0, a0);
+            tcg_gen_mov_tl(s->A0, a0);
             return;
         }
         break;
@@ -460,14 +469,14 @@
             ovr_seg = def_seg;
         }
         if (ovr_seg < 0) {
-            tcg_gen_ext32u_tl(cpu_A0, a0);
+            tcg_gen_ext32u_tl(s->A0, a0);
             return;
         }
         break;
     case MO_16:
         /* 16 bit address */
-        tcg_gen_ext16u_tl(cpu_A0, a0);
-        a0 = cpu_A0;
+        tcg_gen_ext16u_tl(s->A0, a0);
+        a0 = s->A0;
         if (ovr_seg < 0) {
             if (s->addseg) {
                 ovr_seg = def_seg;
@@ -484,13 +493,13 @@
         TCGv seg = cpu_seg_base[ovr_seg];
 
         if (aflag == MO_64) {
-            tcg_gen_add_tl(cpu_A0, a0, seg);
+            tcg_gen_add_tl(s->A0, a0, seg);
         } else if (CODE64(s)) {
-            tcg_gen_ext32u_tl(cpu_A0, a0);
-            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
+            tcg_gen_ext32u_tl(s->A0, a0);
+            tcg_gen_add_tl(s->A0, s->A0, seg);
         } else {
-            tcg_gen_add_tl(cpu_A0, a0, seg);
-            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+            tcg_gen_add_tl(s->A0, a0, seg);
+            tcg_gen_ext32u_tl(s->A0, s->A0);
         }
     }
 }
@@ -505,10 +514,10 @@
     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 }
 
-static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
+static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot)
 {
-    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
-    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
+    tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
+    tcg_gen_shli_tl(s->T0, s->T0, ot);
 };
 
 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
@@ -552,18 +561,20 @@
     gen_ext_tl(reg, reg, ot, true);
 }
 
-static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
+static inline
+void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 {
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size, cpu_tmp0);
-    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
+    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
+    gen_extu(size, s->tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
 }
 
-static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
+static inline
+void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1)
 {
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
-    gen_extu(size, cpu_tmp0);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+    tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
+    gen_extu(size, s->tmp0);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
 }
 
 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
@@ -606,28 +617,28 @@
     target_ulong next_eip;
 
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         switch (ot) {
         case MO_8:
-            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iob(cpu_env, s->tmp2_i32);
             break;
         case MO_16:
-            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iow(cpu_env, s->tmp2_i32);
             break;
         case MO_32:
-            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
+            gen_helper_check_iol(cpu_env, s->tmp2_i32);
             break;
         default:
             tcg_abort();
         }
     }
-    if(s->flags & HF_SVMI_MASK) {
+    if(s->flags & HF_GUEST_MASK) {
         gen_update_cc_op(s);
-        gen_jmp_im(cur_eip);
+        gen_jmp_im(s, cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
                                 tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
     }
@@ -636,42 +647,42 @@
 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
-static void gen_op_update1_cc(void)
+static void gen_op_update1_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update2_cc(void)
+static void gen_op_update2_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static void gen_op_update3_cc(TCGv reg)
+static void gen_op_update3_cc(DisasContext *s, TCGv reg)
 {
     tcg_gen_mov_tl(cpu_cc_src2, reg);
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_src, s->T1);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
 }
 
-static inline void gen_op_testl_T0_T1_cc(void)
+static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
 {
-    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
+    tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
 }
 
-static void gen_op_update_neg_cc(void)
+static void gen_op_update_neg_cc(DisasContext *s)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
-    tcg_gen_movi_tl(cpu_cc_srcT, 0);
+    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+    tcg_gen_neg_tl(cpu_cc_src, s->T0);
+    tcg_gen_movi_tl(s->cc_srcT, 0);
 }
 
 /* compute all eflags to cc_src */
@@ -739,17 +750,17 @@
     case CC_OP_SUBB ... CC_OP_SUBQ:
         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_SUBB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
         /* If no temporary was used, be careful not to alias t1 and t0.  */
-        t0 = t1 == cpu_cc_src ? cpu_tmp0 : reg;
-        tcg_gen_mov_tl(t0, cpu_cc_srcT);
+        t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
+        tcg_gen_mov_tl(t0, s->cc_srcT);
         gen_extu(size, t0);
         goto add_sub;
 
     case CC_OP_ADDB ... CC_OP_ADDQ:
         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
         size = s->cc_op - CC_OP_ADDB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+        t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
     add_sub:
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
@@ -899,10 +910,10 @@
         size = s->cc_op - CC_OP_SUBB;
         switch (jcc_op) {
         case JCC_BE:
-            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
-            gen_extu(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
+            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
+            gen_extu(size, s->tmp4);
+            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
+            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
@@ -912,10 +923,10 @@
         case JCC_LE:
             cond = TCG_COND_LE;
         fast_jcc_l:
-            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
-            gen_exts(size, cpu_tmp4);
-            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
-            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
+            tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
+            gen_exts(size, s->tmp4);
+            t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
+            cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
                                .reg2 = t0, .mask = -1, .use_reg2 = true };
             break;
 
@@ -951,7 +962,7 @@
         case JCC_L:
             gen_compute_eflags(s);
             if (reg == cpu_cc_src) {
-                reg = cpu_tmp0;
+                reg = s->tmp0;
             }
             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
@@ -962,7 +973,7 @@
         case JCC_LE:
             gen_compute_eflags(s);
             if (reg == cpu_cc_src) {
-                reg = cpu_tmp0;
+                reg = s->tmp0;
             }
             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
@@ -1018,11 +1029,11 @@
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
+    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
-        cc.reg = cpu_T0;
+        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
+        cc.reg = s->T0;
     }
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -1036,12 +1047,12 @@
    A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
+    CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     gen_update_cc_op(s);
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
-        cc.reg = cpu_T0;
+        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
+        cc.reg = s->T0;
     }
     set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
@@ -1057,7 +1068,7 @@
 {
     TCGLabel *l1 = gen_new_label();
     TCGLabel *l2 = gen_new_label();
-    gen_op_jnz_ecx(s->aflag, l1);
+    gen_op_jnz_ecx(s, s->aflag, l1);
     gen_set_label(l2);
     gen_jmp_tb(s, next_eip, 1);
     gen_set_label(l1);
@@ -1066,40 +1077,40 @@
 
 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
+    gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
-    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
+    gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
 }
 
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+    gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+    gen_op_ld_v(s, ot, s->T1, s->A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
 }
 
 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
@@ -1123,15 +1134,15 @@
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
-    tcg_gen_movi_tl(cpu_T0, 0);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
-    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
-    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_EDI);
-    gen_bpt_io(s, cpu_tmp2_i32, ot);
+    tcg_gen_movi_tl(s->T0, 0);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
+    gen_helper_in_func(ot, s->T0, s->tmp2_i32);
+    gen_op_st_v(s, ot, s->T0, s->A0);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_EDI);
+    gen_bpt_io(s, s->tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1143,15 +1154,15 @@
         gen_io_start();
     }
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, ot, s->T0, s->A0);
 
-    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
-    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
-    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-    gen_op_movl_T0_Dshift(ot);
-    gen_op_add_reg_T0(s->aflag, R_ESI);
-    gen_bpt_io(s, cpu_tmp2_i32, ot);
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+    tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
+    tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
+    gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
+    gen_op_movl_T0_Dshift(s, ot);
+    gen_op_add_reg_T0(s, s->aflag, R_ESI);
+    gen_bpt_io(s, s->tmp2_i32, ot);
     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1167,11 +1178,11 @@
     gen_update_cc_op(s);                                                      \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
-    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
     /* a loop would cause two single step exceptions if ECX = 1               \
        before rep string_insn */                                              \
     if (s->repz_opt)                                                          \
-        gen_op_jz_ecx(s->aflag, l2);                                          \
+        gen_op_jz_ecx(s, s->aflag, l2);                                       \
     gen_jmp(s, cur_eip);                                                      \
 }
 
@@ -1185,11 +1196,11 @@
     gen_update_cc_op(s);                                                      \
     l2 = gen_jz_ecx_string(s, next_eip);                                      \
     gen_ ## op(s, ot);                                                        \
-    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
+    gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
     gen_update_cc_op(s);                                                      \
     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
     if (s->repz_opt)                                                          \
-        gen_op_jz_ecx(s->aflag, l2);                                          \
+        gen_op_jz_ecx(s, s->aflag, l2);                                       \
     gen_jmp(s, cur_eip);                                                      \
 }
 
@@ -1261,103 +1272,103 @@
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, cpu_T0, d);
+        gen_op_mov_v_reg(s1, ot, s1->T0, d);
     } else if (!(s1->prefix & PREFIX_LOCK)) {
-        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s1, ot, s1->T0, s1->A0);
     }
     switch(op) {
     case OP_ADCL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, s1->tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+            tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(cpu_tmp4);
+        gen_op_update3_cc(s1, s1->tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
-        gen_compute_eflags_c(s1, cpu_tmp4);
+        gen_compute_eflags_c(s1, s1->tmp4);
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
-            tcg_gen_neg_tl(cpu_T0, cpu_T0);
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+            tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
+            tcg_gen_neg_tl(s1->T0, s1->T0);
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update3_cc(cpu_tmp4);
+        gen_op_update3_cc(s1, s1->tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s1);
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_neg_tl(cpu_T0, cpu_T1);
-            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
+            tcg_gen_neg_tl(s1->T0, s1->T1);
+            tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
                                         s1->mem_index, ot | MO_LE);
-            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
+            tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
         } else {
-            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
-            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+            tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
                                        s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
         if (s1->prefix & PREFIX_LOCK) {
-            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+            tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
                                         s1->mem_index, ot | MO_LE);
         } else {
-            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
             gen_op_st_rm_T0_A0(s1, ot, d);
         }
-        gen_op_update1_cc();
+        gen_op_update1_cc(s1);
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
-        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
-        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
+        tcg_gen_mov_tl(cpu_cc_src, s1->T1);
+        tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+        tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
@@ -1367,21 +1378,21 @@
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
     if (s1->prefix & PREFIX_LOCK) {
-        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
-        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+        tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
+        tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
                                     s1->mem_index, ot | MO_LE);
     } else {
         if (d != OR_TMP0) {
-            gen_op_mov_v_reg(ot, cpu_T0, d);
+            gen_op_mov_v_reg(s1, ot, s1->T0, d);
         } else {
-            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+            gen_op_ld_v(s1, ot, s1->T0, s1->A0);
         }
-        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
+        tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
         gen_op_st_rm_T0_A0(s1, ot, d);
     }
 
     gen_compute_eflags_c(s1, cpu_cc_src);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
 }
 
@@ -1410,19 +1421,19 @@
     tcg_temp_free(z_tl);
 
     /* Get the two potential CC_OP values into temporaries.  */
-    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     if (s->cc_op == CC_OP_DYNAMIC) {
         oldop = cpu_cc_op;
     } else {
-        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
-        oldop = cpu_tmp3_i32;
+        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
+        oldop = s->tmp3_i32;
     }
 
     /* Conditionally store the CC_OP value.  */
     z32 = tcg_const_i32(0);
     s32 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(s32, count);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
     tcg_temp_free_i32(z32);
     tcg_temp_free_i32(s32);
 
@@ -1437,33 +1448,33 @@
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
-    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
+    tcg_gen_andi_tl(s->T1, s->T1, mask);
+    tcg_gen_subi_tl(s->tmp0, s->T1, 1);
 
     if (is_right) {
         if (is_arith) {
-            gen_exts(ot, cpu_T0);
-            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_exts(ot, s->T0);
+            tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
+            tcg_gen_sar_tl(s->T0, s->T0, s->T1);
         } else {
-            gen_extu(ot, cpu_T0);
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_extu(ot, s->T0);
+            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
+            tcg_gen_shr_tl(s->T0, s->T0, s->T1);
         }
     } else {
-        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
-        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
+        tcg_gen_shl_tl(s->T0, s->T0, s->T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1473,25 +1484,25 @@
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
 
     op2 &= mask;
     if (op2 != 0) {
         if (is_right) {
             if (is_arith) {
-                gen_exts(ot, cpu_T0);
-                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
-                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
+                gen_exts(ot, s->T0);
+                tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
+                tcg_gen_sari_tl(s->T0, s->T0, op2);
             } else {
-                gen_extu(ot, cpu_T0);
-                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
+                gen_extu(ot, s->T0);
+                tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
+                tcg_gen_shri_tl(s->T0, s->T0, op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
-            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
+            tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
+            tcg_gen_shli_tl(s->T0, s->T0, op2);
         }
     }
 
@@ -1500,8 +1511,8 @@
 
     /* update eflags if non zero shift */
     if (op2 != 0) {
-        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+        tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
+        tcg_gen_mov_tl(cpu_cc_dst, s->T0);
         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
 }
@@ -1513,41 +1524,41 @@
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
+    tcg_gen_andi_tl(s->T1, s->T1, mask);
 
     switch (ot) {
     case MO_8:
         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
-        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
-        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
+        tcg_gen_ext8u_tl(s->T0, s->T0);
+        tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
         goto do_long;
     case MO_16:
         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
-        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
+        tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
         goto do_long;
     do_long:
 #ifdef TARGET_X86_64
     case MO_32:
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
         if (is_right) {
-            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
         } else {
-            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
         }
-        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
         break;
 #endif
     default:
         if (is_right) {
-            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
         } else {
-            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
         }
         break;
     }
@@ -1563,12 +1574,12 @@
        since we've computed the flags into CC_SRC, these variables are
        currently dead.  */
     if (is_right) {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
-        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
+        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
+        tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
     } else {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
-        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
+        tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
+        tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
     }
     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1579,11 +1590,11 @@
        exactly as we computed above.  */
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t1, cpu_T1);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
-    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
+    tcg_gen_trunc_tl_i32(t1, s->T1);
+    tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
+    tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
-                        cpu_tmp2_i32, cpu_tmp3_i32);
+                        s->tmp2_i32, s->tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
 
@@ -1599,9 +1610,9 @@
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     op2 &= mask;
@@ -1609,20 +1620,20 @@
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
             if (is_right) {
-                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+                tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
             } else {
-                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+                tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
             }
-            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
             break;
 #endif
         default:
             if (is_right) {
-                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
+                tcg_gen_rotri_tl(s->T0, s->T0, op2);
             } else {
-                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
+                tcg_gen_rotli_tl(s->T0, s->T0, op2);
             }
             break;
         case MO_8:
@@ -1635,10 +1646,10 @@
             if (is_right) {
                 shift = mask + 1 - shift;
             }
-            gen_extu(ot, cpu_T0);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
-            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+            gen_extu(ot, s->T0);
+            tcg_gen_shli_tl(s->tmp0, s->T0, shift);
+            tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
+            tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
             break;
         }
     }
@@ -1655,12 +1666,12 @@
            since we've computed the flags into CC_SRC, these variables are
            currently dead.  */
         if (is_right) {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
-            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
+            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
+            tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
         } else {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
-            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
+            tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
+            tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
         }
         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1677,24 +1688,24 @@
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     
     if (is_right) {
         switch (ot) {
         case MO_8:
-            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_16:
-            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_32:
-            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
             break;
 #endif
         default:
@@ -1703,17 +1714,17 @@
     } else {
         switch (ot) {
         case MO_8:
-            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_16:
-            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
             break;
         case MO_32:
-            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
+            gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
             break;
 #endif
         default:
@@ -1733,9 +1744,9 @@
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, ot, s->T0, s->A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T0, op1);
+        gen_op_mov_v_reg(s, ot, s->T0, op1);
     }
 
     count = tcg_temp_new();
@@ -1747,69 +1758,69 @@
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
-            tcg_gen_mov_tl(cpu_T1, cpu_T0);
-            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
+            tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
+            tcg_gen_mov_tl(s->T1, s->T0);
+            tcg_gen_mov_tl(s->T0, s->tmp0);
         } else {
-            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
+            tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
     case MO_32:
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
-        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        tcg_gen_subi_tl(s->tmp0, count, 1);
         if (is_right) {
-            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
+            tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
+            tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
+            tcg_gen_shr_i64(s->T0, s->T0, count);
         } else {
-            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
-            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
-            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
-            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
-            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
+            tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
+            tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
+            tcg_gen_shl_i64(s->T0, s->T0, count);
+            tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
+            tcg_gen_shri_i64(s->T0, s->T0, 32);
         }
         break;
 #endif
     default:
-        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        tcg_gen_subi_tl(s->tmp0, count, 1);
         if (is_right) {
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
 
-            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
-            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
+            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
+            tcg_gen_shr_tl(s->T0, s->T0, count);
+            tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
         } else {
-            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
-                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
-                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
-                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+                tcg_gen_subfi_tl(s->tmp4, 33, count);
+                tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
+                tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
             }
 
-            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
-            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
+            tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
+            tcg_gen_shl_tl(s->T0, s->T0, count);
+            tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
         }
-        tcg_gen_movi_tl(cpu_tmp4, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
-                           cpu_tmp4, cpu_T1);
-        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_movi_tl(s->tmp4, 0);
+        tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
+                           s->tmp4, s->T1);
+        tcg_gen_or_tl(s->T0, s->T0, s->T1);
         break;
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
+    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_v_reg(ot, cpu_T1, s);
+        gen_op_mov_v_reg(s1, ot, s1->T1, s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -1857,7 +1868,7 @@
         break;
     default:
         /* currently not optimized */
-        tcg_gen_movi_tl(cpu_T1, c);
+        tcg_gen_movi_tl(s1->T1, c);
         gen_shift(s1, op, ot, d, OR_TMP1);
         break;
     }
@@ -2048,7 +2059,7 @@
 }
 
 /* Compute the address, with a minimum number of TCG ops.  */
-static TCGv gen_lea_modrm_1(AddressParts a)
+static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
 {
     TCGv ea = NULL;
 
@@ -2056,22 +2067,22 @@
         if (a.scale == 0) {
             ea = cpu_regs[a.index];
         } else {
-            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
-            ea = cpu_A0;
+            tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
+            ea = s->A0;
         }
         if (a.base >= 0) {
-            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
-            ea = cpu_A0;
+            tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
+            ea = s->A0;
         }
     } else if (a.base >= 0) {
         ea = cpu_regs[a.base];
     }
     if (!ea) {
-        tcg_gen_movi_tl(cpu_A0, a.disp);
-        ea = cpu_A0;
+        tcg_gen_movi_tl(s->A0, a.disp);
+        ea = s->A0;
     } else if (a.disp != 0) {
-        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
-        ea = cpu_A0;
+        tcg_gen_addi_tl(s->A0, ea, a.disp);
+        ea = s->A0;
     }
 
     return ea;
@@ -2080,7 +2091,7 @@
 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
 {
     AddressParts a = gen_lea_modrm_0(env, s, modrm);
-    TCGv ea = gen_lea_modrm_1(a);
+    TCGv ea = gen_lea_modrm_1(s, a);
     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
 }
 
@@ -2093,21 +2104,21 @@
 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
                       TCGCond cond, TCGv_i64 bndv)
 {
-    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
+    TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
 
-    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
+    tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
     if (!CODE64(s)) {
-        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
+        tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
     }
-    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
-    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
-    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
+    tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
+    tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
+    gen_helper_bndck(cpu_env, s->tmp2_i32);
 }
 
 /* used for LEA and MOV AX, mem */
 static void gen_add_A0_ds_seg(DisasContext *s)
 {
-    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
+    gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
 }
 
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
@@ -2122,23 +2133,23 @@
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+                gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
     } else {
         gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, ot, s->T0, s->A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
     }
 }
@@ -2192,13 +2203,13 @@
     if (use_goto_tb(s, pc))  {
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
-        gen_jmp_im(eip);
+        gen_jmp_im(s, eip);
         tcg_gen_exit_tb(s->base.tb, tb_num);
         s->base.is_jmp = DISAS_NORETURN;
     } else {
         /* jump to another page */
-        gen_jmp_im(eip);
-        gen_jr(s, cpu_tmp0);
+        gen_jmp_im(s, eip);
+        gen_jr(s, s->tmp0);
     }
 }
 
@@ -2220,11 +2231,11 @@
         l2 = gen_new_label();
         gen_jcc1(s, b, l1);
 
-        gen_jmp_im(next_eip);
+        gen_jmp_im(s, next_eip);
         tcg_gen_br(l2);
 
         gen_set_label(l1);
-        gen_jmp_im(val);
+        gen_jmp_im(s, val);
         gen_set_label(l2);
         gen_eob(s);
     }
@@ -2237,7 +2248,7 @@
 
     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-    cc = gen_prepare_cc(s, b, cpu_T1);
+    cc = gen_prepare_cc(s, b, s->T1);
     if (cc.mask != -1) {
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
@@ -2247,9 +2258,9 @@
         cc.reg2 = tcg_const_tl(cc.imm);
     }
 
-    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
-                       cpu_T0, cpu_regs[reg]);
-    gen_op_mov_reg_v(ot, reg, cpu_T0);
+    tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
+                       s->T0, cpu_regs[reg]);
+    gen_op_mov_reg_v(s, ot, reg, s->T0);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2259,18 +2270,18 @@
     }
 }
 
-static inline void gen_op_movl_T0_seg(int seg_reg)
+static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+    tcg_gen_ld32u_tl(s->T0, cpu_env,
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }
 
-static inline void gen_op_movl_seg_T0_vm(int seg_reg)
+static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
 {
-    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
-    tcg_gen_st32_tl(cpu_T0, cpu_env,
+    tcg_gen_ext16u_tl(s->T0, s->T0);
+    tcg_gen_st32_tl(s->T0, cpu_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
-    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
+    tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
 }
 
 /* move T0 to seg_reg and compute if the CPU state may change. Never
@@ -2278,8 +2289,8 @@
 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
 {
     if (s->pe && !s->vm86) {
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
            stop as a special handling must be done to disable hardware
@@ -2288,7 +2299,7 @@
             s->base.is_jmp = DISAS_TOO_MANY;
         }
     } else {
-        gen_op_movl_seg_T0_vm(seg_reg);
+        gen_op_movl_seg_T0_vm(s, seg_reg);
         if (seg_reg == R_SS) {
             s->base.is_jmp = DISAS_TOO_MANY;
         }
@@ -2305,10 +2316,10 @@
                               uint32_t type, uint64_t param)
 {
     /* no SVM activated; fast case */
-    if (likely(!(s->flags & HF_SVMI_MASK)))
+    if (likely(!(s->flags & HF_GUEST_MASK)))
         return;
     gen_update_cc_op(s);
-    gen_jmp_im(pc_start - s->cs_base);
+    gen_jmp_im(s, pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
 }
@@ -2321,7 +2332,7 @@
 
 static inline void gen_stack_update(DisasContext *s, int addend)
 {
-    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
+    gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
 }
 
 /* Generate a push. It depends on ss32, addseg and dflag.  */
@@ -2330,20 +2341,20 @@
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
     TCGMemOp a_ot = mo_stacksize(s);
     int size = 1 << d_ot;
-    TCGv new_esp = cpu_A0;
+    TCGv new_esp = s->A0;
 
-    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
+    tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
 
     if (!CODE64(s)) {
         if (s->addseg) {
-            new_esp = cpu_tmp4;
-            tcg_gen_mov_tl(new_esp, cpu_A0);
+            new_esp = s->tmp4;
+            tcg_gen_mov_tl(new_esp, s->A0);
         }
-        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
+        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
     }
 
-    gen_op_st_v(s, d_ot, val, cpu_A0);
-    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
+    gen_op_st_v(s, d_ot, val, s->A0);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
 }
 
 /* two step pop is necessary for precise exceptions */
@@ -2352,7 +2363,7 @@
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
 
     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
     return d_ot;
 }
@@ -2375,9 +2386,9 @@
     int i;
 
     for (i = 0; i < 8; i++) {
-        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
-        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
+        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
+        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
     }
 
     gen_stack_update(s, -8 * size);
@@ -2395,10 +2406,10 @@
         if (7 - i == R_ESP) {
             continue;
         }
-        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
-        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
-        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
-        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
+        tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
+        gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
+        gen_op_ld_v(s, d_ot, s->T0, s->A0);
+        gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
     }
 
     gen_stack_update(s, 8 * size);
@@ -2411,9 +2422,9 @@
     int size = 1 << d_ot;
 
     /* Push BP; compute FrameTemp into T1.  */
-    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
-    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
-    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
+    tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
+    gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
+    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
 
     level &= 31;
     if (level != 0) {
@@ -2421,27 +2432,27 @@
 
         /* Copy level-1 pointers from the previous frame.  */
         for (i = 1; i < level; ++i) {
-            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
-            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
+            tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
+            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+            gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
 
-            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
-            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
+            tcg_gen_subi_tl(s->A0, s->T1, size * i);
+            gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+            gen_op_st_v(s, d_ot, s->tmp0, s->A0);
         }
 
         /* Push the current FrameTemp as the last level.  */
-        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
-        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
-        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
+        tcg_gen_subi_tl(s->A0, s->T1, size * level);
+        gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, s->T1, s->A0);
     }
 
     /* Copy the FrameTemp value to EBP.  */
-    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
+    gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
 
     /* Compute the final value of ESP.  */
-    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
-    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+    tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
 }
 
 static void gen_leave(DisasContext *s)
@@ -2450,18 +2461,18 @@
     TCGMemOp a_ot = mo_stacksize(s);
 
     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
-    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+    gen_op_ld_v(s, d_ot, s->T0, s->A0);
 
-    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
+    tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
 
-    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
-    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+    gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
+    gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2498,7 +2509,7 @@
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
     s->base.is_jmp = DISAS_NORETURN;
@@ -2507,7 +2518,7 @@
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
-    gen_jmp_im(cur_eip);
+    gen_jmp_im(s, cur_eip);
     gen_helper_debug(cpu_env);
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -2617,7 +2628,7 @@
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
     } else {
-        gen_jmp_im(eip);
+        gen_jmp_im(s, eip);
         gen_eob(s);
     }
 }
@@ -2629,60 +2640,60 @@
 
 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
 }
 
 static inline void gen_stq_env_A0(DisasContext *s, int offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
 }
 
 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
-    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
 static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
-    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
+    tcg_gen_addi_tl(s->tmp0, s->A0, 8);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
 }
 
-static inline void gen_op_movo(int d_offset, int s_offset)
+static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
-static inline void gen_op_movq(int d_offset, int s_offset)
+static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
-static inline void gen_op_movl(int d_offset, int s_offset)
+static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
 {
-    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
-    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
+    tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
+    tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
 }
 
-static inline void gen_op_movq_env_0(int d_offset)
+static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
 {
-    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+    tcg_gen_movi_i64(s->tmp1_i64, 0);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
@@ -3122,41 +3133,42 @@
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                     xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+                gen_op_st_v(s, MO_32, s->T0, s->A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+                tcg_gen_st_tl(s->T0, cpu_env,
+                              offsetof(CPUX86State, fpregs[reg].mmx));
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
             }
             break;
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
+                gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
             }
             break;
         case 0x6f: /* movq mm, ea */
@@ -3165,9 +3177,9 @@
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
-                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
                                offsetof(CPUX86State,fpregs[rm].mmx));
-                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                tcg_gen_st_i64(s->tmp1_i64, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
             }
             break;
@@ -3182,22 +3194,26 @@
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
                             offsetof(CPUX86State,xmm_regs[rm]));
             }
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
+                gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
             }
             break;
@@ -3206,12 +3222,14 @@
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].ZMM_Q(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
@@ -3224,7 +3242,7 @@
             } else {
                 /* movhlps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
             }
             break;
@@ -3234,14 +3252,14 @@
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
             break;
         case 0x312: /* movddup */
@@ -3251,10 +3269,10 @@
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             break;
         case 0x016: /* movhps */
@@ -3266,7 +3284,7 @@
             } else {
                 /* movlhps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
@@ -3276,14 +3294,14 @@
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+            gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
             break;
         case 0x178:
@@ -3295,14 +3313,14 @@
                     goto illegal_op;
                 field_length = x86_ldub_code(env, s) & 0x3F;
                 bit_index = x86_ldub_code(env, s) & 0x3F;
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
                     offsetof(CPUX86State,xmm_regs[reg]));
                 if (b1 == 1)
-                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
+                    gen_helper_extrq_i(cpu_env, s->ptr0,
                                        tcg_const_i32(bit_index),
                                        tcg_const_i32(field_length));
                 else
-                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
+                    gen_helper_insertq_i(cpu_env, s->ptr0,
                                          tcg_const_i32(bit_index),
                                          tcg_const_i32(field_length));
             }
@@ -3310,13 +3328,13 @@
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                tcg_gen_ld_i64(s->T0, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3324,13 +3342,13 @@
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                tcg_gen_ld_i64(s->T0, cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3342,10 +3360,10 @@
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
+            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
@@ -3353,7 +3371,7 @@
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
-                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+                gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
                             offsetof(CPUX86State,fpregs[reg].mmx));
             }
             break;
@@ -3368,18 +3386,19 @@
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+                gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
                             offsetof(CPUX86State,xmm_regs[reg]));
             }
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+                tcg_gen_ld32u_tl(s->T0, cpu_env,
+                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
+                gen_op_st_v(s, MO_32, s->T0, s->A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
+                gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
             }
             break;
@@ -3390,7 +3409,7 @@
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             }
             break;
@@ -3425,16 +3444,20 @@
             }
             val = x86_ldub_code(env, s);
             if (is_xmm) {
-                tcg_gen_movi_tl(cpu_T0, val);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
+                tcg_gen_movi_tl(s->T0, val);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
                 op1_offset = offsetof(CPUX86State,xmm_t0);
             } else {
-                tcg_gen_movi_tl(cpu_T0, val);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
-                tcg_gen_movi_tl(cpu_T0, 0);
-                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+                tcg_gen_movi_tl(s->T0, val);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, mmx_t0.MMX_L(0)));
+                tcg_gen_movi_tl(s->T0, 0);
+                tcg_gen_st32_tl(s->T0, cpu_env,
+                                offsetof(CPUX86State, mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
@@ -3449,23 +3472,23 @@
                 rm = (modrm & 7);
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(s->ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
@@ -3479,15 +3502,15 @@
                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
             }
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
                 break;
             }
             break;
@@ -3496,15 +3519,15 @@
             ot = mo_64_32(s->dflag);
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
-                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
+                sse_fn_epl(cpu_env, s->ptr0, s->T0);
 #else
                 goto illegal_op;
 #endif
@@ -3524,20 +3547,20 @@
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
                 break;
             }
             break;
@@ -3551,30 +3574,31 @@
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
                 } else {
-                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
-                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
+                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                    tcg_gen_st32_tl(s->T0, cpu_env,
+                                    offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
             if (ot == MO_32) {
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
-                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
+                tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
-                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
+                sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
 #else
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3583,11 +3607,11 @@
             val = x86_ldub_code(env, s);
             if (b1) {
                 val &= 7;
-                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                tcg_gen_st16_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
             } else {
                 val &= 3;
-                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                tcg_gen_st16_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
             }
             break;
@@ -3600,16 +3624,16 @@
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld16u_tl(s->T0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
             } else {
                 val &= 3;
                 rm = (modrm & 7);
-                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                tcg_gen_ld16u_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
@@ -3618,22 +3642,23 @@
                                            xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
-                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
+                gen_op_movq_env_0(s,
+                                  offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
             }
             break;
         case 0x2d6: /* movq2dq */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+            gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
                         offsetof(CPUX86State,fpregs[rm].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
+            gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+            gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
@@ -3642,15 +3667,17 @@
                 goto illegal_op;
             if (b1) {
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
+                                 offsetof(CPUX86State, xmm_regs[rm]));
+                gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
             } else {
                 rm = (modrm & 7);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
+                tcg_gen_addi_ptr(s->ptr0, cpu_env,
+                                 offsetof(CPUX86State, fpregs[rm].mmx));
+                gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
 
         case 0x138:
@@ -3690,15 +3717,15 @@
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+                        tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
-                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
+                        tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
                                            s->mem_index, MO_LEUW);
-                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+                        tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
                                         offsetof(ZMMReg, ZMM_W(0)));
                         break;
                     case 0x2a:            /* movntqda */
@@ -3722,9 +3749,9 @@
                 goto unknown_op;
             }
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
 
             if (b == 0x17) {
                 set_cc_op(s, CC_OP_EFLAGS);
@@ -3754,13 +3781,13 @@
                     ot = MO_64;
                 }
 
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
-                                 cpu_T0, tcg_const_i32(8 << ot));
+                gen_helper_crc32(s->T0, s->tmp2_i32,
+                                 s->T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3785,11 +3812,11 @@
 
                 gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
+                    tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                        s->mem_index, ot | MO_BE);
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                 } else {
-                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
+                    tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
                                        s->mem_index, ot | MO_BE);
                 }
                 break;
@@ -3802,9 +3829,9 @@
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
-                gen_op_update1_cc();
+                tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
+                gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
 
@@ -3821,28 +3848,28 @@
                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
-                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
+                    tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
+                    tcg_gen_shr_tl(s->T0, s->T0, s->A0);
 
                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
-                                       cpu_T0, zero);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
+                                       s->T0, zero);
                     tcg_temp_free(zero);
 
                     /* Extract the LEN into a mask.  Lengths larger than
                        operand size get all ones.  */
-                    tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
-                                       cpu_A0, bound);
+                    tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
+                                       s->A0, bound);
                     tcg_temp_free(bound);
-                    tcg_gen_movi_tl(cpu_T1, 1);
-                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
-                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_movi_tl(s->T1, 1);
+                    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
+                    tcg_gen_subi_tl(s->T1, s->T1, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
 
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
-                    gen_op_update1_cc();
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
+                    gen_op_update1_cc(s);
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
                 break;
@@ -3855,22 +3882,22 @@
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     /* Note that since we're using BMILG (in order to get O
                        cleared) we need to store the inverse into C.  */
                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
-                                       cpu_T1, bound);
-                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
-                                       bound, bound, cpu_T1);
+                                       s->T1, bound);
+                    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
+                                       bound, bound, s->T1);
                     tcg_temp_free(bound);
                 }
-                tcg_gen_movi_tl(cpu_A0, -1);
-                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
-                gen_op_update1_cc();
+                tcg_gen_movi_tl(s->A0, -1);
+                tcg_gen_shl_tl(s->A0, s->A0, s->T1);
+                tcg_gen_andc_tl(s->T0, s->T0, s->A0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
+                gen_op_update1_cc(s);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -3884,19 +3911,19 @@
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
-                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                      cpu_tmp2_i32, cpu_tmp3_i32);
-                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
-                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
+                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                    tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
+                    tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+                                      s->tmp2_i32, s->tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
-                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
-                                      cpu_T0, cpu_regs[R_EDX]);
-                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
-                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
+                    tcg_gen_mulu2_i64(s->T0, s->T1,
+                                      s->T0, cpu_regs[R_EDX]);
+                    tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
+                    tcg_gen_mov_i64(cpu_regs[reg], s->T1);
                     break;
 #endif
                 }
@@ -3913,11 +3940,11 @@
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
+                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3931,11 +3958,11 @@
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
+                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
@@ -3982,7 +4009,7 @@
                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
                             gen_compute_eflags(s);
                         }
-                        carry_in = cpu_tmp0;
+                        carry_in = s->tmp0;
                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
                     }
@@ -3993,22 +4020,22 @@
                         /* If we know TL is 64-bit, and we want a 32-bit
                            result, just do everything in 64-bit arithmetic.  */
                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
-                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
-                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
-                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
-                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
-                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
+                        tcg_gen_ext32u_i64(s->T0, s->T0);
+                        tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
+                        tcg_gen_add_i64(s->T0, s->T0, carry_in);
+                        tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
+                        tcg_gen_shri_i64(carry_out, s->T0, 32);
                         break;
 #endif
                     default:
                         /* Otherwise compute the carry-out in two steps.  */
                         zero = tcg_const_tl(0);
-                        tcg_gen_add2_tl(cpu_T0, carry_out,
-                                        cpu_T0, zero,
+                        tcg_gen_add2_tl(s->T0, carry_out,
+                                        s->T0, zero,
                                         carry_in, zero);
                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
                                         cpu_regs[reg], carry_out,
-                                        cpu_T0, zero);
+                                        s->T0, zero);
                         tcg_temp_free(zero);
                         break;
                     }
@@ -4027,24 +4054,24 @@
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 if (ot == MO_64) {
-                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
+                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
                 } else {
-                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
+                    tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
                 }
                 if (b == 0x1f7) {
-                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_shl_tl(s->T0, s->T0, s->T1);
                 } else if (b == 0x2f7) {
                     if (ot != MO_64) {
-                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext32s_tl(s->T0, s->T0);
                     }
-                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_sar_tl(s->T0, s->T0, s->T1);
                 } else {
                     if (ot != MO_64) {
-                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext32u_tl(s->T0, s->T0);
                     }
-                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_shr_tl(s->T0, s->T0, s->T1);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             case 0x0f3:
@@ -4059,25 +4086,25 @@
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_subi_tl(s->T1, s->T0, 1);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
                     break;
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_subi_tl(s->T1, s->T0, 1);
+                    tcg_gen_xor_tl(s->T0, s->T0, s->T1);
                     break;
                 case 3: /* blsi By, Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    tcg_gen_neg_tl(s->T1, s->T0);
+                    tcg_gen_and_tl(s->T0, s->T0, s->T1);
                     break;
                 default:
                     goto unknown_op;
                 }
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
@@ -4115,45 +4142,45 @@
                 val = x86_ldub_code(env, s);
                 switch (b) {
                 case 0x14: /* pextrb */
-                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
                     break;
                 case 0x15: /* pextrw */
-                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_W(val & 7)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUW);
                     }
                     break;
                 case 0x16:
                     if (ot == MO_32) { /* pextrd */
-                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_L(val & 3)));
                         if (mod == 3) {
-                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
+                            tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
                         } else {
-                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
                     } else { /* pextrq */
 #ifdef TARGET_X86_64
-                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+                        tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_Q(val & 1)));
                         if (mod == 3) {
-                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
+                            tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
                         } else {
-                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
 #else
@@ -4162,35 +4189,35 @@
                     }
                     break;
                 case 0x17: /* extractps */
-                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_L(val & 3)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T0);
+                        gen_op_mov_reg_v(s, ot, rm, s->T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_st_tl(s->T0, s->A0,
                                            s->mem_index, MO_LEUL);
                     }
                     break;
                 case 0x20: /* pinsrb */
                     if (mod == 3) {
-                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
+                        gen_op_mov_v_reg(s, MO_32, s->T0, rm);
                     } else {
-                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
+                        tcg_gen_qemu_ld_tl(s->T0, s->A0,
                                            s->mem_index, MO_UB);
                     }
-                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].ZMM_B(val & 15)));
                     break;
                 case 0x21: /* insertps */
                     if (mod == 3) {
-                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,xmm_regs[rm]
                                                 .ZMM_L((val >> 6) & 3)));
                     } else {
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                     }
-                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                    tcg_gen_st_i32(s->tmp2_i32, cpu_env,
                                     offsetof(CPUX86State,xmm_regs[reg]
                                             .ZMM_L((val >> 4) & 3)));
                     if ((val >> 0) & 1)
@@ -4213,23 +4240,23 @@
                 case 0x22:
                     if (ot == MO_32) { /* pinsrd */
                         if (mod == 3) {
-                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
+                            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
                         } else {
-                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                                 s->mem_index, MO_LEUL);
                         }
-                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+                        tcg_gen_st_i32(s->tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_L(val & 3)));
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
                         if (mod == 3) {
-                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+                            gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
                         } else {
-                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                                 s->mem_index, MO_LEQ);
                         }
-                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+                        tcg_gen_st_i64(s->tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
                                                 xmm_regs[reg].ZMM_Q(val & 1)));
 #else
@@ -4271,9 +4298,9 @@
                 }
             }
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
             break;
 
         case 0x33a:
@@ -4293,13 +4320,13 @@
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 b = x86_ldub_code(env, s);
                 if (ot == MO_64) {
-                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
+                    tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
                 } else {
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
-                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                    tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
+                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, ot, reg, s->T0);
                 break;
 
             default:
@@ -4356,8 +4383,8 @@
                 switch (sz) {
                 case 2:
                     /* 32 bit access */
-                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
-                    tcg_gen_st32_tl(cpu_T0, cpu_env,
+                    gen_op_ld_v(s, MO_32, s->T0, s->A0);
+                    tcg_gen_st32_tl(s->T0, cpu_env,
                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                     break;
                 case 3:
@@ -4394,18 +4421,18 @@
             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
                 goto illegal_op;
             }
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
             val = x86_ldub_code(env, s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
-            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
@@ -4414,28 +4441,28 @@
                 goto unknown_op;
             sse_fn_epp = sse_op_table4[val][b1];
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
             if (mod != 3)
                 goto illegal_op;
-            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
-            gen_extu(s->aflag, cpu_A0);
+            tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
+            gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
 
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
             /* XXX: introduce a new table? */
             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
-            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
+            sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
             break;
         default:
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
+            tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+            tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+            sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
@@ -4462,7 +4489,7 @@
 #ifdef TARGET_X86_64
     s->rex_x = 0;
     s->rex_b = 0;
-    x86_64_hregs = 0;
+    s->x86_64_hregs = false;
 #endif
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
@@ -4521,7 +4548,8 @@
             rex_r = (b & 0x4) << 1;
             s->rex_x = (b & 0x2) << 2;
             REX_B(s) = (b & 0x1) << 3;
-            x86_64_hregs = 1; /* select uniform byte register addressing */
+            /* select uniform byte register addressing */
+            s->x86_64_hregs = true;
             goto next_byte;
         }
         break;
@@ -4549,7 +4577,7 @@
                 goto illegal_op;
             }
 #ifdef TARGET_X86_64
-            if (x86_64_hregs) {
+            if (s->x86_64_hregs) {
                 goto illegal_op;
             }
 #endif
@@ -4653,13 +4681,13 @@
                 xor_zero:
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
-                    tcg_gen_movi_tl(cpu_T0, 0);
-                    gen_op_mov_reg_v(ot, reg, cpu_T0);
+                    tcg_gen_movi_tl(s->T0, 0);
+                    gen_op_mov_reg_v(s, ot, reg, s->T0);
                     break;
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_v_reg(ot, cpu_T1, reg);
+                gen_op_mov_v_reg(s, ot, s->T1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4669,17 +4697,17 @@
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+                    gen_op_ld_v(s, ot, s->T1, s->A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T1, rm);
+                    gen_op_mov_v_reg(s, ot, s->T1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
                 val = insn_get(env, s, ot);
-                tcg_gen_movi_tl(cpu_T1, val);
+                tcg_gen_movi_tl(s->T1, val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
             }
@@ -4725,7 +4753,7 @@
                 val = (int8_t)insn_get(env, s, MO_8);
                 break;
             }
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
             gen_op(s, op, ot, opreg);
         }
         break;
@@ -4756,17 +4784,17 @@
             /* For those below that handle locked memory, don't load here.  */
             if (!(s->prefix & PREFIX_LOCK)
                 || op != 2) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
 
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T1, val);
-            gen_op_testl_T0_T1_cc();
+            tcg_gen_movi_tl(s->T1, val);
+            gen_op_testl_T0_T1_cc(s);
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
@@ -4774,15 +4802,15 @@
                 if (mod == 3) {
                     goto illegal_op;
                 }
-                tcg_gen_movi_tl(cpu_T0, ~0);
-                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                tcg_gen_movi_tl(s->T0, ~0);
+                tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
             } else {
-                tcg_gen_not_tl(cpu_T0, cpu_T0);
+                tcg_gen_not_tl(s->T0, s->T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
             break;
@@ -4798,8 +4826,8 @@
                 t0 = tcg_temp_local_new();
                 label1 = gen_new_label();
 
-                tcg_gen_mov_tl(a0, cpu_A0);
-                tcg_gen_mov_tl(t0, cpu_T0);
+                tcg_gen_mov_tl(a0, s->A0);
+                tcg_gen_mov_tl(t0, s->T0);
 
                 gen_set_label(label1);
                 t1 = tcg_temp_new();
@@ -4813,53 +4841,53 @@
 
                 tcg_temp_free(t2);
                 tcg_temp_free(a0);
-                tcg_gen_mov_tl(cpu_T0, t0);
+                tcg_gen_mov_tl(s->T0, t0);
                 tcg_temp_free(t0);
             } else {
-                tcg_gen_neg_tl(cpu_T0, cpu_T0);
+                tcg_gen_neg_tl(s->T0, s->T0);
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
-            gen_op_update_neg_cc();
+            gen_op_update_neg_cc(s);
             set_cc_op(s, CC_OP_SUBB + ot);
             break;
         case 4: /* mul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
-                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
-                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
+                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
+                tcg_gen_ext8u_tl(s->T0, s->T0);
+                tcg_gen_ext8u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
-                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
+                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
+                tcg_gen_ext16u_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_shri_tl(s->T0, s->T0, 16);
+                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_src, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                  cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+                                  s->tmp2_i32, s->tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULL);
@@ -4867,7 +4895,7 @@
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T0, cpu_regs[R_EAX]);
+                                  s->T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
@@ -4878,49 +4906,49 @@
         case 5: /* imul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
-                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
-                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
+                gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
+                tcg_gen_ext8s_tl(s->T0, s->T0);
+                tcg_gen_ext8s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_ext8s_tl(s->tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
-                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
+                gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
+                tcg_gen_ext16s_tl(s->T0, s->T0);
+                tcg_gen_ext16s_tl(s->T1, s->T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
-                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+                tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+                gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+                tcg_gen_ext16s_tl(s->tmp0, s->T0);
+                tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
+                tcg_gen_shri_tl(s->T0, s->T0, 16);
+                gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
-                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                                  cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
-                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
-                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
+                                  s->tmp2_i32, s->tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
+                tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
-                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
-                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+                tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T0, cpu_regs[R_EAX]);
+                                  s->T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
@@ -4932,18 +4960,18 @@
         case 6: /* div */
             switch(ot) {
             case MO_8:
-                gen_helper_divb_AL(cpu_env, cpu_T0);
+                gen_helper_divb_AL(cpu_env, s->T0);
                 break;
             case MO_16:
-                gen_helper_divw_AX(cpu_env, cpu_T0);
+                gen_helper_divw_AX(cpu_env, s->T0);
                 break;
             default:
             case MO_32:
-                gen_helper_divl_EAX(cpu_env, cpu_T0);
+                gen_helper_divl_EAX(cpu_env, s->T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_helper_divq_EAX(cpu_env, cpu_T0);
+                gen_helper_divq_EAX(cpu_env, s->T0);
                 break;
 #endif
             }
@@ -4951,18 +4979,18 @@
         case 7: /* idiv */
             switch(ot) {
             case MO_8:
-                gen_helper_idivb_AL(cpu_env, cpu_T0);
+                gen_helper_idivb_AL(cpu_env, s->T0);
                 break;
             case MO_16:
-                gen_helper_idivw_AX(cpu_env, cpu_T0);
+                gen_helper_idivw_AX(cpu_env, s->T0);
                 break;
             default:
             case MO_32:
-                gen_helper_idivl_EAX(cpu_env, cpu_T0);
+                gen_helper_idivl_EAX(cpu_env, s->T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_helper_idivq_EAX(cpu_env, cpu_T0);
+                gen_helper_idivq_EAX(cpu_env, s->T0);
                 break;
 #endif
             }
@@ -4997,9 +5025,9 @@
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
 
         switch(op) {
@@ -5020,60 +5048,60 @@
         case 2: /* call Ev */
             /* XXX: optimize if memory (no 'and' is necessary) */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
             }
             next_eip = s->pc - s->cs_base;
-            tcg_gen_movi_tl(cpu_T1, next_eip);
-            gen_push_v(s, cpu_T1);
-            gen_op_jmp_v(cpu_T0);
+            tcg_gen_movi_tl(s->T1, next_eip);
+            gen_push_v(s, s->T1);
+            gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
-            gen_jr(s, cpu_T0);
+            gen_jr(s, s->T0);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, ot, s->T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
                                            tcg_const_i32(dflag - 1),
                                            tcg_const_tl(s->pc - s->cs_base));
             } else {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
-            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
-            gen_jr(s, cpu_tmp4);
+            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, s->tmp4);
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(s->T0, s->T0);
             }
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             gen_bnd_jmp(s);
-            gen_jr(s, cpu_T0);
+            gen_jr(s, s->T0);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, ot, s->T1, s->A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
-                gen_op_movl_seg_T0_vm(R_CS);
-                gen_op_jmp_v(cpu_T1);
+                gen_op_movl_seg_T0_vm(s, R_CS);
+                gen_op_jmp_v(s->T1);
             }
-            tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip));
-            gen_jr(s, cpu_tmp4);
+            tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
+            gen_jr(s, s->tmp4);
             break;
         case 6: /* push Ev */
-            gen_push_v(s, cpu_T0);
+            gen_push_v(s, s->T0);
             break;
         default:
             goto unknown_op;
@@ -5088,8 +5116,8 @@
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(ot, cpu_T1, reg);
-        gen_op_testl_T0_T1_cc();
+        gen_op_mov_v_reg(s, ot, s->T1, reg);
+        gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
@@ -5098,9 +5126,9 @@
         ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
-        tcg_gen_movi_tl(cpu_T1, val);
-        gen_op_testl_T0_T1_cc();
+        gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
+        tcg_gen_movi_tl(s->T1, val);
+        gen_op_testl_T0_T1_cc(s);
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
 
@@ -5108,20 +5136,20 @@
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
-            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
+            tcg_gen_ext32s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
-            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+            gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
+            tcg_gen_ext8s_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5131,22 +5159,22 @@
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
-            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
+            tcg_gen_sari_tl(s->T0, s->T0, 63);
+            gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
-            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
-            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
+            tcg_gen_ext32s_tl(s->T0, s->T0);
+            tcg_gen_sari_tl(s->T0, s->T0, 31);
+            gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
-            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+            gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            tcg_gen_sari_tl(s->T0, s->T0, 15);
+            gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
             break;
         default:
             tcg_abort();
@@ -5165,42 +5193,42 @@
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
         } else if (b == 0x6b) {
             val = (int8_t)insn_get(env, s, MO_8);
-            tcg_gen_movi_tl(cpu_T1, val);
+            tcg_gen_movi_tl(s->T1, val);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T1, reg);
+            gen_op_mov_v_reg(s, ot, s->T1, reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
+            tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
             break;
 #endif
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
-            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
-                              cpu_tmp2_i32, cpu_tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
-            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+            tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+            tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
+                              s->tmp2_i32, s->tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
+            tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
-            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
-            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+            tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
             break;
         default:
-            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
-            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
+            tcg_gen_ext16s_tl(s->T0, s->T0);
+            tcg_gen_ext16s_tl(s->T1, s->T1);
             /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+            tcg_gen_ext16s_tl(s->tmp0, s->T0);
+            tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
@@ -5211,27 +5239,27 @@
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
-        gen_op_mov_v_reg(ot, cpu_T0, reg);
+        gen_op_mov_v_reg(s, ot, s->T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, cpu_T1, rm);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_v_reg(s, ot, s->T1, rm);
+            tcg_gen_add_tl(s->T0, s->T0, s->T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
             gen_lea_modrm(env, s, modrm);
             if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
+                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
                                             s->mem_index, ot | MO_LE);
-                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+                tcg_gen_add_tl(s->T0, s->T0, s->T1);
             } else {
-                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, s->T1, s->A0);
+                tcg_gen_add_tl(s->T0, s->T0, s->T1);
+                gen_op_st_v(s, ot, s->T0, s->A0);
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         }
-        gen_op_update2_cc();
+        gen_op_update2_cc(s);
         set_cc_op(s, CC_OP_ADDB + ot);
         break;
     case 0x1b0:
@@ -5246,7 +5274,7 @@
             oldv = tcg_temp_new();
             newv = tcg_temp_new();
             cmpv = tcg_temp_new();
-            gen_op_mov_v_reg(ot, newv, reg);
+            gen_op_mov_v_reg(s, ot, newv, reg);
             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
 
             if (s->prefix & PREFIX_LOCK) {
@@ -5254,16 +5282,16 @@
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
+                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
                                           s->mem_index, ot | MO_LE);
-                gen_op_mov_reg_v(ot, R_EAX, oldv);
+                gen_op_mov_reg_v(s, ot, R_EAX, oldv);
             } else {
                 if (mod == 3) {
                     rm = (modrm & 7) | REX_B(s);
-                    gen_op_mov_v_reg(ot, oldv, rm);
+                    gen_op_mov_v_reg(s, ot, oldv, rm);
                 } else {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, oldv, cpu_A0);
+                    gen_op_ld_v(s, ot, oldv, s->A0);
                     rm = 0; /* avoid warning */
                 }
                 gen_extu(ot, oldv);
@@ -5271,19 +5299,19 @@
                 /* store value = (old == cmp ? new : old);  */
                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
                 if (mod == 3) {
-                    gen_op_mov_reg_v(ot, R_EAX, oldv);
-                    gen_op_mov_reg_v(ot, rm, newv);
+                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(s, ot, rm, newv);
                 } else {
                     /* Perform an unconditional store cycle like physical cpu;
                        must be before changing accumulator to ensure
                        idempotency if the store faults and the instruction
                        is restarted */
-                    gen_op_st_v(s, ot, newv, cpu_A0);
-                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                    gen_op_st_v(s, ot, newv, s->A0);
+                    gen_op_mov_reg_v(s, ot, R_EAX, oldv);
                 }
             }
             tcg_gen_mov_tl(cpu_cc_src, oldv);
-            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
+            tcg_gen_mov_tl(s->cc_srcT, cmpv);
             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
             set_cc_op(s, CC_OP_SUBB + ot);
             tcg_temp_free(oldv);
@@ -5302,9 +5330,9 @@
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
             if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
-                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+                gen_helper_cmpxchg16b(cpu_env, s->A0);
             } else {
-                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
+                gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
             }
         } else
 #endif        
@@ -5313,9 +5341,9 @@
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
             if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) {
-                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+                gen_helper_cmpxchg8b(cpu_env, s->A0);
             } else {
-                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
+                gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
             }
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5324,14 +5352,14 @@
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
-        gen_push_v(s, cpu_T0);
+        gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
+        gen_push_v(s, s->T0);
         break;
     case 0x58 ... 0x5f: /* pop */
         ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s, ot);
-        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
+        gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5350,8 +5378,8 @@
             val = insn_get(env, s, ot);
         else
             val = (int8_t)insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T0, val);
-        gen_push_v(s, cpu_T0);
+        tcg_gen_movi_tl(s->T0, val);
+        gen_push_v(s, s->T0);
         break;
     case 0x8f: /* pop Ev */
         modrm = x86_ldub_code(env, s);
@@ -5361,7 +5389,7 @@
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -5387,13 +5415,13 @@
     case 0x1e: /* push ds */
         if (CODE64(s))
             goto illegal_op;
-        gen_op_movl_T0_seg(b >> 3);
-        gen_push_v(s, cpu_T0);
+        gen_op_movl_T0_seg(s, b >> 3);
+        gen_push_v(s, s->T0);
         break;
     case 0x1a0: /* push fs */
     case 0x1a8: /* push gs */
-        gen_op_movl_T0_seg((b >> 3) & 7);
-        gen_push_v(s, cpu_T0);
+        gen_op_movl_T0_seg(s, (b >> 3) & 7);
+        gen_push_v(s, s->T0);
         break;
     case 0x07: /* pop es */
     case 0x17: /* pop ss */
@@ -5406,7 +5434,7 @@
         gen_pop_update(s, ot);
         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
                 s->tf = 0;
                 gen_eob_inhibit_irq(s, true);
@@ -5421,7 +5449,7 @@
         gen_movl_seg_T0(s, (b >> 3) & 7);
         gen_pop_update(s, ot);
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -5447,11 +5475,11 @@
             gen_lea_modrm(env, s, modrm);
         }
         val = insn_get(env, s, ot);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         if (mod != 3) {
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_st_v(s, ot, s->T0, s->A0);
         } else {
-            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
+            gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
         }
         break;
     case 0x8a:
@@ -5461,7 +5489,7 @@
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = x86_ldub_code(env, s);
@@ -5472,7 +5500,7 @@
         gen_movl_seg_T0(s, reg);
         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
                 s->tf = 0;
                 gen_eob_inhibit_irq(s, true);
@@ -5487,7 +5515,7 @@
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
             goto illegal_op;
-        gen_op_movl_T0_seg(reg);
+        gen_op_movl_T0_seg(s, reg);
         ot = mod == 3 ? dflag : MO_16;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
         break;
@@ -5513,31 +5541,31 @@
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
-                    tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
+                if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
+                    tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    gen_op_mov_v_reg(s, ot, s->T0, rm);
                     switch (s_ot) {
                     case MO_UB:
-                        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext8u_tl(s->T0, s->T0);
                         break;
                     case MO_SB:
-                        tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext8s_tl(s->T0, s->T0);
                         break;
                     case MO_UW:
-                        tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext16u_tl(s->T0, s->T0);
                         break;
                     default:
                     case MO_SW:
-                        tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                        tcg_gen_ext16s_tl(s->T0, s->T0);
                         break;
                     }
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_ld_v(s, s_ot, s->T0, s->A0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             }
         }
         break;
@@ -5550,9 +5578,9 @@
         reg = ((modrm >> 3) & 7) | rex_r;
         {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
-            TCGv ea = gen_lea_modrm_1(a);
+            TCGv ea = gen_lea_modrm_1(s, a);
             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
-            gen_op_mov_reg_v(dflag, reg, cpu_A0);
+            gen_op_mov_reg_v(s, dflag, reg, s->A0);
         }
         break;
 
@@ -5574,30 +5602,30 @@
                 offset_addr = insn_get(env, s, s->aflag);
                 break;
             }
-            tcg_gen_movi_tl(cpu_A0, offset_addr);
+            tcg_gen_movi_tl(s->A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
-                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
+                gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
             } else {
-                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
+                gen_op_st_v(s, ot, s->T0, s->A0);
             }
         }
         break;
     case 0xd7: /* xlat */
-        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
-        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
-        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
-        gen_extu(s->aflag, cpu_A0);
+        tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
+        tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
+        tcg_gen_add_tl(s->A0, s->A0, s->T0);
+        gen_extu(s->aflag, s->A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
+        gen_op_ld_v(s, MO_8, s->T0, s->A0);
+        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T0, val);
-        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
+        tcg_gen_movi_tl(s->T0, val);
+        gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -5606,16 +5634,16 @@
             /* 64 bit case */
             tmp = x86_ldq_code(env, s);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T0, tmp);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
+            tcg_gen_movi_tl(s->T0, tmp);
+            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
         } else
 #endif
         {
             ot = dflag;
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T0, val);
-            gen_op_mov_reg_v(ot, reg, cpu_T0);
+            tcg_gen_movi_tl(s->T0, val);
+            gen_op_mov_reg_v(s, ot, reg, s->T0);
         }
         break;
 
@@ -5634,17 +5662,17 @@
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_mov_v_reg(ot, cpu_T1, rm);
-            gen_op_mov_reg_v(ot, rm, cpu_T0);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_v_reg(s, ot, s->T0, reg);
+            gen_op_mov_v_reg(s, ot, s->T1, rm);
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(s, ot, s->T0, reg);
             /* for xchg, lock is implicit */
-            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
+            tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
                                    s->mem_index, ot | MO_LE);
-            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(s, ot, reg, s->T1);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5671,15 +5699,15 @@
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+        gen_op_ld_v(s, ot, s->T1, s->A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+        gen_op_ld_v(s, MO_16, s->T0, s->A0);
         gen_movl_seg_T0(s, op);
         /* then put the data */
-        gen_op_mov_reg_v(ot, reg, cpu_T1);
+        gen_op_mov_reg_v(s, ot, reg, s->T1);
         if (s->base.is_jmp) {
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -5756,7 +5784,7 @@
         } else {
             opreg = rm;
         }
-        gen_op_mov_v_reg(ot, cpu_T1, reg);
+        gen_op_mov_v_reg(s, ot, s->T1, reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
@@ -5794,25 +5822,25 @@
 
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
-                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
+                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
-                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
                         break;
                     }
 
@@ -5833,25 +5861,25 @@
                 case 0:
                     switch(op >> 4) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
-                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
+                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
-                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
+                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
                         break;
                     case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LESW);
-                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
                         break;
                     }
                     break;
@@ -5859,19 +5887,19 @@
                     /* XXX: the corresponding CPUID bit must be tested ! */
                     switch(op >> 4) {
                     case 1:
-                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
-                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
-                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5880,24 +5908,24 @@
                 default:
                     switch(op >> 4) {
                     case 0:
-                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 1:
-                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUL);
                         break;
                     case 2:
-                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
+                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
+                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
                                             s->mem_index, MO_LEQ);
                         break;
                     case 3:
                     default:
-                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
+                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                             s->mem_index, MO_LEUW);
                         break;
                     }
@@ -5907,53 +5935,53 @@
                 }
                 break;
             case 0x0c: /* fldenv mem */
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
-                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
+                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
-                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
+                gen_helper_fldcw(cpu_env, s->tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
-                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
+                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
-                gen_helper_fldt_ST0(cpu_env, cpu_A0);
+                gen_helper_fldt_ST0(cpu_env, s->A0);
                 break;
             case 0x1f: /* fstpt mem */
-                gen_helper_fstt_ST0(cpu_env, cpu_A0);
+                gen_helper_fstt_ST0(cpu_env, s->A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2e: /* fnsave mem */
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
-                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
+                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
-                gen_helper_fbld_ST0(cpu_env, cpu_A0);
+                gen_helper_fbld_ST0(cpu_env, s->A0);
                 break;
             case 0x3e: /* fbstp */
-                gen_helper_fbst_ST0(cpu_env, cpu_A0);
+                gen_helper_fbst_ST0(cpu_env, s->A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
-                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
+                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
+                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
-                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
-                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
+                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
+                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
                 gen_helper_fpop(cpu_env);
                 break;
             default:
@@ -6215,9 +6243,9 @@
             case 0x3c: /* df/4 */
                 switch(rm) {
                 case 0:
-                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
-                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
                     break;
                 default:
                     goto unknown_op;
@@ -6327,7 +6355,7 @@
     case 0x6c: /* insS */
     case 0x6d:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base, 
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6342,7 +6370,7 @@
     case 0x6e: /* outsS */
     case 0x6f:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6362,16 +6390,16 @@
     case 0xe5:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        tcg_gen_movi_i32(s->tmp2_i32, val);
+        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
+        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6381,18 +6409,18 @@
     case 0xe7:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T0, val);
+        tcg_gen_movi_tl(s->T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
-        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        tcg_gen_movi_i32(s->tmp2_i32, val);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6401,16 +6429,16 @@
     case 0xec:
     case 0xed:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_helper_in_func(ot, s->T1, s->tmp2_i32);
+        gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6419,18 +6447,18 @@
     case 0xee:
     case 0xef:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
 
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
-        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-        gen_bpt_io(s, cpu_tmp2_i32, ot);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
+        gen_bpt_io(s, s->tmp2_i32, ot);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6444,37 +6472,37 @@
         ot = gen_pop_T0(s);
         gen_stack_update(s, val + (1 << ot));
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T0);
+        gen_op_jmp_v(s->T0);
         gen_bnd_jmp(s);
-        gen_jr(s, cpu_T0);
+        gen_jr(s, s->T0);
         break;
     case 0xc3: /* ret */
         ot = gen_pop_T0(s);
         gen_pop_update(s, ot);
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T0);
+        gen_op_jmp_v(s->T0);
         gen_bnd_jmp(s);
-        gen_jr(s, cpu_T0);
+        gen_jr(s, s->T0);
         break;
     case 0xca: /* lret im */
         val = x86_ldsw_code(env, s);
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, dflag, s->T0, s->A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
-            gen_op_jmp_v(cpu_T0);
+            gen_op_jmp_v(s->T0);
             /* pop selector */
             gen_add_A0_im(s, 1 << dflag);
-            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
-            gen_op_movl_seg_T0_vm(R_CS);
+            gen_op_ld_v(s, dflag, s->T0, s->A0);
+            gen_op_movl_seg_T0_vm(s, R_CS);
             /* add stack offset */
             gen_stack_update(s, val + (2 << dflag));
         }
@@ -6517,8 +6545,8 @@
             } else if (!CODE64(s)) {
                 tval &= 0xffffffff;
             }
-            tcg_gen_movi_tl(cpu_T0, next_eip);
-            gen_push_v(s, cpu_T0);
+            tcg_gen_movi_tl(s->T0, next_eip);
+            gen_push_v(s, s->T0);
             gen_bnd_jmp(s);
             gen_jmp(s, tval);
         }
@@ -6533,8 +6561,8 @@
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T0, selector);
-            tcg_gen_movi_tl(cpu_T1, offset);
+            tcg_gen_movi_tl(s->T0, selector);
+            tcg_gen_movi_tl(s->T1, offset);
         }
         goto do_lcall;
     case 0xe9: /* jmp im */
@@ -6562,8 +6590,8 @@
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T0, selector);
-            tcg_gen_movi_tl(cpu_T1, offset);
+            tcg_gen_movi_tl(s->T0, selector);
+            tcg_gen_movi_tl(s->T1, offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
@@ -6595,7 +6623,7 @@
 
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = x86_ldub_code(env, s);
-        gen_setcc1(s, b, cpu_T0);
+        gen_setcc1(s, b, s->T0);
         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
@@ -6616,8 +6644,8 @@
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_helper_read_eflags(cpu_T0, cpu_env);
-            gen_push_v(s, cpu_T0);
+            gen_helper_read_eflags(s->T0, cpu_env);
+            gen_push_v(s, s->T0);
         }
         break;
     case 0x9d: /* popf */
@@ -6628,13 +6656,13 @@
             ot = gen_pop_T0(s);
             if (s->cpl == 0) {
                 if (dflag != MO_16) {
-                    gen_helper_write_eflags(cpu_env, cpu_T0,
+                    gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK |
                                                            IOPL_MASK)));
                 } else {
-                    gen_helper_write_eflags(cpu_env, cpu_T0,
+                    gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK | IOPL_MASK)
@@ -6643,14 +6671,14 @@
             } else {
                 if (s->cpl <= s->iopl) {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
                                                                NT_MASK |
                                                                IF_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
@@ -6660,11 +6688,11 @@
                     }
                 } else {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T0,
+                        gen_helper_write_eflags(cpu_env, s->T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)
                                                          & 0xffff));
@@ -6674,26 +6702,26 @@
             gen_pop_update(s, ot);
             set_cc_op(s, CC_OP_EFLAGS);
             /* abort translation because TF/AC flag may change */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
+        gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
-        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
+        tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
-        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
+        tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
+        gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -6708,12 +6736,12 @@
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xfc: /* cld */
-        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
+        tcg_gen_movi_i32(s->tmp2_i32, 1);
+        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
     case 0xfd: /* std */
-        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
+        tcg_gen_movi_i32(s->tmp2_i32, -1);
+        tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
 
         /************************/
@@ -6728,14 +6756,14 @@
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
         /* load shift */
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(cpu_T1, val);
+        tcg_gen_movi_tl(s->T1, val);
         if (op < 4)
             goto unknown_op;
         op -= 4;
@@ -6757,70 +6785,70 @@
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
+        gen_op_mov_v_reg(s, MO_32, s->T1, reg);
         if (mod != 3) {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
-            gen_exts(ot, cpu_T1);
-            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
-            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+            gen_exts(ot, s->T1);
+            tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
+            tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
+            tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
+            gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
             if (!(s->prefix & PREFIX_LOCK)) {
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
             }
         } else {
-            gen_op_mov_v_reg(ot, cpu_T0, rm);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
         }
     bt_op:
-        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
-        tcg_gen_movi_tl(cpu_tmp0, 1);
-        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+        tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
+        tcg_gen_movi_tl(s->tmp0, 1);
+        tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
         if (s->prefix & PREFIX_LOCK) {
             switch (op) {
             case 0: /* bt */
                 /* Needs no atomic ops; we surpressed the normal
                    memory load for LOCK above so do it now.  */
-                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_ld_v(s, ot, s->T0, s->A0);
                 break;
             case 1: /* bts */
-                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
                                            s->mem_index, ot | MO_LE);
                 break;
             case 2: /* btr */
-                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_not_tl(s->tmp0, s->tmp0);
+                tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
                                             s->mem_index, ot | MO_LE);
                 break;
             }
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
         } else {
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
             switch (op) {
             case 0: /* bt */
                 /* Data already loaded; nothing to do.  */
                 break;
             case 1: /* bts */
-                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
                 break;
             case 2: /* btr */
-                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
                 break;
             default:
             case 3: /* btc */
-                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
                 break;
             }
             if (op != 0) {
                 if (mod != 3) {
-                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                    gen_op_st_v(s, ot, s->T0, s->A0);
                 } else {
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                 }
             }
         }
@@ -6844,13 +6872,13 @@
                We can get that same Z value (and the new C value) by leaving
                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
                same width.  */
-            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+            tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
             break;
         default:
             /* Otherwise, generate EFLAGS and replace the C bit.  */
             gen_compute_eflags(s);
-            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
+            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
                                ctz32(CC_C), 1);
             break;
         }
@@ -6861,7 +6889,7 @@
         modrm = x86_ldub_code(env, s);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T0);
+        gen_extu(ot, s->T0);
 
         /* Note that lzcnt and tzcnt are in different extensions.  */
         if ((prefixes & PREFIX_REPZ)
@@ -6870,23 +6898,23 @@
                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
             int size = 8 << ot;
             /* For lzcnt/tzcnt, C bit is defined related to the input. */
-            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+            tcg_gen_mov_tl(cpu_cc_src, s->T0);
             if (b & 1) {
                 /* For lzcnt, reduce the target_ulong result by the
                    number of zeros that we expect to find at the top.  */
-                tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
-                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
+                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
+                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
             } else {
                 /* For tzcnt, a zero input must return the operand size.  */
-                tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
+                tcg_gen_ctzi_tl(s->T0, s->T0, size);
             }
             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
-            gen_op_update1_cc();
+            gen_op_update1_cc(s);
             set_cc_op(s, CC_OP_BMILGB + ot);
         } else {
             /* For bsr/bsf, only the Z bit is defined and it is related
                to the input and not the result.  */
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
             set_cc_op(s, CC_OP_LOGICB + ot);
 
             /* ??? The manual says that the output is undefined when the
@@ -6896,14 +6924,14 @@
             if (b & 1) {
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
-                tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
-                tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
-                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
+                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
+                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
+                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
             } else {
-                tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
+                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
             }
         }
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
         break;
         /************************/
         /* bcd */
@@ -6966,7 +6994,7 @@
         }
         if (prefixes & PREFIX_REPZ) {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->base.is_jmp = DISAS_NORETURN;
         }
@@ -6994,7 +7022,7 @@
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
 #ifdef WANT_ICEBP
@@ -7028,7 +7056,7 @@
         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
             gen_helper_sti(cpu_env);
             /* interruptions are enabled only the first insn after sti */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob_inhibit_irq(s, true);
         } else {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
@@ -7043,37 +7071,37 @@
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_v_reg(ot, cpu_T0, reg);
+        gen_op_mov_v_reg(s, ot, s->T0, reg);
         gen_lea_modrm(env, s, modrm);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         if (ot == MO_16) {
-            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
+            gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
         } else {
-            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
+            gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
         }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
-            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
+            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
+            tcg_gen_bswap64_i64(s->T0, s->T0);
+            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
         } else
 #endif
         {
-            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
-            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
-            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
-            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
+            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
+            tcg_gen_ext32u_tl(s->T0, s->T0);
+            tcg_gen_bswap32_tl(s->T0, s->T0);
+            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
         }
         break;
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        gen_compute_eflags_c(s, cpu_T0);
-        tcg_gen_neg_tl(cpu_T0, cpu_T0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
+        gen_compute_eflags_c(s, s->T0);
+        tcg_gen_neg_tl(s->T0, s->T0);
+        gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7096,26 +7124,26 @@
             switch(b) {
             case 0: /* loopnz */
             case 1: /* loopz */
-                gen_op_add_reg_im(s->aflag, R_ECX, -1);
-                gen_op_jz_ecx(s->aflag, l3);
+                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+                gen_op_jz_ecx(s, s->aflag, l3);
                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
                 break;
             case 2: /* loop */
-                gen_op_add_reg_im(s->aflag, R_ECX, -1);
-                gen_op_jnz_ecx(s->aflag, l1);
+                gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+                gen_op_jnz_ecx(s, s->aflag, l1);
                 break;
             default:
             case 3: /* jcxz */
-                gen_op_jz_ecx(s->aflag, l1);
+                gen_op_jz_ecx(s, s->aflag, l1);
                 break;
             }
 
             gen_set_label(l3);
-            gen_jmp_im(next_eip);
+            gen_jmp_im(s, next_eip);
             tcg_gen_br(l2);
 
             gen_set_label(l1);
-            gen_jmp_im(tval);
+            gen_jmp_im(s, tval);
             gen_set_label(l2);
             gen_eob(s);
         }
@@ -7126,7 +7154,7 @@
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             if (b & 2) {
                 gen_helper_rdmsr(cpu_env);
             } else {
@@ -7136,7 +7164,7 @@
         break;
     case 0x131: /* rdtsc */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
 	}
@@ -7148,7 +7176,7 @@
         break;
     case 0x133: /* rdpmc */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_rdpmc(cpu_env);
         break;
     case 0x134: /* sysenter */
@@ -7177,7 +7205,7 @@
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         /* TF handling for the syscall insn is different. The TF bit is  checked
            after the syscall insn completes. This allows #DB to not be
@@ -7203,7 +7231,7 @@
 #endif
     case 0x1a2: /* cpuid */
         gen_update_cc_op(s);
-        gen_jmp_im(pc_start - s->cs_base);
+        gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_cpuid(cpu_env);
         break;
     case 0xf4: /* hlt */
@@ -7211,7 +7239,7 @@
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->base.is_jmp = DISAS_NORETURN;
         }
@@ -7225,7 +7253,7 @@
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+            tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
@@ -7238,15 +7266,15 @@
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_lldt(cpu_env, s->tmp2_i32);
             }
             break;
         case 1: /* str */
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+            tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, tr.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
@@ -7259,8 +7287,8 @@
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
-                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
+                tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+                gen_helper_ltr(cpu_env, s->tmp2_i32);
             }
             break;
         case 4: /* verr */
@@ -7270,9 +7298,9 @@
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_update_cc_op(s);
             if (op == 4) {
-                gen_helper_verr(cpu_env, cpu_T0);
+                gen_helper_verr(cpu_env, s->T0);
             } else {
-                gen_helper_verw(cpu_env, cpu_T0);
+                gen_helper_verw(cpu_env, s->T0);
             }
             set_cc_op(s, CC_OP_EFLAGS);
             break;
@@ -7287,15 +7315,15 @@
         CASE_MODRM_MEM_OP(0): /* sgdt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0,
+            tcg_gen_ld32u_tl(s->T0,
                              cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_op_st_v(s, MO_16, s->T0, s->A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             break;
 
         case 0xc8: /* monitor */
@@ -7303,11 +7331,11 @@
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
-            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
-            gen_extu(s->aflag, cpu_A0);
+            gen_jmp_im(s, pc_start - s->cs_base);
+            tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
+            gen_extu(s->aflag, s->A0);
             gen_add_A0_ds_seg(s);
-            gen_helper_monitor(cpu_env, cpu_A0);
+            gen_helper_monitor(cpu_env, s->A0);
             break;
 
         case 0xc9: /* mwait */
@@ -7315,7 +7343,7 @@
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
             gen_eob(s);
             break;
@@ -7326,7 +7354,7 @@
                 goto illegal_op;
             }
             gen_helper_clac(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7336,21 +7364,21 @@
                 goto illegal_op;
             }
             gen_helper_stac(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
         CASE_MODRM_MEM_OP(1): /* sidt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
-            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
+            gen_op_st_v(s, MO_16, s->T0, s->A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             break;
 
         case 0xd0: /* xgetbv */
@@ -7359,9 +7387,9 @@
                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
                 goto illegal_op;
             }
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
-            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
             break;
 
         case 0xd1: /* xsetbv */
@@ -7374,12 +7402,12 @@
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
             }
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
             /* End TB because translation flags may change.  */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7392,7 +7420,7 @@
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
                              tcg_const_i32(s->pc - pc_start));
             tcg_gen_exit_tb(NULL, 0);
@@ -7404,7 +7432,7 @@
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmmcall(cpu_env);
             break;
 
@@ -7417,7 +7445,7 @@
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7430,7 +7458,7 @@
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7446,7 +7474,7 @@
             }
             gen_update_cc_op(s);
             gen_helper_stgi(cpu_env);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7459,7 +7487,7 @@
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_clgi(cpu_env);
             break;
 
@@ -7470,7 +7498,7 @@
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_skinit(cpu_env);
             break;
 
@@ -7483,7 +7511,7 @@
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
@@ -7494,14 +7522,14 @@
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
-            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
             break;
 
         CASE_MODRM_MEM_OP(3): /* lidt */
@@ -7511,19 +7539,19 @@
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
-            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+                tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
             }
-            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
-            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
+            tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
             break;
 
         CASE_MODRM_OP(4): /* smsw */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
-            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
+            tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
             if (CODE64(s)) {
                 mod = (modrm >> 6) & 3;
                 ot = (mod != 3 ? MO_16 : s->dflag);
@@ -7536,18 +7564,18 @@
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
-            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
             break;
         case 0xef: /* wrpkru */
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
-            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
             break;
         CASE_MODRM_OP(6): /* lmsw */
             if (s->cpl != 0) {
@@ -7556,8 +7584,8 @@
             }
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-            gen_helper_lmsw(cpu_env, cpu_T0);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_helper_lmsw(cpu_env, s->T0);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7567,10 +7595,10 @@
                 break;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             gen_lea_modrm(env, s, modrm);
-            gen_helper_invlpg(cpu_env, cpu_A0);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_helper_invlpg(cpu_env, s->A0);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -7580,10 +7608,10 @@
                 if (s->cpl != 0) {
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
-                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
+                    tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
-                    tcg_gen_st_tl(cpu_T0, cpu_env,
+                    tcg_gen_st_tl(s->T0, cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
                 }
                 break;
@@ -7596,7 +7624,7 @@
                 goto illegal_op;
             }
             gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
+            gen_jmp_im(s, pc_start - s->cs_base);
             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                 gen_io_start();
             }
@@ -7634,16 +7662,16 @@
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
+                gen_op_mov_v_reg(s, MO_32, s->T0, rm);
                 /* sign extend */
                 if (d_ot == MO_64) {
-                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+                    tcg_gen_ext32s_tl(s->T0, s->T0);
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
+                gen_op_mov_reg_v(s, d_ot, reg, s->T0);
             }
         } else
 #endif
@@ -7663,19 +7691,19 @@
             rm = modrm & 7;
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, ot, t0, cpu_A0);
+                gen_op_ld_v(s, ot, t0, s->A0);
                 a0 = tcg_temp_local_new();
-                tcg_gen_mov_tl(a0, cpu_A0);
+                tcg_gen_mov_tl(a0, s->A0);
             } else {
-                gen_op_mov_v_reg(ot, t0, rm);
+                gen_op_mov_v_reg(s, ot, t0, rm);
                 a0 = NULL;
             }
-            gen_op_mov_v_reg(ot, t1, reg);
-            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+            gen_op_mov_v_reg(s, ot, t1, reg);
+            tcg_gen_andi_tl(s->tmp0, t0, 3);
             tcg_gen_andi_tl(t1, t1, 3);
             tcg_gen_movi_tl(t2, 0);
             label1 = gen_new_label();
-            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+            tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
             tcg_gen_andi_tl(t0, t0, ~3);
             tcg_gen_or_tl(t0, t0, t1);
             tcg_gen_movi_tl(t2, CC_Z);
@@ -7684,7 +7712,7 @@
                 gen_op_st_v(s, ot, t0, a0);
                 tcg_temp_free(a0);
            } else {
-                gen_op_mov_reg_v(ot, rm, t0);
+                gen_op_mov_reg_v(s, ot, rm, t0);
             }
             gen_compute_eflags(s);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
@@ -7708,14 +7736,14 @@
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
-                gen_helper_lar(t0, cpu_env, cpu_T0);
+                gen_helper_lar(t0, cpu_env, s->T0);
             } else {
-                gen_helper_lsl(t0, cpu_env, cpu_T0);
+                gen_helper_lsl(t0, cpu_env, s->T0);
             }
-            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
+            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
-            gen_op_mov_reg_v(ot, reg, t0);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
+            gen_op_mov_reg_v(s, ot, reg, t0);
             gen_set_label(label1);
             set_cc_op(s, CC_OP_EFLAGS);
             tcg_temp_free(t0);
@@ -7781,16 +7809,16 @@
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     if (CODE64(s)) {
-                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEQ);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
-                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 8);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEQ);
                     } else {
-                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
-                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 4);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                     /* bnd registers are now in-use */
@@ -7806,22 +7834,22 @@
                     goto illegal_op;
                 }
                 if (a.base >= 0) {
-                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
                 } else {
-                    tcg_gen_movi_tl(cpu_A0, 0);
+                    tcg_gen_movi_tl(s->A0, 0);
                 }
-                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
-                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
                 } else {
-                    tcg_gen_movi_tl(cpu_T0, 0);
+                    tcg_gen_movi_tl(s->T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
+                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
                 } else {
-                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
+                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
                 }
@@ -7855,11 +7883,11 @@
                     /* rip-relative generates #ud */
                     goto illegal_op;
                 }
-                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
+                tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
                 if (!CODE64(s)) {
-                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+                    tcg_gen_ext32u_tl(s->A0, s->A0);
                 }
-                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
+                tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
                 /* bnd registers are now in-use */
                 gen_set_hflag(s, HF_MPX_IU_MASK);
                 break;
@@ -7888,16 +7916,16 @@
                 } else {
                     gen_lea_modrm(env, s, modrm);
                     if (CODE64(s)) {
-                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEQ);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
-                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 8);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEQ);
                     } else {
-                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
                                             s->mem_index, MO_LEUL);
-                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
-                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                        tcg_gen_addi_tl(s->A0, s->A0, 4);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
                                             s->mem_index, MO_LEUL);
                     }
                 }
@@ -7911,21 +7939,21 @@
                     goto illegal_op;
                 }
                 if (a.base >= 0) {
-                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                    tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
                 } else {
-                    tcg_gen_movi_tl(cpu_A0, 0);
+                    tcg_gen_movi_tl(s->A0, 0);
                 }
-                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
                 if (a.index >= 0) {
-                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                    tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
                 } else {
-                    tcg_gen_movi_tl(cpu_T0, 0);
+                    tcg_gen_movi_tl(s->T0, 0);
                 }
                 if (CODE64(s)) {
-                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
+                    gen_helper_bndstx64(cpu_env, s->A0, s->T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 } else {
-                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
+                    gen_helper_bndstx32(cpu_env, s->A0, s->T0,
                                         cpu_bndl[reg], cpu_bndu[reg]);
                 }
             }
@@ -7964,25 +7992,25 @@
             case 4:
             case 8:
                 gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
+                gen_jmp_im(s, pc_start - s->cs_base);
                 if (b & 2) {
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_op_mov_v_reg(ot, cpu_T0, rm);
+                    gen_op_mov_v_reg(s, ot, s->T0, rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
-                                         cpu_T0);
+                                         s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
-                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_jmp_im(s, s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_start();
                     }
-                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
+                    gen_op_mov_reg_v(s, ot, rm, s->T0);
                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
                         gen_io_end();
                     }
@@ -8015,16 +8043,16 @@
             }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(ot, cpu_T0, rm);
-                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
-                gen_jmp_im(s->pc - s->cs_base);
+                gen_op_mov_v_reg(s, ot, s->T0, rm);
+                tcg_gen_movi_i32(s->tmp2_i32, reg);
+                gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
+                gen_jmp_im(s, s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
-                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
-                gen_op_mov_reg_v(ot, rm, cpu_T0);
+                tcg_gen_movi_i32(s->tmp2_i32, reg);
+                gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
+                gen_op_mov_reg_v(s, ot, rm, s->T0);
             }
         }
         break;
@@ -8035,7 +8063,7 @@
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
             gen_helper_clts(cpu_env);
             /* abort block because static cpu state changed */
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
         }
         break;
@@ -8065,7 +8093,7 @@
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_helper_fxsave(cpu_env, cpu_A0);
+            gen_helper_fxsave(cpu_env, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(1): /* fxrstor */
@@ -8078,7 +8106,7 @@
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_helper_fxrstor(cpu_env, cpu_A0);
+            gen_helper_fxrstor(cpu_env, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
@@ -8090,8 +8118,8 @@
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
-            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
+            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+            gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
             break;
 
         CASE_MODRM_MEM_OP(3): /* stmxcsr */
@@ -8103,8 +8131,8 @@
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
-            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+            tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
+            gen_op_st_v(s, MO_32, s->T0, s->A0);
             break;
 
         CASE_MODRM_MEM_OP(4): /* xsave */
@@ -8114,9 +8142,9 @@
                 goto illegal_op;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
+            gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
             break;
 
         CASE_MODRM_MEM_OP(5): /* xrstor */
@@ -8126,13 +8154,13 @@
                 goto illegal_op;
             }
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                   cpu_regs[R_EDX]);
-            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+            gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
             /* XRSTOR is how MPX is enabled, which changes how
                we translate.  Thus we need to end the TB.  */
             gen_update_cc_op(s);
-            gen_jmp_im(s->pc - s->cs_base);
+            gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
@@ -8154,9 +8182,9 @@
                     goto illegal_op;
                 }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
                                       cpu_regs[R_EDX]);
-                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
+                gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
             }
             break;
 
@@ -8190,8 +8218,8 @@
                 TCGv base, treg, src, dst;
 
                 /* Preserve hflags bits by testing CR4 at runtime.  */
-                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
-                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
+                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
+                gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
 
                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
@@ -8262,7 +8290,7 @@
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
-        gen_jmp_im(s->pc - s->cs_base);
+        gen_jmp_im(s, s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
         break;
@@ -8283,10 +8311,10 @@
         }
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T0);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-        tcg_gen_ctpop_tl(cpu_T0, cpu_T0);
-        gen_op_mov_reg_v(ot, reg, cpu_T0);
+        gen_extu(ot, s->T0);
+        tcg_gen_mov_tl(cpu_cc_src, s->T0);
+        tcg_gen_ctpop_tl(s->T0, s->T0);
+        gen_op_mov_reg_v(s, ot, reg, s->T0);
 
         set_cc_op(s, CC_OP_POPCNT);
         break;
@@ -8452,18 +8480,18 @@
         printf("ERROR addseg\n");
 #endif
 
-    cpu_T0 = tcg_temp_new();
-    cpu_T1 = tcg_temp_new();
-    cpu_A0 = tcg_temp_new();
+    dc->T0 = tcg_temp_new();
+    dc->T1 = tcg_temp_new();
+    dc->A0 = tcg_temp_new();
 
-    cpu_tmp0 = tcg_temp_new();
-    cpu_tmp1_i64 = tcg_temp_new_i64();
-    cpu_tmp2_i32 = tcg_temp_new_i32();
-    cpu_tmp3_i32 = tcg_temp_new_i32();
-    cpu_tmp4 = tcg_temp_new();
-    cpu_ptr0 = tcg_temp_new_ptr();
-    cpu_ptr1 = tcg_temp_new_ptr();
-    cpu_cc_srcT = tcg_temp_local_new();
+    dc->tmp0 = tcg_temp_new();
+    dc->tmp1_i64 = tcg_temp_new_i64();
+    dc->tmp2_i32 = tcg_temp_new_i32();
+    dc->tmp3_i32 = tcg_temp_new_i32();
+    dc->tmp4 = tcg_temp_new();
+    dc->ptr0 = tcg_temp_new_ptr();
+    dc->ptr1 = tcg_temp_new_ptr();
+    dc->cc_srcT = tcg_temp_local_new();
 }
 
 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -8510,10 +8538,10 @@
            chance to happen */
         dc->base.is_jmp = DISAS_TOO_MANY;
     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
-               && ((dc->base.pc_next & TARGET_PAGE_MASK)
-                   != ((dc->base.pc_next + TARGET_MAX_INSN_SIZE - 1)
+               && ((pc_next & TARGET_PAGE_MASK)
+                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
                        & TARGET_PAGE_MASK)
-                   || (dc->base.pc_next & ~TARGET_PAGE_MASK) == 0)) {
+                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
         /* Do not cross the boundary of the pages in icount mode,
            it can cause an exception. Do it only when boundary is
            crossed by the first instruction in the block.
@@ -8533,7 +8561,7 @@
     DisasContext *dc = container_of(dcbase, DisasContext, base);
 
     if (dc->base.is_jmp == DISAS_TOO_MANY) {
-        gen_jmp_im(dc->base.pc_next - dc->cs_base);
+        gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
         gen_eob(dc);
     }
 }
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 497706b..e217fb3 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -113,11 +113,20 @@
 }
 
 static void mips_cpu_disas_set_info(CPUState *s, disassemble_info *info) {
+    MIPSCPU *cpu = MIPS_CPU(s);
+    CPUMIPSState *env = &cpu->env;
+
+    if (!(env->insn_flags & ISA_NANOMIPS32)) {
 #ifdef TARGET_WORDS_BIGENDIAN
-    info->print_insn = print_insn_big_mips;
+        info->print_insn = print_insn_big_mips;
 #else
-    info->print_insn = print_insn_little_mips;
+        info->print_insn = print_insn_little_mips;
 #endif
+    } else {
+#if defined(CONFIG_NANOMIPS_DIS)
+        info->print_insn = print_insn_nanomips;
+#endif
+    }
 }
 
 static void mips_cpu_realizefn(DeviceState *dev, Error **errp)
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 28af4d1..03c03fd 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -170,6 +170,16 @@
         MSACSR_FS_MASK)
 
     float_status msa_fp_status;
+
+#define NUMBER_OF_MXU_REGISTERS 16
+    target_ulong mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
+    target_ulong mxu_cr;
+#define MXU_CR_LC       31
+#define MXU_CR_RC       30
+#define MXU_CR_BIAS     2
+#define MXU_CR_RD_EN    1
+#define MXU_CR_MXU_EN   0
+
 };
 
 typedef struct CPUMIPSState CPUMIPSState;
@@ -195,10 +205,125 @@
 #define MSAIR_ProcID    8
 #define MSAIR_Rev       0
 
+/*
+ *     Summary of CP0 registers
+ *     ========================
+ *
+ *
+ *     Register 0        Register 1        Register 2        Register 3
+ *     ----------        ----------        ----------        ----------
+ *
+ * 0   Index             Random            EntryLo0          EntryLo1
+ * 1   MVPControl        VPEControl        TCStatus          GlobalNumber
+ * 2   MVPConf0          VPEConf0          TCBind
+ * 3   MVPConf1          VPEConf1          TCRestart
+ * 4   VPControl         YQMask            TCHalt
+ * 5                     VPESchedule       TCContext
+ * 6                     VPEScheFBack      TCSchedule
+ * 7                     VPEOpt            TCScheFBack       TCOpt
+ *
+ *
+ *     Register 4        Register 5        Register 6        Register 7
+ *     ----------        ----------        ----------        ----------
+ *
+ * 0   Context           PageMask          Wired             HWREna
+ * 1   ContextConfig     PageGrain         SRSConf0
+ * 2   UserLocal         SegCtl0           SRSConf1
+ * 3   XContextConfig    SegCtl1           SRSConf2
+ * 4   DebugContextID    SegCtl2           SRSConf3
+ * 5   MemoryMapID       PWBase            SRSConf4
+ * 6                     PWField           PWCtl
+ * 7                     PWSize
+ *
+ *
+ *     Register 8        Register 9        Register 10       Register 11
+ *     ----------        ----------        -----------       -----------
+ *
+ * 0   BadVAddr          Count             EntryHi           Compare
+ * 1   BadInstr
+ * 2   BadInstrP
+ * 3   BadInstrX
+ * 4                                       GuestCtl1         GuestCtl0Ext
+ * 5                                       GuestCtl2
+ * 6                                       GuestCtl3
+ * 7
+ *
+ *
+ *     Register 12       Register 13       Register 14       Register 15
+ *     -----------       -----------       -----------       -----------
+ *
+ * 0   Status            Cause             EPC               PRId
+ * 1   IntCtl                                                EBase
+ * 2   SRSCtl                              NestedEPC         CDMMBase
+ * 3   SRSMap                                                CMGCRBase
+ * 4   View_IPL          View_RIPL                           BEVVA
+ * 5   SRSMap2           NestedExc
+ * 6   GuestCtl0
+ * 7   GTOffset
+ *
+ *
+ *     Register 16       Register 17       Register 18       Register 19
+ *     -----------       -----------       -----------       -----------
+ *
+ * 0   Config            LLAddr            WatchLo           WatchHi
+ * 1   Config1           MAAR              WatchLo           WatchHi
+ * 2   Config2           MAARI             WatchLo           WatchHi
+ * 3   Config3                             WatchLo           WatchHi
+ * 4   Config4                             WatchLo           WatchHi
+ * 5   Config5                             WatchLo           WatchHi
+ * 6                                       WatchLo           WatchHi
+ * 7                                       WatchLo           WatchHi
+ *
+ *
+ *     Register 20       Register 21       Register 22       Register 23
+ *     -----------       -----------       -----------       -----------
+ *
+ * 0   XContext                                              Debug
+ * 1                                                         TraceControl
+ * 2                                                         TraceControl2
+ * 3                                                         UserTraceData1
+ * 4                                                         TraceIBPC
+ * 5                                                         TraceDBPC
+ * 6                                                         Debug2
+ * 7
+ *
+ *
+ *     Register 24       Register 25       Register 26       Register 27
+ *     -----------       -----------       -----------       -----------
+ *
+ * 0   DEPC              PerfCnt            ErrCtl          CacheErr
+ * 1                     PerfCnt
+ * 2   TraceControl3     PerfCnt
+ * 3   UserTraceData2    PerfCnt
+ * 4                     PerfCnt
+ * 5                     PerfCnt
+ * 6                     PerfCnt
+ * 7                     PerfCnt
+ *
+ *
+ *     Register 28       Register 29       Register 30       Register 31
+ *     -----------       -----------       -----------       -----------
+ *
+ * 0   DataLo            DataHi            ErrorEPC          DESAVE
+ * 1   TagLo             TagHi
+ * 2   DataLo            DataHi                              KScratch<n>
+ * 3   TagLo             TagHi                               KScratch<n>
+ * 4   DataLo            DataHi                              KScratch<n>
+ * 5   TagLo             TagHi                               KScratch<n>
+ * 6   DataLo            DataHi                              KScratch<n>
+ * 7   TagLo             TagHi                               KScratch<n>
+ *
+ */
+/*
+ * CP0 Register 0
+ */
     int32_t CP0_Index;
     /* CP0_MVP* are per MVP registers. */
     int32_t CP0_VPControl;
 #define CP0VPCtl_DIS    0
+/*
+ * CP0 Register 1
+ */
     int32_t CP0_Random;
     int32_t CP0_VPEControl;
 #define CP0VPECo_YSI	21
@@ -239,7 +364,13 @@
 #define CP0VPEOpt_DWX2	2
 #define CP0VPEOpt_DWX1	1
 #define CP0VPEOpt_DWX0	0
+/*
+ * CP0 Register 2
+ */
     uint64_t CP0_EntryLo0;
+/*
+ * CP0 Register 3
+ */
     uint64_t CP0_EntryLo1;
 #if defined(TARGET_MIPS64)
 # define CP0EnLo_RI 63
@@ -250,8 +381,14 @@
 #endif
     int32_t CP0_GlobalNumber;
 #define CP0GN_VPId 0
+/*
+ * CP0 Register 4
+ */
     target_ulong CP0_Context;
     target_ulong CP0_KScratch[MIPS_KSCRATCH_NUM];
+/*
+ * CP0 Register 5
+ */
     int32_t CP0_PageMask;
     int32_t CP0_PageGrain_rw_bitmask;
     int32_t CP0_PageGrain;
@@ -289,7 +426,47 @@
 #define CP0SC2_XR       56
 #define CP0SC2_XR_MASK  (0xFFULL << CP0SC2_XR)
 #define CP0SC2_MASK     (CP0SC_1GMASK | (CP0SC_1GMASK << 16) | CP0SC2_XR_MASK)
+    target_ulong CP0_PWBase;
+    target_ulong CP0_PWField;
+#if defined(TARGET_MIPS64)
+#define CP0PF_BDI  32    /* 37..32 */
+#define CP0PF_GDI  24    /* 29..24 */
+#define CP0PF_UDI  18    /* 23..18 */
+#define CP0PF_MDI  12    /* 17..12 */
+#define CP0PF_PTI  6     /* 11..6  */
+#define CP0PF_PTEI 0     /*  5..0  */
+#else
+#define CP0PF_GDW  24    /* 29..24 */
+#define CP0PF_UDW  18    /* 23..18 */
+#define CP0PF_MDW  12    /* 17..12 */
+#define CP0PF_PTW  6     /* 11..6  */
+#define CP0PF_PTEW 0     /*  5..0  */
+#endif
+    target_ulong CP0_PWSize;
+#if defined(TARGET_MIPS64)
+#define CP0PS_BDW  32    /* 37..32 */
+#endif
+#define CP0PS_PS   30
+#define CP0PS_GDW  24    /* 29..24 */
+#define CP0PS_UDW  18    /* 23..18 */
+#define CP0PS_MDW  12    /* 17..12 */
+#define CP0PS_PTW  6     /* 11..6  */
+#define CP0PS_PTEW 0     /*  5..0  */
+/*
+ * CP0 Register 6
+ */
     int32_t CP0_Wired;
+    int32_t CP0_PWCtl;
+#define CP0PC_PWEN      31
+#if defined(TARGET_MIPS64)
+#define CP0PC_PWDIREXT  30
+#define CP0PC_XK        28
+#define CP0PC_XS        27
+#define CP0PC_XU        26
+#endif
+#define CP0PC_DPH       7
+#define CP0PC_HUGEPG    6
+#define CP0PC_PSN       0     /*  5..0  */
     int32_t CP0_SRSConf0_rw_bitmask;
     int32_t CP0_SRSConf0;
 #define CP0SRSC0_M	31
@@ -319,16 +496,34 @@
 #define CP0SRSC4_SRS15	20
 #define CP0SRSC4_SRS14	10
 #define CP0SRSC4_SRS13	0
+/*
+ * CP0 Register 7
+ */
     int32_t CP0_HWREna;
+/*
+ * CP0 Register 8
+ */
     target_ulong CP0_BadVAddr;
     uint32_t CP0_BadInstr;
     uint32_t CP0_BadInstrP;
     uint32_t CP0_BadInstrX;
+/*
+ * CP0 Register 9
+ */
     int32_t CP0_Count;
+/*
+ * CP0 Register 10
+ */
     target_ulong CP0_EntryHi;
 #define CP0EnHi_EHINV 10
     target_ulong CP0_EntryHi_ASID_mask;
+/*
+ * CP0 Register 11
+ */
     int32_t CP0_Compare;
+/*
+ * CP0 Register 12
+ */
     int32_t CP0_Status;
 #define CP0St_CU3   31
 #define CP0St_CU2   30
@@ -370,6 +565,9 @@
 #define CP0SRSMap_SSV2 8
 #define CP0SRSMap_SSV1 4
 #define CP0SRSMap_SSV0 0
+/*
+ * CP0 Register 13
+ */
     int32_t CP0_Cause;
 #define CP0Ca_BD   31
 #define CP0Ca_TI   30
@@ -381,12 +579,21 @@
 #define CP0Ca_IP    8
 #define CP0Ca_IP_mask 0x0000FF00
 #define CP0Ca_EC    2
+/*
+ * CP0 Register 14
+ */
     target_ulong CP0_EPC;
+/*
+ * CP0 Register 15
+ */
     int32_t CP0_PRid;
     target_ulong CP0_EBase;
     target_ulong CP0_EBaseWG_rw_bitmask;
 #define CP0EBase_WG 11
     target_ulong CP0_CMGCRBase;
+/*
+ * CP0 Register 16
+ */
     int32_t CP0_Config0;
 #define CP0C0_M    31
 #define CP0C0_K23  28    /* 30..28 */
@@ -503,6 +710,9 @@
     uint64_t CP0_MAAR[MIPS_MAAR_MAX];
     int32_t CP0_MAARI;
     /* XXX: Maybe make LLAddr per-TC? */
+/*
+ * CP0 Register 17
+ */
     uint64_t lladdr;
     target_ulong llval;
     target_ulong llnewval;
@@ -511,11 +721,23 @@
     target_ulong llreg;
     uint64_t CP0_LLAddr_rw_bitmask;
     int CP0_LLAddr_shift;
+/*
+ * CP0 Register 18
+ */
     target_ulong CP0_WatchLo[8];
+/*
+ * CP0 Register 19
+ */
     int32_t CP0_WatchHi[8];
 #define CP0WH_ASID 16
+/*
+ * CP0 Register 20
+ */
     target_ulong CP0_XContext;
     int32_t CP0_Framemask;
+/*
+ * CP0 Register 23
+ */
     int32_t CP0_Debug;
 #define CP0DB_DBD  31
 #define CP0DB_DM   30
@@ -535,18 +757,40 @@
 #define CP0DB_DDBL 2
 #define CP0DB_DBp  1
 #define CP0DB_DSS  0
+/*
+ * CP0 Register 24
+ */
     target_ulong CP0_DEPC;
+/*
+ * CP0 Register 25
+ */
     int32_t CP0_Performance0;
+/*
+ * CP0 Register 26
+ */
     int32_t CP0_ErrCtl;
 #define CP0EC_WST 29
 #define CP0EC_SPR 28
 #define CP0EC_ITC 26
+/*
+ * CP0 Register 28
+ */
     uint64_t CP0_TagLo;
     int32_t CP0_DataLo;
+/*
+ * CP0 Register 29
+ */
     int32_t CP0_TagHi;
     int32_t CP0_DataHi;
+/*
+ * CP0 Register 30
+ */
     target_ulong CP0_ErrorEPC;
+/*
+ * CP0 Register 31
+ */
     int32_t CP0_DESAVE;
+
     /* We waste some space so we can handle shadow registers like TCs. */
     TCState tcs[MIPS_SHADOW_SET_MAX];
     CPUMIPSFPUContext fpus[MIPS_FPU_MAX];
@@ -596,8 +840,9 @@
 #define MIPS_HFLAG_BX     0x40000 /* branch exchanges execution mode    */
 #define MIPS_HFLAG_BMASK  (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT)
     /* MIPS DSP resources access. */
-#define MIPS_HFLAG_DSP   0x080000  /* Enable access to MIPS DSP resources. */
-#define MIPS_HFLAG_DSPR2 0x100000  /* Enable access to MIPS DSPR2 resources. */
+#define MIPS_HFLAG_DSP    0x080000   /* Enable access to DSP resources.    */
+#define MIPS_HFLAG_DSP_R2 0x100000   /* Enable access to DSP R2 resources. */
+#define MIPS_HFLAG_DSP_R3 0x20000000 /* Enable access to DSP R3 resources. */
     /* Extra flag about HWREna register. */
 #define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */
 #define MIPS_HFLAG_SBRI  0x400000 /* R6 SDBBP causes RI excpt. in user mode */
@@ -614,7 +859,7 @@
     int CCRes; /* Cycle count resolution/divisor */
     uint32_t CP0_Status_rw_bitmask; /* Read/write bits in CP0_Status */
     uint32_t CP0_TCStatus_rw_bitmask; /* Read/write bits in CP0_TCStatus */
-    int insn_flags; /* Supported instruction set */
+    uint64_t insn_flags; /* Supported instruction set */
 
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
diff --git a/target/mips/helper.c b/target/mips/helper.c
index f0c268b..8988452 100644
--- a/target/mips/helper.c
+++ b/target/mips/helper.c
@@ -537,6 +537,342 @@
 }
 #endif
 
+#if !defined(CONFIG_USER_ONLY)
+#if !defined(TARGET_MIPS64)
+
+/*
+ * Perform hardware page table walk
+ *
+ * Memory accesses are performed using the KERNEL privilege level.
+ * Synchronous exceptions detected on memory accesses cause a silent exit
+ * from page table walking, resulting in a TLB or XTLB Refill exception.
+ *
+ * Implementations are not required to support page table walk memory
+ * accesses from mapped memory regions. When an unsupported access is
+ * attempted, a silent exit is taken, resulting in a TLB or XTLB Refill
+ * exception.
+ *
+ * Note that if an exception is caused by AddressTranslation or LoadMemory
+ * functions, the exception is not taken, a silent exit is taken,
+ * resulting in a TLB or XTLB Refill exception.
+ */
+
+static bool get_pte(CPUMIPSState *env, uint64_t vaddr, int entry_size,
+        uint64_t *pte)
+{
+    if ((vaddr & ((entry_size >> 3) - 1)) != 0) {
+        return false;
+    }
+    if (entry_size == 64) {
+        *pte = cpu_ldq_code(env, vaddr);
+    } else {
+        *pte = cpu_ldl_code(env, vaddr);
+    }
+    return true;
+}
+
+static uint64_t get_tlb_entry_layout(CPUMIPSState *env, uint64_t entry,
+        int entry_size, int ptei)
+{
+    uint64_t result = entry;
+    uint64_t rixi;
+    if (ptei > entry_size) {
+        ptei -= 32;
+    }
+    result >>= (ptei - 2);
+    rixi = result & 3;
+    result >>= 2;
+    result |= rixi << CP0EnLo_XI;
+    return result;
+}
+
+static int walk_directory(CPUMIPSState *env, uint64_t *vaddr,
+        int directory_index, bool *huge_page, bool *hgpg_directory_hit,
+        uint64_t *pw_entrylo0, uint64_t *pw_entrylo1)
+{
+    int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1;
+    int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F;
+    int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1;
+    int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
+    int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F;
+    int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3;
+    int directory_shift = (ptew > 1) ? -1 :
+            (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift;
+    int leaf_shift = (ptew > 1) ? -1 :
+            (ptew == 1) ? native_shift + 1 : native_shift;
+    uint32_t direntry_size = 1 << (directory_shift + 3);
+    uint32_t leafentry_size = 1 << (leaf_shift + 3);
+    uint64_t entry;
+    uint64_t paddr;
+    int prot;
+    uint64_t lsb = 0;
+    uint64_t w = 0;
+
+    if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD,
+                             ACCESS_INT, cpu_mmu_index(env, false)) !=
+                             TLBRET_MATCH) {
+        /* wrong base address */
+        return 0;
+    }
+    if (!get_pte(env, *vaddr, direntry_size, &entry)) {
+        return 0;
+    }
+
+    if ((entry & (1 << psn)) && hugepg) {
+        *huge_page = true;
+        *hgpg_directory_hit = true;
+        entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew);
+        w = directory_index - 1;
+        if (directory_index & 0x1) {
+            /* Generate adjacent page from same PTE for odd TLB page */
+            lsb = (1 << w) >> 6;
+            *pw_entrylo0 = entry & ~lsb; /* even page */
+            *pw_entrylo1 = entry | lsb; /* odd page */
+        } else if (dph) {
+            int oddpagebit = 1 << leaf_shift;
+            uint64_t vaddr2 = *vaddr ^ oddpagebit;
+            if (*vaddr & oddpagebit) {
+                *pw_entrylo1 = entry;
+            } else {
+                *pw_entrylo0 = entry;
+            }
+            if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD,
+                                     ACCESS_INT, cpu_mmu_index(env, false)) !=
+                                     TLBRET_MATCH) {
+                return 0;
+            }
+            if (!get_pte(env, vaddr2, leafentry_size, &entry)) {
+                return 0;
+            }
+            entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew);
+            if (*vaddr & oddpagebit) {
+                *pw_entrylo0 = entry;
+            } else {
+                *pw_entrylo1 = entry;
+            }
+        } else {
+            return 0;
+        }
+        return 1;
+    } else {
+        *vaddr = entry;
+        return 2;
+    }
+}
+
+static bool page_table_walk_refill(CPUMIPSState *env, vaddr address, int rw,
+        int mmu_idx)
+{
+    int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F;
+    int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F;
+    int mdw = (env->CP0_PWSize >> CP0PS_MDW) & 0x3F;
+    int ptw = (env->CP0_PWSize >> CP0PS_PTW) & 0x3F;
+    int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F;
+
+    /* Initial values */
+    bool huge_page = false;
+    bool hgpg_bdhit = false;
+    bool hgpg_gdhit = false;
+    bool hgpg_udhit = false;
+    bool hgpg_mdhit = false;
+
+    int32_t pw_pagemask = 0;
+    target_ulong pw_entryhi = 0;
+    uint64_t pw_entrylo0 = 0;
+    uint64_t pw_entrylo1 = 0;
+
+    /* Native pointer size */
+    /*For the 32-bit architectures, this bit is fixed to 0.*/
+    int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3;
+
+    /* Indices from PWField */
+    int pf_gdw = (env->CP0_PWField >> CP0PF_GDW) & 0x3F;
+    int pf_udw = (env->CP0_PWField >> CP0PF_UDW) & 0x3F;
+    int pf_mdw = (env->CP0_PWField >> CP0PF_MDW) & 0x3F;
+    int pf_ptw = (env->CP0_PWField >> CP0PF_PTW) & 0x3F;
+    int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
+
+    /* Indices computed from faulting address */
+    int gindex = (address >> pf_gdw) & ((1 << gdw) - 1);
+    int uindex = (address >> pf_udw) & ((1 << udw) - 1);
+    int mindex = (address >> pf_mdw) & ((1 << mdw) - 1);
+    int ptindex = (address >> pf_ptw) & ((1 << ptw) - 1);
+
+    /* Other HTW configs */
+    int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1;
+
+    /* HTW Shift values (depend on entry size) */
+    int directory_shift = (ptew > 1) ? -1 :
+            (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift;
+    int leaf_shift = (ptew > 1) ? -1 :
+            (ptew == 1) ? native_shift + 1 : native_shift;
+
+    /* Offsets into tables */
+    int goffset = gindex << directory_shift;
+    int uoffset = uindex << directory_shift;
+    int moffset = mindex << directory_shift;
+    int ptoffset0 = (ptindex >> 1) << (leaf_shift + 1);
+    int ptoffset1 = ptoffset0 | (1 << (leaf_shift));
+
+    uint32_t leafentry_size = 1 << (leaf_shift + 3);
+
+    /* Starting address - Page Table Base */
+    uint64_t vaddr = env->CP0_PWBase;
+
+    uint64_t dir_entry;
+    uint64_t paddr;
+    int prot;
+    int m;
+
+    if (!(env->CP0_Config3 & (1 << CP0C3_PW))) {
+        /* walker is unimplemented */
+        return false;
+    }
+    if (!(env->CP0_PWCtl & (1 << CP0PC_PWEN))) {
+        /* walker is disabled */
+        return false;
+    }
+    if (!(gdw > 0 || udw > 0 || mdw > 0)) {
+        /* no structure to walk */
+        return false;
+    }
+    if ((directory_shift == -1) || (leaf_shift == -1)) {
+        return false;
+    }
+
+    /* Global Directory */
+    if (gdw > 0) {
+        vaddr |= goffset;
+        switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit,
+                               &pw_entrylo0, &pw_entrylo1))
+        {
+        case 0:
+            return false;
+        case 1:
+            goto refill;
+        case 2:
+        default:
+            break;
+        }
+    }
+
+    /* Upper directory */
+    if (udw > 0) {
+        vaddr |= uoffset;
+        switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit,
+                               &pw_entrylo0, &pw_entrylo1))
+        {
+        case 0:
+            return false;
+        case 1:
+            goto refill;
+        case 2:
+        default:
+            break;
+        }
+    }
+
+    /* Middle directory */
+    if (mdw > 0) {
+        vaddr |= moffset;
+        switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit,
+                               &pw_entrylo0, &pw_entrylo1))
+        {
+        case 0:
+            return false;
+        case 1:
+            goto refill;
+        case 2:
+        default:
+            break;
+        }
+    }
+
+    /* Leaf Level Page Table - First half of PTE pair */
+    vaddr |= ptoffset0;
+    if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
+                             ACCESS_INT, cpu_mmu_index(env, false)) !=
+                             TLBRET_MATCH) {
+        return false;
+    }
+    if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
+        return false;
+    }
+    dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew);
+    pw_entrylo0 = dir_entry;
+
+    /* Leaf Level Page Table - Second half of PTE pair */
+    vaddr |= ptoffset1;
+    if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
+                             ACCESS_INT, cpu_mmu_index(env, false)) !=
+                             TLBRET_MATCH) {
+        return false;
+    }
+    if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
+        return false;
+    }
+    dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew);
+    pw_entrylo1 = dir_entry;
+
+refill:
+
+    m = (1 << pf_ptw) - 1;
+
+    if (huge_page) {
+        switch (hgpg_bdhit << 3 | hgpg_gdhit << 2 | hgpg_udhit << 1 |
+                hgpg_mdhit)
+        {
+        case 4:
+            m = (1 << pf_gdw) - 1;
+            if (pf_gdw & 1) {
+                m >>= 1;
+            }
+            break;
+        case 2:
+            m = (1 << pf_udw) - 1;
+            if (pf_udw & 1) {
+                m >>= 1;
+            }
+            break;
+        case 1:
+            m = (1 << pf_mdw) - 1;
+            if (pf_mdw & 1) {
+                m >>= 1;
+            }
+            break;
+        }
+    }
+    pw_pagemask = m >> 12;
+    update_pagemask(env, pw_pagemask << 13, &pw_pagemask);
+    pw_entryhi = (address & ~0x1fff) | (env->CP0_EntryHi & 0xFF);
+    {
+        target_ulong tmp_entryhi = env->CP0_EntryHi;
+        int32_t tmp_pagemask = env->CP0_PageMask;
+        uint64_t tmp_entrylo0 = env->CP0_EntryLo0;
+        uint64_t tmp_entrylo1 = env->CP0_EntryLo1;
+
+        env->CP0_EntryHi = pw_entryhi;
+        env->CP0_PageMask = pw_pagemask;
+        env->CP0_EntryLo0 = pw_entrylo0;
+        env->CP0_EntryLo1 = pw_entrylo1;
+
+        /*
+         * The hardware page walker inserts a page into the TLB in a manner
+         * identical to a TLBWR instruction as executed by the software refill
+         * handler.
+         */
+        r4k_helper_tlbwr(env);
+
+        env->CP0_EntryHi = tmp_entryhi;
+        env->CP0_PageMask = tmp_pagemask;
+        env->CP0_EntryLo0 = tmp_entrylo0;
+        env->CP0_EntryLo1 = tmp_entrylo1;
+    }
+    return true;
+}
+#endif
+#endif
+
 int mips_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size, int rw,
                               int mmu_idx)
 {
@@ -558,8 +894,7 @@
 
     /* data access */
 #if !defined(CONFIG_USER_ONLY)
-    /* XXX: put correct access by using cpu_restore_state()
-       correctly */
+    /* XXX: put correct access by using cpu_restore_state() correctly */
     access_type = ACCESS_INT;
     ret = get_physical_address(env, &physical, &prot,
                                address, rw, access_type, mmu_idx);
@@ -583,6 +918,32 @@
     } else if (ret < 0)
 #endif
     {
+#if !defined(CONFIG_USER_ONLY)
+#if !defined(TARGET_MIPS64)
+        if ((ret == TLBRET_NOMATCH) && (env->tlb->nb_tlb > 1)) {
+            /*
+             * Memory reads during hardware page table walking are performed
+             * as if they were kernel-mode load instructions.
+             */
+            int mode = (env->hflags & MIPS_HFLAG_KSU);
+            bool ret_walker;
+            env->hflags &= ~MIPS_HFLAG_KSU;
+            ret_walker = page_table_walk_refill(env, address, rw, mmu_idx);
+            env->hflags |= mode;
+            if (ret_walker) {
+                ret = get_physical_address(env, &physical, &prot,
+                                           address, rw, access_type, mmu_idx);
+                if (ret == TLBRET_MATCH) {
+                    tlb_set_page(cs, address & TARGET_PAGE_MASK,
+                            physical & TARGET_PAGE_MASK, prot | PAGE_EXEC,
+                            mmu_idx, TARGET_PAGE_SIZE);
+                    ret = 0;
+                    return ret;
+                }
+            }
+        }
+#endif
+#endif
         raise_mmu_exception(env, address, rw, ret);
         ret = 1;
     }
diff --git a/target/mips/helper.h b/target/mips/helper.h
index b2a780a..c23e4e5 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -120,6 +120,8 @@
 DEF_HELPER_2(mtc0_segctl0, void, env, tl)
 DEF_HELPER_2(mtc0_segctl1, void, env, tl)
 DEF_HELPER_2(mtc0_segctl2, void, env, tl)
+DEF_HELPER_2(mtc0_pwfield, void, env, tl)
+DEF_HELPER_2(mtc0_pwsize, void, env, tl)
 DEF_HELPER_2(mtc0_wired, void, env, tl)
 DEF_HELPER_2(mtc0_srsconf0, void, env, tl)
 DEF_HELPER_2(mtc0_srsconf1, void, env, tl)
@@ -127,6 +129,7 @@
 DEF_HELPER_2(mtc0_srsconf3, void, env, tl)
 DEF_HELPER_2(mtc0_srsconf4, void, env, tl)
 DEF_HELPER_2(mtc0_hwrena, void, env, tl)
+DEF_HELPER_2(mtc0_pwctl, void, env, tl)
 DEF_HELPER_2(mtc0_count, void, env, tl)
 DEF_HELPER_2(mtc0_entryhi, void, env, tl)
 DEF_HELPER_2(mttc0_entryhi, void, env, tl)
diff --git a/target/mips/internal.h b/target/mips/internal.h
index e41051f..8b1b245 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -59,7 +59,7 @@
     int32_t CP0_PageGrain_rw_bitmask;
     int32_t CP0_PageGrain;
     target_ulong CP0_EBaseWG_rw_bitmask;
-    int insn_flags;
+    uint64_t insn_flags;
     enum mips_mmu_types mmu_type;
 };
 
@@ -211,6 +211,7 @@
 
 extern unsigned int ieee_rm[];
 int ieee_ex_to_mips(int xcpt);
+void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask);
 
 static inline void restore_rounding_mode(CPUMIPSState *env)
 {
@@ -306,9 +307,9 @@
 {
     env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 |
                      MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU |
-                     MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2 |
-                     MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA | MIPS_HFLAG_FRE |
-                     MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL);
+                     MIPS_HFLAG_AWRAP | MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 |
+                     MIPS_HFLAG_DSP_R3 | MIPS_HFLAG_SBRI | MIPS_HFLAG_MSA |
+                     MIPS_HFLAG_FRE | MIPS_HFLAG_ELPA | MIPS_HFLAG_ERL);
     if (env->CP0_Status & (1 << CP0St_ERL)) {
         env->hflags |= MIPS_HFLAG_ERL;
     }
@@ -355,16 +356,29 @@
         (env->CP0_Config5 & (1 << CP0C5_SBRI))) {
         env->hflags |= MIPS_HFLAG_SBRI;
     }
-    if (env->insn_flags & ASE_DSPR2) {
-        /* Enables access MIPS DSP resources, now our cpu is DSP ASER2,
-           so enable to access DSPR2 resources. */
+    if (env->insn_flags & ASE_DSP_R3) {
+        /*
+         * Our cpu supports DSP R3 ASE, so enable
+         * access to DSP R3 resources.
+         */
         if (env->CP0_Status & (1 << CP0St_MX)) {
-            env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2;
+            env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2 |
+                           MIPS_HFLAG_DSP_R3;
+        }
+    } else if (env->insn_flags & ASE_DSP_R2) {
+        /*
+         * Our cpu supports DSP R2 ASE, so enable
+         * access to DSP R2 resources.
+         */
+        if (env->CP0_Status & (1 << CP0St_MX)) {
+            env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSP_R2;
         }
 
     } else if (env->insn_flags & ASE_DSP) {
-        /* Enables access MIPS DSP resources, now our cpu is DSP ASE,
-           so enable to access DSP resources. */
+        /*
+         * Our cpu supports DSP ASE, so enable
+         * access to DSP resources.
+         */
         if (env->CP0_Status & (1 << CP0St_MX)) {
             env->hflags |= MIPS_HFLAG_DSP;
         }
diff --git a/target/mips/machine.c b/target/mips/machine.c
index 5ba78ac..70a8909 100644
--- a/target/mips/machine.c
+++ b/target/mips/machine.c
@@ -212,8 +212,8 @@
 
 const VMStateDescription vmstate_mips_cpu = {
     .name = "cpu",
-    .version_id = 11,
-    .minimum_version_id = 11,
+    .version_id = 15,
+    .minimum_version_id = 15,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
         /* Active TC */
@@ -256,7 +256,11 @@
         VMSTATE_UINTTL(env.CP0_SegCtl0, MIPSCPU),
         VMSTATE_UINTTL(env.CP0_SegCtl1, MIPSCPU),
         VMSTATE_UINTTL(env.CP0_SegCtl2, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_PWBase, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_PWField, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_PWSize, MIPSCPU),
         VMSTATE_INT32(env.CP0_Wired, MIPSCPU),
+        VMSTATE_INT32(env.CP0_PWCtl, MIPSCPU),
         VMSTATE_INT32(env.CP0_SRSConf0, MIPSCPU),
         VMSTATE_INT32(env.CP0_SRSConf1, MIPSCPU),
         VMSTATE_INT32(env.CP0_SRSConf2, MIPSCPU),
diff --git a/target/mips/mips-defs.h b/target/mips/mips-defs.h
index c8e9979..dbdb4b2 100644
--- a/target/mips/mips-defs.h
+++ b/target/mips/mips-defs.h
@@ -22,40 +22,54 @@
 #endif
 #endif
 
-/* Masks used to mark instructions to indicate which ISA level they
-   were introduced in. */
-#define		ISA_MIPS1	0x00000001
-#define		ISA_MIPS2	0x00000002
-#define		ISA_MIPS3	0x00000004
-#define		ISA_MIPS4	0x00000008
-#define		ISA_MIPS5	0x00000010
-#define		ISA_MIPS32	0x00000020
-#define		ISA_MIPS32R2	0x00000040
-#define		ISA_MIPS64	0x00000080
-#define		ISA_MIPS64R2	0x00000100
-#define   ISA_MIPS32R3  0x00000200
-#define   ISA_MIPS64R3  0x00000400
-#define   ISA_MIPS32R5  0x00000800
-#define   ISA_MIPS64R5  0x00001000
-#define   ISA_MIPS32R6  0x00002000
-#define   ISA_MIPS64R6  0x00004000
-#define   ISA_NANOMIPS32  0x00008000
-
-/* MIPS ASEs. */
-#define   ASE_MIPS16    0x00010000
-#define   ASE_MIPS3D    0x00020000
-#define   ASE_MDMX      0x00040000
-#define   ASE_DSP       0x00080000
-#define   ASE_DSPR2     0x00100000
-#define   ASE_MT        0x00200000
-#define   ASE_SMARTMIPS 0x00400000
-#define   ASE_MICROMIPS 0x00800000
-#define   ASE_MSA       0x01000000
-
-/* Chip specific instructions. */
-#define		INSN_LOONGSON2E  0x20000000
-#define		INSN_LOONGSON2F  0x40000000
-#define		INSN_VR54XX	0x80000000
+/*
+ * bit definitions for insn_flags (ISAs/ASEs flags)
+ * ------------------------------------------------
+ */
+/*
+ *   bits 0-31: MIPS base instruction sets
+ */
+#define ISA_MIPS1         0x0000000000000001ULL
+#define ISA_MIPS2         0x0000000000000002ULL
+#define ISA_MIPS3         0x0000000000000004ULL
+#define ISA_MIPS4         0x0000000000000008ULL
+#define ISA_MIPS5         0x0000000000000010ULL
+#define ISA_MIPS32        0x0000000000000020ULL
+#define ISA_MIPS32R2      0x0000000000000040ULL
+#define ISA_MIPS64        0x0000000000000080ULL
+#define ISA_MIPS64R2      0x0000000000000100ULL
+#define ISA_MIPS32R3      0x0000000000000200ULL
+#define ISA_MIPS64R3      0x0000000000000400ULL
+#define ISA_MIPS32R5      0x0000000000000800ULL
+#define ISA_MIPS64R5      0x0000000000001000ULL
+#define ISA_MIPS32R6      0x0000000000002000ULL
+#define ISA_MIPS64R6      0x0000000000004000ULL
+#define ISA_NANOMIPS32    0x0000000000008000ULL
+/*
+ *   bits 32-47: MIPS ASEs
+ */
+#define ASE_MIPS16        0x0000000100000000ULL
+#define ASE_MIPS3D        0x0000000200000000ULL
+#define ASE_MDMX          0x0000000400000000ULL
+#define ASE_DSP           0x0000000800000000ULL
+#define ASE_DSP_R2        0x0000001000000000ULL
+#define ASE_DSP_R3        0x0000002000000000ULL
+#define ASE_MT            0x0000004000000000ULL
+#define ASE_SMARTMIPS     0x0000008000000000ULL
+#define ASE_MICROMIPS     0x0000010000000000ULL
+#define ASE_MSA           0x0000020000000000ULL
+/*
+ *   bits 48-55: vendor-specific base instruction sets
+ */
+#define INSN_LOONGSON2E   0x0001000000000000ULL
+#define INSN_LOONGSON2F   0x0002000000000000ULL
+#define INSN_VR54XX       0x0004000000000000ULL
+#define INSN_R5900        0x0008000000000000ULL
+/*
+ *   bits 56-63: vendor-specific ASEs
+ */
+#define ASE_MMI           0x0100000000000000ULL
+#define ASE_MXU           0x0200000000000000ULL
 
 /* MIPS CPU defines. */
 #define		CPU_MIPS1	(ISA_MIPS1)
@@ -63,6 +77,7 @@
 #define		CPU_MIPS3	(CPU_MIPS2 | ISA_MIPS3)
 #define		CPU_MIPS4	(CPU_MIPS3 | ISA_MIPS4)
 #define		CPU_VR54XX	(CPU_MIPS4 | INSN_VR54XX)
+#define         CPU_R5900       (CPU_MIPS3 | INSN_R5900)
 #define		CPU_LOONGSON2E  (CPU_MIPS3 | INSN_LOONGSON2E)
 #define		CPU_LOONGSON2F  (CPU_MIPS3 | INSN_LOONGSON2F)
 
diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c
index c148b31..d1f1d1a 100644
--- a/target/mips/op_helper.c
+++ b/target/mips/op_helper.c
@@ -1400,7 +1400,7 @@
     env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF);
 }
 
-void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1)
+void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask)
 {
     uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1);
     if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) ||
@@ -1411,6 +1411,11 @@
     }
 }
 
+void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1)
+{
+    update_pagemask(env, arg1, &env->CP0_PageMask);
+}
+
 void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1)
 {
     /* SmartMIPS not implemented */
@@ -1445,6 +1450,77 @@
     tlb_flush(cs);
 }
 
+void helper_mtc0_pwfield(CPUMIPSState *env, target_ulong arg1)
+{
+#if defined(TARGET_MIPS64)
+    uint64_t mask = 0x3F3FFFFFFFULL;
+    uint32_t old_ptei = (env->CP0_PWField >> CP0PF_PTEI) & 0x3FULL;
+    uint32_t new_ptei = (arg1 >> CP0PF_PTEI) & 0x3FULL;
+
+    if ((env->insn_flags & ISA_MIPS32R6)) {
+        if (((arg1 >> CP0PF_BDI) & 0x3FULL) < 12) {
+            mask &= ~(0x3FULL << CP0PF_BDI);
+        }
+        if (((arg1 >> CP0PF_GDI) & 0x3FULL) < 12) {
+            mask &= ~(0x3FULL << CP0PF_GDI);
+        }
+        if (((arg1 >> CP0PF_UDI) & 0x3FULL) < 12) {
+            mask &= ~(0x3FULL << CP0PF_UDI);
+        }
+        if (((arg1 >> CP0PF_MDI) & 0x3FULL) < 12) {
+            mask &= ~(0x3FULL << CP0PF_MDI);
+        }
+        if (((arg1 >> CP0PF_PTI) & 0x3FULL) < 12) {
+            mask &= ~(0x3FULL << CP0PF_PTI);
+        }
+    }
+    env->CP0_PWField = arg1 & mask;
+
+    if ((new_ptei >= 32) ||
+            ((env->insn_flags & ISA_MIPS32R6) &&
+                    (new_ptei == 0 || new_ptei == 1))) {
+        env->CP0_PWField = (env->CP0_PWField & ~0x3FULL) |
+                (old_ptei << CP0PF_PTEI);
+    }
+#else
+    uint32_t mask = 0x3FFFFFFF;
+    uint32_t old_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
+    uint32_t new_ptew = (arg1 >> CP0PF_PTEW) & 0x3F;
+
+    if ((env->insn_flags & ISA_MIPS32R6)) {
+        if (((arg1 >> CP0PF_GDW) & 0x3F) < 12) {
+            mask &= ~(0x3F << CP0PF_GDW);
+        }
+        if (((arg1 >> CP0PF_UDW) & 0x3F) < 12) {
+            mask &= ~(0x3F << CP0PF_UDW);
+        }
+        if (((arg1 >> CP0PF_MDW) & 0x3F) < 12) {
+            mask &= ~(0x3F << CP0PF_MDW);
+        }
+        if (((arg1 >> CP0PF_PTW) & 0x3F) < 12) {
+            mask &= ~(0x3F << CP0PF_PTW);
+        }
+    }
+    env->CP0_PWField = arg1 & mask;
+
+    if ((new_ptew >= 32) ||
+            ((env->insn_flags & ISA_MIPS32R6) &&
+                    (new_ptew == 0 || new_ptew == 1))) {
+        env->CP0_PWField = (env->CP0_PWField & ~0x3F) |
+                (old_ptew << CP0PF_PTEW);
+    }
+#endif
+}
+
+void helper_mtc0_pwsize(CPUMIPSState *env, target_ulong arg1)
+{
+#if defined(TARGET_MIPS64)
+    env->CP0_PWSize = arg1 & 0x3F7FFFFFFFULL;
+#else
+    env->CP0_PWSize = arg1 & 0x3FFFFFFF;
+#endif
+}
+
 void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1)
 {
     if (env->insn_flags & ISA_MIPS32R6) {
@@ -1456,6 +1532,16 @@
     }
 }
 
+void helper_mtc0_pwctl(CPUMIPSState *env, target_ulong arg1)
+{
+#if defined(TARGET_MIPS64)
+    /* PWEn = 0. Hardware page table walking is not implemented. */
+    env->CP0_PWCtl = (env->CP0_PWCtl & 0x000000C0) | (arg1 & 0x5C00003F);
+#else
+    env->CP0_PWCtl = (arg1 & 0x800000FF);
+#endif
+}
+
 void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask;
diff --git a/target/mips/translate.c b/target/mips/translate.c
index ab16cdb..60320cb 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -1,5 +1,5 @@
 /*
- *  MIPS32 emulation for qemu: main translation routines.
+ *  MIPS emulation for QEMU - main translation routines
  *
  *  Copyright (c) 2004-2005 Jocelyn Mayer
  *  Copyright (c) 2006 Marius Groeger (FPU operations)
@@ -463,8 +463,10 @@
     OPC_WSBH      = (0x02 << 6) | OPC_BSHFL,
     OPC_SEB       = (0x10 << 6) | OPC_BSHFL,
     OPC_SEH       = (0x18 << 6) | OPC_BSHFL,
-    OPC_ALIGN     = (0x08 << 6) | OPC_BSHFL, /* 010.bp */
-    OPC_ALIGN_END = (0x0B << 6) | OPC_BSHFL, /* 010.00 to 010.11 */
+    OPC_ALIGN     = (0x08 << 6) | OPC_BSHFL, /* 010.bp (010.00 to 010.11) */
+    OPC_ALIGN_1   = (0x09 << 6) | OPC_BSHFL,
+    OPC_ALIGN_2   = (0x0A << 6) | OPC_BSHFL,
+    OPC_ALIGN_3   = (0x0B << 6) | OPC_BSHFL,
     OPC_BITSWAP   = (0x00 << 6) | OPC_BSHFL  /* 00000 */
 };
 
@@ -474,8 +476,14 @@
 enum {
     OPC_DSBH       = (0x02 << 6) | OPC_DBSHFL,
     OPC_DSHD       = (0x05 << 6) | OPC_DBSHFL,
-    OPC_DALIGN     = (0x08 << 6) | OPC_DBSHFL, /* 01.bp */
-    OPC_DALIGN_END = (0x0F << 6) | OPC_DBSHFL, /* 01.000 to 01.111 */
+    OPC_DALIGN     = (0x08 << 6) | OPC_DBSHFL, /* 01.bp (01.000 to 01.111) */
+    OPC_DALIGN_1   = (0x09 << 6) | OPC_DBSHFL,
+    OPC_DALIGN_2   = (0x0A << 6) | OPC_DBSHFL,
+    OPC_DALIGN_3   = (0x0B << 6) | OPC_DBSHFL,
+    OPC_DALIGN_4   = (0x0C << 6) | OPC_DBSHFL,
+    OPC_DALIGN_5   = (0x0D << 6) | OPC_DBSHFL,
+    OPC_DALIGN_6   = (0x0E << 6) | OPC_DBSHFL,
+    OPC_DALIGN_7   = (0x0F << 6) | OPC_DBSHFL,
     OPC_DBITSWAP   = (0x00 << 6) | OPC_DBSHFL, /* 00000 */
 };
 
@@ -1389,6 +1397,1021 @@
     OPC_BINSRI_df   = (0x7 << 23) | OPC_MSA_BIT_09,
 };
 
+
+/*
+ *    AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
+ *    ============================================
+ *
+ * MXU (full name: MIPS eXtension/enhanced Unit) is an SIMD extension of MIPS32
+ * instructions set. It is designed to fit the needs of signal, graphical and
+ * video processing applications. MXU instruction set is used in Xburst family
+ * of microprocessors by Ingenic.
+ *
+ * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
+ * the control register.
+ *
+ * The notation used in MXU assembler mnemonics
+ * --------------------------------------------
+ *
+ *  Registers:
+ *
+ *   XRa, XRb, XRc, XRd - MXU registers
+ *   Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
+ *
+ *  Subfields:
+ *
+ *   aptn1              - 1-bit accumulate add/subtract pattern
+ *   aptn2              - 2-bit accumulate add/subtract pattern
+ *   eptn2              - 2-bit execute add/subtract pattern
+ *   optn2              - 2-bit operand pattern
+ *   optn3              - 3-bit operand pattern
+ *   sft4               - 4-bit shift amount
+ *   strd2              - 2-bit stride amount
+ *
+ *  Prefixes:
+ *
+ *   <Operation parallel level><Operand size>
+ *     S                         32
+ *     D                         16
+ *     Q                          8
+ *
+ *  Suffixes:
+ *
+ *   E - Expand results
+ *   F - Fixed point multiplication
+ *   L - Low part result
+ *   R - Doing rounding
+ *   V - Variable instead of immediate
+ *   W - Combine above L and V
+ *
+ *  Operations:
+ *
+ *   ADD   - Add or subtract
+ *   ADDC  - Add with carry-in
+ *   ACC   - Accumulate
+ *   ASUM  - Sum together then accumulate (add or subtract)
+ *   ASUMC - Sum together then accumulate (add or subtract) with carry-in
+ *   AVG   - Average between 2 operands
+ *   ABD   - Absolute difference
+ *   ALN   - Align data
+ *   AND   - Logical bitwise 'and' operation
+ *   CPS   - Copy sign
+ *   EXTR  - Extract bits
+ *   I2M   - Move from GPR register to MXU register
+ *   LDD   - Load data from memory to XRF
+ *   LDI   - Load data from memory to XRF (and increase the address base)
+ *   LUI   - Load unsigned immediate
+ *   MUL   - Multiply
+ *   MULU  - Unsigned multiply
+ *   MADD  - 64-bit operand add 32x32 product
+ *   MSUB  - 64-bit operand subtract 32x32 product
+ *   MAC   - Multiply and accumulate (add or subtract)
+ *   MAD   - Multiply and add or subtract
+ *   MAX   - Maximum between 2 operands
+ *   MIN   - Minimum between 2 operands
+ *   M2I   - Move from MXU register to GPR register
+ *   MOVZ  - Move if zero
+ *   MOVN  - Move if non-zero
+ *   NOR   - Logical bitwise 'nor' operation
+ *   OR    - Logical bitwise 'or' operation
+ *   STD   - Store data from XRF to memory
+ *   SDI   - Store data from XRF to memory (and increase the address base)
+ *   SLT   - Set of less than comparison
+ *   SAD   - Sum of absolute differences
+ *   SLL   - Logical shift left
+ *   SLR   - Logical shift right
+ *   SAR   - Arithmetic shift right
+ *   SAT   - Saturation
+ *   SFL   - Shuffle
+ *   SCOP  - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0)
+ *   XOR   - Logical bitwise 'exclusive or' operation
+ *
+ * Load/Store instructions           Multiplication instructions
+ * -----------------------           ---------------------------
+ *
+ *  S32LDD XRa, Rb, s12               S32MADD XRa, XRd, Rs, Rt
+ *  S32STD XRa, Rb, s12               S32MADDU XRa, XRd, Rs, Rt
+ *  S32LDDV XRa, Rb, rc, strd2        S32MSUB XRa, XRd, Rs, Rt
+ *  S32STDV XRa, Rb, rc, strd2        S32MSUBU XRa, XRd, Rs, Rt
+ *  S32LDI XRa, Rb, s12               S32MUL XRa, XRd, Rs, Rt
+ *  S32SDI XRa, Rb, s12               S32MULU XRa, XRd, Rs, Rt
+ *  S32LDIV XRa, Rb, rc, strd2        D16MUL XRa, XRb, XRc, XRd, optn2
+ *  S32SDIV XRa, Rb, rc, strd2        D16MULE XRa, XRb, XRc, optn2
+ *  S32LDDR XRa, Rb, s12              D16MULF XRa, XRb, XRc, optn2
+ *  S32STDR XRa, Rb, s12              D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32LDDVR XRa, Rb, rc, strd2       D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32STDVR XRa, Rb, rc, strd2       D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32LDIR XRa, Rb, s12              D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
+ *  S32SDIR XRa, Rb, s12              S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
+ *  S32LDIVR XRa, Rb, rc, strd2       Q8MUL XRa, XRb, XRc, XRd
+ *  S32SDIVR XRa, Rb, rc, strd2       Q8MULSU XRa, XRb, XRc, XRd
+ *  S16LDD XRa, Rb, s10, eptn2        Q8MAC XRa, XRb, XRc, XRd, aptn2
+ *  S16STD XRa, Rb, s10, eptn2        Q8MACSU XRa, XRb, XRc, XRd, aptn2
+ *  S16LDI XRa, Rb, s10, eptn2        Q8MADL XRa, XRb, XRc, XRd, aptn2
+ *  S16SDI XRa, Rb, s10, eptn2
+ *  S8LDD XRa, Rb, s8, eptn3
+ *  S8STD XRa, Rb, s8, eptn3         Addition and subtraction instructions
+ *  S8LDI XRa, Rb, s8, eptn3         -------------------------------------
+ *  S8SDI XRa, Rb, s8, eptn3
+ *  LXW Rd, Rs, Rt, strd2             D32ADD XRa, XRb, XRc, XRd, eptn2
+ *  LXH Rd, Rs, Rt, strd2             D32ADDC XRa, XRb, XRc, XRd
+ *  LXHU Rd, Rs, Rt, strd2            D32ACC XRa, XRb, XRc, XRd, eptn2
+ *  LXB Rd, Rs, Rt, strd2             D32ACCM XRa, XRb, XRc, XRd, eptn2
+ *  LXBU Rd, Rs, Rt, strd2            D32ASUM XRa, XRb, XRc, XRd, eptn2
+ *                                    S32CPS XRa, XRb, XRc
+ *                                    Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
+ * Comparison instructions            Q16ACC XRa, XRb, XRc, XRd, eptn2
+ * -----------------------            Q16ACCM XRa, XRb, XRc, XRd, eptn2
+ *                                    D16ASUM XRa, XRb, XRc, XRd, eptn2
+ *  S32MAX XRa, XRb, XRc              D16CPS XRa, XRb,
+ *  S32MIN XRa, XRb, XRc              D16AVG XRa, XRb, XRc
+ *  S32SLT XRa, XRb, XRc              D16AVGR XRa, XRb, XRc
+ *  S32MOVZ XRa, XRb, XRc             Q8ADD XRa, XRb, XRc, eptn2
+ *  S32MOVN XRa, XRb, XRc             Q8ADDE XRa, XRb, XRc, XRd, eptn2
+ *  D16MAX XRa, XRb, XRc              Q8ACCE XRa, XRb, XRc, XRd, eptn2
+ *  D16MIN XRa, XRb, XRc              Q8ABD XRa, XRb, XRc
+ *  D16SLT XRa, XRb, XRc              Q8SAD XRa, XRb, XRc, XRd
+ *  D16MOVZ XRa, XRb, XRc             Q8AVG XRa, XRb, XRc
+ *  D16MOVN XRa, XRb, XRc             Q8AVGR XRa, XRb, XRc
+ *  Q8MAX XRa, XRb, XRc               D8SUM XRa, XRb, XRc, XRd
+ *  Q8MIN XRa, XRb, XRc               D8SUMC XRa, XRb, XRc, XRd
+ *  Q8SLT XRa, XRb, XRc
+ *  Q8SLTU XRa, XRb, XRc
+ *  Q8MOVZ XRa, XRb, XRc             Shift instructions
+ *  Q8MOVN XRa, XRb, XRc             ------------------
+ *
+ *                                    D32SLL XRa, XRb, XRc, XRd, sft4
+ * Bitwise instructions               D32SLR XRa, XRb, XRc, XRd, sft4
+ * --------------------               D32SAR XRa, XRb, XRc, XRd, sft4
+ *                                    D32SARL XRa, XRb, XRc, sft4
+ *  S32NOR XRa, XRb, XRc              D32SLLV XRa, XRb, Rb
+ *  S32AND XRa, XRb, XRc              D32SLRV XRa, XRb, Rb
+ *  S32XOR XRa, XRb, XRc              D32SARV XRa, XRb, Rb
+ *  S32OR XRa, XRb, XRc               D32SARW XRa, XRb, XRc, Rb
+ *                                    Q16SLL XRa, XRb, XRc, XRd, sft4
+ *                                    Q16SLR XRa, XRb, XRc, XRd, sft4
+ * Miscellaneous instructions         Q16SAR XRa, XRb, XRc, XRd, sft4
+ * -------------------------          Q16SLLV XRa, XRb, Rb
+ *                                    Q16SLRV XRa, XRb, Rb
+ *  S32SFL XRa, XRb, XRc, XRd, optn2  Q16SARV XRa, XRb, Rb
+ *  S32ALN XRa, XRb, XRc, Rb
+ *  S32ALNI XRa, XRb, XRc, s3
+ *  S32LUI XRa, s8, optn3            Move instructions
+ *  S32EXTR XRa, XRb, Rb, bits5      -----------------
+ *  S32EXTRV XRa, XRb, Rs, Rt
+ *  Q16SCOP XRa, XRb, XRc, XRd        S32M2I XRa, Rb
+ *  Q16SAT XRa, XRb, XRc              S32I2M XRa, Rb
+ *
+ *
+ *              bits
+ *             05..00
+ *
+ *          ┌─ 000000 ─ OPC_MXU_S32MADD
+ *          ├─ 000001 ─ OPC_MXU_S32MADDU
+ *          ├─ 000010 ─ <not assigned>   (non-MXU OPC_MUL)
+ *          │
+ *          │                               20..18
+ *          ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
+ *          │                            ├─ 001 ─ OPC_MXU_S32MIN
+ *          │                            ├─ 010 ─ OPC_MXU_D16MAX
+ *          │                            ├─ 011 ─ OPC_MXU_D16MIN
+ *          │                            ├─ 100 ─ OPC_MXU_Q8MAX
+ *          │                            ├─ 101 ─ OPC_MXU_Q8MIN
+ *          │                            ├─ 110 ─ OPC_MXU_Q8SLT
+ *          │                            └─ 111 ─ OPC_MXU_Q8SLTU
+ *          ├─ 000100 ─ OPC_MXU_S32MSUB
+ *          ├─ 000101 ─ OPC_MXU_S32MSUBU    20..18
+ *          ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
+ *          │                            ├─ 001 ─ OPC_MXU_D16SLT
+ *          │                            ├─ 010 ─ OPC_MXU_D16AVG
+ *          │                            ├─ 011 ─ OPC_MXU_D16AVGR
+ *          │                            ├─ 100 ─ OPC_MXU_Q8AVG
+ *          │                            ├─ 101 ─ OPC_MXU_Q8AVGR
+ *          │                            └─ 111 ─ OPC_MXU_Q8ADD
+ *          │
+ *          │                               20..18
+ *          ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
+ *          │                            ├─ 010 ─ OPC_MXU_D16CPS
+ *          │                            ├─ 100 ─ OPC_MXU_Q8ABD
+ *          │                            └─ 110 ─ OPC_MXU_Q16SAT
+ *          ├─ 001000 ─ OPC_MXU_D16MUL
+ *          │                               25..24
+ *          ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
+ *          │                            └─ 01 ─ OPC_MXU_D16MULE
+ *          ├─ 001010 ─ OPC_MXU_D16MAC
+ *          ├─ 001011 ─ OPC_MXU_D16MACF
+ *          ├─ 001100 ─ OPC_MXU_D16MADL
+ *          ├─ 001101 ─ OPC_MXU_S16MAD
+ *          ├─ 001110 ─ OPC_MXU_Q16ADD
+ *          ├─ 001111 ─ OPC_MXU_D16MACE     23
+ *          │                            ┌─ 0 ─ OPC_MXU_S32LDD
+ *          ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
+ *          │
+ *          │                               23
+ *          ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
+ *          │                            └─ 1 ─ OPC_MXU_S32STDR
+ *          │
+ *          │                               13..10
+ *          ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
+ *          │                            └─ 0001 ─ OPC_MXU_S32LDDVR
+ *          │
+ *          │                               13..10
+ *          ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
+ *          │                            └─ 0001 ─ OPC_MXU_S32STDVR
+ *          │
+ *          │                               23
+ *          ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
+ *          │                            └─ 1 ─ OPC_MXU_S32LDIR
+ *          │
+ *          │                               23
+ *          ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
+ *          │                            └─ 1 ─ OPC_MXU_S32SDIR
+ *          │
+ *          │                               13..10
+ *          ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
+ *          │                            └─ 0001 ─ OPC_MXU_S32LDIVR
+ *          │
+ *          │                               13..10
+ *          ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
+ *          │                            └─ 0001 ─ OPC_MXU_S32SDIVR
+ *          ├─ 011000 ─ OPC_MXU_D32ADD
+ *          │                               23..22
+ *   MXU    ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
+ * opcodes ─┤                            ├─ 01 ─ OPC_MXU_D32ACCM
+ *          │                            └─ 10 ─ OPC_MXU_D32ASUM
+ *          ├─ 011010 ─ <not assigned>
+ *          │                               23..22
+ *          ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
+ *          │                            ├─ 01 ─ OPC_MXU_Q16ACCM
+ *          │                            └─ 10 ─ OPC_MXU_Q16ASUM
+ *          │
+ *          │                               23..22
+ *          ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
+ *          │                            ├─ 01 ─ OPC_MXU_D8SUM
+ *          ├─ 011101 ─ OPC_MXU_Q8ACCE   └─ 10 ─ OPC_MXU_D8SUMC
+ *          ├─ 011110 ─ <not assigned>
+ *          ├─ 011111 ─ <not assigned>
+ *          ├─ 100000 ─ <not assigned>   (overlaps with CLZ)
+ *          ├─ 100001 ─ <not assigned>   (overlaps with CLO)
+ *          ├─ 100010 ─ OPC_MXU_S8LDD
+ *          ├─ 100011 ─ OPC_MXU_S8STD       15..14
+ *          ├─ 100100 ─ OPC_MXU_S8LDI    ┌─ 00 ─ OPC_MXU_S32MUL
+ *          ├─ 100101 ─ OPC_MXU_S8SDI    ├─ 00 ─ OPC_MXU_S32MULU
+ *          │                            ├─ 00 ─ OPC_MXU_S32EXTR
+ *          ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV
+ *          │
+ *          │                               20..18
+ *          ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
+ *          │                            ├─ 001 ─ OPC_MXU_S32ALN
+ *          ├─ 101000 ─ OPC_MXU_LXB      ├─ 010 ─ OPC_MXU_S32ALNI
+ *          ├─ 101001 ─ <not assigned>   ├─ 011 ─ OPC_MXU_S32NOR
+ *          ├─ 101010 ─ OPC_MXU_S16LDD   ├─ 100 ─ OPC_MXU_S32AND
+ *          ├─ 101011 ─ OPC_MXU_S16STD   ├─ 101 ─ OPC_MXU_S32OR
+ *          ├─ 101100 ─ OPC_MXU_S16LDI   ├─ 110 ─ OPC_MXU_S32XOR
+ *          ├─ 101101 ─ OPC_MXU_S16SDI   └─ 111 ─ OPC_MXU_S32LUI
+ *          ├─ 101110 ─ OPC_MXU_S32M2I
+ *          ├─ 101111 ─ OPC_MXU_S32I2M
+ *          ├─ 110000 ─ OPC_MXU_D32SLL
+ *          ├─ 110001 ─ OPC_MXU_D32SLR      20..18
+ *          ├─ 110010 ─ OPC_MXU_D32SARL  ┌─ 000 ─ OPC_MXU_D32SLLV
+ *          ├─ 110011 ─ OPC_MXU_D32SAR   ├─ 001 ─ OPC_MXU_D32SLRV
+ *          ├─ 110100 ─ OPC_MXU_Q16SLL   ├─ 010 ─ OPC_MXU_D32SARV
+ *          ├─ 110101 ─ OPC_MXU_Q16SLR   ├─ 011 ─ OPC_MXU_Q16SLLV
+ *          │                            ├─ 100 ─ OPC_MXU_Q16SLRV
+ *          ├─ 110110 ─ OPC_MXU__POOL17 ─┴─ 101 ─ OPC_MXU_Q16SARV
+ *          │
+ *          ├─ 110111 ─ OPC_MXU_Q16SAR
+ *          │                               23..22
+ *          ├─ 111000 ─ OPC_MXU__POOL18 ─┬─ 00 ─ OPC_MXU_Q8MUL
+ *          │                            └─ 01 ─ OPC_MXU_Q8MULSU
+ *          │
+ *          │                               20..18
+ *          ├─ 111001 ─ OPC_MXU__POOL19 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
+ *          │                            ├─ 001 ─ OPC_MXU_Q8MOVN
+ *          │                            ├─ 010 ─ OPC_MXU_D16MOVZ
+ *          │                            ├─ 011 ─ OPC_MXU_D16MOVN
+ *          │                            ├─ 100 ─ OPC_MXU_S32MOVZ
+ *          │                            └─ 101 ─ OPC_MXU_S32MOV
+ *          │
+ *          │                               23..22
+ *          ├─ 111010 ─ OPC_MXU__POOL20 ─┬─ 00 ─ OPC_MXU_Q8MAC
+ *          │                            └─ 10 ─ OPC_MXU_Q8MACSU
+ *          ├─ 111011 ─ OPC_MXU_Q16SCOP
+ *          ├─ 111100 ─ OPC_MXU_Q8MADL
+ *          ├─ 111101 ─ OPC_MXU_S32SFL
+ *          ├─ 111110 ─ OPC_MXU_Q8SAD
+ *          └─ 111111 ─ <not assigned>   (overlaps with SDBBP)
+ *
+ *
+ *   Compiled after:
+ *
+ *   "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
+ *   Programming Manual", Ingenic Semiconductor Co, Ltd., 2017
+ */
+
+enum {
+    OPC_MXU_S32MADD  = 0x00,
+    OPC_MXU_S32MADDU = 0x01,
+    OPC__MXU_MUL     = 0x02,
+    OPC_MXU__POOL00  = 0x03,
+    OPC_MXU_S32MSUB  = 0x04,
+    OPC_MXU_S32MSUBU = 0x05,
+    OPC_MXU__POOL01  = 0x06,
+    OPC_MXU__POOL02  = 0x07,
+    OPC_MXU_D16MUL   = 0x08,
+    OPC_MXU__POOL03  = 0x09,
+    OPC_MXU_D16MAC   = 0x0A,
+    OPC_MXU_D16MACF  = 0x0B,
+    OPC_MXU_D16MADL  = 0x0C,
+    OPC_MXU_S16MAD   = 0x0D,
+    OPC_MXU_Q16ADD   = 0x0E,
+    OPC_MXU_D16MACE  = 0x0F,
+    OPC_MXU__POOL04  = 0x10,
+    OPC_MXU__POOL05  = 0x11,
+    OPC_MXU__POOL06  = 0x12,
+    OPC_MXU__POOL07  = 0x13,
+    OPC_MXU__POOL08  = 0x14,
+    OPC_MXU__POOL09  = 0x15,
+    OPC_MXU__POOL10  = 0x16,
+    OPC_MXU__POOL11  = 0x17,
+    OPC_MXU_D32ADD   = 0x18,
+    OPC_MXU__POOL12  = 0x19,
+    /* not assigned 0x1A */
+    OPC_MXU__POOL13  = 0x1B,
+    OPC_MXU__POOL14  = 0x1C,
+    OPC_MXU_Q8ACCE   = 0x1D,
+    /* not assigned 0x1E */
+    /* not assigned 0x1F */
+    /* not assigned 0x20 */
+    /* not assigned 0x21 */
+    OPC_MXU_S8LDD    = 0x22,
+    OPC_MXU_S8STD    = 0x23,
+    OPC_MXU_S8LDI    = 0x24,
+    OPC_MXU_S8SDI    = 0x25,
+    OPC_MXU__POOL15  = 0x26,
+    OPC_MXU__POOL16  = 0x27,
+    OPC_MXU_LXB      = 0x28,
+    /* not assigned 0x29 */
+    OPC_MXU_S16LDD   = 0x2A,
+    OPC_MXU_S16STD   = 0x2B,
+    OPC_MXU_S16LDI   = 0x2C,
+    OPC_MXU_S16SDI   = 0x2D,
+    OPC_MXU_S32M2I   = 0x2E,
+    OPC_MXU_S32I2M   = 0x2F,
+    OPC_MXU_D32SLL   = 0x30,
+    OPC_MXU_D32SLR   = 0x31,
+    OPC_MXU_D32SARL  = 0x32,
+    OPC_MXU_D32SAR   = 0x33,
+    OPC_MXU_Q16SLL   = 0x34,
+    OPC_MXU_Q16SLR   = 0x35,
+    OPC_MXU__POOL17  = 0x36,
+    OPC_MXU_Q16SAR   = 0x37,
+    OPC_MXU__POOL18  = 0x38,
+    OPC_MXU__POOL19  = 0x39,
+    OPC_MXU__POOL20  = 0x3A,
+    OPC_MXU_Q16SCOP  = 0x3B,
+    OPC_MXU_Q8MADL   = 0x3C,
+    OPC_MXU_S32SFL   = 0x3D,
+    OPC_MXU_Q8SAD    = 0x3E,
+    /* not assigned 0x3F */
+};
+
+
+/*
+ * MXU pool 00
+ */
+enum {
+    OPC_MXU_S32MAX   = 0x00,
+    OPC_MXU_S32MIN   = 0x01,
+    OPC_MXU_D16MAX   = 0x02,
+    OPC_MXU_D16MIN   = 0x03,
+    OPC_MXU_Q8MAX    = 0x04,
+    OPC_MXU_Q8MIN    = 0x05,
+    OPC_MXU_Q8SLT    = 0x06,
+    OPC_MXU_Q8SLTU   = 0x07,
+};
+
+/*
+ * MXU pool 01
+ */
+enum {
+    OPC_MXU_S32SLT   = 0x00,
+    OPC_MXU_D16SLT   = 0x01,
+    OPC_MXU_D16AVG   = 0x02,
+    OPC_MXU_D16AVGR  = 0x03,
+    OPC_MXU_Q8AVG    = 0x04,
+    OPC_MXU_Q8AVGR   = 0x05,
+    OPC_MXU_Q8ADD    = 0x07,
+};
+
+/*
+ * MXU pool 02
+ */
+enum {
+    OPC_MXU_S32CPS   = 0x00,
+    OPC_MXU_D16CPS   = 0x02,
+    OPC_MXU_Q8ABD    = 0x04,
+    OPC_MXU_Q16SAT   = 0x06,
+};
+
+/*
+ * MXU pool 03
+ */
+enum {
+    OPC_MXU_D16MULF  = 0x00,
+    OPC_MXU_D16MULE  = 0x01,
+};
+
+/*
+ * MXU pool 04
+ */
+enum {
+    OPC_MXU_S32LDD   = 0x00,
+    OPC_MXU_S32LDDR  = 0x01,
+};
+
+/*
+ * MXU pool 05
+ */
+enum {
+    OPC_MXU_S32STD   = 0x00,
+    OPC_MXU_S32STDR  = 0x01,
+};
+
+/*
+ * MXU pool 06
+ */
+enum {
+    OPC_MXU_S32LDDV  = 0x00,
+    OPC_MXU_S32LDDVR = 0x01,
+};
+
+/*
+ * MXU pool 07
+ */
+enum {
+    OPC_MXU_S32STDV  = 0x00,
+    OPC_MXU_S32STDVR = 0x01,
+};
+
+/*
+ * MXU pool 08
+ */
+enum {
+    OPC_MXU_S32LDI   = 0x00,
+    OPC_MXU_S32LDIR  = 0x01,
+};
+
+/*
+ * MXU pool 09
+ */
+enum {
+    OPC_MXU_S32SDI   = 0x00,
+    OPC_MXU_S32SDIR  = 0x01,
+};
+
+/*
+ * MXU pool 10
+ */
+enum {
+    OPC_MXU_S32LDIV  = 0x00,
+    OPC_MXU_S32LDIVR = 0x01,
+};
+
+/*
+ * MXU pool 11
+ */
+enum {
+    OPC_MXU_S32SDIV  = 0x00,
+    OPC_MXU_S32SDIVR = 0x01,
+};
+
+/*
+ * MXU pool 12
+ */
+enum {
+    OPC_MXU_D32ACC   = 0x00,
+    OPC_MXU_D32ACCM  = 0x01,
+    OPC_MXU_D32ASUM  = 0x02,
+};
+
+/*
+ * MXU pool 13
+ */
+enum {
+    OPC_MXU_Q16ACC   = 0x00,
+    OPC_MXU_Q16ACCM  = 0x01,
+    OPC_MXU_Q16ASUM  = 0x02,
+};
+
+/*
+ * MXU pool 14
+ */
+enum {
+    OPC_MXU_Q8ADDE   = 0x00,
+    OPC_MXU_D8SUM    = 0x01,
+    OPC_MXU_D8SUMC   = 0x02,
+};
+
+/*
+ * MXU pool 15
+ */
+enum {
+    OPC_MXU_S32MUL   = 0x00,
+    OPC_MXU_S32MULU  = 0x01,
+    OPC_MXU_S32EXTR  = 0x02,
+    OPC_MXU_S32EXTRV = 0x03,
+};
+
+/*
+ * MXU pool 16
+ */
+enum {
+    OPC_MXU_D32SARW  = 0x00,
+    OPC_MXU_S32ALN   = 0x01,
+    OPC_MXU_S32ALNI  = 0x02,
+    OPC_MXU_S32NOR   = 0x03,
+    OPC_MXU_S32AND   = 0x04,
+    OPC_MXU_S32OR    = 0x05,
+    OPC_MXU_S32XOR   = 0x06,
+    OPC_MXU_S32LUI   = 0x07,
+};
+
+/*
+ * MXU pool 17
+ */
+enum {
+    OPC_MXU_D32SLLV  = 0x00,
+    OPC_MXU_D32SLRV  = 0x01,
+    OPC_MXU_D32SARV  = 0x03,
+    OPC_MXU_Q16SLLV  = 0x04,
+    OPC_MXU_Q16SLRV  = 0x05,
+    OPC_MXU_Q16SARV  = 0x07,
+};
+
+/*
+ * MXU pool 18
+ */
+enum {
+    OPC_MXU_Q8MUL    = 0x00,
+    OPC_MXU_Q8MULSU  = 0x01,
+};
+
+/*
+ * MXU pool 19
+ */
+enum {
+    OPC_MXU_Q8MOVZ   = 0x00,
+    OPC_MXU_Q8MOVN   = 0x01,
+    OPC_MXU_D16MOVZ  = 0x02,
+    OPC_MXU_D16MOVN  = 0x03,
+    OPC_MXU_S32MOVZ  = 0x04,
+    OPC_MXU_S32MOVN  = 0x05,
+};
+
+/*
+ * MXU pool 20
+ */
+enum {
+    OPC_MXU_Q8MAC    = 0x00,
+    OPC_MXU_Q8MACSU  = 0x01,
+};
+
+/*
+ *     Overview of the TX79-specific instruction set
+ *     =============================================
+ *
+ * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits
+ * are only used by the specific quadword (128-bit) LQ/SQ load/store
+ * instructions and certain multimedia instructions (MMIs). These MMIs
+ * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit
+ * or sixteen 8-bit paths.
+ *
+ * Reference:
+ *
+ * The Toshiba TX System RISC TX79 Core Architecture manual,
+ * https://wiki.qemu.org/File:C790.pdf
+ *
+ *     Three-Operand Multiply and Multiply-Add (4 instructions)
+ *     --------------------------------------------------------
+ * MADD    [rd,] rs, rt      Multiply/Add
+ * MADDU   [rd,] rs, rt      Multiply/Add Unsigned
+ * MULT    [rd,] rs, rt      Multiply (3-operand)
+ * MULTU   [rd,] rs, rt      Multiply Unsigned (3-operand)
+ *
+ *     Multiply Instructions for Pipeline 1 (10 instructions)
+ *     ------------------------------------------------------
+ * MULT1   [rd,] rs, rt      Multiply Pipeline 1
+ * MULTU1  [rd,] rs, rt      Multiply Unsigned Pipeline 1
+ * DIV1    rs, rt            Divide Pipeline 1
+ * DIVU1   rs, rt            Divide Unsigned Pipeline 1
+ * MADD1   [rd,] rs, rt      Multiply-Add Pipeline 1
+ * MADDU1  [rd,] rs, rt      Multiply-Add Unsigned Pipeline 1
+ * MFHI1   rd                Move From HI1 Register
+ * MFLO1   rd                Move From LO1 Register
+ * MTHI1   rs                Move To HI1 Register
+ * MTLO1   rs                Move To LO1 Register
+ *
+ *     Arithmetic (19 instructions)
+ *     ----------------------------
+ * PADDB   rd, rs, rt        Parallel Add Byte
+ * PSUBB   rd, rs, rt        Parallel Subtract Byte
+ * PADDH   rd, rs, rt        Parallel Add Halfword
+ * PSUBH   rd, rs, rt        Parallel Subtract Halfword
+ * PADDW   rd, rs, rt        Parallel Add Word
+ * PSUBW   rd, rs, rt        Parallel Subtract Word
+ * PADSBH  rd, rs, rt        Parallel Add/Subtract Halfword
+ * PADDSB  rd, rs, rt        Parallel Add with Signed Saturation Byte
+ * PSUBSB  rd, rs, rt        Parallel Subtract with Signed Saturation Byte
+ * PADDSH  rd, rs, rt        Parallel Add with Signed Saturation Halfword
+ * PSUBSH  rd, rs, rt        Parallel Subtract with Signed Saturation Halfword
+ * PADDSW  rd, rs, rt        Parallel Add with Signed Saturation Word
+ * PSUBSW  rd, rs, rt        Parallel Subtract with Signed Saturation Word
+ * PADDUB  rd, rs, rt        Parallel Add with Unsigned saturation Byte
+ * PSUBUB  rd, rs, rt        Parallel Subtract with Unsigned saturation Byte
+ * PADDUH  rd, rs, rt        Parallel Add with Unsigned saturation Halfword
+ * PSUBUH  rd, rs, rt        Parallel Subtract with Unsigned saturation Halfword
+ * PADDUW  rd, rs, rt        Parallel Add with Unsigned saturation Word
+ * PSUBUW  rd, rs, rt        Parallel Subtract with Unsigned saturation Word
+ *
+ *     Min/Max (4 instructions)
+ *     ------------------------
+ * PMAXH   rd, rs, rt        Parallel Maximum Halfword
+ * PMINH   rd, rs, rt        Parallel Minimum Halfword
+ * PMAXW   rd, rs, rt        Parallel Maximum Word
+ * PMINW   rd, rs, rt        Parallel Minimum Word
+ *
+ *     Absolute (2 instructions)
+ *     -------------------------
+ * PABSH   rd, rt            Parallel Absolute Halfword
+ * PABSW   rd, rt            Parallel Absolute Word
+ *
+ *     Logical (4 instructions)
+ *     ------------------------
+ * PAND    rd, rs, rt        Parallel AND
+ * POR     rd, rs, rt        Parallel OR
+ * PXOR    rd, rs, rt        Parallel XOR
+ * PNOR    rd, rs, rt        Parallel NOR
+ *
+ *     Shift (9 instructions)
+ *     ----------------------
+ * PSLLH   rd, rt, sa        Parallel Shift Left Logical Halfword
+ * PSRLH   rd, rt, sa        Parallel Shift Right Logical Halfword
+ * PSRAH   rd, rt, sa        Parallel Shift Right Arithmetic Halfword
+ * PSLLW   rd, rt, sa        Parallel Shift Left Logical Word
+ * PSRLW   rd, rt, sa        Parallel Shift Right Logical Word
+ * PSRAW   rd, rt, sa        Parallel Shift Right Arithmetic Word
+ * PSLLVW  rd, rt, rs        Parallel Shift Left Logical Variable Word
+ * PSRLVW  rd, rt, rs        Parallel Shift Right Logical Variable Word
+ * PSRAVW  rd, rt, rs        Parallel Shift Right Arithmetic Variable Word
+ *
+ *     Compare (6 instructions)
+ *     ------------------------
+ * PCGTB   rd, rs, rt        Parallel Compare for Greater Than Byte
+ * PCEQB   rd, rs, rt        Parallel Compare for Equal Byte
+ * PCGTH   rd, rs, rt        Parallel Compare for Greater Than Halfword
+ * PCEQH   rd, rs, rt        Parallel Compare for Equal Halfword
+ * PCGTW   rd, rs, rt        Parallel Compare for Greater Than Word
+ * PCEQW   rd, rs, rt        Parallel Compare for Equal Word
+ *
+ *     LZC (1 instruction)
+ *     -------------------
+ * PLZCW   rd, rs            Parallel Leading Zero or One Count Word
+ *
+ *     Quadword Load and Store (2 instructions)
+ *     ----------------------------------------
+ * LQ      rt, offset(base)  Load Quadword
+ * SQ      rt, offset(base)  Store Quadword
+ *
+ *     Multiply and Divide (19 instructions)
+ *     -------------------------------------
+ * PMULTW  rd, rs, rt        Parallel Multiply Word
+ * PMULTUW rd, rs, rt        Parallel Multiply Unsigned Word
+ * PDIVW   rs, rt            Parallel Divide Word
+ * PDIVUW  rs, rt            Parallel Divide Unsigned Word
+ * PMADDW  rd, rs, rt        Parallel Multiply-Add Word
+ * PMADDUW rd, rs, rt        Parallel Multiply-Add Unsigned Word
+ * PMSUBW  rd, rs, rt        Parallel Multiply-Subtract Word
+ * PMULTH  rd, rs, rt        Parallel Multiply Halfword
+ * PMADDH  rd, rs, rt        Parallel Multiply-Add Halfword
+ * PMSUBH  rd, rs, rt        Parallel Multiply-Subtract Halfword
+ * PHMADH  rd, rs, rt        Parallel Horizontal Multiply-Add Halfword
+ * PHMSBH  rd, rs, rt        Parallel Horizontal Multiply-Subtract Halfword
+ * PDIVBW  rs, rt            Parallel Divide Broadcast Word
+ * PMFHI   rd                Parallel Move From HI Register
+ * PMFLO   rd                Parallel Move From LO Register
+ * PMTHI   rs                Parallel Move To HI Register
+ * PMTLO   rs                Parallel Move To LO Register
+ * PMFHL   rd                Parallel Move From HI/LO Register
+ * PMTHL   rs                Parallel Move To HI/LO Register
+ *
+ *     Pack/Extend (11 instructions)
+ *     -----------------------------
+ * PPAC5   rd, rt            Parallel Pack to 5 bits
+ * PPACB   rd, rs, rt        Parallel Pack to Byte
+ * PPACH   rd, rs, rt        Parallel Pack to Halfword
+ * PPACW   rd, rs, rt        Parallel Pack to Word
+ * PEXT5   rd, rt            Parallel Extend Upper from 5 bits
+ * PEXTUB  rd, rs, rt        Parallel Extend Upper from Byte
+ * PEXTLB  rd, rs, rt        Parallel Extend Lower from Byte
+ * PEXTUH  rd, rs, rt        Parallel Extend Upper from Halfword
+ * PEXTLH  rd, rs, rt        Parallel Extend Lower from Halfword
+ * PEXTUW  rd, rs, rt        Parallel Extend Upper from Word
+ * PEXTLW  rd, rs, rt        Parallel Extend Lower from Word
+ *
+ *     Others (16 instructions)
+ *     ------------------------
+ * PCPYH   rd, rt            Parallel Copy Halfword
+ * PCPYLD  rd, rs, rt        Parallel Copy Lower Doubleword
+ * PCPYUD  rd, rs, rt        Parallel Copy Upper Doubleword
+ * PREVH   rd, rt            Parallel Reverse Halfword
+ * PINTH   rd, rs, rt        Parallel Interleave Halfword
+ * PINTEH  rd, rs, rt        Parallel Interleave Even Halfword
+ * PEXEH   rd, rt            Parallel Exchange Even Halfword
+ * PEXCH   rd, rt            Parallel Exchange Center Halfword
+ * PEXEW   rd, rt            Parallel Exchange Even Word
+ * PEXCW   rd, rt            Parallel Exchange Center Word
+ * QFSRV   rd, rs, rt        Quadword Funnel Shift Right Variable
+ * MFSA    rd                Move from Shift Amount Register
+ * MTSA    rs                Move to Shift Amount Register
+ * MTSAB   rs, immediate     Move Byte Count to Shift Amount Register
+ * MTSAH   rs, immediate     Move Halfword Count to Shift Amount Register
+ * PROT3W  rd, rt            Parallel Rotate 3 Words
+ *
+ *     The TX79-specific Multimedia Instruction encodings
+ *     ==================================================
+ *
+ * TX79 Multimedia Instruction encoding table keys:
+ *
+ *     *   This code is reserved for future use. An attempt to execute it
+ *         causes a Reserved Instruction exception.
+ *     %   This code indicates an instruction class. The instruction word
+ *         must be further decoded by examining additional tables that show
+ *         the values for other instruction fields.
+ *     #   This code is reserved for the unsupported instructions DMULT,
+ *         DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt
+ *         to execute it causes a Reserved Instruction exception.
+ *
+ * TX79 Multimedia Instructions encoded by opcode field (MMI, LQ, SQ):
+ *
+ *  31    26                                        0
+ * +--------+----------------------------------------+
+ * | opcode |                                        |
+ * +--------+----------------------------------------+
+ *
+ *   opcode  bits 28..26
+ *     bits |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ *   31..29 |  000  |  001  |  010  |  011  |  100  |  101  |  110  |  111
+ *   -------+-------+-------+-------+-------+-------+-------+-------+-------
+ *    0 000 |SPECIAL| REGIMM|   J   |  JAL  |  BEQ  |  BNE  |  BLEZ |  BGTZ
+ *    1 001 |  ADDI | ADDIU |  SLTI | SLTIU |  ANDI |  ORI  |  XORI |  LUI
+ *    2 010 |  COP0 |  COP1 |   *   |   *   |  BEQL |  BNEL | BLEZL | BGTZL
+ *    3 011 | DADDI | DADDIU|  LDL  |  LDR  |  MMI% |   *   |   LQ  |   SQ
+ *    4 100 |   LB  |   LH  |  LWL  |   LW  |  LBU  |  LHU  |  LWR  |  LWU
+ *    5 101 |   SB  |   SH  |  SWL  |   SW  |  SDL  |  SDR  |  SWR  | CACHE
+ *    6 110 |   #   |  LWC1 |   #   |  PREF |   #   |  LDC1 |   #   |   LD
+ *    7 111 |   #   |  SWC1 |   #   |   *   |   #   |  SDC1 |   #   |   SD
+ */
+
+enum {
+    TX79_CLASS_MMI = 0x1C << 26,    /* Same as OPC_SPECIAL2 */
+    TX79_LQ        = 0x1E << 26,    /* Same as OPC_MSA */
+    TX79_SQ        = 0x1F << 26,    /* Same as OPC_SPECIAL3 */
+};
+
+/*
+ * TX79 Multimedia Instructions with opcode field = MMI:
+ *
+ *  31    26                                 5      0
+ * +--------+-------------------------------+--------+
+ * |   MMI  |                               |function|
+ * +--------+-------------------------------+--------+
+ *
+ * function  bits 2..0
+ *     bits |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ *     5..3 |  000  |  001  |  010  |  011  |  100  |  101  |  110  |  111
+ *   -------+-------+-------+-------+-------+-------+-------+-------+-------
+ *    0 000 |  MADD | MADDU |   *   |   *   | PLZCW |   *   |   *   |   *
+ *    1 001 | MMI0% | MMI2% |   *   |   *   |   *   |   *   |   *   |   *
+ *    2 010 | MFHI1 | MTHI1 | MFLO1 | MTLO1 |   *   |   *   |   *   |   *
+ *    3 011 | MULT1 | MULTU1|  DIV1 | DIVU1 |   *   |   *   |   *   |   *
+ *    4 100 | MADD1 | MADDU1|   *   |   *   |   *   |   *   |   *   |   *
+ *    5 101 | MMI1% | MMI3% |   *   |   *   |   *   |   *   |   *   |   *
+ *    6 110 | PMFHL | PMTHL |   *   |   *   | PSLLH |   *   | PSRLH | PSRAH
+ *    7 111 |   *   |   *   |   *   |   *   | PSLLW |   *   | PSRLW | PSRAW
+ */
+
+#define MASK_TX79_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
+enum {
+    TX79_MMI_MADD       = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */
+    TX79_MMI_MADDU      = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */
+    TX79_MMI_PLZCW      = 0x04 | TX79_CLASS_MMI,
+    TX79_MMI_CLASS_MMI0 = 0x08 | TX79_CLASS_MMI,
+    TX79_MMI_CLASS_MMI2 = 0x09 | TX79_CLASS_MMI,
+    TX79_MMI_MFHI1      = 0x10 | TX79_CLASS_MMI, /* Same minor as OPC_MFHI */
+    TX79_MMI_MTHI1      = 0x11 | TX79_CLASS_MMI, /* Same minor as OPC_MTHI */
+    TX79_MMI_MFLO1      = 0x12 | TX79_CLASS_MMI, /* Same minor as OPC_MFLO */
+    TX79_MMI_MTLO1      = 0x13 | TX79_CLASS_MMI, /* Same minor as OPC_MTLO */
+    TX79_MMI_MULT1      = 0x18 | TX79_CLASS_MMI, /* Same minor as OPC_MULT */
+    TX79_MMI_MULTU1     = 0x19 | TX79_CLASS_MMI, /* Same minor as OPC_MULTU */
+    TX79_MMI_DIV1       = 0x1A | TX79_CLASS_MMI, /* Same minor as OPC_DIV */
+    TX79_MMI_DIVU1      = 0x1B | TX79_CLASS_MMI, /* Same minor as OPC_DIVU */
+    TX79_MMI_MADD1      = 0x20 | TX79_CLASS_MMI,
+    TX79_MMI_MADDU1     = 0x21 | TX79_CLASS_MMI,
+    TX79_MMI_CLASS_MMI1 = 0x28 | TX79_CLASS_MMI,
+    TX79_MMI_CLASS_MMI3 = 0x29 | TX79_CLASS_MMI,
+    TX79_MMI_PMFHL      = 0x30 | TX79_CLASS_MMI,
+    TX79_MMI_PMTHL      = 0x31 | TX79_CLASS_MMI,
+    TX79_MMI_PSLLH      = 0x34 | TX79_CLASS_MMI,
+    TX79_MMI_PSRLH      = 0x36 | TX79_CLASS_MMI,
+    TX79_MMI_PSRAH      = 0x37 | TX79_CLASS_MMI,
+    TX79_MMI_PSLLW      = 0x3C | TX79_CLASS_MMI,
+    TX79_MMI_PSRLW      = 0x3E | TX79_CLASS_MMI,
+    TX79_MMI_PSRAW      = 0x3F | TX79_CLASS_MMI,
+};
+
+/*
+ * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI0:
+ *
+ *  31    26                        10     6 5      0
+ * +--------+----------------------+--------+--------+
+ * |   MMI  |                      |function|  MMI0  |
+ * +--------+----------------------+--------+--------+
+ *
+ * function  bits 7..6
+ *     bits |   0   |   1   |   2   |   3
+ *    10..8 |   00  |   01  |   10  |   11
+ *   -------+-------+-------+-------+-------
+ *    0 000 | PADDW | PSUBW | PCGTW | PMAXW
+ *    1 001 | PADDH | PSUBH | PCGTH | PMAXH
+ *    2 010 | PADDB | PSUBB | PCGTB |   *
+ *    3 011 |   *   |   *   |   *   |   *
+ *    4 100 | PADDSW| PSUBSW| PEXTLW| PPACW
+ *    5 101 | PADDSH| PSUBSH| PEXTLH| PPACH
+ *    6 110 | PADDSB| PSUBSB| PEXTLB| PPACB
+ *    7 111 |   *   |   *   | PEXT5 | PPAC5
+ */
+
+#define MASK_TX79_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+enum {
+    TX79_MMI0_PADDW  = (0x00 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PSUBW  = (0x01 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PCGTW  = (0x02 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PMAXW  = (0x03 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PADDH  = (0x04 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PSUBH  = (0x05 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PCGTH  = (0x06 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PMAXH  = (0x07 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PADDB  = (0x08 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PSUBB  = (0x09 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PCGTB  = (0x0A << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PADDSW = (0x10 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PSUBSW = (0x11 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PEXTLW = (0x12 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PPACW  = (0x13 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PADDSH = (0x14 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PSUBSH = (0x15 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PEXTLH = (0x16 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PPACH  = (0x17 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PADDSB = (0x18 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PSUBSB = (0x19 << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PEXTLB = (0x1A << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PPACB  = (0x1B << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PEXT5  = (0x1E << 6) | TX79_MMI_CLASS_MMI0,
+    TX79_MMI0_PPAC5  = (0x1F << 6) | TX79_MMI_CLASS_MMI0,
+};
+
+/*
+ * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI1:
+ *
+ *  31    26                        10     6 5      0
+ * +--------+----------------------+--------+--------+
+ * |   MMI  |                      |function|  MMI1  |
+ * +--------+----------------------+--------+--------+
+ *
+ * function  bits 7..6
+ *     bits |   0   |   1   |   2   |   3
+ *    10..8 |   00  |   01  |   10  |   11
+ *   -------+-------+-------+-------+-------
+ *    0 000 |   *   | PABSW | PCEQW | PMINW
+ *    1 001 | PADSBH| PABSH | PCEQH | PMINH
+ *    2 010 |   *   |   *   | PCEQB |   *
+ *    3 011 |   *   |   *   |   *   |   *
+ *    4 100 | PADDUW| PSUBUW| PEXTUW|   *
+ *    5 101 | PADDUH| PSUBUH| PEXTUH|   *
+ *    6 110 | PADDUB| PSUBUB| PEXTUB| QFSRV
+ *    7 111 |   *   |   *   |   *   |   *
+ */
+
+#define MASK_TX79_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+enum {
+    TX79_MMI1_PABSW  = (0x01 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PCEQW  = (0x02 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PMINW  = (0x03 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PADSBH = (0x04 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PABSH  = (0x05 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PCEQH  = (0x06 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PMINH  = (0x07 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PCEQB  = (0x0A << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PADDUW = (0x10 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PSUBUW = (0x11 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PEXTUW = (0x12 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PADDUH = (0x14 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PSUBUH = (0x15 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PEXTUH = (0x16 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PADDUB = (0x18 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PSUBUB = (0x19 << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_PEXTUB = (0x1A << 6) | TX79_MMI_CLASS_MMI1,
+    TX79_MMI1_QFSRV  = (0x1B << 6) | TX79_MMI_CLASS_MMI1,
+};
+
+/*
+ * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI2:
+ *
+ *  31    26                        10     6 5      0
+ * +--------+----------------------+--------+--------+
+ * |   MMI  |                      |function|  MMI2  |
+ * +--------+----------------------+--------+--------+
+ *
+ * function  bits 7..6
+ *     bits |   0   |   1   |   2   |   3
+ *    10..8 |   00  |   01  |   10  |   11
+ *   -------+-------+-------+-------+-------
+ *    0 000 | PMADDW|   *   | PSLLVW| PSRLVW
+ *    1 001 | PMSUBW|   *   |   *   |   *
+ *    2 010 | PMFHI | PMFLO | PINTH |   *
+ *    3 011 | PMULTW| PDIVW | PCPYLD|   *
+ *    4 100 | PMADDH| PHMADH|  PAND |  PXOR
+ *    5 101 | PMSUBH| PHMSBH|   *   |   *
+ *    6 110 |   *   |   *   | PEXEH | PREVH
+ *    7 111 | PMULTH| PDIVBW| PEXEW | PROT3W
+ */
+
+#define MASK_TX79_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+enum {
+    TX79_MMI2_PMADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PSLLVW = (0x02 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PSRLVW = (0x03 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMSUBW = (0x04 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMFHI  = (0x08 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMFLO  = (0x09 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PINTH  = (0x0A << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMULTW = (0x0C << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PDIVW  = (0x0D << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PCPYLD = (0x0E << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMADDH = (0x10 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PHMADH = (0x11 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PAND   = (0x12 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PXOR   = (0x13 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMSUBH = (0x14 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PHMSBH = (0x15 << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PEXEH  = (0x1A << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PREVH  = (0x1B << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PMULTH = (0x1C << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PDIVBW = (0x1D << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PEXEW  = (0x1E << 6) | TX79_MMI_CLASS_MMI2,
+    TX79_MMI2_PROT3W = (0x1F << 6) | TX79_MMI_CLASS_MMI2,
+};
+
+/*
+ * TX79 Multimedia Instructions with opcode field = MMI and bits 5..0 = MMI3:
+ *
+ *  31    26                        10     6 5      0
+ * +--------+----------------------+--------+--------+
+ * |   MMI  |                      |function|  MMI3  |
+ * +--------+----------------------+--------+--------+
+ *
+ * function  bits 7..6
+ *     bits |   0   |   1   |   2   |   3
+ *    10..8 |   00  |   01  |   10  |   11
+ *   -------+-------+-------+-------+-------
+ *    0 000 |PMADDUW|   *   |   *   | PSRAVW
+ *    1 001 |   *   |   *   |   *   |   *
+ *    2 010 | PMTHI | PMTLO | PINTEH|   *
+ *    3 011 |PMULTUW| PDIVUW| PCPYUD|   *
+ *    4 100 |   *   |   *   |  POR  |  PNOR
+ *    5 101 |   *   |   *   |   *   |   *
+ *    6 110 |   *   |   *   | PEXCH | PCPYH
+ *    7 111 |   *   |   *   | PEXCW |   *
+ */
+
+#define MASK_TX79_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+enum {
+    TX79_MMI3_PMADDUW = (0x00 << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PSRAVW  = (0x03 << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PMTHI   = (0x08 << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PMTLO   = (0x09 << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PINTEH  = (0x0A << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PMULTUW = (0x0C << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PDIVUW  = (0x0D << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PCPYUD  = (0x0E << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_POR     = (0x12 << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PNOR    = (0x13 << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PEXCH   = (0x1A << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PCPYH   = (0x1B << 6) | TX79_MMI_CLASS_MMI3,
+    TX79_MMI3_PEXCW   = (0x1E << 6) | TX79_MMI_CLASS_MMI3,
+};
+
 /* global register indices */
 static TCGv cpu_gpr[32], cpu_PC;
 static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC];
@@ -1398,6 +2421,10 @@
 static TCGv_i64 fpu_f64[32];
 static TCGv_i64 msa_wr_d[64];
 
+/* MXU registers */
+static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
+static TCGv mxu_CR;
+
 #include "exec/gen-icount.h"
 
 #define gen_helper_0e0i(name, arg) do {                           \
@@ -1447,8 +2474,9 @@
     target_ulong saved_pc;
     target_ulong page_start;
     uint32_t opcode;
-    int insn_flags;
+    uint64_t insn_flags;
     int32_t CP0_Config1;
+    int32_t CP0_Config2;
     int32_t CP0_Config3;
     int32_t CP0_Config5;
     /* Routine used to access memory */
@@ -1519,6 +2547,11 @@
     "w30.d0", "w30.d1", "w31.d0", "w31.d1",
 };
 
+static const char * const mxuregnames[] = {
+    "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
+    "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "MXU_CR",
+};
+
 #define LOG_DISAS(...)                                                        \
     do {                                                                      \
         if (MIPS_DEBUG_DISAS) {                                               \
@@ -1600,6 +2633,36 @@
     }
 }
 
+/* MXU General purpose registers moves. */
+static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
+{
+    if (reg == 0) {
+        tcg_gen_movi_tl(t, 0);
+    } else if (reg <= 15) {
+        tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
+    }
+}
+
+static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
+{
+    if (reg > 0 && reg <= 15) {
+        tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
+    }
+}
+
+/* MXU control register moves. */
+static inline void gen_load_mxu_cr(TCGv t)
+{
+    tcg_gen_mov_tl(t, mxu_CR);
+}
+
+static inline void gen_store_mxu_cr(TCGv t)
+{
+    /* TODO: Add handling of RW rules for MXU_CR. */
+    tcg_gen_mov_tl(mxu_CR, t);
+}
+
+
 /* Tests */
 static inline void gen_save_pc(target_ulong pc)
 {
@@ -1857,9 +2920,20 @@
     }
 }
 
-static inline void check_dspr2(DisasContext *ctx)
+static inline void check_dsp_r2(DisasContext *ctx)
 {
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSPR2))) {
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R2))) {
+        if (ctx->insn_flags & ASE_DSP) {
+            generate_exception_end(ctx, EXCP_DSPDIS);
+        } else {
+            generate_exception_end(ctx, EXCP_RI);
+        }
+    }
+}
+
+static inline void check_dsp_r3(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R3))) {
         if (ctx->insn_flags & ASE_DSP) {
             generate_exception_end(ctx, EXCP_DSPDIS);
         } else {
@@ -1870,7 +2944,7 @@
 
 /* This code generates a "reserved instruction" exception if the
    CPU does not support the instruction set corresponding to flags. */
-static inline void check_insn(DisasContext *ctx, int flags)
+static inline void check_insn(DisasContext *ctx, uint64_t flags)
 {
     if (unlikely(!(ctx->insn_flags & flags))) {
         generate_exception_end(ctx, EXCP_RI);
@@ -1880,13 +2954,28 @@
 /* This code generates a "reserved instruction" exception if the
    CPU has corresponding flag set which indicates that the instruction
    has been removed. */
-static inline void check_insn_opc_removed(DisasContext *ctx, int flags)
+static inline void check_insn_opc_removed(DisasContext *ctx, uint64_t flags)
 {
     if (unlikely(ctx->insn_flags & flags)) {
         generate_exception_end(ctx, EXCP_RI);
     }
 }
 
+/*
+ * The Linux kernel traps certain reserved instruction exceptions to
+ * emulate the corresponding instructions. QEMU is the kernel in user
+ * mode, so those traps are emulated by accepting the instructions.
+ *
+ * A reserved instruction exception is generated for flagged CPUs if
+ * QEMU runs in system mode.
+ */
+static inline void check_insn_opc_user_only(DisasContext *ctx, uint64_t flags)
+{
+#ifndef CONFIG_USER_ONLY
+    check_insn_opc_removed(ctx, flags);
+#endif
+}
+
 /* This code generates a "reserved instruction" exception if the
    CPU does not support 64-bit paired-single (PS) floating point data type */
 static inline void check_ps(DisasContext *ctx)
@@ -1927,6 +3016,19 @@
     }
 }
 
+#ifndef CONFIG_USER_ONLY
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config3 PW bit is NOT set.
+ */
+static inline void check_pw(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_PW)))) {
+        generate_exception_end(ctx, EXCP_RI);
+    }
+}
+#endif
+
 /*
  * This code generates a "reserved instruction" exception if the
  * Config3 MT bit is NOT set.
@@ -1968,6 +3070,35 @@
     }
 }
 
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config5 NMS bit is set, and Config1 DL, Config1 IL, Config2 SL,
+ * Config2 TL, and Config5 L2C are unset.
+ */
+static inline void check_nms_dl_il_sl_tl_l2c(DisasContext *ctx)
+{
+    if (unlikely(ctx->CP0_Config5 & (1 << CP0C5_NMS)) &&
+        !(ctx->CP0_Config1 & (1 << CP0C1_DL)) &&
+        !(ctx->CP0_Config1 & (1 << CP0C1_IL)) &&
+        !(ctx->CP0_Config2 & (1 << CP0C2_SL)) &&
+        !(ctx->CP0_Config2 & (1 << CP0C2_TL)) &&
+        !(ctx->CP0_Config5 & (1 << CP0C5_L2C)))
+    {
+        generate_exception_end(ctx, EXCP_RI);
+    }
+}
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config5 EVA bit is NOT set.
+ */
+static inline void check_eva(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->CP0_Config5 & (1 << CP0C5_EVA)))) {
+        generate_exception_end(ctx, EXCP_RI);
+    }
+}
+
 
 /* Define small wrappers for gen_load_fpr* so that we have a uniform
    calling interface for 32 and 64-bit FPRs.  No sense in changing
@@ -3231,17 +4362,21 @@
 /* Arithmetic on HI/LO registers */
 static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
 {
-    if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
+    if (reg == 0 && (opc == OPC_MFHI || opc == TX79_MMI_MFHI1 ||
+                     opc == OPC_MFLO || opc == TX79_MMI_MFLO1)) {
         /* Treat as NOP. */
         return;
     }
 
     if (acc != 0) {
-        check_dsp(ctx);
+        if (!(ctx->insn_flags & INSN_R5900)) {
+            check_dsp(ctx);
+        }
     }
 
     switch (opc) {
     case OPC_MFHI:
+    case TX79_MMI_MFHI1:
 #if defined(TARGET_MIPS64)
         if (acc != 0) {
             tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
@@ -3252,6 +4387,7 @@
         }
         break;
     case OPC_MFLO:
+    case TX79_MMI_MFLO1:
 #if defined(TARGET_MIPS64)
         if (acc != 0) {
             tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
@@ -3262,6 +4398,7 @@
         }
         break;
     case OPC_MTHI:
+    case TX79_MMI_MTHI1:
         if (reg != 0) {
 #if defined(TARGET_MIPS64)
             if (acc != 0) {
@@ -3276,6 +4413,7 @@
         }
         break;
     case OPC_MTLO:
+    case TX79_MMI_MTLO1:
         if (reg != 0) {
 #if defined(TARGET_MIPS64)
             if (acc != 0) {
@@ -3588,11 +4726,14 @@
     gen_load_gpr(t1, rt);
 
     if (acc != 0) {
-        check_dsp(ctx);
+        if (!(ctx->insn_flags & INSN_R5900)) {
+            check_dsp(ctx);
+        }
     }
 
     switch (opc) {
     case OPC_DIV:
+    case TX79_MMI_DIV1:
         {
             TCGv t2 = tcg_temp_new();
             TCGv t3 = tcg_temp_new();
@@ -3614,6 +4755,7 @@
         }
         break;
     case OPC_DIVU:
+    case TX79_MMI_DIVU1:
         {
             TCGv t2 = tcg_const_tl(0);
             TCGv t3 = tcg_const_tl(1);
@@ -3768,6 +4910,84 @@
     tcg_temp_free(t1);
 }
 
+/*
+ * These MULT and MULTU instructions implemented in for example the
+ * Toshiba/Sony R5900 and the Toshiba TX19, TX39 and TX79 core
+ * architectures are special three-operand variants with the syntax
+ *
+ *     MULT[U][1] rd, rs, rt
+ *
+ * such that
+ *
+ *     (rd, LO, HI) <- rs * rt
+ *
+ * where the low-order 32-bits of the result is placed into both the
+ * GPR rd and the special register LO. The high-order 32-bits of the
+ * result is placed into the special register HI.
+ *
+ * If the GPR rd is omitted in assembly language, it is taken to be 0,
+ * which is the zero register that always reads as 0.
+ */
+static void gen_mul_txx9(DisasContext *ctx, uint32_t opc,
+                         int rd, int rs, int rt)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    int acc = 0;
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case TX79_MMI_MULT1:
+        acc = 1;
+        /* Fall through */
+    case OPC_MULT:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_muls2_i32(t2, t3, t2, t3);
+            if (rd) {
+                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            }
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case TX79_MMI_MULTU1:
+        acc = 1;
+        /* Fall through */
+    case OPC_MULTU:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mulu2_i32(t2, t3, t2, t3);
+            if (rd) {
+                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            }
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    default:
+        MIPS_INVAL("mul TXx9");
+        generate_exception_end(ctx, EXCP_RI);
+        goto out;
+    }
+
+ out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
 static void gen_mul_vr54xx (DisasContext *ctx, uint32_t opc,
                             int rd, int rs, int rt)
 {
@@ -5537,6 +6757,21 @@
             tcg_gen_ext32s_tl(arg, arg);
             rn = "SegCtl2";
             break;
+        case 5:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWBase));
+            rn = "PWBase";
+            break;
+        case 6:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWField));
+            rn = "PWField";
+            break;
+        case 7:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWSize));
+            rn = "PWSize";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -5572,6 +6807,11 @@
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
             rn = "SRSConf4";
             break;
+        case 6:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl));
+            rn = "PWCtl";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -6238,6 +7478,21 @@
             gen_helper_mtc0_segctl2(cpu_env, arg);
             rn = "SegCtl2";
             break;
+        case 5:
+            check_pw(ctx);
+            gen_mtc0_store32(arg, offsetof(CPUMIPSState, CP0_PWBase));
+            rn = "PWBase";
+            break;
+        case 6:
+            check_pw(ctx);
+            gen_helper_mtc0_pwfield(cpu_env, arg);
+            rn = "PWField";
+            break;
+        case 7:
+            check_pw(ctx);
+            gen_helper_mtc0_pwsize(cpu_env, arg);
+            rn = "PWSize";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -6273,6 +7528,11 @@
             gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
+        case 6:
+            check_pw(ctx);
+            gen_helper_mtc0_pwctl(cpu_env, arg);
+            rn = "PWCtl";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -6948,6 +8208,21 @@
             tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2));
             rn = "SegCtl2";
             break;
+        case 5:
+            check_pw(ctx);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase));
+            rn = "PWBase";
+            break;
+        case 6:
+            check_pw(ctx);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWField));
+            rn = "PWField";
+            break;
+        case 7:
+            check_pw(ctx);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWSize));
+            rn = "PWSize";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -6983,6 +8258,11 @@
             gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
             rn = "SRSConf4";
             break;
+        case 6:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl));
+            rn = "PWCtl";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -7631,6 +8911,21 @@
             gen_helper_mtc0_segctl2(cpu_env, arg);
             rn = "SegCtl2";
             break;
+        case 5:
+            check_pw(ctx);
+            tcg_gen_st_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase));
+            rn = "PWBase";
+            break;
+        case 6:
+            check_pw(ctx);
+            gen_helper_mtc0_pwfield(cpu_env, arg);
+            rn = "PWField";
+            break;
+        case 7:
+            check_pw(ctx);
+            gen_helper_mtc0_pwsize(cpu_env, arg);
+            rn = "PWSize";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -7666,6 +8961,11 @@
             gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
+        case 6:
+            check_pw(ctx);
+            gen_helper_mtc0_pwctl(cpu_env, arg);
+            rn = "PWCtl";
+            break;
         default:
             goto cp0_unimplemented;
         }
@@ -14999,15 +16299,15 @@
             case 0x38:
                 /* cmovs */
                 switch ((ctx->opcode >> 6) & 0x7) {
-                case MOVN_FMT: /* SELNEZ_FMT */
+                case MOVN_FMT: /* SELEQZ_FMT */
                     if (ctx->insn_flags & ISA_MIPS32R6) {
-                        /* SELNEZ_FMT */
+                        /* SELEQZ_FMT */
                         switch ((ctx->opcode >> 9) & 0x3) {
                         case FMT_SDPS_S:
-                            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
+                            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
                             break;
                         case FMT_SDPS_D:
-                            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
+                            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
                             break;
                         default:
                             goto pool32f_invalid;
@@ -15021,15 +16321,15 @@
                     check_insn_opc_removed(ctx, ISA_MIPS32R6);
                     FINSN_3ARG_SDPS(MOVN);
                     break;
-                case MOVZ_FMT: /* SELEQZ_FMT */
+                case MOVZ_FMT: /* SELNEZ_FMT */
                     if (ctx->insn_flags & ISA_MIPS32R6) {
-                        /* SELEQZ_FMT */
+                        /* SELNEZ_FMT */
                         switch ((ctx->opcode >> 9) & 0x3) {
                         case FMT_SDPS_S:
-                            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
+                            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
                             break;
                         case FMT_SDPS_D:
-                            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
+                            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
                             break;
                         default:
                             goto pool32f_invalid;
@@ -16285,6 +17585,16 @@
     NM_SOV      = 0x7a,
 };
 
+/* CRC32 instruction pool */
+enum {
+    NM_CRC32B   = 0x00,
+    NM_CRC32H   = 0x01,
+    NM_CRC32W   = 0x02,
+    NM_CRC32CB  = 0x04,
+    NM_CRC32CH  = 0x05,
+    NM_CRC32CW  = 0x06,
+};
+
 /* POOL32A5 instruction pool */
 enum {
     NM_CMP_EQ_PH        = 0x00,
@@ -16488,6 +17798,40 @@
     NM_P_SC      = 0x0b,
 };
 
+/* P.LS.E0 instruction pool */
+enum {
+    NM_LBE      = 0x00,
+    NM_SBE      = 0x01,
+    NM_LBUE     = 0x02,
+    NM_P_PREFE  = 0x03,
+    NM_LHE      = 0x04,
+    NM_SHE      = 0x05,
+    NM_LHUE     = 0x06,
+    NM_CACHEE   = 0x07,
+    NM_LWE      = 0x08,
+    NM_SWE      = 0x09,
+    NM_P_LLE    = 0x0a,
+    NM_P_SCE    = 0x0b,
+};
+
+/* P.PREFE instruction pool */
+enum {
+    NM_SYNCIE   = 0x00,
+    NM_PREFE    = 0x01,
+};
+
+/* P.LLE instruction pool */
+enum {
+    NM_LLE      = 0x00,
+    NM_LLWPE    = 0x01,
+};
+
+/* P.SCE instruction pool */
+enum {
+    NM_SCE      = 0x00,
+    NM_SCWPE    = 0x01,
+};
+
 /* P.LS.WM instruction pool */
 enum {
     NM_LWM       = 0x00,
@@ -17444,7 +18788,7 @@
     case NM_POOL32AXF_2_0_7:
         switch (extract32(ctx->opcode, 9, 3)) {
         case NM_DPA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpa_w_ph(t0, v1, v0, cpu_env);
             break;
         case NM_DPAQ_S_W_PH:
@@ -17452,7 +18796,7 @@
             gen_helper_dpaq_s_w_ph(t0, v1, v0, cpu_env);
             break;
         case NM_DPS_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dps_w_ph(t0, v1, v0, cpu_env);
             break;
         case NM_DPSQ_S_W_PH:
@@ -17467,7 +18811,7 @@
     case NM_POOL32AXF_2_8_15:
         switch (extract32(ctx->opcode, 9, 3)) {
         case NM_DPAX_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpax_w_ph(t0, v0, v1, cpu_env);
             break;
         case NM_DPAQ_SA_L_W:
@@ -17475,7 +18819,7 @@
             gen_helper_dpaq_sa_l_w(t0, v0, v1, cpu_env);
             break;
         case NM_DPSX_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpsx_w_ph(t0, v0, v1, cpu_env);
             break;
         case NM_DPSQ_SA_L_W:
@@ -17494,7 +18838,7 @@
             gen_helper_dpau_h_qbl(t0, v0, v1, cpu_env);
             break;
         case NM_DPAQX_S_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpaqx_s_w_ph(t0, v0, v1, cpu_env);
             break;
         case NM_DPSU_H_QBL:
@@ -17502,11 +18846,11 @@
             gen_helper_dpsu_h_qbl(t0, v0, v1, cpu_env);
             break;
         case NM_DPSQX_S_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpsqx_s_w_ph(t0, v0, v1, cpu_env);
             break;
         case NM_MULSA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_mulsa_w_ph(t0, v0, v1, cpu_env);
             break;
         default:
@@ -17521,7 +18865,7 @@
             gen_helper_dpau_h_qbr(t0, v1, v0, cpu_env);
             break;
         case NM_DPAQX_SA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpaqx_sa_w_ph(t0, v1, v0, cpu_env);
             break;
         case NM_DPSU_H_QBR:
@@ -17529,7 +18873,7 @@
             gen_helper_dpsu_h_qbr(t0, v1, v0, cpu_env);
             break;
         case NM_DPSQX_SA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpsqx_sa_w_ph(t0, v1, v0, cpu_env);
             break;
         case NM_MULSAQ_S_W_PH:
@@ -17571,7 +18915,7 @@
             gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
             break;
         case NM_BALIGN:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             if (rt != 0) {
                 gen_load_gpr(t0, rs);
                 rd &= 3;
@@ -17801,7 +19145,7 @@
 
     switch (opc) {
     case NM_ABSQ_S_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_absq_s_qb(v0_t, v0_t, cpu_env);
         gen_store_gpr(v0_t, ret);
         break;
@@ -17940,7 +19284,7 @@
 
     switch (opc) {
     case NM_SHRA_R_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         tcg_gen_movi_tl(t0, rd >> 2);
         switch (extract32(ctx->opcode, 12, 1)) {
         case 0:
@@ -17956,7 +19300,7 @@
         }
         break;
     case NM_SHRL_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         tcg_gen_movi_tl(t0, rd >> 1);
         gen_helper_shrl_ph(t0, t0, rs_t);
         gen_store_gpr(t0, rt);
@@ -18881,19 +20225,19 @@
         gen_store_gpr(v1_t, ret);
         break;
     case NM_CMPGDU_EQ_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t);
         tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
         gen_store_gpr(v1_t, ret);
         break;
     case NM_CMPGDU_LT_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t);
         tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
         gen_store_gpr(v1_t, ret);
         break;
     case NM_CMPGDU_LE_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t);
         tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
         gen_store_gpr(v1_t, ret);
@@ -18949,7 +20293,7 @@
         }
         break;
     case NM_ADDQH_R_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* ADDQH_PH */
@@ -18964,7 +20308,7 @@
         }
         break;
     case NM_ADDQH_R_W:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* ADDQH_W */
@@ -18994,7 +20338,7 @@
         }
         break;
     case NM_ADDU_S_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* ADDU_PH */
@@ -19009,7 +20353,7 @@
         }
         break;
     case NM_ADDUH_R_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* ADDUH_QB */
@@ -19039,7 +20383,7 @@
         }
         break;
     case NM_SHRAV_R_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* SHRAV_QB */
@@ -19069,7 +20413,7 @@
         }
         break;
     case NM_SUBQH_R_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* SUBQH_PH */
@@ -19084,7 +20428,7 @@
         }
         break;
     case NM_SUBQH_R_W:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* SUBQH_W */
@@ -19114,7 +20458,7 @@
         }
         break;
     case NM_SUBU_S_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* SUBU_PH */
@@ -19129,7 +20473,7 @@
         }
         break;
     case NM_SUBUH_R_QB:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* SUBUH_QB */
@@ -19159,7 +20503,7 @@
         }
         break;
     case NM_PRECR_SRA_R_PH_W:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* PRECR_SRA_PH_W */
@@ -19199,22 +20543,22 @@
         gen_store_gpr(v1_t, ret);
         break;
     case NM_MULQ_S_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_mulq_s_ph(v1_t, v1_t, v2_t, cpu_env);
         gen_store_gpr(v1_t, ret);
         break;
     case NM_MULQ_RS_W:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_mulq_rs_w(v1_t, v1_t, v2_t, cpu_env);
         gen_store_gpr(v1_t, ret);
         break;
     case NM_MULQ_S_W:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_mulq_s_w(v1_t, v1_t, v2_t, cpu_env);
         gen_store_gpr(v1_t, ret);
         break;
     case NM_APPEND:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_load_gpr(t0, rs);
         if (rd != 0) {
             tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], rd, 32 - rd);
@@ -19232,7 +20576,7 @@
         gen_store_gpr(v1_t, ret);
         break;
     case NM_SHRLV_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_shrl_ph(v1_t, v1_t, v2_t);
         gen_store_gpr(v1_t, ret);
         break;
@@ -19274,7 +20618,7 @@
         gen_store_gpr(v1_t, ret);
         break;
     case NM_MUL_S_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (extract32(ctx->opcode, 10, 1)) {
         case 0:
             /* MUL_PH */
@@ -19289,7 +20633,7 @@
         }
         break;
     case NM_PRECR_QB_PH:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         gen_helper_precr_qb_ph(v1_t, v1_t, v2_t);
         gen_store_gpr(v1_t, ret);
         break;
@@ -19326,8 +20670,8 @@
         case 0:
             /* SHRA_PH */
             gen_helper_shra_ph(v1_t, t0, v1_t);
-            break;
             gen_store_gpr(v1_t, rt);
+            break;
         case 1:
             /* SHRA_R_PH */
             gen_helper_shra_r_ph(v1_t, t0, v1_t);
@@ -19984,6 +21328,107 @@
                     break;
                 }
                 break;
+            case NM_P_LS_E0:
+                switch (extract32(ctx->opcode, 11, 4)) {
+                case NM_LBE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LBE, rt, rs, s);
+                    break;
+                case NM_SBE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_st(ctx, OPC_SBE, rt, rs, s);
+                    break;
+                case NM_LBUE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LBUE, rt, rs, s);
+                    break;
+                case NM_P_PREFE:
+                    if (rt == 31) {
+                        /* case NM_SYNCIE */
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        /* Break the TB to be able to sync copied instructions
+                           immediately */
+                        ctx->base.is_jmp = DISAS_STOP;
+                    } else {
+                        /* case NM_PREFE */
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        /* Treat as NOP. */
+                    }
+                    break;
+                case NM_LHE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LHE, rt, rs, s);
+                    break;
+                case NM_SHE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_st(ctx, OPC_SHE, rt, rs, s);
+                    break;
+                case NM_LHUE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LHUE, rt, rs, s);
+                    break;
+                case NM_CACHEE:
+                    check_nms_dl_il_sl_tl_l2c(ctx);
+                    gen_cache_operation(ctx, rt, rs, s);
+                    break;
+                case NM_LWE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LWE, rt, rs, s);
+                    break;
+                case NM_SWE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_st(ctx, OPC_SWE, rt, rs, s);
+                    break;
+                case NM_P_LLE:
+                    switch (extract32(ctx->opcode, 2, 2)) {
+                    case NM_LLE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_ld(ctx, OPC_LLE, rt, rs, s);
+                        break;
+                    case NM_LLWPE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_llwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5));
+                        break;
+                    default:
+                        generate_exception_end(ctx, EXCP_RI);
+                        break;
+                    }
+                    break;
+                case NM_P_SCE:
+                    switch (extract32(ctx->opcode, 2, 2)) {
+                    case NM_SCE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_st_cond(ctx, OPC_SCE, rt, rs, s);
+                        break;
+                    case NM_SCWPE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_scwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5));
+                        break;
+                    default:
+                        generate_exception_end(ctx, EXCP_RI);
+                        break;
+                    }
+                    break;
+                }
+                break;
             case NM_P_LS_WM:
             case NM_P_LS_UAWM:
                 check_nms(ctx);
@@ -20098,7 +21543,7 @@
                     gen_compute_branch_cp1_nm(ctx, OPC_BC1NEZ, rt, s);
                     break;
                 case NM_BPOSGE32C:
-                    check_dspr2(ctx);
+                    check_dsp_r3(ctx);
                     {
                         int32_t imm = extract32(ctx->opcode, 1, 13) |
                                       extract32(ctx->opcode, 0, 1) << 13;
@@ -20607,7 +22052,7 @@
     switch (op1) {
     /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */
     case OPC_MULT_G_2E:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (op2) {
         case OPC_ADDUH_QB:
             gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t);
@@ -20650,7 +22095,7 @@
     case OPC_ABSQ_S_PH_DSP:
         switch (op2) {
         case OPC_ABSQ_S_QB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env);
             break;
         case OPC_ABSQ_S_PH:
@@ -20729,11 +22174,11 @@
             gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_ADDU_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_ADDU_S_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_SUBQ_PH:
@@ -20757,11 +22202,11 @@
             gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_SUBU_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_SUBU_S_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_ADDSC:
@@ -20785,7 +22230,7 @@
     case OPC_CMPU_EQ_QB_DSP:
         switch (op2) {
         case OPC_PRECR_QB_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_PRECRQ_QB_PH:
@@ -20793,7 +22238,7 @@
             gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_PRECR_SRA_PH_W:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             {
                 TCGv_i32 sa_t = tcg_const_i32(v2);
                 gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t,
@@ -20802,7 +22247,7 @@
                 break;
             }
         case OPC_PRECR_SRA_R_PH_W:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             {
                 TCGv_i32 sa_t = tcg_const_i32(v2);
                 gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t,
@@ -20884,7 +22329,7 @@
             gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t);
             break;
         case OPC_ABSQ_S_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env);
             break;
         case OPC_ABSQ_S_PW:
@@ -20928,19 +22373,19 @@
             gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_SUBU_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_SUBU_S_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_SUBUH_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_SUBUH_R_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_ADDQ_PW:
@@ -20968,19 +22413,19 @@
             gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_ADDU_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_ADDU_S_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_ADDUH_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_ADDUH_R_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t);
             break;
         }
@@ -20988,11 +22433,11 @@
     case OPC_CMPU_EQ_OB_DSP:
         switch (op2) {
         case OPC_PRECR_OB_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_PRECR_SRA_QH_PW:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             {
                 TCGv_i32 ret_t = tcg_const_i32(ret);
                 gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t);
@@ -21000,7 +22445,7 @@
                 break;
             }
         case OPC_PRECR_SRA_R_QH_PW:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             {
                 TCGv_i32 sa_v = tcg_const_i32(ret);
                 gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v);
@@ -21103,27 +22548,27 @@
                 gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t);
                 break;
             case OPC_SHRL_PH:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t);
                 break;
             case OPC_SHRLV_PH:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t);
                 break;
             case OPC_SHRA_QB:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t);
                 break;
             case OPC_SHRA_R_QB:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t);
                 break;
             case OPC_SHRAV_QB:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t);
                 break;
             case OPC_SHRAV_R_QB:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t);
                 break;
             case OPC_SHRA_PH:
@@ -21202,19 +22647,19 @@
             gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
             break;
         case OPC_SHRA_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0);
             break;
         case OPC_SHRAV_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t);
             break;
         case OPC_SHRA_R_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0);
             break;
         case OPC_SHRAV_R_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t);
             break;
         case OPC_SHRA_PW:
@@ -21258,11 +22703,11 @@
             gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t);
             break;
         case OPC_SHRL_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0);
             break;
         case OPC_SHRLV_QH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t);
             break;
         default:            /* Invalid */
@@ -21303,7 +22748,7 @@
     /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
      * the same mask and op1. */
     case OPC_MULT_G_2E:
-        check_dspr2(ctx);
+        check_dsp_r2(ctx);
         switch (op2) {
         case  OPC_MUL_PH:
             gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
@@ -21338,11 +22783,11 @@
             gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPAX_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPAQ_S_W_PH:
@@ -21350,19 +22795,19 @@
             gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPAQX_S_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPAQX_SA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPS_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPSX_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPSQ_S_W_PH:
@@ -21370,11 +22815,11 @@
             gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPSQX_S_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_DPSQX_SA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_MULSAQ_S_W_PH:
@@ -21406,7 +22851,7 @@
             gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env);
             break;
         case OPC_MULSA_W_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env);
             break;
         }
@@ -21435,7 +22880,7 @@
                 gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env);
                 break;
             case OPC_DPA_W_QH:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env);
                 break;
             case OPC_DPAQ_S_W_QH:
@@ -21455,7 +22900,7 @@
                 gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env);
                 break;
             case OPC_DPS_W_QH:
-                check_dspr2(ctx);
+                check_dsp_r2(ctx);
                 gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env);
                 break;
             case OPC_DPSQ_S_W_QH:
@@ -21549,7 +22994,7 @@
             gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_MULQ_S_PH:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         }
@@ -21773,7 +23218,7 @@
             gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t);
             break;
         case OPC_CMPGDU_EQ_QB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t);
             tcg_gen_mov_tl(cpu_gpr[ret], t1);
             tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
@@ -21781,7 +23226,7 @@
             tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
             break;
         case OPC_CMPGDU_LT_QB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t);
             tcg_gen_mov_tl(cpu_gpr[ret], t1);
             tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
@@ -21789,7 +23234,7 @@
             tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
             break;
         case OPC_CMPGDU_LE_QB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_cmpgu_le_qb(t1, v1_t, v2_t);
             tcg_gen_mov_tl(cpu_gpr[ret], t1);
             tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
@@ -21850,15 +23295,15 @@
             gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env);
             break;
         case OPC_CMPGDU_EQ_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_CMPGDU_LT_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_CMPGDU_LE_OB:
-            check_dspr2(ctx);
+            check_dsp_r2(ctx);
             gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
             break;
         case OPC_CMPGU_EQ_OB:
@@ -21916,7 +23361,7 @@
 {
     TCGv t0;
 
-    check_dspr2(ctx);
+    check_dsp_r2(ctx);
 
     if (rt == 0) {
         /* Treat as NOP. */
@@ -22351,7 +23796,7 @@
     case OPC_MOVN:         /* Conditional move */
     case OPC_MOVZ:
         check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
-                   INSN_LOONGSON2E | INSN_LOONGSON2F);
+                   INSN_LOONGSON2E | INSN_LOONGSON2F | INSN_R5900);
         gen_cond_move(ctx, op1, rd, rs, rt);
         break;
     case OPC_MFHI:          /* Move from HI/LO */
@@ -22378,6 +23823,8 @@
             check_insn(ctx, INSN_VR54XX);
             op1 = MASK_MUL_VR54XX(ctx->opcode);
             gen_mul_vr54xx(ctx, op1, rd, rs, rt);
+        } else if (ctx->insn_flags & INSN_R5900) {
+            gen_mul_txx9(ctx, op1, rd, rs, rt);
         } else {
             gen_muldiv(ctx, op1, rd & 3, rs, rt);
         }
@@ -22392,6 +23839,7 @@
     case OPC_DDIV:
     case OPC_DDIVU:
         check_insn(ctx, ISA_MIPS3);
+        check_insn_opc_user_only(ctx, INSN_R5900);
         check_mips_64(ctx);
         gen_muldiv(ctx, op1, 0, rs, rt);
         break;
@@ -22624,6 +24072,1578 @@
     }
 }
 
+
+/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
+#define MXU_APTN1_A    0
+#define MXU_APTN1_S    1
+
+/* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
+#define MXU_APTN2_AA    0
+#define MXU_APTN2_AS    1
+#define MXU_APTN2_SA    2
+#define MXU_APTN2_SS    3
+
+/* MXU execute add/subtract 2-bit pattern 'eptn2' */
+#define MXU_EPTN2_AA    0
+#define MXU_EPTN2_AS    1
+#define MXU_EPTN2_SA    2
+#define MXU_EPTN2_SS    3
+
+/* MXU operand getting pattern 'optn2' */
+#define MXU_OPTN2_WW    0
+#define MXU_OPTN2_LW    1
+#define MXU_OPTN2_HW    2
+#define MXU_OPTN2_XW    3
+
+/* MXU operand getting pattern 'optn3' */
+#define MXU_OPTN3_PTN0  0
+#define MXU_OPTN3_PTN1  1
+#define MXU_OPTN3_PTN2  2
+#define MXU_OPTN3_PTN3  3
+#define MXU_OPTN3_PTN4  4
+#define MXU_OPTN3_PTN5  5
+#define MXU_OPTN3_PTN6  6
+#define MXU_OPTN3_PTN7  7
+
+
+/*
+ * S32I2M XRa, rb - Register move from GRF to XRF
+ */
+static void gen_mxu_s32i2m(DisasContext *ctx)
+{
+    TCGv t0;
+    uint32_t XRa, Rb;
+
+    t0 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 5);
+    Rb = extract32(ctx->opcode, 16, 5);
+
+    gen_load_gpr(t0, Rb);
+    if (XRa <= 15) {
+        gen_store_mxu_gpr(t0, XRa);
+    } else if (XRa == 16) {
+        gen_store_mxu_cr(t0);
+    }
+
+    tcg_temp_free(t0);
+}
+
+/*
+ * S32M2I XRa, rb - Register move from XRF to GRF
+ */
+static void gen_mxu_s32m2i(DisasContext *ctx)
+{
+    TCGv t0;
+    uint32_t XRa, Rb;
+
+    t0 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 5);
+    Rb = extract32(ctx->opcode, 16, 5);
+
+    if (XRa <= 15) {
+        gen_load_mxu_gpr(t0, XRa);
+    } else if (XRa == 16) {
+        gen_load_mxu_cr(t0);
+    }
+
+    gen_store_gpr(t0, Rb);
+
+    tcg_temp_free(t0);
+}
+
+/*
+ * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
+ */
+static void gen_mxu_s8ldd(DisasContext *ctx)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, s8, optn3;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s8 = extract32(ctx->opcode, 10, 8);
+    optn3 = extract32(ctx->opcode, 18, 3);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+    tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+
+    switch (optn3) {
+    /* XRa[7:0] = tmp8 */
+    case MXU_OPTN3_PTN0:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
+        break;
+    /* XRa[15:8] = tmp8 */
+    case MXU_OPTN3_PTN1:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
+        break;
+    /* XRa[23:16] = tmp8 */
+    case MXU_OPTN3_PTN2:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
+        break;
+    /* XRa[31:24] = tmp8 */
+    case MXU_OPTN3_PTN3:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        gen_load_mxu_gpr(t0, XRa);
+        tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
+        break;
+    /* XRa = {8'b0, tmp8, 8'b0, tmp8} */
+    case MXU_OPTN3_PTN4:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
+    case MXU_OPTN3_PTN5:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        tcg_gen_shli_tl(t1, t1, 8);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
+    case MXU_OPTN3_PTN6:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
+        tcg_gen_mov_tl(t0, t1);
+        tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
+        tcg_gen_shli_tl(t1, t1, 16);
+        tcg_gen_or_tl(t0, t0, t1);
+        break;
+    /* XRa = {tmp8, tmp8, tmp8, tmp8} */
+    case MXU_OPTN3_PTN7:
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+        tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
+        tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+        break;
+    }
+
+    gen_store_mxu_gpr(t0, XRa);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/*
+ * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ */
+static void gen_mxu_d16mul(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3;
+    uint32_t XRa, XRb, XRc, XRd, optn2;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    optn2 = extract32(ctx->opcode, 22, 2);
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_sextract_tl(t0, t1, 0, 16);
+    tcg_gen_sextract_tl(t1, t1, 16, 16);
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_sextract_tl(t2, t3, 0, 16);
+    tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    }
+    gen_store_mxu_gpr(t3, XRa);
+    gen_store_mxu_gpr(t2, XRd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+}
+
+/*
+ * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
+ *                                           and accumulate
+ */
+static void gen_mxu_d16mac(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3;
+    uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    optn2 = extract32(ctx->opcode, 22, 2);
+    aptn2 = extract32(ctx->opcode, 24, 2);
+
+    gen_load_mxu_gpr(t1, XRb);
+    tcg_gen_sextract_tl(t0, t1, 0, 16);
+    tcg_gen_sextract_tl(t1, t1, 16, 16);
+
+    gen_load_mxu_gpr(t3, XRc);
+    tcg_gen_sextract_tl(t2, t3, 0, 16);
+    tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+    switch (optn2) {
+    case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t0, t2);
+        break;
+    case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t1, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+        tcg_gen_mul_tl(t3, t0, t3);
+        tcg_gen_mul_tl(t2, t1, t2);
+        break;
+    }
+    gen_load_mxu_gpr(t0, XRa);
+    gen_load_mxu_gpr(t1, XRd);
+
+    switch (aptn2) {
+    case MXU_APTN2_AA:
+        tcg_gen_add_tl(t3, t0, t3);
+        tcg_gen_add_tl(t2, t1, t2);
+        break;
+    case MXU_APTN2_AS:
+        tcg_gen_add_tl(t3, t0, t3);
+        tcg_gen_sub_tl(t2, t1, t2);
+        break;
+    case MXU_APTN2_SA:
+        tcg_gen_sub_tl(t3, t0, t3);
+        tcg_gen_add_tl(t2, t1, t2);
+        break;
+    case MXU_APTN2_SS:
+        tcg_gen_sub_tl(t3, t0, t3);
+        tcg_gen_sub_tl(t2, t1, t2);
+        break;
+    }
+    gen_store_mxu_gpr(t3, XRa);
+    gen_store_mxu_gpr(t2, XRd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+}
+
+/*
+ * Q8MUL   XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
+ * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
+ */
+static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
+{
+    TCGv t0, t1, t2, t3, t4, t5, t6, t7;
+    uint32_t XRa, XRb, XRc, XRd, sel;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    t2 = tcg_temp_new();
+    t3 = tcg_temp_new();
+    t4 = tcg_temp_new();
+    t5 = tcg_temp_new();
+    t6 = tcg_temp_new();
+    t7 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    XRb = extract32(ctx->opcode, 10, 4);
+    XRc = extract32(ctx->opcode, 14, 4);
+    XRd = extract32(ctx->opcode, 18, 4);
+    sel = extract32(ctx->opcode, 22, 2);
+
+    gen_load_mxu_gpr(t3, XRb);
+    gen_load_mxu_gpr(t7, XRc);
+
+    if (sel == 0x2) {
+        /* Q8MULSU */
+        tcg_gen_ext8s_tl(t0, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8s_tl(t1, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8s_tl(t2, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8s_tl(t3, t3);
+    } else {
+        /* Q8MUL */
+        tcg_gen_ext8u_tl(t0, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8u_tl(t1, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8u_tl(t2, t3);
+        tcg_gen_shri_tl(t3, t3, 8);
+        tcg_gen_ext8u_tl(t3, t3);
+    }
+
+    tcg_gen_ext8u_tl(t4, t7);
+    tcg_gen_shri_tl(t7, t7, 8);
+    tcg_gen_ext8u_tl(t5, t7);
+    tcg_gen_shri_tl(t7, t7, 8);
+    tcg_gen_ext8u_tl(t6, t7);
+    tcg_gen_shri_tl(t7, t7, 8);
+    tcg_gen_ext8u_tl(t7, t7);
+
+    tcg_gen_mul_tl(t0, t0, t4);
+    tcg_gen_mul_tl(t1, t1, t5);
+    tcg_gen_mul_tl(t2, t2, t6);
+    tcg_gen_mul_tl(t3, t3, t7);
+
+    tcg_gen_andi_tl(t0, t0, 0xFFFF);
+    tcg_gen_andi_tl(t1, t1, 0xFFFF);
+    tcg_gen_andi_tl(t2, t2, 0xFFFF);
+    tcg_gen_andi_tl(t3, t3, 0xFFFF);
+
+    tcg_gen_shli_tl(t1, t1, 16);
+    tcg_gen_shli_tl(t3, t3, 16);
+
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_or_tl(t1, t2, t3);
+
+    gen_store_mxu_gpr(t0, XRd);
+    gen_store_mxu_gpr(t1, XRa);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t3);
+    tcg_temp_free(t4);
+    tcg_temp_free(t5);
+    tcg_temp_free(t6);
+    tcg_temp_free(t7);
+}
+
+/*
+ * S32LDD  XRa, Rb, S12 - Load a word from memory to XRF
+ * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
+ */
+static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
+{
+    TCGv t0, t1;
+    uint32_t XRa, Rb, s12, sel;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    XRa = extract32(ctx->opcode, 6, 4);
+    s12 = extract32(ctx->opcode, 10, 10);
+    sel = extract32(ctx->opcode, 20, 1);
+    Rb = extract32(ctx->opcode, 21, 5);
+
+    gen_load_gpr(t0, Rb);
+
+    tcg_gen_movi_tl(t1, s12);
+    tcg_gen_shli_tl(t1, t1, 2);
+    if (s12 & 0x200) {
+        tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
+    }
+    tcg_gen_add_tl(t1, t0, t1);
+    tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL);
+
+    if (sel == 1) {
+        /* S32LDDR */
+        tcg_gen_bswap32_tl(t1, t1);
+    }
+    gen_store_mxu_gpr(t1, XRa);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+
+/*
+ * Decoding engine for MXU
+ * =======================
+ */
+
+/*
+ *
+ * Decode MXU pool00
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL00|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool00(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32MAX:
+        /* TODO: Implement emulation of S32MAX instruction. */
+        MIPS_INVAL("OPC_MXU_S32MAX");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32MIN:
+        /* TODO: Implement emulation of S32MIN instruction. */
+        MIPS_INVAL("OPC_MXU_S32MIN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16MAX:
+        /* TODO: Implement emulation of D16MAX instruction. */
+        MIPS_INVAL("OPC_MXU_D16MAX");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16MIN:
+        /* TODO: Implement emulation of D16MIN instruction. */
+        MIPS_INVAL("OPC_MXU_D16MIN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8MAX:
+        /* TODO: Implement emulation of Q8MAX instruction. */
+        MIPS_INVAL("OPC_MXU_Q8MAX");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8MIN:
+        /* TODO: Implement emulation of Q8MIN instruction. */
+        MIPS_INVAL("OPC_MXU_Q8MIN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8SLT:
+        /* TODO: Implement emulation of Q8SLT instruction. */
+        MIPS_INVAL("OPC_MXU_Q8SLT");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8SLTU:
+        /* TODO: Implement emulation of Q8SLTU instruction. */
+        MIPS_INVAL("OPC_MXU_Q8SLTU");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool01
+ *
+ *  S32SLT, D16SLT, D16AVG, D16AVGR, Q8AVG, Q8AVGR:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL01|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ *  Q8ADD:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+-----+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |en2|0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL01|
+ *  +-----------+---+-----+-----+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool01(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32SLT:
+        /* TODO: Implement emulation of S32SLT instruction. */
+        MIPS_INVAL("OPC_MXU_S32SLT");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16SLT:
+        /* TODO: Implement emulation of D16SLT instruction. */
+        MIPS_INVAL("OPC_MXU_D16SLT");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16AVG:
+        /* TODO: Implement emulation of D16AVG instruction. */
+        MIPS_INVAL("OPC_MXU_D16AVG");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16AVGR:
+        /* TODO: Implement emulation of D16AVGR instruction. */
+        MIPS_INVAL("OPC_MXU_D16AVGR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8AVG:
+        /* TODO: Implement emulation of Q8AVG instruction. */
+        MIPS_INVAL("OPC_MXU_Q8AVG");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8AVGR:
+        /* TODO: Implement emulation of Q8AVGR instruction. */
+        MIPS_INVAL("OPC_MXU_Q8AVGR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8ADD:
+        /* TODO: Implement emulation of Q8ADD instruction. */
+        MIPS_INVAL("OPC_MXU_Q8ADD");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool02
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL02|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool02(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_S32CPS:
+        /* TODO: Implement emulation of S32CPS instruction. */
+        MIPS_INVAL("OPC_MXU_S32CPS");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16CPS:
+        /* TODO: Implement emulation of D16CPS instruction. */
+        MIPS_INVAL("OPC_MXU_D16CPS");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8ABD:
+        /* TODO: Implement emulation of Q8ABD instruction. */
+        MIPS_INVAL("OPC_MXU_Q8ABD");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q16SAT:
+        /* TODO: Implement emulation of Q16SAT instruction. */
+        MIPS_INVAL("OPC_MXU_Q16SAT");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool03
+ *
+ *  D16MULF:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |x x|on2|0 0 0 0|  XRc  |  XRb  |  XRa  |MXU__POOL03|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ *  D16MULE:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |x x|on2|   Xd  |  XRc  |  XRb  |  XRa  |MXU__POOL03|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool03(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 24, 2);
+
+    switch (opcode) {
+    case OPC_MXU_D16MULF:
+        /* TODO: Implement emulation of D16MULF instruction. */
+        MIPS_INVAL("OPC_MXU_D16MULF");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16MULE:
+        /* TODO: Implement emulation of D16MULE instruction. */
+        MIPS_INVAL("OPC_MXU_D16MULE");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool04
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL04|
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool04(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 20, 1);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDD:
+    case OPC_MXU_S32LDDR:
+        gen_mxu_s32ldd_s32lddr(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool05
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL05|
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool05(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 20, 1);
+
+    switch (opcode) {
+    case OPC_MXU_S32STD:
+        /* TODO: Implement emulation of S32STD instruction. */
+        MIPS_INVAL("OPC_MXU_S32STD");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32STDR:
+        /* TODO: Implement emulation of S32STDR instruction. */
+        MIPS_INVAL("OPC_MXU_S32STDR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool06
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL06|
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool06(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDDV:
+        /* TODO: Implement emulation of S32LDDV instruction. */
+        MIPS_INVAL("OPC_MXU_S32LDDV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32LDDVR:
+        /* TODO: Implement emulation of S32LDDVR instruction. */
+        MIPS_INVAL("OPC_MXU_S32LDDVR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool07
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL07|
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool07(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    switch (opcode) {
+    case OPC_MXU_S32STDV:
+        /* TODO: Implement emulation of S32TDV instruction. */
+        MIPS_INVAL("OPC_MXU_S32TDV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32STDVR:
+        /* TODO: Implement emulation of S32TDVR instruction. */
+        MIPS_INVAL("OPC_MXU_S32TDVR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool08
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL08|
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *
+*/
+static void decode_opc_mxu__pool08(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 20, 1);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDI:
+        /* TODO: Implement emulation of S32LDI instruction. */
+        MIPS_INVAL("OPC_MXU_S32LDI");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32LDIR:
+        /* TODO: Implement emulation of S32LDIR instruction. */
+        MIPS_INVAL("OPC_MXU_S32LDIR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool09
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |x|        s12        |  XRa  |MXU__POOL09|
+ *  +-----------+---------+-+-------------------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool09(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 5, 0);
+
+    switch (opcode) {
+    case OPC_MXU_S32SDI:
+        /* TODO: Implement emulation of S32SDI instruction. */
+        MIPS_INVAL("OPC_MXU_S32SDI");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32SDIR:
+        /* TODO: Implement emulation of S32SDIR instruction. */
+        MIPS_INVAL("OPC_MXU_S32SDIR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool10
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL10|
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool10(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 5, 0);
+
+    switch (opcode) {
+    case OPC_MXU_S32LDIV:
+        /* TODO: Implement emulation of S32LDIV instruction. */
+        MIPS_INVAL("OPC_MXU_S32LDIV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32LDIVR:
+        /* TODO: Implement emulation of S32LDIVR instruction. */
+        MIPS_INVAL("OPC_MXU_S32LDIVR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool11
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |    rc   |st2|x x x x|  XRa  |MXU__POOL11|
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool11(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+    switch (opcode) {
+    case OPC_MXU_S32SDIV:
+        /* TODO: Implement emulation of S32SDIV instruction. */
+        MIPS_INVAL("OPC_MXU_S32SDIV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32SDIVR:
+        /* TODO: Implement emulation of S32SDIVR instruction. */
+        MIPS_INVAL("OPC_MXU_S32SDIVR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool12
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |an2|x x|   Xd  |  XRc  |  XRb  |  XRa  |MXU__POOL12|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool12(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_D32ACC:
+        /* TODO: Implement emulation of D32ACC instruction. */
+        MIPS_INVAL("OPC_MXU_D32ACC");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D32ACCM:
+        /* TODO: Implement emulation of D32ACCM instruction. */
+        MIPS_INVAL("OPC_MXU_D32ACCM");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D32ASUM:
+        /* TODO: Implement emulation of D32ASUM instruction. */
+        MIPS_INVAL("OPC_MXU_D32ASUM");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool13
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |en2|x x|0 0 0 0|  XRc  |  XRb  |  XRa  |MXU__POOL13|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool13(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q16ACC:
+        /* TODO: Implement emulation of Q16ACC instruction. */
+        MIPS_INVAL("OPC_MXU_Q16ACC");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q16ACCM:
+        /* TODO: Implement emulation of Q16ACCM instruction. */
+        MIPS_INVAL("OPC_MXU_Q16ACCM");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q16ASUM:
+        /* TODO: Implement emulation of Q16ASUM instruction. */
+        MIPS_INVAL("OPC_MXU_Q16ASUM");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool14
+ *
+ *  Q8ADDE, Q8ACCE:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0|x x|  XRd  |  XRc  |  XRb  |  XRa  |MXU__POOL14|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ *  D8SUM, D8SUMC:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |en2|x x|0 0 0 0|  XRc  |  XRb  |  XRa  |MXU__POOL14|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool14(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8ADDE:
+        /* TODO: Implement emulation of Q8ADDE instruction. */
+        MIPS_INVAL("OPC_MXU_Q8ADDE");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D8SUM:
+        /* TODO: Implement emulation of D8SUM instruction. */
+        MIPS_INVAL("OPC_MXU_D8SUM");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D8SUMC:
+        /* TODO: Implement emulation of D8SUMC instruction. */
+        MIPS_INVAL("OPC_MXU_D8SUMC");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool15
+ *
+ *  S32MUL, S32MULU, S32EXTRV:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *  |  SPECIAL2 |    rs   |    rt   |x x|  XRd  |  XRa  |MXU__POOL15|
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *
+ *  S32EXTR:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |   sft5  |x x|  XRd  |  XRa  |MXU__POOL15|
+ *  +-----------+---------+---------+---+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool15(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 14, 2);
+
+    switch (opcode) {
+    case OPC_MXU_S32MUL:
+        /* TODO: Implement emulation of S32MUL instruction. */
+        MIPS_INVAL("OPC_MXU_S32MUL");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32MULU:
+        /* TODO: Implement emulation of S32MULU instruction. */
+        MIPS_INVAL("OPC_MXU_S32MULU");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32EXTR:
+        /* TODO: Implement emulation of S32EXTR instruction. */
+        MIPS_INVAL("OPC_MXU_S32EXTR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32EXTRV:
+        /* TODO: Implement emulation of S32EXTRV instruction. */
+        MIPS_INVAL("OPC_MXU_S32EXTRV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool16
+ *
+ *  D32SARW:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ *  S32ALN:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |    rs   |x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ *  S32ALNI:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+-----+---+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |  s3 |0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
+ *  +-----------+-----+---+-----+-------+-------+-------+-----------+
+ *
+ *  S32NOR, S32AND, S32OR, S32XOR:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL16|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ *  S32LUI:
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+-----+---+-----+-------+---------------+-----------+
+ *  |  SPECIAL2 |optn3|0 0|x x x|  XRc  |       s8      |MXU__POOL16|
+ *  +-----------+-----+---+-----+-------+---------------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool16(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_D32SARW:
+        /* TODO: Implement emulation of D32SARW instruction. */
+        MIPS_INVAL("OPC_MXU_D32SARW");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32ALN:
+        /* TODO: Implement emulation of S32ALN instruction. */
+        MIPS_INVAL("OPC_MXU_S32ALN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32ALNI:
+        /* TODO: Implement emulation of S32ALNI instruction. */
+        MIPS_INVAL("OPC_MXU_S32ALNI");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32NOR:
+        /* TODO: Implement emulation of S32NOR instruction. */
+        MIPS_INVAL("OPC_MXU_S32NOR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32AND:
+        /* TODO: Implement emulation of S32AND instruction. */
+        MIPS_INVAL("OPC_MXU_S32AND");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32OR:
+        /* TODO: Implement emulation of S32OR instruction. */
+        MIPS_INVAL("OPC_MXU_S32OR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32XOR:
+        /* TODO: Implement emulation of S32XOR instruction. */
+        MIPS_INVAL("OPC_MXU_S32XOR");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32LUI:
+        /* TODO: Implement emulation of S32LUI instruction. */
+        MIPS_INVAL("OPC_MXU_S32LUI");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool17
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |    rb   |x x x|  XRd  |  XRa  |0 0 0 0|MXU__POOL17|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool17(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_D32SLLV:
+        /* TODO: Implement emulation of D32SLLV instruction. */
+        MIPS_INVAL("OPC_MXU_D32SLLV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D32SLRV:
+        /* TODO: Implement emulation of D32SLRV instruction. */
+        MIPS_INVAL("OPC_MXU_D32SLRV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D32SARV:
+        /* TODO: Implement emulation of D32SARV instruction. */
+        MIPS_INVAL("OPC_MXU_D32SARV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q16SLLV:
+        /* TODO: Implement emulation of Q16SLLV instruction. */
+        MIPS_INVAL("OPC_MXU_Q16SLLV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q16SLRV:
+        /* TODO: Implement emulation of Q16SLRV instruction. */
+        MIPS_INVAL("OPC_MXU_Q16SLRV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q16SARV:
+        /* TODO: Implement emulation of Q16SARV instruction. */
+        MIPS_INVAL("OPC_MXU_Q16SARV");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool18
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0|x x|  XRd  |  XRc  |  XRb  |  XRa  |MXU__POOL18|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool18(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8MUL:
+    case OPC_MXU_Q8MULSU:
+        gen_mxu_q8mul_q8mulsu(ctx);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool19
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |0 0 0 0 0|x x x|  XRc  |  XRb  |  XRa  |MXU__POOL19|
+ *  +-----------+---------+-----+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool19(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+    switch (opcode) {
+    case OPC_MXU_Q8MOVZ:
+        /* TODO: Implement emulation of Q8MOVZ instruction. */
+        MIPS_INVAL("OPC_MXU_Q8MOVZ");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8MOVN:
+        /* TODO: Implement emulation of Q8MOVN instruction. */
+        MIPS_INVAL("OPC_MXU_Q8MOVN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16MOVZ:
+        /* TODO: Implement emulation of D16MOVZ instruction. */
+        MIPS_INVAL("OPC_MXU_D16MOVZ");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_D16MOVN:
+        /* TODO: Implement emulation of D16MOVN instruction. */
+        MIPS_INVAL("OPC_MXU_D16MOVN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32MOVZ:
+        /* TODO: Implement emulation of S32MOVZ instruction. */
+        MIPS_INVAL("OPC_MXU_S32MOVZ");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_S32MOVN:
+        /* TODO: Implement emulation of S32MOVN instruction. */
+        MIPS_INVAL("OPC_MXU_S32MOVN");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+/*
+ *
+ * Decode MXU pool20
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *  |  SPECIAL2 |an2|x x|  XRd  |  XRc  |  XRb  |  XRa  |MXU__POOL20|
+ *  +-----------+---+---+-------+-------+-------+-------+-----------+
+ *
+ */
+static void decode_opc_mxu__pool20(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+    switch (opcode) {
+    case OPC_MXU_Q8MAC:
+        /* TODO: Implement emulation of Q8MAC instruction. */
+        MIPS_INVAL("OPC_MXU_Q8MAC");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    case OPC_MXU_Q8MACSU:
+        /* TODO: Implement emulation of Q8MACSU instruction. */
+        MIPS_INVAL("OPC_MXU_Q8MACSU");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    default:
+        MIPS_INVAL("decode_opc_mxu");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+
+/*
+ * Main MXU decoding function
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------------------------------------+-----------+
+ *  |  SPECIAL2 |                                       |x x x x x x|
+ *  +-----------+---------------------------------------+-----------+
+ *
+ */
+static void decode_opc_mxu(CPUMIPSState *env, DisasContext *ctx)
+{
+    /*
+     * TODO: Investigate necessity of including handling of
+     * CLZ, CLO, SDBB in this function, as they belong to
+     * SPECIAL2 opcode space for regular pre-R6 MIPS ISAs.
+     */
+    uint32_t opcode = extract32(ctx->opcode, 0, 6);
+
+    if (opcode == OPC__MXU_MUL) {
+        uint32_t  rs, rt, rd, op1;
+
+        rs = extract32(ctx->opcode, 21, 5);
+        rt = extract32(ctx->opcode, 16, 5);
+        rd = extract32(ctx->opcode, 11, 5);
+        op1 = MASK_SPECIAL2(ctx->opcode);
+
+        gen_arith(ctx, op1, rd, rs, rt);
+
+        return;
+    }
+
+    if (opcode == OPC_MXU_S32M2I) {
+        gen_mxu_s32m2i(ctx);
+        return;
+    }
+
+    if (opcode == OPC_MXU_S32I2M) {
+        gen_mxu_s32i2m(ctx);
+        return;
+    }
+
+    {
+        TCGv t_mxu_cr = tcg_temp_new();
+        TCGLabel *l_exit = gen_new_label();
+
+        gen_load_mxu_cr(t_mxu_cr);
+        tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
+        tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
+
+        switch (opcode) {
+        case OPC_MXU_S32MADD:
+            /* TODO: Implement emulation of S32MADD instruction. */
+            MIPS_INVAL("OPC_MXU_S32MADD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S32MADDU:
+            /* TODO: Implement emulation of S32MADDU instruction. */
+            MIPS_INVAL("OPC_MXU_S32MADDU");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL00:
+            decode_opc_mxu__pool00(env, ctx);
+            break;
+        case OPC_MXU_S32MSUB:
+            /* TODO: Implement emulation of S32MSUB instruction. */
+            MIPS_INVAL("OPC_MXU_S32MSUB");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S32MSUBU:
+            /* TODO: Implement emulation of S32MSUBU instruction. */
+            MIPS_INVAL("OPC_MXU_S32MSUBU");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL01:
+            decode_opc_mxu__pool01(env, ctx);
+            break;
+        case OPC_MXU__POOL02:
+            decode_opc_mxu__pool02(env, ctx);
+            break;
+        case OPC_MXU_D16MUL:
+            gen_mxu_d16mul(ctx);
+            break;
+        case OPC_MXU__POOL03:
+            decode_opc_mxu__pool03(env, ctx);
+            break;
+        case OPC_MXU_D16MAC:
+            gen_mxu_d16mac(ctx);
+            break;
+        case OPC_MXU_D16MACF:
+            /* TODO: Implement emulation of D16MACF instruction. */
+            MIPS_INVAL("OPC_MXU_D16MACF");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_D16MADL:
+            /* TODO: Implement emulation of D16MADL instruction. */
+            MIPS_INVAL("OPC_MXU_D16MADL");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S16MAD:
+            /* TODO: Implement emulation of S16MAD instruction. */
+            MIPS_INVAL("OPC_MXU_S16MAD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_Q16ADD:
+            /* TODO: Implement emulation of Q16ADD instruction. */
+            MIPS_INVAL("OPC_MXU_Q16ADD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_D16MACE:
+            /* TODO: Implement emulation of D16MACE instruction. */
+            MIPS_INVAL("OPC_MXU_D16MACE");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL04:
+            decode_opc_mxu__pool04(env, ctx);
+            break;
+        case OPC_MXU__POOL05:
+            decode_opc_mxu__pool05(env, ctx);
+            break;
+        case OPC_MXU__POOL06:
+            decode_opc_mxu__pool06(env, ctx);
+            break;
+        case OPC_MXU__POOL07:
+            decode_opc_mxu__pool07(env, ctx);
+            break;
+        case OPC_MXU__POOL08:
+            decode_opc_mxu__pool08(env, ctx);
+            break;
+        case OPC_MXU__POOL09:
+            decode_opc_mxu__pool09(env, ctx);
+            break;
+        case OPC_MXU__POOL10:
+            decode_opc_mxu__pool10(env, ctx);
+            break;
+        case OPC_MXU__POOL11:
+            decode_opc_mxu__pool11(env, ctx);
+            break;
+        case OPC_MXU_D32ADD:
+            /* TODO: Implement emulation of D32ADD instruction. */
+            MIPS_INVAL("OPC_MXU_D32ADD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL12:
+            decode_opc_mxu__pool12(env, ctx);
+            break;
+        case OPC_MXU__POOL13:
+            decode_opc_mxu__pool13(env, ctx);
+            break;
+        case OPC_MXU__POOL14:
+            decode_opc_mxu__pool14(env, ctx);
+            break;
+        case OPC_MXU_Q8ACCE:
+            /* TODO: Implement emulation of Q8ACCE instruction. */
+            MIPS_INVAL("OPC_MXU_Q8ACCE");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S8LDD:
+            gen_mxu_s8ldd(ctx);
+            break;
+        case OPC_MXU_S8STD:
+            /* TODO: Implement emulation of S8STD instruction. */
+            MIPS_INVAL("OPC_MXU_S8STD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S8LDI:
+            /* TODO: Implement emulation of S8LDI instruction. */
+            MIPS_INVAL("OPC_MXU_S8LDI");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S8SDI:
+            /* TODO: Implement emulation of S8SDI instruction. */
+            MIPS_INVAL("OPC_MXU_S8SDI");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL15:
+            decode_opc_mxu__pool15(env, ctx);
+            break;
+        case OPC_MXU__POOL16:
+            decode_opc_mxu__pool16(env, ctx);
+            break;
+        case OPC_MXU_LXB:
+            /* TODO: Implement emulation of LXB instruction. */
+            MIPS_INVAL("OPC_MXU_LXB");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S16LDD:
+            /* TODO: Implement emulation of S16LDD instruction. */
+            MIPS_INVAL("OPC_MXU_S16LDD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S16STD:
+            /* TODO: Implement emulation of S16STD instruction. */
+            MIPS_INVAL("OPC_MXU_S16STD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S16LDI:
+            /* TODO: Implement emulation of S16LDI instruction. */
+            MIPS_INVAL("OPC_MXU_S16LDI");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S16SDI:
+            /* TODO: Implement emulation of S16SDI instruction. */
+            MIPS_INVAL("OPC_MXU_S16SDI");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_D32SLL:
+            /* TODO: Implement emulation of D32SLL instruction. */
+            MIPS_INVAL("OPC_MXU_D32SLL");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_D32SLR:
+            /* TODO: Implement emulation of D32SLR instruction. */
+            MIPS_INVAL("OPC_MXU_D32SLR");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_D32SARL:
+            /* TODO: Implement emulation of D32SARL instruction. */
+            MIPS_INVAL("OPC_MXU_D32SARL");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_D32SAR:
+            /* TODO: Implement emulation of D32SAR instruction. */
+            MIPS_INVAL("OPC_MXU_D32SAR");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_Q16SLL:
+            /* TODO: Implement emulation of Q16SLL instruction. */
+            MIPS_INVAL("OPC_MXU_Q16SLL");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_Q16SLR:
+            /* TODO: Implement emulation of Q16SLR instruction. */
+            MIPS_INVAL("OPC_MXU_Q16SLR");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL17:
+            decode_opc_mxu__pool17(env, ctx);
+            break;
+        case OPC_MXU_Q16SAR:
+            /* TODO: Implement emulation of Q16SAR instruction. */
+            MIPS_INVAL("OPC_MXU_Q16SAR");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU__POOL18:
+            decode_opc_mxu__pool18(env, ctx);
+            break;
+        case OPC_MXU__POOL19:
+            decode_opc_mxu__pool19(env, ctx);
+            break;
+        case OPC_MXU__POOL20:
+            decode_opc_mxu__pool20(env, ctx);
+            break;
+        case OPC_MXU_Q16SCOP:
+            /* TODO: Implement emulation of Q16SCOP instruction. */
+            MIPS_INVAL("OPC_MXU_Q16SCOP");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_Q8MADL:
+            /* TODO: Implement emulation of Q8MADL instruction. */
+            MIPS_INVAL("OPC_MXU_Q8MADL");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_S32SFL:
+            /* TODO: Implement emulation of S32SFL instruction. */
+            MIPS_INVAL("OPC_MXU_S32SFL");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        case OPC_MXU_Q8SAD:
+            /* TODO: Implement emulation of Q8SAD instruction. */
+            MIPS_INVAL("OPC_MXU_Q8SAD");
+            generate_exception_end(ctx, EXCP_RI);
+            break;
+        default:
+            MIPS_INVAL("decode_opc_mxu");
+            generate_exception_end(ctx, EXCP_RI);
+        }
+
+        gen_set_label(l_exit);
+        tcg_temp_free(t_mxu_cr);
+    }
+}
+
+
 static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
 {
     int rs, rt, rd;
@@ -22738,7 +25758,9 @@
             op2 = MASK_BSHFL(ctx->opcode);
             switch (op2) {
             case OPC_ALIGN:
-            case OPC_ALIGN_END:
+            case OPC_ALIGN_1:
+            case OPC_ALIGN_2:
+            case OPC_ALIGN_3:
                 gen_align(ctx, 32, rd, rs, rt, sa & 3);
                 break;
             case OPC_BITSWAP:
@@ -22764,7 +25786,13 @@
             op2 = MASK_DBSHFL(ctx->opcode);
             switch (op2) {
             case OPC_DALIGN:
-            case OPC_DALIGN_END:
+            case OPC_DALIGN_1:
+            case OPC_DALIGN_2:
+            case OPC_DALIGN_3:
+            case OPC_DALIGN_4:
+            case OPC_DALIGN_5:
+            case OPC_DALIGN_6:
+            case OPC_DALIGN_7:
                 gen_align(ctx, 64, rd, rs, rt, sa & 7);
                 break;
             case OPC_DBITSWAP:
@@ -22801,7 +25829,7 @@
     case OPC_MULTU_G_2E:
         /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
          * the same mask and op1. */
-        if ((ctx->insn_flags & ASE_DSPR2) && (op1 == OPC_MULT_G_2E)) {
+        if ((ctx->insn_flags & ASE_DSP_R2) && (op1 == OPC_MULT_G_2E)) {
             op2 = MASK_ADDUH_QB(ctx->opcode);
             switch (op2) {
             case OPC_ADDUH_QB:
@@ -23308,6 +26336,250 @@
     }
 }
 
+static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opc = MASK_TX79_MMI0(ctx->opcode);
+
+    switch (opc) {
+    case TX79_MMI0_PADDW:     /* TODO: TX79_MMI0_PADDW */
+    case TX79_MMI0_PSUBW:     /* TODO: TX79_MMI0_PSUBW */
+    case TX79_MMI0_PCGTW:     /* TODO: TX79_MMI0_PCGTW */
+    case TX79_MMI0_PMAXW:     /* TODO: TX79_MMI0_PMAXW */
+    case TX79_MMI0_PADDH:     /* TODO: TX79_MMI0_PADDH */
+    case TX79_MMI0_PSUBH:     /* TODO: TX79_MMI0_PSUBH */
+    case TX79_MMI0_PCGTH:     /* TODO: TX79_MMI0_PCGTH */
+    case TX79_MMI0_PMAXH:     /* TODO: TX79_MMI0_PMAXH */
+    case TX79_MMI0_PADDB:     /* TODO: TX79_MMI0_PADDB */
+    case TX79_MMI0_PSUBB:     /* TODO: TX79_MMI0_PSUBB */
+    case TX79_MMI0_PCGTB:     /* TODO: TX79_MMI0_PCGTB */
+    case TX79_MMI0_PADDSW:    /* TODO: TX79_MMI0_PADDSW */
+    case TX79_MMI0_PSUBSW:    /* TODO: TX79_MMI0_PSUBSW */
+    case TX79_MMI0_PEXTLW:    /* TODO: TX79_MMI0_PEXTLW */
+    case TX79_MMI0_PPACW:     /* TODO: TX79_MMI0_PPACW */
+    case TX79_MMI0_PADDSH:    /* TODO: TX79_MMI0_PADDSH */
+    case TX79_MMI0_PSUBSH:    /* TODO: TX79_MMI0_PSUBSH */
+    case TX79_MMI0_PEXTLH:    /* TODO: TX79_MMI0_PEXTLH */
+    case TX79_MMI0_PPACH:     /* TODO: TX79_MMI0_PPACH */
+    case TX79_MMI0_PADDSB:    /* TODO: TX79_MMI0_PADDSB */
+    case TX79_MMI0_PSUBSB:    /* TODO: TX79_MMI0_PSUBSB */
+    case TX79_MMI0_PEXTLB:    /* TODO: TX79_MMI0_PEXTLB */
+    case TX79_MMI0_PPACB:     /* TODO: TX79_MMI0_PPACB */
+    case TX79_MMI0_PEXT5:     /* TODO: TX79_MMI0_PEXT5 */
+    case TX79_MMI0_PPAC5:     /* TODO: TX79_MMI0_PPAC5 */
+        generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI0 */
+        break;
+    default:
+        MIPS_INVAL("TX79 MMI class MMI0");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opc = MASK_TX79_MMI1(ctx->opcode);
+
+    switch (opc) {
+    case TX79_MMI1_PABSW:     /* TODO: TX79_MMI1_PABSW */
+    case TX79_MMI1_PCEQW:     /* TODO: TX79_MMI1_PCEQW */
+    case TX79_MMI1_PMINW:     /* TODO: TX79_MMI1_PMINW */
+    case TX79_MMI1_PADSBH:    /* TODO: TX79_MMI1_PADSBH */
+    case TX79_MMI1_PABSH:     /* TODO: TX79_MMI1_PABSH */
+    case TX79_MMI1_PCEQH:     /* TODO: TX79_MMI1_PCEQH */
+    case TX79_MMI1_PMINH:     /* TODO: TX79_MMI1_PMINH */
+    case TX79_MMI1_PCEQB:     /* TODO: TX79_MMI1_PCEQB */
+    case TX79_MMI1_PADDUW:    /* TODO: TX79_MMI1_PADDUW */
+    case TX79_MMI1_PSUBUW:    /* TODO: TX79_MMI1_PSUBUW */
+    case TX79_MMI1_PEXTUW:    /* TODO: TX79_MMI1_PEXTUW */
+    case TX79_MMI1_PADDUH:    /* TODO: TX79_MMI1_PADDUH */
+    case TX79_MMI1_PSUBUH:    /* TODO: TX79_MMI1_PSUBUH */
+    case TX79_MMI1_PEXTUH:    /* TODO: TX79_MMI1_PEXTUH */
+    case TX79_MMI1_PADDUB:    /* TODO: TX79_MMI1_PADDUB */
+    case TX79_MMI1_PSUBUB:    /* TODO: TX79_MMI1_PSUBUB */
+    case TX79_MMI1_PEXTUB:    /* TODO: TX79_MMI1_PEXTUB */
+    case TX79_MMI1_QFSRV:     /* TODO: TX79_MMI1_QFSRV */
+        generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI1 */
+        break;
+    default:
+        MIPS_INVAL("TX79 MMI class MMI1");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opc = MASK_TX79_MMI2(ctx->opcode);
+
+    switch (opc) {
+    case TX79_MMI2_PMADDW:    /* TODO: TX79_MMI2_PMADDW */
+    case TX79_MMI2_PSLLVW:    /* TODO: TX79_MMI2_PSLLVW */
+    case TX79_MMI2_PSRLVW:    /* TODO: TX79_MMI2_PSRLVW */
+    case TX79_MMI2_PMSUBW:    /* TODO: TX79_MMI2_PMSUBW */
+    case TX79_MMI2_PMFHI:     /* TODO: TX79_MMI2_PMFHI */
+    case TX79_MMI2_PMFLO:     /* TODO: TX79_MMI2_PMFLO */
+    case TX79_MMI2_PINTH:     /* TODO: TX79_MMI2_PINTH */
+    case TX79_MMI2_PMULTW:    /* TODO: TX79_MMI2_PMULTW */
+    case TX79_MMI2_PDIVW:     /* TODO: TX79_MMI2_PDIVW */
+    case TX79_MMI2_PCPYLD:    /* TODO: TX79_MMI2_PCPYLD */
+    case TX79_MMI2_PMADDH:    /* TODO: TX79_MMI2_PMADDH */
+    case TX79_MMI2_PHMADH:    /* TODO: TX79_MMI2_PHMADH */
+    case TX79_MMI2_PAND:      /* TODO: TX79_MMI2_PAND */
+    case TX79_MMI2_PXOR:      /* TODO: TX79_MMI2_PXOR */
+    case TX79_MMI2_PMSUBH:    /* TODO: TX79_MMI2_PMSUBH */
+    case TX79_MMI2_PHMSBH:    /* TODO: TX79_MMI2_PHMSBH */
+    case TX79_MMI2_PEXEH:     /* TODO: TX79_MMI2_PEXEH */
+    case TX79_MMI2_PREVH:     /* TODO: TX79_MMI2_PREVH */
+    case TX79_MMI2_PMULTH:    /* TODO: TX79_MMI2_PMULTH */
+    case TX79_MMI2_PDIVBW:    /* TODO: TX79_MMI2_PDIVBW */
+    case TX79_MMI2_PEXEW:     /* TODO: TX79_MMI2_PEXEW */
+    case TX79_MMI2_PROT3W:    /* TODO: TX79_MMI2_PROT3W */
+        generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI2 */
+        break;
+    default:
+        MIPS_INVAL("TX79 MMI class MMI2");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opc = MASK_TX79_MMI3(ctx->opcode);
+
+    switch (opc) {
+    case TX79_MMI3_PMADDUW:    /* TODO: TX79_MMI3_PMADDUW */
+    case TX79_MMI3_PSRAVW:     /* TODO: TX79_MMI3_PSRAVW */
+    case TX79_MMI3_PMTHI:      /* TODO: TX79_MMI3_PMTHI */
+    case TX79_MMI3_PMTLO:      /* TODO: TX79_MMI3_PMTLO */
+    case TX79_MMI3_PINTEH:     /* TODO: TX79_MMI3_PINTEH */
+    case TX79_MMI3_PMULTUW:    /* TODO: TX79_MMI3_PMULTUW */
+    case TX79_MMI3_PDIVUW:     /* TODO: TX79_MMI3_PDIVUW */
+    case TX79_MMI3_PCPYUD:     /* TODO: TX79_MMI3_PCPYUD */
+    case TX79_MMI3_POR:        /* TODO: TX79_MMI3_POR */
+    case TX79_MMI3_PNOR:       /* TODO: TX79_MMI3_PNOR */
+    case TX79_MMI3_PEXCH:      /* TODO: TX79_MMI3_PEXCH */
+    case TX79_MMI3_PCPYH:      /* TODO: TX79_MMI3_PCPYH */
+    case TX79_MMI3_PEXCW:      /* TODO: TX79_MMI3_PEXCW */
+        generate_exception_end(ctx, EXCP_RI); /* TODO: TX79_MMI_CLASS_MMI3 */
+        break;
+    default:
+        MIPS_INVAL("TX79 MMI class MMI3");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opc = MASK_TX79_MMI(ctx->opcode);
+    int rs = extract32(ctx->opcode, 21, 5);
+    int rt = extract32(ctx->opcode, 16, 5);
+    int rd = extract32(ctx->opcode, 11, 5);
+
+    switch (opc) {
+    case TX79_MMI_CLASS_MMI0:
+        decode_tx79_mmi0(env, ctx);
+        break;
+    case TX79_MMI_CLASS_MMI1:
+        decode_tx79_mmi1(env, ctx);
+        break;
+    case TX79_MMI_CLASS_MMI2:
+        decode_tx79_mmi2(env, ctx);
+        break;
+    case TX79_MMI_CLASS_MMI3:
+        decode_tx79_mmi3(env, ctx);
+        break;
+    case TX79_MMI_MULT1:
+    case TX79_MMI_MULTU1:
+        gen_mul_txx9(ctx, opc, rd, rs, rt);
+        break;
+    case TX79_MMI_DIV1:
+    case TX79_MMI_DIVU1:
+        gen_muldiv(ctx, opc, 1, rs, rt);
+        break;
+    case TX79_MMI_MTLO1:
+    case TX79_MMI_MTHI1:
+        gen_HILO(ctx, opc, 1, rs);
+        break;
+    case TX79_MMI_MFLO1:
+    case TX79_MMI_MFHI1:
+        gen_HILO(ctx, opc, 1, rd);
+        break;
+    case TX79_MMI_MADD:          /* TODO: TX79_MMI_MADD */
+    case TX79_MMI_MADDU:         /* TODO: TX79_MMI_MADDU */
+    case TX79_MMI_PLZCW:         /* TODO: TX79_MMI_PLZCW */
+    case TX79_MMI_MADD1:         /* TODO: TX79_MMI_MADD1 */
+    case TX79_MMI_MADDU1:        /* TODO: TX79_MMI_MADDU1 */
+    case TX79_MMI_PMFHL:         /* TODO: TX79_MMI_PMFHL */
+    case TX79_MMI_PMTHL:         /* TODO: TX79_MMI_PMTHL */
+    case TX79_MMI_PSLLH:         /* TODO: TX79_MMI_PSLLH */
+    case TX79_MMI_PSRLH:         /* TODO: TX79_MMI_PSRLH */
+    case TX79_MMI_PSRAH:         /* TODO: TX79_MMI_PSRAH */
+    case TX79_MMI_PSLLW:         /* TODO: TX79_MMI_PSLLW */
+    case TX79_MMI_PSRLW:         /* TODO: TX79_MMI_PSRLW */
+    case TX79_MMI_PSRAW:         /* TODO: TX79_MMI_PSRAW */
+        generate_exception_end(ctx, EXCP_RI);    /* TODO: TX79_CLASS_MMI */
+        break;
+    default:
+        MIPS_INVAL("TX79 MMI class");
+        generate_exception_end(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void decode_tx79_lq(CPUMIPSState *env, DisasContext *ctx)
+{
+    generate_exception_end(ctx, EXCP_RI);    /* TODO: TX79_LQ */
+}
+
+static void gen_tx79_sq(DisasContext *ctx, int base, int rt, int offset)
+{
+    generate_exception_end(ctx, EXCP_RI);    /* TODO: TX79_SQ */
+}
+
+/*
+ * The TX79-specific instruction Store Quadword
+ *
+ * +--------+-------+-------+------------------------+
+ * | 011111 |  base |   rt  |           offset       | SQ
+ * +--------+-------+-------+------------------------+
+ *      6       5       5                 16
+ *
+ * has the same opcode as the Read Hardware Register instruction
+ *
+ * +--------+-------+-------+-------+-------+--------+
+ * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
+ * +--------+-------+-------+-------+-------+--------+
+ *      6       5       5       5       5        6
+ *
+ * that is required, trapped and emulated by the Linux kernel. However, all
+ * RDHWR encodings yield address error exceptions on the TX79 since the SQ
+ * offset is odd. Therefore all valid SQ instructions can execute normally.
+ * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
+ * between SQ and RDHWR, as the Linux kernel does.
+ */
+static void decode_tx79_sq(CPUMIPSState *env, DisasContext *ctx)
+{
+    int base = extract32(ctx->opcode, 21, 5);
+    int rt = extract32(ctx->opcode, 16, 5);
+    int offset = extract32(ctx->opcode, 0, 16);
+
+#ifdef CONFIG_USER_ONLY
+    uint32_t op1 = MASK_SPECIAL3(ctx->opcode);
+    uint32_t op2 = extract32(ctx->opcode, 6, 5);
+
+    if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) {
+        int rd = extract32(ctx->opcode, 11, 5);
+
+        gen_rdhwr(ctx, rt, rd, 0);
+        return;
+    }
+#endif
+
+    gen_tx79_sq(ctx, base, rt, offset);
+}
+
 static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
 {
     int rs, rt, rd, sa;
@@ -23380,7 +26652,9 @@
         op2 = MASK_BSHFL(ctx->opcode);
         switch (op2) {
         case OPC_ALIGN:
-        case OPC_ALIGN_END:
+        case OPC_ALIGN_1:
+        case OPC_ALIGN_2:
+        case OPC_ALIGN_3:
         case OPC_BITSWAP:
             check_insn(ctx, ISA_MIPS32R6);
             decode_opc_special3_r6(env, ctx);
@@ -23406,7 +26680,13 @@
         op2 = MASK_DBSHFL(ctx->opcode);
         switch (op2) {
         case OPC_DALIGN:
-        case OPC_DALIGN_END:
+        case OPC_DALIGN_1:
+        case OPC_DALIGN_2:
+        case OPC_DALIGN_3:
+        case OPC_DALIGN_4:
+        case OPC_DALIGN_5:
+        case OPC_DALIGN_6:
+        case OPC_DALIGN_7:
         case OPC_DBITSWAP:
             check_insn(ctx, ISA_MIPS32R6);
             decode_opc_special3_r6(env, ctx);
@@ -24605,10 +27885,20 @@
         decode_opc_special(env, ctx);
         break;
     case OPC_SPECIAL2:
-        decode_opc_special2_legacy(env, ctx);
+        if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) {
+            decode_tx79_mmi(env, ctx);
+        } else if (ctx->insn_flags & ASE_MXU) {
+            decode_opc_mxu(env, ctx);
+        } else {
+            decode_opc_special2_legacy(env, ctx);
+        }
         break;
     case OPC_SPECIAL3:
-        decode_opc_special3(env, ctx);
+        if (ctx->insn_flags & INSN_R5900) {
+            decode_tx79_sq(env, ctx);    /* TX79_SQ */
+        } else {
+            decode_opc_special3(env, ctx);
+        }
         break;
     case OPC_REGIMM:
         op1 = MASK_REGIMM(ctx->opcode);
@@ -24895,6 +28185,7 @@
          break;
     case OPC_LL: /* Load and stores */
         check_insn(ctx, ISA_MIPS2);
+        check_insn_opc_user_only(ctx, INSN_R5900);
         /* Fallthrough */
     case OPC_LWL:
     case OPC_LWR:
@@ -24920,6 +28211,7 @@
     case OPC_SC:
         check_insn(ctx, ISA_MIPS2);
          check_insn_opc_removed(ctx, ISA_MIPS32R6);
+        check_insn_opc_user_only(ctx, INSN_R5900);
          gen_st_cond(ctx, op, rt, rs, imm);
          break;
     case OPC_CACHE:
@@ -24933,7 +28225,8 @@
         break;
     case OPC_PREF:
         check_insn_opc_removed(ctx, ISA_MIPS32R6);
-        check_insn(ctx, ISA_MIPS4 | ISA_MIPS32);
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
+                   INSN_R5900);
         /* Treat as NOP. */
         break;
 
@@ -25185,9 +28478,11 @@
 
 #if defined(TARGET_MIPS64)
     /* MIPS64 opcodes */
+    case OPC_LLD:
+        check_insn_opc_user_only(ctx, INSN_R5900);
+        /* fall through */
     case OPC_LDL:
     case OPC_LDR:
-    case OPC_LLD:
         check_insn_opc_removed(ctx, ISA_MIPS32R6);
         /* fall through */
     case OPC_LWU:
@@ -25208,6 +28503,7 @@
     case OPC_SCD:
         check_insn_opc_removed(ctx, ISA_MIPS32R6);
         check_insn(ctx, ISA_MIPS3);
+        check_insn_opc_user_only(ctx, INSN_R5900);
         check_mips_64(ctx);
         gen_st_cond(ctx, op, rt, rs, imm);
         break;
@@ -25262,8 +28558,12 @@
         }
         break;
     case OPC_MSA: /* OPC_MDMX */
-        /* MDMX: Not implemented. */
-        gen_msa(env, ctx);
+        if (ctx->insn_flags & INSN_R5900) {
+            decode_tx79_lq(env, ctx);    /* TX79_LQ */
+        } else {
+            /* MDMX: Not implemented. */
+            gen_msa(env, ctx);
+        }
         break;
     case OPC_PCREL:
         check_insn(ctx, ISA_MIPS32R6);
@@ -25285,6 +28585,7 @@
     ctx->saved_pc = -1;
     ctx->insn_flags = env->insn_flags;
     ctx->CP0_Config1 = env->CP0_Config1;
+    ctx->CP0_Config2 = env->CP0_Config2;
     ctx->CP0_Config3 = env->CP0_Config3;
     ctx->CP0_Config5 = env->CP0_Config5;
     ctx->btarget = 0;
@@ -25585,6 +28886,17 @@
     fpu_fcr31 = tcg_global_mem_new_i32(cpu_env,
                                        offsetof(CPUMIPSState, active_fpu.fcr31),
                                        "fcr31");
+
+    for (i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
+        mxu_gpr[i] = tcg_global_mem_new(cpu_env,
+                                        offsetof(CPUMIPSState,
+                                                 active_tc.mxu_gpr[i]),
+                                        mxuregnames[i]);
+    }
+
+    mxu_CR = tcg_global_mem_new(cpu_env,
+                                offsetof(CPUMIPSState, active_tc.mxu_cr),
+                                mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
 }
 
 #include "translate_init.inc.c"
@@ -25799,6 +29111,24 @@
         env->CP0_Status |= (1 << CP0St_FR);
     }
 
+    if (env->insn_flags & ISA_MIPS32R6) {
+        /* PTW  =  1 */
+        env->CP0_PWSize = 0x40;
+        /* GDI  = 12 */
+        /* UDI  = 12 */
+        /* MDI  = 12 */
+        /* PRI  = 12 */
+        /* PTEI =  2 */
+        env->CP0_PWField = 0x0C30C302;
+    } else {
+        /* GDI  =  0 */
+        /* UDI  =  0 */
+        /* MDI  =  0 */
+        /* PRI  =  0 */
+        /* PTEI =  2 */
+        env->CP0_PWField = 0x02;
+    }
+
     if (env->CP0_Config3 & (1 << CP0C3_ISA) & (1 << (CP0C3_ISA + 1))) {
         /*  microMIPS on reset when Config3.ISA is 3 */
         env->hflags |= MIPS_HFLAG_M16;
diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c
index b3320b9..85da4a2 100644
--- a/target/mips/translate_init.inc.c
+++ b/target/mips/translate_init.inc.c
@@ -320,7 +320,7 @@
         .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
         .SEGBITS = 32,
         .PABITS = 32,
-        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2,
+        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSP_R2,
         .mmu_type = MMU_TYPE_R4000,
     },
     {
@@ -411,6 +411,65 @@
         .mmu_type = MMU_TYPE_R4000,
     },
     {
+        /*
+         * The Toshiba TX System RISC TX79 Core Architecture manual
+         *
+         * https://wiki.qemu.org/File:C790.pdf
+         *
+         * describes the C790 processor that is a follow-up to the R5900.
+         * There are a few notable differences in that the R5900 FPU
+         *
+         * - is not IEEE 754-1985 compliant,
+         * - does not implement double format, and
+         * - its machine code is nonstandard.
+         */
+        .name = "R5900",
+        .CP0_PRid = 0x00002E00,
+        /* No L2 cache, icache size 32k, dcache size 32k, uncached coherency. */
+        .CP0_Config0 = (0x3 << 9) | (0x3 << 6) | (0x2 << CP0C0_K0),
+        .CP0_Status_rw_bitmask = 0xF4C79C1F,
+#ifdef CONFIG_USER_ONLY
+        /*
+         * R5900 hardware traps to the Linux kernel for IEEE 754-1985 and LL/SC
+         * emulation. For user only, QEMU is the kernel, so we emulate the traps
+         * by simply emulating the instructions directly.
+         *
+         * Note: Config1 is only used internally, the R5900 has only Config0.
+         */
+        .CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
+        .CP0_LLAddr_rw_bitmask = 0xFFFFFFFF,
+        .CP0_LLAddr_shift = 4,
+        .CP1_fcr0 = (0x38 << FCR0_PRID) | (0x0 << FCR0_REV),
+        .CP1_fcr31 = 0,
+        .CP1_fcr31_rw_bitmask = 0x0183FFFF,
+#else
+        /*
+         * The R5900 COP1 FPU implements single-precision floating-point
+         * operations but is not entirely IEEE 754-1985 compatible. In
+         * particular,
+         *
+         * - NaN (not a number) and +/- infinities are not supported;
+         * - exception mechanisms are not fully supported;
+         * - denormalized numbers are not supported;
+         * - rounding towards nearest and +/- infinities are not supported;
+         * - computed results usually differs in the least significant bit;
+         * - saturations can differ more than the least significant bit.
+         *
+         * Since only rounding towards zero is supported, the two least
+         * significant bits of FCR31 are hardwired to 01.
+         *
+         * FPU emulation is disabled here until it is implemented.
+         *
+         * Note: Config1 is only used internally, the R5900 has only Config0.
+         */
+        .CP0_Config1 = (47 << CP0C1_MMU),
+#endif /* !CONFIG_USER_ONLY */
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_R5900 | ASE_MMI,
+        .mmu_type = MMU_TYPE_R4000,
+    },
+    {
         /* A generic CPU supporting MIPS32 Release 6 ISA.
            FIXME: Support IEEE 754-2008 FP.
                   Eventually this should be replaced by a real CPU model. */
@@ -485,7 +544,8 @@
         .CP1_fcr31 = (1 << FCR31_ABS2008) | (1 << FCR31_NAN2008),
         .SEGBITS = 32,
         .PABITS = 32,
-        .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSPR2 | ASE_MT,
+        .insn_flags = CPU_NANOMIPS32 | ASE_DSP | ASE_DSP_R2 | ASE_DSP_R3 |
+                      ASE_MT,
         .mmu_type = MMU_TYPE_R4000,
     },
 #if defined(TARGET_MIPS64)
@@ -761,7 +821,7 @@
         .mmu_type = MMU_TYPE_R4000,
     },
     {
-        /* A generic CPU providing MIPS64 ASE DSP 2 features.
+        /* A generic CPU providing MIPS64 DSP R2 ASE features.
            FIXME: Eventually this should be replaced by a real CPU model. */
         .name = "mips64dspr2",
         .CP0_PRid = 0x00010000,
@@ -786,7 +846,7 @@
         .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
         .SEGBITS = 42,
         .PABITS = 36,
-        .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2,
+        .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSP_R2,
         .mmu_type = MMU_TYPE_R4000,
     },
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index ef64248..7a1481f 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -800,7 +800,7 @@
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
 
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+#ifdef TARGET_PPC64
 DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
 DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
 DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 8f0d86d..a1485fa 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "tcg.h"
 #include "internal.h"
+#include "qemu/atomic128.h"
 
 //#define DEBUG_OP
 
@@ -215,11 +216,15 @@
     return i;
 }
 
-#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
+#ifdef TARGET_PPC64
 uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
                                uint32_t opidx)
 {
-    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
+    Int128 ret;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -227,7 +232,11 @@
 uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
                                uint32_t opidx)
 {
-    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
+    Int128 ret;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -235,14 +244,22 @@
 void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
                             uint64_t lo, uint64_t hi, uint32_t opidx)
 {
-    Int128 val = int128_make128(lo, hi);
+    Int128 val;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    val = int128_make128(lo, hi);
     helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
 }
 
 void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
                             uint64_t lo, uint64_t hi, uint32_t opidx)
 {
-    Int128 val = int128_make128(lo, hi);
+    Int128 val;
+
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_ATOMIC128);
+    val = int128_make128(lo, hi);
     helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
 }
 
@@ -252,6 +269,9 @@
 {
     bool success = false;
 
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_CMPXCHG128);
+
     if (likely(addr == env->reserve_addr)) {
         Int128 oldv, cmpv, newv;
 
@@ -271,6 +291,9 @@
 {
     bool success = false;
 
+    /* We will have raised EXCP_ATOMIC from the translator.  */
+    assert(HAVE_CMPXCHG128);
+
     if (likely(addr == env->reserve_addr)) {
         Int128 oldv, cmpv, newv;
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8817435..4e59dd5 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -33,6 +33,7 @@
 #include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "qemu/atomic128.h"
 
 
 #define CPU_SINGLE_STEP 0x1
@@ -2654,22 +2655,22 @@
     hi = cpu_gpr[rd];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-        TCGv_i32 oi = tcg_temp_new_i32();
-        if (ctx->le_mode) {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        if (HAVE_ATOMIC128) {
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            }
+            tcg_temp_free_i32(oi);
+            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
         } else {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
         }
-        tcg_temp_free_i32(oi);
-        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-#endif
     } else if (ctx->le_mode) {
         tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
         gen_addr_add(ctx, EA, EA, 8);
@@ -2805,21 +2806,21 @@
         hi = cpu_gpr[rs];
 
         if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-            TCGv_i32 oi = tcg_temp_new_i32();
-            if (ctx->le_mode) {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-                gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+            if (HAVE_ATOMIC128) {
+                TCGv_i32 oi = tcg_temp_new_i32();
+                if (ctx->le_mode) {
+                    tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
+                    gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
+                } else {
+                    tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
+                    gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+                }
+                tcg_temp_free_i32(oi);
             } else {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-                gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
+                /* Restart with exclusive lock.  */
+                gen_helper_exit_atomic(cpu_env);
+                ctx->base.is_jmp = DISAS_NORETURN;
             }
-            tcg_temp_free_i32(oi);
-#else
-            /* Restart with exclusive lock.  */
-            gen_helper_exit_atomic(cpu_env);
-            ctx->base.is_jmp = DISAS_NORETURN;
-#endif
         } else if (ctx->le_mode) {
             tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
             gen_addr_add(ctx, EA, EA, 8);
@@ -3404,26 +3405,26 @@
     hi = cpu_gpr[rd];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-#ifdef CONFIG_ATOMIC128
-        TCGv_i32 oi = tcg_temp_new_i32();
-        if (ctx->le_mode) {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
-                                                ctx->mem_idx));
-            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+        if (HAVE_ATOMIC128) {
+            TCGv_i32 oi = tcg_temp_new_i32();
+            if (ctx->le_mode) {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
+                                                    ctx->mem_idx));
+                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
+            } else {
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
+                                                    ctx->mem_idx));
+                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            }
+            tcg_temp_free_i32(oi);
+            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
         } else {
-            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
-                                                ctx->mem_idx));
-            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+            tcg_temp_free(EA);
+            return;
         }
-        tcg_temp_free_i32(oi);
-        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-        tcg_temp_free(EA);
-        return;
-#endif
     } else if (ctx->le_mode) {
         tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
         tcg_gen_mov_tl(cpu_reserve, EA);
@@ -3461,20 +3462,22 @@
     hi = cpu_gpr[rs];
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-        TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
-#ifdef CONFIG_ATOMIC128
-        if (ctx->le_mode) {
-            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+        if (HAVE_CMPXCHG128) {
+            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
+            if (ctx->le_mode) {
+                gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
+                                             EA, lo, hi, oi);
+            } else {
+                gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
+                                             EA, lo, hi, oi);
+            }
+            tcg_temp_free_i32(oi);
         } else {
-            gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, EA, lo, hi, oi);
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
         }
-#else
-        /* Restart with exclusive lock.  */
-        gen_helper_exit_atomic(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-#endif
         tcg_temp_free(EA);
-        tcg_temp_free_i32(oi);
     } else {
         TCGLabel *lab_fail = gen_new_label();
         TCGLabel *lab_over = gen_new_label();
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 263e63c..ee9432e 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -8381,8 +8381,8 @@
     QNull *null = NULL;
 
     if (!qtest_enabled()) {
-        error_report("CPU 'compat' property is deprecated and has no effect; "
-                     "use max-cpu-compat machine property instead");
+        warn_report("CPU 'compat' property is deprecated and has no effect; "
+                    "use max-cpu-compat machine property instead");
     }
     visit_type_null(v, name, &null, NULL);
     qobject_unref(null);
diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
index abd0a7c..fcc5d34 100644
--- a/target/riscv/Makefile.objs
+++ b/target/riscv/Makefile.objs
@@ -1 +1 @@
-obj-y += translate.o op_helper.o helper.o cpu.o fpu_helper.o gdbstub.o pmp.o
+obj-y += translate.o op_helper.o cpu_helper.o cpu.o fpu_helper.o gdbstub.o pmp.o
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index d630e8f..a025a0a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -74,8 +74,10 @@
     "s_external",
     "h_external",
     "m_external",
-    "coprocessor",
-    "host"
+    "reserved",
+    "reserved",
+    "reserved",
+    "reserved"
 };
 
 typedef struct RISCVCPUInfo {
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index d4f3629..4ee09b9 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -126,13 +126,18 @@
 
     target_ulong mhartid;
     target_ulong mstatus;
+
     /*
      * CAUTION! Unlike the rest of this struct, mip is accessed asynchonously
-     * by I/O threads and other vCPUs, so hold the iothread mutex before
-     * operating on it.  CPU_INTERRUPT_HARD should be in effect iff this is
-     * non-zero.  Use riscv_cpu_set_local_interrupt.
+     * by I/O threads. It should be read with atomic_read. It should be updated
+     * using riscv_cpu_update_mip with the iothread mutex held. The iothread
+     * mutex must be held because mip must be consistent with the CPU inturrept
+     * state. riscv_cpu_update_mip calls cpu_interrupt or cpu_reset_interrupt
+     * wuth the invariant that CPU_INTERRUPT_HARD is set iff mip is non-zero.
+     * mip is 32-bits to allow atomic_read on 32-bit hosts.
      */
-    uint32_t mip;        /* allow atomic_read for >= 32-bit hosts */
+    uint32_t mip;
+
     target_ulong mie;
     target_ulong mideleg;
 
@@ -247,7 +252,6 @@
                                     uintptr_t retaddr);
 int riscv_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int size,
                               int rw, int mmu_idx);
-
 char *riscv_isa_string(RISCVCPU *cpu);
 void riscv_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 
@@ -255,6 +259,10 @@
 #define cpu_list riscv_cpu_list
 #define cpu_mmu_index riscv_cpu_mmu_index
 
+#ifndef CONFIG_USER_ONLY
+uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value);
+#define BOOL_TO_MASK(x) (-!!(x)) /* helper for riscv_cpu_update_mip value */
+#endif
 void riscv_set_mode(CPURISCVState *env, target_ulong newpriv);
 
 void riscv_translate_init(void);
@@ -285,10 +293,6 @@
         target_ulong csrno);
 target_ulong csr_read_helper(CPURISCVState *env, target_ulong csrno);
 
-#ifndef CONFIG_USER_ONLY
-void riscv_set_local_interrupt(RISCVCPU *cpu, target_ulong mask, int value);
-#endif
-
 #include "exec/cpu-all.h"
 
 #endif /* RISCV_CPU_H */
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 12b4757..5439f47 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -6,242 +6,283 @@
                  (((target_ulong)(val) * ((mask) & ~((mask) << 1))) & \
                  (target_ulong)(mask)))
 
-#define PGSHIFT 12
+/* Floating point round mode */
+#define FSR_RD_SHIFT        5
+#define FSR_RD              (0x7 << FSR_RD_SHIFT)
 
-#define FSR_RD_SHIFT 5
-#define FSR_RD   (0x7 << FSR_RD_SHIFT)
+/* Floating point accrued exception flags */
+#define FPEXC_NX            0x01
+#define FPEXC_UF            0x02
+#define FPEXC_OF            0x04
+#define FPEXC_DZ            0x08
+#define FPEXC_NV            0x10
 
-#define FPEXC_NX 0x01
-#define FPEXC_UF 0x02
-#define FPEXC_OF 0x04
-#define FPEXC_DZ 0x08
-#define FPEXC_NV 0x10
+/* Floating point status register bits */
+#define FSR_AEXC_SHIFT      0
+#define FSR_NVA             (FPEXC_NV << FSR_AEXC_SHIFT)
+#define FSR_OFA             (FPEXC_OF << FSR_AEXC_SHIFT)
+#define FSR_UFA             (FPEXC_UF << FSR_AEXC_SHIFT)
+#define FSR_DZA             (FPEXC_DZ << FSR_AEXC_SHIFT)
+#define FSR_NXA             (FPEXC_NX << FSR_AEXC_SHIFT)
+#define FSR_AEXC            (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
 
-#define FSR_AEXC_SHIFT 0
-#define FSR_NVA  (FPEXC_NV << FSR_AEXC_SHIFT)
-#define FSR_OFA  (FPEXC_OF << FSR_AEXC_SHIFT)
-#define FSR_UFA  (FPEXC_UF << FSR_AEXC_SHIFT)
-#define FSR_DZA  (FPEXC_DZ << FSR_AEXC_SHIFT)
-#define FSR_NXA  (FPEXC_NX << FSR_AEXC_SHIFT)
-#define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
+/* Control and Status Registers */
 
-/* CSR numbers */
-#define CSR_FFLAGS 0x1
-#define CSR_FRM 0x2
-#define CSR_FCSR 0x3
-#define CSR_CYCLE 0xc00
-#define CSR_TIME 0xc01
-#define CSR_INSTRET 0xc02
-#define CSR_HPMCOUNTER3 0xc03
-#define CSR_HPMCOUNTER4 0xc04
-#define CSR_HPMCOUNTER5 0xc05
-#define CSR_HPMCOUNTER6 0xc06
-#define CSR_HPMCOUNTER7 0xc07
-#define CSR_HPMCOUNTER8 0xc08
-#define CSR_HPMCOUNTER9 0xc09
-#define CSR_HPMCOUNTER10 0xc0a
-#define CSR_HPMCOUNTER11 0xc0b
-#define CSR_HPMCOUNTER12 0xc0c
-#define CSR_HPMCOUNTER13 0xc0d
-#define CSR_HPMCOUNTER14 0xc0e
-#define CSR_HPMCOUNTER15 0xc0f
-#define CSR_HPMCOUNTER16 0xc10
-#define CSR_HPMCOUNTER17 0xc11
-#define CSR_HPMCOUNTER18 0xc12
-#define CSR_HPMCOUNTER19 0xc13
-#define CSR_HPMCOUNTER20 0xc14
-#define CSR_HPMCOUNTER21 0xc15
-#define CSR_HPMCOUNTER22 0xc16
-#define CSR_HPMCOUNTER23 0xc17
-#define CSR_HPMCOUNTER24 0xc18
-#define CSR_HPMCOUNTER25 0xc19
-#define CSR_HPMCOUNTER26 0xc1a
-#define CSR_HPMCOUNTER27 0xc1b
-#define CSR_HPMCOUNTER28 0xc1c
-#define CSR_HPMCOUNTER29 0xc1d
-#define CSR_HPMCOUNTER30 0xc1e
-#define CSR_HPMCOUNTER31 0xc1f
-#define CSR_SSTATUS 0x100
-#define CSR_SIE 0x104
-#define CSR_STVEC 0x105
-#define CSR_SCOUNTEREN 0x106
-#define CSR_SSCRATCH 0x140
-#define CSR_SEPC 0x141
-#define CSR_SCAUSE 0x142
-#define CSR_SBADADDR 0x143
-#define CSR_SIP 0x144
-#define CSR_SPTBR 0x180
-#define CSR_SATP 0x180
-#define CSR_MSTATUS 0x300
-#define CSR_MISA 0x301
-#define CSR_MEDELEG 0x302
-#define CSR_MIDELEG 0x303
-#define CSR_MIE 0x304
-#define CSR_MTVEC 0x305
-#define CSR_MCOUNTEREN 0x306
-#define CSR_MSCRATCH 0x340
-#define CSR_MEPC 0x341
-#define CSR_MCAUSE 0x342
-#define CSR_MBADADDR 0x343
-#define CSR_MIP 0x344
-#define CSR_PMPCFG0 0x3a0
-#define CSR_PMPCFG1 0x3a1
-#define CSR_PMPCFG2 0x3a2
-#define CSR_PMPCFG3 0x3a3
-#define CSR_PMPADDR0 0x3b0
-#define CSR_PMPADDR1 0x3b1
-#define CSR_PMPADDR2 0x3b2
-#define CSR_PMPADDR3 0x3b3
-#define CSR_PMPADDR4 0x3b4
-#define CSR_PMPADDR5 0x3b5
-#define CSR_PMPADDR6 0x3b6
-#define CSR_PMPADDR7 0x3b7
-#define CSR_PMPADDR8 0x3b8
-#define CSR_PMPADDR9 0x3b9
-#define CSR_PMPADDR10 0x3ba
-#define CSR_PMPADDR11 0x3bb
-#define CSR_PMPADDR12 0x3bc
-#define CSR_PMPADDR13 0x3bd
-#define CSR_PMPADDR14 0x3be
-#define CSR_PMPADDR15 0x3bf
-#define CSR_TSELECT 0x7a0
-#define CSR_TDATA1 0x7a1
-#define CSR_TDATA2 0x7a2
-#define CSR_TDATA3 0x7a3
-#define CSR_DCSR 0x7b0
-#define CSR_DPC 0x7b1
-#define CSR_DSCRATCH 0x7b2
-#define CSR_MCYCLE 0xb00
-#define CSR_MINSTRET 0xb02
-#define CSR_MHPMCOUNTER3 0xb03
-#define CSR_MHPMCOUNTER4 0xb04
-#define CSR_MHPMCOUNTER5 0xb05
-#define CSR_MHPMCOUNTER6 0xb06
-#define CSR_MHPMCOUNTER7 0xb07
-#define CSR_MHPMCOUNTER8 0xb08
-#define CSR_MHPMCOUNTER9 0xb09
-#define CSR_MHPMCOUNTER10 0xb0a
-#define CSR_MHPMCOUNTER11 0xb0b
-#define CSR_MHPMCOUNTER12 0xb0c
-#define CSR_MHPMCOUNTER13 0xb0d
-#define CSR_MHPMCOUNTER14 0xb0e
-#define CSR_MHPMCOUNTER15 0xb0f
-#define CSR_MHPMCOUNTER16 0xb10
-#define CSR_MHPMCOUNTER17 0xb11
-#define CSR_MHPMCOUNTER18 0xb12
-#define CSR_MHPMCOUNTER19 0xb13
-#define CSR_MHPMCOUNTER20 0xb14
-#define CSR_MHPMCOUNTER21 0xb15
-#define CSR_MHPMCOUNTER22 0xb16
-#define CSR_MHPMCOUNTER23 0xb17
-#define CSR_MHPMCOUNTER24 0xb18
-#define CSR_MHPMCOUNTER25 0xb19
-#define CSR_MHPMCOUNTER26 0xb1a
-#define CSR_MHPMCOUNTER27 0xb1b
-#define CSR_MHPMCOUNTER28 0xb1c
-#define CSR_MHPMCOUNTER29 0xb1d
-#define CSR_MHPMCOUNTER30 0xb1e
-#define CSR_MHPMCOUNTER31 0xb1f
-#define CSR_MUCOUNTEREN 0x320
-#define CSR_MSCOUNTEREN 0x321
-#define CSR_MHPMEVENT3 0x323
-#define CSR_MHPMEVENT4 0x324
-#define CSR_MHPMEVENT5 0x325
-#define CSR_MHPMEVENT6 0x326
-#define CSR_MHPMEVENT7 0x327
-#define CSR_MHPMEVENT8 0x328
-#define CSR_MHPMEVENT9 0x329
-#define CSR_MHPMEVENT10 0x32a
-#define CSR_MHPMEVENT11 0x32b
-#define CSR_MHPMEVENT12 0x32c
-#define CSR_MHPMEVENT13 0x32d
-#define CSR_MHPMEVENT14 0x32e
-#define CSR_MHPMEVENT15 0x32f
-#define CSR_MHPMEVENT16 0x330
-#define CSR_MHPMEVENT17 0x331
-#define CSR_MHPMEVENT18 0x332
-#define CSR_MHPMEVENT19 0x333
-#define CSR_MHPMEVENT20 0x334
-#define CSR_MHPMEVENT21 0x335
-#define CSR_MHPMEVENT22 0x336
-#define CSR_MHPMEVENT23 0x337
-#define CSR_MHPMEVENT24 0x338
-#define CSR_MHPMEVENT25 0x339
-#define CSR_MHPMEVENT26 0x33a
-#define CSR_MHPMEVENT27 0x33b
-#define CSR_MHPMEVENT28 0x33c
-#define CSR_MHPMEVENT29 0x33d
-#define CSR_MHPMEVENT30 0x33e
-#define CSR_MHPMEVENT31 0x33f
-#define CSR_MVENDORID 0xf11
-#define CSR_MARCHID 0xf12
-#define CSR_MIMPID 0xf13
-#define CSR_MHARTID 0xf14
-#define CSR_CYCLEH 0xc80
-#define CSR_TIMEH 0xc81
-#define CSR_INSTRETH 0xc82
-#define CSR_HPMCOUNTER3H 0xc83
-#define CSR_HPMCOUNTER4H 0xc84
-#define CSR_HPMCOUNTER5H 0xc85
-#define CSR_HPMCOUNTER6H 0xc86
-#define CSR_HPMCOUNTER7H 0xc87
-#define CSR_HPMCOUNTER8H 0xc88
-#define CSR_HPMCOUNTER9H 0xc89
-#define CSR_HPMCOUNTER10H 0xc8a
-#define CSR_HPMCOUNTER11H 0xc8b
-#define CSR_HPMCOUNTER12H 0xc8c
-#define CSR_HPMCOUNTER13H 0xc8d
-#define CSR_HPMCOUNTER14H 0xc8e
-#define CSR_HPMCOUNTER15H 0xc8f
-#define CSR_HPMCOUNTER16H 0xc90
-#define CSR_HPMCOUNTER17H 0xc91
-#define CSR_HPMCOUNTER18H 0xc92
-#define CSR_HPMCOUNTER19H 0xc93
-#define CSR_HPMCOUNTER20H 0xc94
-#define CSR_HPMCOUNTER21H 0xc95
-#define CSR_HPMCOUNTER22H 0xc96
-#define CSR_HPMCOUNTER23H 0xc97
-#define CSR_HPMCOUNTER24H 0xc98
-#define CSR_HPMCOUNTER25H 0xc99
-#define CSR_HPMCOUNTER26H 0xc9a
-#define CSR_HPMCOUNTER27H 0xc9b
-#define CSR_HPMCOUNTER28H 0xc9c
-#define CSR_HPMCOUNTER29H 0xc9d
-#define CSR_HPMCOUNTER30H 0xc9e
-#define CSR_HPMCOUNTER31H 0xc9f
-#define CSR_MCYCLEH 0xb80
-#define CSR_MINSTRETH 0xb82
-#define CSR_MHPMCOUNTER3H 0xb83
-#define CSR_MHPMCOUNTER4H 0xb84
-#define CSR_MHPMCOUNTER5H 0xb85
-#define CSR_MHPMCOUNTER6H 0xb86
-#define CSR_MHPMCOUNTER7H 0xb87
-#define CSR_MHPMCOUNTER8H 0xb88
-#define CSR_MHPMCOUNTER9H 0xb89
-#define CSR_MHPMCOUNTER10H 0xb8a
-#define CSR_MHPMCOUNTER11H 0xb8b
-#define CSR_MHPMCOUNTER12H 0xb8c
-#define CSR_MHPMCOUNTER13H 0xb8d
-#define CSR_MHPMCOUNTER14H 0xb8e
-#define CSR_MHPMCOUNTER15H 0xb8f
-#define CSR_MHPMCOUNTER16H 0xb90
-#define CSR_MHPMCOUNTER17H 0xb91
-#define CSR_MHPMCOUNTER18H 0xb92
-#define CSR_MHPMCOUNTER19H 0xb93
-#define CSR_MHPMCOUNTER20H 0xb94
-#define CSR_MHPMCOUNTER21H 0xb95
-#define CSR_MHPMCOUNTER22H 0xb96
-#define CSR_MHPMCOUNTER23H 0xb97
-#define CSR_MHPMCOUNTER24H 0xb98
-#define CSR_MHPMCOUNTER25H 0xb99
-#define CSR_MHPMCOUNTER26H 0xb9a
-#define CSR_MHPMCOUNTER27H 0xb9b
-#define CSR_MHPMCOUNTER28H 0xb9c
-#define CSR_MHPMCOUNTER29H 0xb9d
-#define CSR_MHPMCOUNTER30H 0xb9e
-#define CSR_MHPMCOUNTER31H 0xb9f
+/* User Trap Setup */
+#define CSR_USTATUS         0x000
+#define CSR_UIE             0x004
+#define CSR_UTVEC           0x005
 
-/* mstatus bits */
+/* User Trap Handling */
+#define CSR_USCRATCH        0x040
+#define CSR_UEPC            0x041
+#define CSR_UCAUSE          0x042
+#define CSR_UTVAL           0x043
+#define CSR_UIP             0x044
+
+/* User Floating-Point CSRs */
+#define CSR_FFLAGS          0x001
+#define CSR_FRM             0x002
+#define CSR_FCSR            0x003
+
+/* User Timers and Counters */
+#define CSR_CYCLE           0xc00
+#define CSR_TIME            0xc01
+#define CSR_INSTRET         0xc02
+#define CSR_HPMCOUNTER3     0xc03
+#define CSR_HPMCOUNTER4     0xc04
+#define CSR_HPMCOUNTER5     0xc05
+#define CSR_HPMCOUNTER6     0xc06
+#define CSR_HPMCOUNTER7     0xc07
+#define CSR_HPMCOUNTER8     0xc08
+#define CSR_HPMCOUNTER9     0xc09
+#define CSR_HPMCOUNTER10    0xc0a
+#define CSR_HPMCOUNTER11    0xc0b
+#define CSR_HPMCOUNTER12    0xc0c
+#define CSR_HPMCOUNTER13    0xc0d
+#define CSR_HPMCOUNTER14    0xc0e
+#define CSR_HPMCOUNTER15    0xc0f
+#define CSR_HPMCOUNTER16    0xc10
+#define CSR_HPMCOUNTER17    0xc11
+#define CSR_HPMCOUNTER18    0xc12
+#define CSR_HPMCOUNTER19    0xc13
+#define CSR_HPMCOUNTER20    0xc14
+#define CSR_HPMCOUNTER21    0xc15
+#define CSR_HPMCOUNTER22    0xc16
+#define CSR_HPMCOUNTER23    0xc17
+#define CSR_HPMCOUNTER24    0xc18
+#define CSR_HPMCOUNTER25    0xc19
+#define CSR_HPMCOUNTER26    0xc1a
+#define CSR_HPMCOUNTER27    0xc1b
+#define CSR_HPMCOUNTER28    0xc1c
+#define CSR_HPMCOUNTER29    0xc1d
+#define CSR_HPMCOUNTER30    0xc1e
+#define CSR_HPMCOUNTER31    0xc1f
+#define CSR_CYCLEH          0xc80
+#define CSR_TIMEH           0xc81
+#define CSR_INSTRETH        0xc82
+#define CSR_HPMCOUNTER3H    0xc83
+#define CSR_HPMCOUNTER4H    0xc84
+#define CSR_HPMCOUNTER5H    0xc85
+#define CSR_HPMCOUNTER6H    0xc86
+#define CSR_HPMCOUNTER7H    0xc87
+#define CSR_HPMCOUNTER8H    0xc88
+#define CSR_HPMCOUNTER9H    0xc89
+#define CSR_HPMCOUNTER10H   0xc8a
+#define CSR_HPMCOUNTER11H   0xc8b
+#define CSR_HPMCOUNTER12H   0xc8c
+#define CSR_HPMCOUNTER13H   0xc8d
+#define CSR_HPMCOUNTER14H   0xc8e
+#define CSR_HPMCOUNTER15H   0xc8f
+#define CSR_HPMCOUNTER16H   0xc90
+#define CSR_HPMCOUNTER17H   0xc91
+#define CSR_HPMCOUNTER18H   0xc92
+#define CSR_HPMCOUNTER19H   0xc93
+#define CSR_HPMCOUNTER20H   0xc94
+#define CSR_HPMCOUNTER21H   0xc95
+#define CSR_HPMCOUNTER22H   0xc96
+#define CSR_HPMCOUNTER23H   0xc97
+#define CSR_HPMCOUNTER24H   0xc98
+#define CSR_HPMCOUNTER25H   0xc99
+#define CSR_HPMCOUNTER26H   0xc9a
+#define CSR_HPMCOUNTER27H   0xc9b
+#define CSR_HPMCOUNTER28H   0xc9c
+#define CSR_HPMCOUNTER29H   0xc9d
+#define CSR_HPMCOUNTER30H   0xc9e
+#define CSR_HPMCOUNTER31H   0xc9f
+
+/* Machine Timers and Counters */
+#define CSR_MCYCLE          0xb00
+#define CSR_MINSTRET        0xb02
+#define CSR_MCYCLEH         0xb80
+#define CSR_MINSTRETH       0xb82
+
+/* Machine Information Registers */
+#define CSR_MVENDORID       0xf11
+#define CSR_MARCHID         0xf12
+#define CSR_MIMPID          0xf13
+#define CSR_MHARTID         0xf14
+
+/* Machine Trap Setup */
+#define CSR_MSTATUS         0x300
+#define CSR_MISA            0x301
+#define CSR_MEDELEG         0x302
+#define CSR_MIDELEG         0x303
+#define CSR_MIE             0x304
+#define CSR_MTVEC           0x305
+#define CSR_MCOUNTEREN      0x306
+
+/* Legacy Counter Setup (priv v1.9.1) */
+#define CSR_MUCOUNTEREN     0x320
+#define CSR_MSCOUNTEREN     0x321
+
+/* Machine Trap Handling */
+#define CSR_MSCRATCH        0x340
+#define CSR_MEPC            0x341
+#define CSR_MCAUSE          0x342
+#define CSR_MBADADDR        0x343
+#define CSR_MIP             0x344
+
+/* Supervisor Trap Setup */
+#define CSR_SSTATUS         0x100
+#define CSR_SIE             0x104
+#define CSR_STVEC           0x105
+#define CSR_SCOUNTEREN      0x106
+
+/* Supervisor Trap Handling */
+#define CSR_SSCRATCH        0x140
+#define CSR_SEPC            0x141
+#define CSR_SCAUSE          0x142
+#define CSR_SBADADDR        0x143
+#define CSR_SIP             0x144
+
+/* Supervisor Protection and Translation */
+#define CSR_SPTBR           0x180
+#define CSR_SATP            0x180
+
+/* Physical Memory Protection */
+#define CSR_PMPCFG0         0x3a0
+#define CSR_PMPCFG1         0x3a1
+#define CSR_PMPCFG2         0x3a2
+#define CSR_PMPCFG3         0x3a3
+#define CSR_PMPADDR0        0x3b0
+#define CSR_PMPADDR1        0x3b1
+#define CSR_PMPADDR2        0x3b2
+#define CSR_PMPADDR3        0x3b3
+#define CSR_PMPADDR4        0x3b4
+#define CSR_PMPADDR5        0x3b5
+#define CSR_PMPADDR6        0x3b6
+#define CSR_PMPADDR7        0x3b7
+#define CSR_PMPADDR8        0x3b8
+#define CSR_PMPADDR9        0x3b9
+#define CSR_PMPADDR10       0x3ba
+#define CSR_PMPADDR11       0x3bb
+#define CSR_PMPADDR12       0x3bc
+#define CSR_PMPADDR13       0x3bd
+#define CSR_PMPADDR14       0x3be
+#define CSR_PMPADDR15       0x3bf
+
+/* Debug/Trace Registers (shared with Debug Mode) */
+#define CSR_TSELECT         0x7a0
+#define CSR_TDATA1          0x7a1
+#define CSR_TDATA2          0x7a2
+#define CSR_TDATA3          0x7a3
+
+/* Debug Mode Registers */
+#define CSR_DCSR            0x7b0
+#define CSR_DPC             0x7b1
+#define CSR_DSCRATCH        0x7b2
+
+/* Performance Counters */
+#define CSR_MHPMCOUNTER3    0xb03
+#define CSR_MHPMCOUNTER4    0xb04
+#define CSR_MHPMCOUNTER5    0xb05
+#define CSR_MHPMCOUNTER6    0xb06
+#define CSR_MHPMCOUNTER7    0xb07
+#define CSR_MHPMCOUNTER8    0xb08
+#define CSR_MHPMCOUNTER9    0xb09
+#define CSR_MHPMCOUNTER10   0xb0a
+#define CSR_MHPMCOUNTER11   0xb0b
+#define CSR_MHPMCOUNTER12   0xb0c
+#define CSR_MHPMCOUNTER13   0xb0d
+#define CSR_MHPMCOUNTER14   0xb0e
+#define CSR_MHPMCOUNTER15   0xb0f
+#define CSR_MHPMCOUNTER16   0xb10
+#define CSR_MHPMCOUNTER17   0xb11
+#define CSR_MHPMCOUNTER18   0xb12
+#define CSR_MHPMCOUNTER19   0xb13
+#define CSR_MHPMCOUNTER20   0xb14
+#define CSR_MHPMCOUNTER21   0xb15
+#define CSR_MHPMCOUNTER22   0xb16
+#define CSR_MHPMCOUNTER23   0xb17
+#define CSR_MHPMCOUNTER24   0xb18
+#define CSR_MHPMCOUNTER25   0xb19
+#define CSR_MHPMCOUNTER26   0xb1a
+#define CSR_MHPMCOUNTER27   0xb1b
+#define CSR_MHPMCOUNTER28   0xb1c
+#define CSR_MHPMCOUNTER29   0xb1d
+#define CSR_MHPMCOUNTER30   0xb1e
+#define CSR_MHPMCOUNTER31   0xb1f
+#define CSR_MHPMEVENT3      0x323
+#define CSR_MHPMEVENT4      0x324
+#define CSR_MHPMEVENT5      0x325
+#define CSR_MHPMEVENT6      0x326
+#define CSR_MHPMEVENT7      0x327
+#define CSR_MHPMEVENT8      0x328
+#define CSR_MHPMEVENT9      0x329
+#define CSR_MHPMEVENT10     0x32a
+#define CSR_MHPMEVENT11     0x32b
+#define CSR_MHPMEVENT12     0x32c
+#define CSR_MHPMEVENT13     0x32d
+#define CSR_MHPMEVENT14     0x32e
+#define CSR_MHPMEVENT15     0x32f
+#define CSR_MHPMEVENT16     0x330
+#define CSR_MHPMEVENT17     0x331
+#define CSR_MHPMEVENT18     0x332
+#define CSR_MHPMEVENT19     0x333
+#define CSR_MHPMEVENT20     0x334
+#define CSR_MHPMEVENT21     0x335
+#define CSR_MHPMEVENT22     0x336
+#define CSR_MHPMEVENT23     0x337
+#define CSR_MHPMEVENT24     0x338
+#define CSR_MHPMEVENT25     0x339
+#define CSR_MHPMEVENT26     0x33a
+#define CSR_MHPMEVENT27     0x33b
+#define CSR_MHPMEVENT28     0x33c
+#define CSR_MHPMEVENT29     0x33d
+#define CSR_MHPMEVENT30     0x33e
+#define CSR_MHPMEVENT31     0x33f
+#define CSR_MHPMCOUNTER3H   0xb83
+#define CSR_MHPMCOUNTER4H   0xb84
+#define CSR_MHPMCOUNTER5H   0xb85
+#define CSR_MHPMCOUNTER6H   0xb86
+#define CSR_MHPMCOUNTER7H   0xb87
+#define CSR_MHPMCOUNTER8H   0xb88
+#define CSR_MHPMCOUNTER9H   0xb89
+#define CSR_MHPMCOUNTER10H  0xb8a
+#define CSR_MHPMCOUNTER11H  0xb8b
+#define CSR_MHPMCOUNTER12H  0xb8c
+#define CSR_MHPMCOUNTER13H  0xb8d
+#define CSR_MHPMCOUNTER14H  0xb8e
+#define CSR_MHPMCOUNTER15H  0xb8f
+#define CSR_MHPMCOUNTER16H  0xb90
+#define CSR_MHPMCOUNTER17H  0xb91
+#define CSR_MHPMCOUNTER18H  0xb92
+#define CSR_MHPMCOUNTER19H  0xb93
+#define CSR_MHPMCOUNTER20H  0xb94
+#define CSR_MHPMCOUNTER21H  0xb95
+#define CSR_MHPMCOUNTER22H  0xb96
+#define CSR_MHPMCOUNTER23H  0xb97
+#define CSR_MHPMCOUNTER24H  0xb98
+#define CSR_MHPMCOUNTER25H  0xb99
+#define CSR_MHPMCOUNTER26H  0xb9a
+#define CSR_MHPMCOUNTER27H  0xb9b
+#define CSR_MHPMCOUNTER28H  0xb9c
+#define CSR_MHPMCOUNTER29H  0xb9d
+#define CSR_MHPMCOUNTER30H  0xb9e
+#define CSR_MHPMCOUNTER31H  0xb9f
+
+/* mstatus CSR bits */
 #define MSTATUS_UIE         0x00000001
 #define MSTATUS_SIE         0x00000002
 #define MSTATUS_HIE         0x00000004
@@ -276,7 +317,7 @@
 #define MSTATUS_SD MSTATUS64_SD
 #endif
 
-/* sstatus bits */
+/* sstatus CSR bits */
 #define SSTATUS_UIE         0x00000001
 #define SSTATUS_SIE         0x00000002
 #define SSTATUS_UPIE        0x00000010
@@ -297,83 +338,71 @@
 #define SSTATUS_SD SSTATUS64_SD
 #endif
 
-/* irqs */
-#define MIP_SSIP            (1 << IRQ_S_SOFT)
-#define MIP_HSIP            (1 << IRQ_H_SOFT)
-#define MIP_MSIP            (1 << IRQ_M_SOFT)
-#define MIP_STIP            (1 << IRQ_S_TIMER)
-#define MIP_HTIP            (1 << IRQ_H_TIMER)
-#define MIP_MTIP            (1 << IRQ_M_TIMER)
-#define MIP_SEIP            (1 << IRQ_S_EXT)
-#define MIP_HEIP            (1 << IRQ_H_EXT)
-#define MIP_MEIP            (1 << IRQ_M_EXT)
-
-#define SIP_SSIP            MIP_SSIP
-#define SIP_STIP            MIP_STIP
-#define SIP_SEIP            MIP_SEIP
-
+/* Privilege modes */
 #define PRV_U 0
 #define PRV_S 1
 #define PRV_H 2
 #define PRV_M 3
 
-/* privileged ISA 1.9.1 VM modes (mstatus.vm) */
-#define VM_1_09_MBARE 0
-#define VM_1_09_MBB   1
-#define VM_1_09_MBBID 2
-#define VM_1_09_SV32  8
-#define VM_1_09_SV39  9
-#define VM_1_09_SV48  10
+/* RV32 satp CSR field masks */
+#define SATP32_MODE         0x80000000
+#define SATP32_ASID         0x7fc00000
+#define SATP32_PPN          0x003fffff
 
-/* privileged ISA 1.10.0 VM modes (satp.mode) */
-#define VM_1_10_MBARE 0
-#define VM_1_10_SV32  1
-#define VM_1_10_SV39  8
-#define VM_1_10_SV48  9
-#define VM_1_10_SV57  10
-#define VM_1_10_SV64  11
-
-/* privileged ISA interrupt causes */
-#define IRQ_U_SOFT      0  /* since: priv-1.10 */
-#define IRQ_S_SOFT      1
-#define IRQ_H_SOFT      2  /* until: priv-1.9.1 */
-#define IRQ_M_SOFT      3  /* until: priv-1.9.1 */
-#define IRQ_U_TIMER     4  /* since: priv-1.10 */
-#define IRQ_S_TIMER     5
-#define IRQ_H_TIMER     6  /* until: priv-1.9.1 */
-#define IRQ_M_TIMER     7  /* until: priv-1.9.1 */
-#define IRQ_U_EXT       8  /* since: priv-1.10 */
-#define IRQ_S_EXT       9
-#define IRQ_H_EXT       10 /* until: priv-1.9.1 */
-#define IRQ_M_EXT       11 /* until: priv-1.9.1 */
-#define IRQ_X_COP       12 /* non-standard */
-
-/* Default addresses */
-#define DEFAULT_RSTVEC     0x00001000
-
-/* RV32 satp field masks */
-#define SATP32_MODE 0x80000000
-#define SATP32_ASID 0x7fc00000
-#define SATP32_PPN  0x003fffff
-
-/* RV64 satp field masks */
-#define SATP64_MODE 0xF000000000000000ULL
-#define SATP64_ASID 0x0FFFF00000000000ULL
-#define SATP64_PPN  0x00000FFFFFFFFFFFULL
+/* RV64 satp CSR field masks */
+#define SATP64_MODE         0xF000000000000000ULL
+#define SATP64_ASID         0x0FFFF00000000000ULL
+#define SATP64_PPN          0x00000FFFFFFFFFFFULL
 
 #if defined(TARGET_RISCV32)
-#define SATP_MODE SATP32_MODE
-#define SATP_ASID SATP32_ASID
-#define SATP_PPN  SATP32_PPN
+#define SATP_MODE           SATP32_MODE
+#define SATP_ASID           SATP32_ASID
+#define SATP_PPN            SATP32_PPN
 #endif
 #if defined(TARGET_RISCV64)
-#define SATP_MODE SATP64_MODE
-#define SATP_ASID SATP64_ASID
-#define SATP_PPN  SATP64_PPN
+#define SATP_MODE           SATP64_MODE
+#define SATP_ASID           SATP64_ASID
+#define SATP_PPN            SATP64_PPN
 #endif
 
-/* RISCV Exception Codes */
-#define EXCP_NONE                       -1 /* not a real RISCV exception code */
+/* VM modes (mstatus.vm) privileged ISA 1.9.1 */
+#define VM_1_09_MBARE       0
+#define VM_1_09_MBB         1
+#define VM_1_09_MBBID       2
+#define VM_1_09_SV32        8
+#define VM_1_09_SV39        9
+#define VM_1_09_SV48        10
+
+/* VM modes (satp.mode) privileged ISA 1.10 */
+#define VM_1_10_MBARE       0
+#define VM_1_10_SV32        1
+#define VM_1_10_SV39        8
+#define VM_1_10_SV48        9
+#define VM_1_10_SV57        10
+#define VM_1_10_SV64        11
+
+/* Page table entry (PTE) fields */
+#define PTE_V               0x001 /* Valid */
+#define PTE_R               0x002 /* Read */
+#define PTE_W               0x004 /* Write */
+#define PTE_X               0x008 /* Execute */
+#define PTE_U               0x010 /* User */
+#define PTE_G               0x020 /* Global */
+#define PTE_A               0x040 /* Accessed */
+#define PTE_D               0x080 /* Dirty */
+#define PTE_SOFT            0x300 /* Reserved for Software */
+
+/* Page table PPN shift amount */
+#define PTE_PPN_SHIFT       10
+
+/* Leaf page shift amount */
+#define PGSHIFT             12
+
+/* Default Reset Vector adress */
+#define DEFAULT_RSTVEC      0x1000
+
+/* Exception causes */
+#define EXCP_NONE                          -1 /* sentinel value */
 #define RISCV_EXCP_INST_ADDR_MIS           0x0
 #define RISCV_EXCP_INST_ACCESS_FAULT       0x1
 #define RISCV_EXCP_ILLEGAL_INST            0x2
@@ -382,9 +411,7 @@
 #define RISCV_EXCP_LOAD_ACCESS_FAULT       0x5
 #define RISCV_EXCP_STORE_AMO_ADDR_MIS      0x6
 #define RISCV_EXCP_STORE_AMO_ACCESS_FAULT  0x7
-#define RISCV_EXCP_U_ECALL                 0x8 /* for convenience, report all
-                                                  ECALLs as this, handler
-                                                  fixes */
+#define RISCV_EXCP_U_ECALL                 0x8
 #define RISCV_EXCP_S_ECALL                 0x9
 #define RISCV_EXCP_H_ECALL                 0xa
 #define RISCV_EXCP_M_ECALL                 0xb
@@ -395,15 +422,35 @@
 #define RISCV_EXCP_INT_FLAG                0x80000000
 #define RISCV_EXCP_INT_MASK                0x7fffffff
 
-/* page table entry (PTE) fields */
-#define PTE_V     0x001 /* Valid */
-#define PTE_R     0x002 /* Read */
-#define PTE_W     0x004 /* Write */
-#define PTE_X     0x008 /* Execute */
-#define PTE_U     0x010 /* User */
-#define PTE_G     0x020 /* Global */
-#define PTE_A     0x040 /* Accessed */
-#define PTE_D     0x080 /* Dirty */
-#define PTE_SOFT  0x300 /* Reserved for Software */
+/* Interrupt causes */
+#define IRQ_U_SOFT                         0
+#define IRQ_S_SOFT                         1
+#define IRQ_H_SOFT                         2  /* reserved */
+#define IRQ_M_SOFT                         3
+#define IRQ_U_TIMER                        4
+#define IRQ_S_TIMER                        5
+#define IRQ_H_TIMER                        6  /* reserved */
+#define IRQ_M_TIMER                        7
+#define IRQ_U_EXT                          8
+#define IRQ_S_EXT                          9
+#define IRQ_H_EXT                          10 /* reserved */
+#define IRQ_M_EXT                          11
 
-#define PTE_PPN_SHIFT 10
+/* mip masks */
+#define MIP_USIP                           (1 << IRQ_U_SOFT)
+#define MIP_SSIP                           (1 << IRQ_S_SOFT)
+#define MIP_HSIP                           (1 << IRQ_H_SOFT)
+#define MIP_MSIP                           (1 << IRQ_M_SOFT)
+#define MIP_UTIP                           (1 << IRQ_U_TIMER)
+#define MIP_STIP                           (1 << IRQ_S_TIMER)
+#define MIP_HTIP                           (1 << IRQ_H_TIMER)
+#define MIP_MTIP                           (1 << IRQ_M_TIMER)
+#define MIP_UEIP                           (1 << IRQ_U_EXT)
+#define MIP_SEIP                           (1 << IRQ_S_EXT)
+#define MIP_HEIP                           (1 << IRQ_H_EXT)
+#define MIP_MEIP                           (1 << IRQ_M_EXT)
+
+/* sip masks */
+#define SIP_SSIP                           MIP_SSIP
+#define SIP_STIP                           MIP_STIP
+#define SIP_SEIP                           MIP_SEIP
diff --git a/target/riscv/helper.c b/target/riscv/cpu_helper.c
similarity index 95%
rename from target/riscv/helper.c
rename to target/riscv/cpu_helper.c
index 63b3386..86f9f47 100644
--- a/target/riscv/helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1,5 +1,5 @@
 /*
- * RISC-V emulation helpers for qemu.
+ * RISC-V CPU helpers for qemu.
  *
  * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
  * Copyright (c) 2017-2018 SiFive, Inc.
@@ -72,6 +72,39 @@
 
 #if !defined(CONFIG_USER_ONLY)
 
+/* iothread_mutex must be held */
+uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value)
+{
+    CPURISCVState *env = &cpu->env;
+    uint32_t old, new, cmp = atomic_read(&env->mip);
+
+    do {
+        old = cmp;
+        new = (old & ~mask) | (value & mask);
+        cmp = atomic_cmpxchg(&env->mip, old, new);
+    } while (old != cmp);
+
+    if (new && !old) {
+        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
+    } else if (!new && old) {
+        cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
+    }
+
+    return old;
+}
+
+void riscv_set_mode(CPURISCVState *env, target_ulong newpriv)
+{
+    if (newpriv > PRV_M) {
+        g_assert_not_reached();
+    }
+    if (newpriv == PRV_H) {
+        newpriv = PRV_U;
+    }
+    /* tlb_flush is unnecessary as mode is contained in mmu_idx */
+    env->priv = newpriv;
+}
+
 /* get_physical_address - get the physical address for this virtual address
  *
  * Do a page table walk to obtain the physical address corresponding to a
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index aec7558..3726299 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -90,7 +90,7 @@
         target_ulong csrno)
 {
 #ifndef CONFIG_USER_ONLY
-    uint64_t delegable_ints = MIP_SSIP | MIP_STIP | MIP_SEIP | (1 << IRQ_X_COP);
+    uint64_t delegable_ints = MIP_SSIP | MIP_STIP | MIP_SEIP;
     uint64_t all_ints = delegable_ints | MIP_MSIP | MIP_MTIP;
 #endif
 
@@ -171,10 +171,8 @@
          */
         qemu_mutex_lock_iothread();
         RISCVCPU *cpu = riscv_env_get_cpu(env);
-        riscv_set_local_interrupt(cpu, MIP_SSIP,
-                                  (val_to_write & MIP_SSIP) != 0);
-        riscv_set_local_interrupt(cpu, MIP_STIP,
-                                  (val_to_write & MIP_STIP) != 0);
+        riscv_cpu_update_mip(cpu, MIP_SSIP | MIP_STIP,
+                                  (val_to_write & (MIP_SSIP | MIP_STIP)));
         /*
          * csrs, csrc on mip.SEIP is not decomposable into separate read and
          * write steps, so a different implementation is needed
@@ -656,31 +654,6 @@
 
 #ifndef CONFIG_USER_ONLY
 
-/* iothread_mutex must be held */
-void riscv_set_local_interrupt(RISCVCPU *cpu, target_ulong mask, int value)
-{
-    target_ulong old_mip = cpu->env.mip;
-    cpu->env.mip = (old_mip & ~mask) | (value ? mask : 0);
-
-    if (cpu->env.mip && !old_mip) {
-        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-    } else if (!cpu->env.mip && old_mip) {
-        cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-    }
-}
-
-void riscv_set_mode(CPURISCVState *env, target_ulong newpriv)
-{
-    if (newpriv > PRV_M) {
-        g_assert_not_reached();
-    }
-    if (newpriv == PRV_H) {
-        newpriv = PRV_U;
-    }
-    /* tlb_flush is unnecessary as mode is contained in mmu_idx */
-    env->priv = newpriv;
-}
-
 target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb)
 {
     if (!(env->priv >= PRV_S)) {
@@ -731,7 +704,6 @@
     return retpc;
 }
 
-
 void helper_wfi(CPURISCVState *env)
 {
     CPUState *cs = CPU(riscv_env_get_cpu(env));
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 8ed4823..18ba7f8 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -145,6 +145,11 @@
     env->cregs[0] = CR0_RESET;
     env->cregs[14] = CR14_RESET;
 
+#if defined(CONFIG_USER_ONLY)
+    /* user mode should always be allowed to use the full FPU */
+    env->cregs[0] |= CR0_AFP;
+#endif
+
     /* architectured initial value for Breaking-Event-Address register */
     env->gbea = 1;
 
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 6f8861e..8c2320e 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -255,6 +255,7 @@
 
 /* PSW defines */
 #undef PSW_MASK_PER
+#undef PSW_MASK_UNUSED_2
 #undef PSW_MASK_DAT
 #undef PSW_MASK_IO
 #undef PSW_MASK_EXT
@@ -273,6 +274,7 @@
 #undef PSW_MASK_ESA_ADDR
 
 #define PSW_MASK_PER            0x4000000000000000ULL
+#define PSW_MASK_UNUSED_2       0x2000000000000000ULL
 #define PSW_MASK_DAT            0x0400000000000000ULL
 #define PSW_MASK_IO             0x0200000000000000ULL
 #define PSW_MASK_EXT            0x0100000000000000ULL
@@ -318,10 +320,14 @@
 #define FLAG_MASK_PSW           (FLAG_MASK_PER | FLAG_MASK_DAT | FLAG_MASK_PSTATE \
                                 | FLAG_MASK_ASC | FLAG_MASK_64 | FLAG_MASK_32)
 
+/* we'll use some unused PSW positions to store CR flags in tb flags */
+#define FLAG_MASK_AFP           (PSW_MASK_UNUSED_2 >> FLAG_MASK_PSW_SHIFT)
+
 /* Control register 0 bits */
 #define CR0_LOWPROT             0x0000000010000000ULL
 #define CR0_SECONDARY           0x0000000004000000ULL
 #define CR0_EDAT                0x0000000000800000ULL
+#define CR0_AFP                 0x0000000000040000ULL
 #define CR0_EMERGENCY_SIGNAL_SC 0x0000000000004000ULL
 #define CR0_EXTERNAL_CALL_SC    0x0000000000002000ULL
 #define CR0_CKC_SC              0x0000000000000800ULL
@@ -363,6 +369,9 @@
     *pc = env->psw.addr;
     *cs_base = env->ex_value;
     *flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW;
+    if (env->cregs[0] & CR0_AFP) {
+        *flags |= FLAG_MASK_AFP;
+    }
 }
 
 /* PER bits from control register 9 */
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 172fb18..60cfeba 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -39,8 +39,10 @@
     FEAT_INIT("srs", S390_FEAT_TYPE_STFL, 9, "Sense-running-status facility"),
     FEAT_INIT("csske", S390_FEAT_TYPE_STFL, 10, "Conditional-SSKE facility"),
     FEAT_INIT("ctop", S390_FEAT_TYPE_STFL, 11, "Configuration-topology facility"),
+    FEAT_INIT("apqci", S390_FEAT_TYPE_STFL, 12, "Query AP Configuration Information facility"),
     FEAT_INIT("ipter", S390_FEAT_TYPE_STFL, 13, "IPTE-range facility"),
     FEAT_INIT("nonqks", S390_FEAT_TYPE_STFL, 14, "Nonquiescing key-setting facility"),
+    FEAT_INIT("apft", S390_FEAT_TYPE_STFL, 15, "AP Facilities Test facility"),
     FEAT_INIT("etf2", S390_FEAT_TYPE_STFL, 16, "Extended-translation facility 2"),
     FEAT_INIT("msa-base", S390_FEAT_TYPE_STFL, 17, "Message-security-assist facility (excluding subfunctions)"),
     FEAT_INIT("ldisp", S390_FEAT_TYPE_STFL, 18, "Long-displacement facility"),
@@ -129,6 +131,7 @@
 
     FEAT_INIT_MISC("dateh2", "DAT-enhancement facility 2"),
     FEAT_INIT_MISC("cmm", "Collaborative-memory-management facility"),
+    FEAT_INIT_MISC("ap", "AP instructions installed"),
 
     FEAT_INIT("plo-cl", S390_FEAT_TYPE_PLO, 0, "PLO Compare and load (32 bit in general registers)"),
     FEAT_INIT("plo-clg", S390_FEAT_TYPE_PLO, 1, "PLO Compare and load (64 bit in parameter list)"),
diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h
index ac2c947..5fc7e7b 100644
--- a/target/s390x/cpu_features_def.h
+++ b/target/s390x/cpu_features_def.h
@@ -27,8 +27,10 @@
     S390_FEAT_SENSE_RUNNING_STATUS,
     S390_FEAT_CONDITIONAL_SSKE,
     S390_FEAT_CONFIGURATION_TOPOLOGY,
+    S390_FEAT_AP_QUERY_CONFIG_INFO,
     S390_FEAT_IPTE_RANGE,
     S390_FEAT_NONQ_KEY_SETTING,
+    S390_FEAT_AP_FACILITIES_TEST,
     S390_FEAT_EXTENDED_TRANSLATION_2,
     S390_FEAT_MSA,
     S390_FEAT_LONG_DISPLACEMENT,
@@ -119,6 +121,7 @@
     /* Misc */
     S390_FEAT_DAT_ENH_2,
     S390_FEAT_CMM,
+    S390_FEAT_AP,
 
     /* PLO */
     S390_FEAT_PLO_CL,
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 265d25c..7c253ff 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -786,6 +786,8 @@
         { S390_FEAT_PRNO_TRNG_QRTCR, S390_FEAT_MSA_EXT_5 },
         { S390_FEAT_PRNO_TRNG, S390_FEAT_MSA_EXT_5 },
         { S390_FEAT_SIE_KSS, S390_FEAT_SIE_F2 },
+        { S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_AP },
+        { S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_AP },
     };
     int i;
 
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index f0ce60c..2a33222 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -21,32 +21,51 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "internal.h"
+#include "exec/helper-proto.h"
 #include "qemu/timer.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "hw/s390x/ioinst.h"
 #include "exec/address-spaces.h"
+#include "tcg_s390x.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/sysemu.h"
 #include "hw/s390x/s390_flic.h"
 #endif
 
-/* #define DEBUG_S390 */
-/* #define DEBUG_S390_STDOUT */
+void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
+                                              int ilen, uintptr_t ra)
+{
+    CPUState *cs = CPU(s390_env_get_cpu(env));
 
-#ifdef DEBUG_S390
-#ifdef DEBUG_S390_STDOUT
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); \
-         if (qemu_log_separate()) { qemu_log(fmt, ##__VA_ARGS__); } } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { qemu_log(fmt, ## __VA_ARGS__); } while (0)
+    cpu_restore_state(cs, ra, true);
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  env->psw.addr);
+    trigger_pgm_exception(env, code, ilen);
+    cpu_loop_exit(cs);
+}
+
+void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
+                                           uintptr_t ra)
+{
+    g_assert(dxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Store the DXC into the lowcore */
+    stl_phys(CPU(s390_env_get_cpu(env))->as,
+             env->psa + offsetof(LowCore, data_exc_code), dxc);
 #endif
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
+
+    /* Store the DXC into the FPC if AFP is enabled */
+    if (env->cregs[0] & CR0_AFP) {
+        env->fpc = deposit32(env->fpc, 8, 8, dxc);
+    }
+    tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra);
+}
+
+void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
+{
+    tcg_s390_data_exception(env, dxc, GETPC());
+}
 
 #if defined(CONFIG_USER_ONLY)
 
@@ -92,8 +111,8 @@
     uint64_t asc;
     int prot;
 
-    DPRINTF("%s: address 0x%" VADDR_PRIx " rw %d mmu_idx %d\n",
-            __func__, orig_vaddr, rw, mmu_idx);
+    qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n",
+                  __func__, orig_vaddr, rw, mmu_idx);
 
     vaddr = orig_vaddr;
 
@@ -122,8 +141,9 @@
     if (!address_space_access_valid(&address_space_memory, raddr,
                                     TARGET_PAGE_SIZE, rw,
                                     MEMTXATTRS_UNSPECIFIED)) {
-        DPRINTF("%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", __func__,
-                (uint64_t)raddr, (uint64_t)ram_size);
+        qemu_log_mask(CPU_LOG_MMU,
+                      "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n",
+                      __func__, (uint64_t)raddr, (uint64_t)ram_size);
         trigger_pgm_exception(env, PGM_ADDRESSING, ILEN_AUTO);
         return 1;
     }
@@ -181,8 +201,10 @@
         break;
     }
 
-    qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n",
-                  __func__, env->int_pgm_code, ilen);
+    qemu_log_mask(CPU_LOG_INT,
+                  "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n",
+                  __func__, env->int_pgm_code, ilen, env->psw.mask,
+                  env->psw.addr);
 
     lowcore = cpu_map_lowcore(env);
 
@@ -204,10 +226,6 @@
 
     cpu_unmap_lowcore(lowcore);
 
-    DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__,
-            env->int_pgm_code, ilen, env->psw.mask,
-            env->psw.addr);
-
     load_psw(env, mask, addr);
 }
 
@@ -298,9 +316,6 @@
 
     cpu_unmap_lowcore(lowcore);
 
-    DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
-            env->psw.mask, env->psw.addr);
-
     load_psw(env, mask, addr);
 }
 
@@ -329,8 +344,6 @@
     cpu_unmap_lowcore(lowcore);
     g_free(io);
 
-    DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, env->psw.mask,
-            env->psw.addr);
     load_psw(env, mask, addr);
 }
 
@@ -372,9 +385,6 @@
 
     cpu_unmap_lowcore(lowcore);
 
-    DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
-            env->psw.mask, env->psw.addr);
-
     load_psw(env, mask, addr);
 }
 
@@ -385,8 +395,8 @@
     CPUS390XState *env = &cpu->env;
     bool stopped = false;
 
-    qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
-                  __func__, cs->exception_index, env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n",
+                  __func__, cs->exception_index, env->psw.mask, env->psw.addr);
 
 try_deliver:
     /* handle machine checks */
diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index 5c5b451..1b662d2 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "internal.h"
+#include "tcg_s390x.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
@@ -40,14 +41,6 @@
      ? (mask / (from / to)) & to    \
      : (mask & from) * (to / from))
 
-static void ieee_exception(CPUS390XState *env, uint32_t dxc, uintptr_t retaddr)
-{
-    /* Install the DXC code.  */
-    env->fpc = (env->fpc & ~0xff00) | (dxc << 8);
-    /* Trap.  */
-    s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, retaddr);
-}
-
 /* Should be called after any operation that may raise IEEE exceptions.  */
 static void handle_exceptions(CPUS390XState *env, uintptr_t retaddr)
 {
@@ -75,7 +68,7 @@
     /* Send signals for enabled exceptions.  */
     s390_exc &= env->fpc >> 24;
     if (s390_exc) {
-        ieee_exception(env, s390_exc, retaddr);
+        tcg_s390_data_exception(env, s390_exc, retaddr);
     }
 }
 
@@ -773,6 +766,6 @@
        is also 1, a simulated-iee-exception trap occurs.  */
     s390_exc = (signalling >> 16) & (source >> 24);
     if (s390_exc) {
-        ieee_exception(env, s390_exc | 3, GETPC());
+        tcg_s390_data_exception(env, s390_exc | 3, GETPC());
     }
 }
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 384b61c..70015ea 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -447,6 +447,9 @@
     S390_FEAT_ADAPTER_INT_SUPPRESSION,
     S390_FEAT_EDAT_2,
     S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2,
+    S390_FEAT_AP_QUERY_CONFIG_INFO,
+    S390_FEAT_AP_FACILITIES_TEST,
+    S390_FEAT_AP,
 };
 
 static uint16_t full_GEN12_GA2[] = {
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 97c60ca..018e9dd 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -1,4 +1,5 @@
 DEF_HELPER_2(exception, noreturn, env, i32)
+DEF_HELPER_2(data_exception, noreturn, env, i32)
 DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 9c7b434..54e39df 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -3,6 +3,8 @@
  *
  *  C(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC)
  *  D(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC, DATA)
+ *  E(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC, DATA, FLAGS)
+ *  F(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC, FLAGS)
  *
  *  OPC  = (op << 8) | op2 where op is the major, op2 the minor opcode
  *  NAME = name of the opcode, used internally
@@ -15,6 +17,7 @@
  *  OP   = func op_xx does the bulk of the operation
  *  CC   = func cout_xx defines how cc should get set
  *  DATA = immediate argument to op_xx function
+ *  FLAGS = categorize the type of instruction (e.g. for advanced checks)
  *
  *  The helpers get called in order: I1, I2, P, OP, W, CC
  */
@@ -29,11 +32,11 @@
     C(0xb9e8, AGRK,    RRF_a, DO,  r2, r3, r1, 0, add, adds64)
     C(0xe308, AG,      RXY_a, Z,   r1, m2_64, r1, 0, add, adds64)
     C(0xe318, AGF,     RXY_a, Z,   r1, m2_32s, r1, 0, add, adds64)
-    C(0xb30a, AEBR,    RRE,   Z,   e1, e2, new, e1, aeb, f32)
-    C(0xb31a, ADBR,    RRE,   Z,   f1_o, f2_o, f1, 0, adb, f64)
-    C(0xb34a, AXBR,    RRE,   Z,   0, x2_o, x1, 0, axb, f128)
-    C(0xed0a, AEB,     RXE,   Z,   e1, m2_32u, new, e1, aeb, f32)
-    C(0xed1a, ADB,     RXE,   Z,   f1_o, m2_64, f1, 0, adb, f64)
+    F(0xb30a, AEBR,    RRE,   Z,   e1, e2, new, e1, aeb, f32, IF_BFP)
+    F(0xb31a, ADBR,    RRE,   Z,   f1_o, f2_o, f1, 0, adb, f64, IF_BFP)
+    F(0xb34a, AXBR,    RRE,   Z,   0, x2_o, x1, 0, axb, f128, IF_BFP)
+    F(0xed0a, AEB,     RXE,   Z,   e1, m2_32u, new, e1, aeb, f32, IF_BFP)
+    F(0xed1a, ADB,     RXE,   Z,   f1_o, m2_64, f1, 0, adb, f64, IF_BFP)
 /* ADD HIGH */
     C(0xb9c8, AHHHR,   RRF_a, HW,  r2_sr32, r3_sr32, new, r1_32h, add, adds32)
     C(0xb9d8, AHHLR,   RRF_a, HW,  r2_sr32, r3, new, r1_32h, add, adds32)
@@ -151,7 +154,7 @@
     C(0xb241, CKSM,    RRE,   Z,   r1_o, ra2, new, r1_32, cksm, 0)
 
 /* COPY SIGN */
-    C(0xb372, CPSDR,   RRF_b, FPSSH, f3_o, f2_o, f1, 0, cps, 0)
+    F(0xb372, CPSDR,   RRF_b, FPSSH, f3_o, f2_o, f1, 0, cps, 0, IF_AFP1 | IF_AFP2 | IF_AFP3)
 
 /* COMPARE */
     C(0x1900, CR,      RR_a,  Z,   r1_o, r2_o, 0, 0, 0, cmps32)
@@ -161,17 +164,17 @@
     C(0xb930, CGFR,    RRE,   Z,   r1_o, r2_32s, 0, 0, 0, cmps64)
     C(0xe320, CG,      RXY_a, Z,   r1_o, m2_64, 0, 0, 0, cmps64)
     C(0xe330, CGF,     RXY_a, Z,   r1_o, m2_32s, 0, 0, 0, cmps64)
-    C(0xb309, CEBR,    RRE,   Z,   e1, e2, 0, 0, ceb, 0)
-    C(0xb319, CDBR,    RRE,   Z,   f1_o, f2_o, 0, 0, cdb, 0)
-    C(0xb349, CXBR,    RRE,   Z,   x1_o, x2_o, 0, 0, cxb, 0)
-    C(0xed09, CEB,     RXE,   Z,   e1, m2_32u, 0, 0, ceb, 0)
-    C(0xed19, CDB,     RXE,   Z,   f1_o, m2_64, 0, 0, cdb, 0)
+    F(0xb309, CEBR,    RRE,   Z,   e1, e2, 0, 0, ceb, 0, IF_BFP)
+    F(0xb319, CDBR,    RRE,   Z,   f1_o, f2_o, 0, 0, cdb, 0, IF_BFP)
+    F(0xb349, CXBR,    RRE,   Z,   x1_o, x2_o, 0, 0, cxb, 0, IF_BFP)
+    F(0xed09, CEB,     RXE,   Z,   e1, m2_32u, 0, 0, ceb, 0, IF_BFP)
+    F(0xed19, CDB,     RXE,   Z,   f1_o, m2_64, 0, 0, cdb, 0, IF_BFP)
 /* COMPARE AND SIGNAL */
-    C(0xb308, KEBR,    RRE,   Z,   e1, e2, 0, 0, keb, 0)
-    C(0xb318, KDBR,    RRE,   Z,   f1_o, f2_o, 0, 0, kdb, 0)
-    C(0xb348, KXBR,    RRE,   Z,   x1_o, x2_o, 0, 0, kxb, 0)
-    C(0xed08, KEB,     RXE,   Z,   e1, m2_32u, 0, 0, keb, 0)
-    C(0xed18, KDB,     RXE,   Z,   f1_o, m2_64, 0, 0, kdb, 0)
+    F(0xb308, KEBR,    RRE,   Z,   e1, e2, 0, 0, keb, 0, IF_BFP)
+    F(0xb318, KDBR,    RRE,   Z,   f1_o, f2_o, 0, 0, kdb, 0, IF_BFP)
+    F(0xb348, KXBR,    RRE,   Z,   x1_o, x2_o, 0, 0, kxb, 0, IF_BFP)
+    F(0xed08, KEB,     RXE,   Z,   e1, m2_32u, 0, 0, keb, 0, IF_BFP)
+    F(0xed18, KDB,     RXE,   Z,   f1_o, m2_64, 0, 0, kdb, 0, IF_BFP)
 /* COMPARE IMMEDIATE */
     C(0xc20d, CFI,     RIL_a, EI,  r1, i2, 0, 0, 0, cmps32)
     C(0xc20c, CGFI,    RIL_a, EI,  r1, i2, 0, 0, 0, cmps64)
@@ -288,33 +291,33 @@
     C(0x4e00, CVD,     RX_a,  Z,   r1_o, a2, 0, 0, cvd, 0)
     C(0xe326, CVDY,    RXY_a, LD,  r1_o, a2, 0, 0, cvd, 0)
 /* CONVERT TO FIXED */
-    C(0xb398, CFEBR,   RRF_e, Z,   0, e2, new, r1_32, cfeb, 0)
-    C(0xb399, CFDBR,   RRF_e, Z,   0, f2_o, new, r1_32, cfdb, 0)
-    C(0xb39a, CFXBR,   RRF_e, Z,   0, x2_o, new, r1_32, cfxb, 0)
-    C(0xb3a8, CGEBR,   RRF_e, Z,   0, e2, r1, 0, cgeb, 0)
-    C(0xb3a9, CGDBR,   RRF_e, Z,   0, f2_o, r1, 0, cgdb, 0)
-    C(0xb3aa, CGXBR,   RRF_e, Z,   0, x2_o, r1, 0, cgxb, 0)
+    F(0xb398, CFEBR,   RRF_e, Z,   0, e2, new, r1_32, cfeb, 0, IF_BFP)
+    F(0xb399, CFDBR,   RRF_e, Z,   0, f2_o, new, r1_32, cfdb, 0, IF_BFP)
+    F(0xb39a, CFXBR,   RRF_e, Z,   0, x2_o, new, r1_32, cfxb, 0, IF_BFP)
+    F(0xb3a8, CGEBR,   RRF_e, Z,   0, e2, r1, 0, cgeb, 0, IF_BFP)
+    F(0xb3a9, CGDBR,   RRF_e, Z,   0, f2_o, r1, 0, cgdb, 0, IF_BFP)
+    F(0xb3aa, CGXBR,   RRF_e, Z,   0, x2_o, r1, 0, cgxb, 0, IF_BFP)
 /* CONVERT FROM FIXED */
-    C(0xb394, CEFBR,   RRF_e, Z,   0, r2_32s, new, e1, cegb, 0)
-    C(0xb395, CDFBR,   RRF_e, Z,   0, r2_32s, f1, 0, cdgb, 0)
-    C(0xb396, CXFBR,   RRF_e, Z,   0, r2_32s, x1, 0, cxgb, 0)
-    C(0xb3a4, CEGBR,   RRF_e, Z,   0, r2_o, new, e1, cegb, 0)
-    C(0xb3a5, CDGBR,   RRF_e, Z,   0, r2_o, f1, 0, cdgb, 0)
-    C(0xb3a6, CXGBR,   RRF_e, Z,   0, r2_o, x1, 0, cxgb, 0)
+    F(0xb394, CEFBR,   RRF_e, Z,   0, r2_32s, new, e1, cegb, 0, IF_BFP)
+    F(0xb395, CDFBR,   RRF_e, Z,   0, r2_32s, f1, 0, cdgb, 0, IF_BFP)
+    F(0xb396, CXFBR,   RRF_e, Z,   0, r2_32s, x1, 0, cxgb, 0, IF_BFP)
+    F(0xb3a4, CEGBR,   RRF_e, Z,   0, r2_o, new, e1, cegb, 0, IF_BFP)
+    F(0xb3a5, CDGBR,   RRF_e, Z,   0, r2_o, f1, 0, cdgb, 0, IF_BFP)
+    F(0xb3a6, CXGBR,   RRF_e, Z,   0, r2_o, x1, 0, cxgb, 0, IF_BFP)
 /* CONVERT TO LOGICAL */
-    C(0xb39c, CLFEBR,  RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0)
-    C(0xb39d, CLFDBR,  RRF_e, FPE, 0, f2_o, new, r1_32, clfdb, 0)
-    C(0xb39e, CLFXBR,  RRF_e, FPE, 0, x2_o, new, r1_32, clfxb, 0)
-    C(0xb3ac, CLGEBR,  RRF_e, FPE, 0, e2, r1, 0, clgeb, 0)
-    C(0xb3ad, CLGDBR,  RRF_e, FPE, 0, f2_o, r1, 0, clgdb, 0)
-    C(0xb3ae, CLGXBR,  RRF_e, FPE, 0, x2_o, r1, 0, clgxb, 0)
+    F(0xb39c, CLFEBR,  RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
+    F(0xb39d, CLFDBR,  RRF_e, FPE, 0, f2_o, new, r1_32, clfdb, 0, IF_BFP)
+    F(0xb39e, CLFXBR,  RRF_e, FPE, 0, x2_o, new, r1_32, clfxb, 0, IF_BFP)
+    F(0xb3ac, CLGEBR,  RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP)
+    F(0xb3ad, CLGDBR,  RRF_e, FPE, 0, f2_o, r1, 0, clgdb, 0, IF_BFP)
+    F(0xb3ae, CLGXBR,  RRF_e, FPE, 0, x2_o, r1, 0, clgxb, 0, IF_BFP)
 /* CONVERT FROM LOGICAL */
-    C(0xb390, CELFBR,  RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0)
-    C(0xb391, CDLFBR,  RRF_e, FPE, 0, r2_32u, f1, 0, cdlgb, 0)
-    C(0xb392, CXLFBR,  RRF_e, FPE, 0, r2_32u, x1, 0, cxlgb, 0)
-    C(0xb3a0, CELGBR,  RRF_e, FPE, 0, r2_o, new, e1, celgb, 0)
-    C(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0)
-    C(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0)
+    F(0xb390, CELFBR,  RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
+    F(0xb391, CDLFBR,  RRF_e, FPE, 0, r2_32u, f1, 0, cdlgb, 0, IF_BFP)
+    F(0xb392, CXLFBR,  RRF_e, FPE, 0, r2_32u, x1, 0, cxlgb, 0, IF_BFP)
+    F(0xb3a0, CELGBR,  RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP)
+    F(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0, IF_BFP)
+    F(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0, IF_BFP)
 
 /* CONVERT UTF-8 TO UTF-16 */
     D(0xb2a7, CU12,    RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 12)
@@ -332,11 +335,11 @@
 /* DIVIDE */
     C(0x1d00, DR,      RR_a,  Z,   r1_D32, r2_32s, new_P, r1_P32, divs32, 0)
     C(0x5d00, D,       RX_a,  Z,   r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
-    C(0xb30d, DEBR,    RRE,   Z,   e1, e2, new, e1, deb, 0)
-    C(0xb31d, DDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, ddb, 0)
-    C(0xb34d, DXBR,    RRE,   Z,   0, x2_o, x1, 0, dxb, 0)
-    C(0xed0d, DEB,     RXE,   Z,   e1, m2_32u, new, e1, deb, 0)
-    C(0xed1d, DDB,     RXE,   Z,   f1_o, m2_64, f1, 0, ddb, 0)
+    F(0xb30d, DEBR,    RRE,   Z,   e1, e2, new, e1, deb, 0, IF_BFP)
+    F(0xb31d, DDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, ddb, 0, IF_BFP)
+    F(0xb34d, DXBR,    RRE,   Z,   0, x2_o, x1, 0, dxb, 0, IF_BFP)
+    F(0xed0d, DEB,     RXE,   Z,   e1, m2_32u, new, e1, deb, 0, IF_BFP)
+    F(0xed1d, DDB,     RXE,   Z,   f1_o, m2_64, f1, 0, ddb, 0, IF_BFP)
 /* DIVIDE LOGICAL */
     C(0xb997, DLR,     RRE,   Z,   r1_D32, r2_32u, new_P, r1_P32, divu32, 0)
     C(0xe397, DL,      RXY_a, Z,   r1_D32, m2_32u, new_P, r1_P32, divu32, 0)
@@ -375,7 +378,7 @@
 /* EXTRACT CPU TIME */
     C(0xc801, ECTG,    SSF,   ECT, 0, 0, 0, 0, ectg, 0)
 /* EXTRACT FPC */
-    C(0xb38c, EFPC,    RRE,   Z,   0, 0, new, r1_32, efpc, 0)
+    F(0xb38c, EFPC,    RRE,   Z,   0, 0, new, r1_32, efpc, 0, IF_BFP)
 /* EXTRACT PSW */
     C(0xb98d, EPSW,    RRE,   Z,   0, 0, 0, 0, epsw, 0)
 
@@ -407,13 +410,13 @@
     C(0xb914, LGFR,    RRE,   Z,   0, r2_32s, 0, r1, mov2, 0)
     C(0xe304, LG,      RXY_a, Z,   0, a2, r1, 0, ld64, 0)
     C(0xe314, LGF,     RXY_a, Z,   0, a2, r1, 0, ld32s, 0)
-    C(0x2800, LDR,     RR_a,  Z,   0, f2_o, 0, f1, mov2, 0)
-    C(0x6800, LD,      RX_a,  Z,   0, m2_64, 0, f1, mov2, 0)
-    C(0xed65, LDY,     RXY_a, LD,  0, m2_64, 0, f1, mov2, 0)
-    C(0x3800, LER,     RR_a,  Z,   0, e2, 0, cond_e1e2, mov2, 0)
-    C(0x7800, LE,      RX_a,  Z,   0, m2_32u, 0, e1, mov2, 0)
-    C(0xed64, LEY,     RXY_a, LD,  0, m2_32u, 0, e1, mov2, 0)
-    C(0xb365, LXR,     RRE,   Z,   0, x2_o, 0, x1, movx, 0)
+    F(0x2800, LDR,     RR_a,  Z,   0, f2_o, 0, f1, mov2, 0, IF_AFP1 | IF_AFP2)
+    F(0x6800, LD,      RX_a,  Z,   0, m2_64, 0, f1, mov2, 0, IF_AFP1)
+    F(0xed65, LDY,     RXY_a, LD,  0, m2_64, 0, f1, mov2, 0, IF_AFP1)
+    F(0x3800, LER,     RR_a,  Z,   0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2)
+    F(0x7800, LE,      RX_a,  Z,   0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
+    F(0xed64, LEY,     RXY_a, LD,  0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
+    F(0xb365, LXR,     RRE,   Z,   0, x2_o, 0, x1, movx, 0, IF_AFP1)
 /* LOAD IMMEDIATE */
     C(0xc001, LGFI,    RIL_a, EI,  0, i2, 0, r1, mov2, 0)
 /* LOAD RELATIVE LONG */
@@ -450,9 +453,9 @@
     C(0xe312, LT,      RXY_a, EI,  0, a2, new, r1_32, ld32s, s64)
     C(0xe302, LTG,     RXY_a, EI,  0, a2, r1, 0, ld64, s64)
     C(0xe332, LTGF,    RXY_a, GIE, 0, a2, r1, 0, ld32s, s64)
-    C(0xb302, LTEBR,   RRE,   Z,   0, e2, 0, cond_e1e2, mov2, f32)
-    C(0xb312, LTDBR,   RRE,   Z,   0, f2_o, 0, f1, mov2, f64)
-    C(0xb342, LTXBR,   RRE,   Z,   0, x2_o, 0, x1, movx, f128)
+    F(0xb302, LTEBR,   RRE,   Z,   0, e2, 0, cond_e1e2, mov2, f32, IF_BFP)
+    F(0xb312, LTDBR,   RRE,   Z,   0, f2_o, 0, f1, mov2, f64, IF_BFP)
+    F(0xb342, LTXBR,   RRE,   Z,   0, x2_o, 0, x1, movx, f128, IF_BFP)
 /* LOAD AND TRAP */
     C(0xe39f, LAT,     RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0)
     C(0xe385, LGAT,    RXY_a, LAT, 0, a2, r1, 0, lgat, 0)
@@ -472,10 +475,10 @@
     C(0x1300, LCR,     RR_a,  Z,   0, r2, new, r1_32, neg, neg32)
     C(0xb903, LCGR,    RRE,   Z,   0, r2, r1, 0, neg, neg64)
     C(0xb913, LCGFR,   RRE,   Z,   0, r2_32s, r1, 0, neg, neg64)
-    C(0xb303, LCEBR,   RRE,   Z,   0, e2, new, e1, negf32, f32)
-    C(0xb313, LCDBR,   RRE,   Z,   0, f2_o, f1, 0, negf64, f64)
-    C(0xb343, LCXBR,   RRE,   Z,   0, x2_o, x1, 0, negf128, f128)
-    C(0xb373, LCDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, negf64, 0)
+    F(0xb303, LCEBR,   RRE,   Z,   0, e2, new, e1, negf32, f32, IF_BFP)
+    F(0xb313, LCDBR,   RRE,   Z,   0, f2_o, f1, 0, negf64, f64, IF_BFP)
+    F(0xb343, LCXBR,   RRE,   Z,   0, x2_o, x1, 0, negf128, f128, IF_BFP)
+    F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, negf64, 0, IF_AFP1 | IF_AFP2)
 /* LOAD HALFWORD */
     C(0xb927, LHR,     RRE,   EI,  0, r2_16s, 0, r1_32, mov2, 0)
     C(0xb907, LGHR,    RRE,   EI,  0, r2_16s, 0, r1, mov2, 0)
@@ -532,17 +535,17 @@
     C(0xe39c, LLGTAT,  RXY_a, LAT, 0, m2_32u, r1, 0, llgtat, 0)
 
 /* LOAD FPR FROM GR */
-    C(0xb3c1, LDGR,    RRE,   FPRGR, 0, r2_o, 0, f1, mov2, 0)
+    F(0xb3c1, LDGR,    RRE,   FPRGR, 0, r2_o, 0, f1, mov2, 0, IF_AFP1)
 /* LOAD GR FROM FPR */
-    C(0xb3cd, LGDR,    RRE,   FPRGR, 0, f2_o, 0, r1, mov2, 0)
+    F(0xb3cd, LGDR,    RRE,   FPRGR, 0, f2_o, 0, r1, mov2, 0, IF_AFP2)
 /* LOAD NEGATIVE */
     C(0x1100, LNR,     RR_a,  Z,   0, r2_32s, new, r1_32, nabs, nabs32)
     C(0xb901, LNGR,    RRE,   Z,   0, r2, r1, 0, nabs, nabs64)
     C(0xb911, LNGFR,   RRE,   Z,   0, r2_32s, r1, 0, nabs, nabs64)
-    C(0xb301, LNEBR,   RRE,   Z,   0, e2, new, e1, nabsf32, f32)
-    C(0xb311, LNDBR,   RRE,   Z,   0, f2_o, f1, 0, nabsf64, f64)
-    C(0xb341, LNXBR,   RRE,   Z,   0, x2_o, x1, 0, nabsf128, f128)
-    C(0xb371, LNDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, nabsf64, 0)
+    F(0xb301, LNEBR,   RRE,   Z,   0, e2, new, e1, nabsf32, f32, IF_BFP)
+    F(0xb311, LNDBR,   RRE,   Z,   0, f2_o, f1, 0, nabsf64, f64, IF_BFP)
+    F(0xb341, LNXBR,   RRE,   Z,   0, x2_o, x1, 0, nabsf128, f128, IF_BFP)
+    F(0xb371, LNDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, nabsf64, 0, IF_AFP1 | IF_AFP2)
 /* LOAD ON CONDITION */
     C(0xb9f2, LOCR,    RRF_c, LOC, r1, r2, new, r1_32, loc, 0)
     C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
@@ -564,10 +567,10 @@
     C(0x1000, LPR,     RR_a,  Z,   0, r2_32s, new, r1_32, abs, abs32)
     C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
     C(0xb910, LPGFR,   RRE,   Z,   0, r2_32s, r1, 0, abs, abs64)
-    C(0xb300, LPEBR,   RRE,   Z,   0, e2, new, e1, absf32, f32)
-    C(0xb310, LPDBR,   RRE,   Z,   0, f2_o, f1, 0, absf64, f64)
-    C(0xb340, LPXBR,   RRE,   Z,   0, x2_o, x1, 0, absf128, f128)
-    C(0xb370, LPDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, absf64, 0)
+    F(0xb300, LPEBR,   RRE,   Z,   0, e2, new, e1, absf32, f32, IF_BFP)
+    F(0xb310, LPDBR,   RRE,   Z,   0, f2_o, f1, 0, absf64, f64, IF_BFP)
+    F(0xb340, LPXBR,   RRE,   Z,   0, x2_o, x1, 0, absf128, f128, IF_BFP)
+    F(0xb370, LPDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, absf64, 0, IF_AFP1 | IF_AFP2)
 /* LOAD REVERSED */
     C(0xb91f, LRVR,    RRE,   Z,   0, r2_32u, new, r1_32, rev32, 0)
     C(0xb90f, LRVGR,   RRE,   Z,   0, r2_o, r1, 0, rev64, 0)
@@ -575,30 +578,30 @@
     C(0xe31e, LRV,     RXY_a, Z,   0, m2_32u, new, r1_32, rev32, 0)
     C(0xe30f, LRVG,    RXY_a, Z,   0, m2_64, r1, 0, rev64, 0)
 /* LOAD ZERO */
-    C(0xb374, LZER,    RRE,   Z,   0, 0, 0, e1, zero, 0)
-    C(0xb375, LZDR,    RRE,   Z,   0, 0, 0, f1, zero, 0)
-    C(0xb376, LZXR,    RRE,   Z,   0, 0, 0, x1, zero2, 0)
+    F(0xb374, LZER,    RRE,   Z,   0, 0, 0, e1, zero, 0, IF_AFP1)
+    F(0xb375, LZDR,    RRE,   Z,   0, 0, 0, f1, zero, 0, IF_AFP1)
+    F(0xb376, LZXR,    RRE,   Z,   0, 0, 0, x1, zero2, 0, IF_AFP1)
 
 /* LOAD FPC */
-    C(0xb29d, LFPC,    S,     Z,   0, m2_32u, 0, 0, sfpc, 0)
+    F(0xb29d, LFPC,    S,     Z,   0, m2_32u, 0, 0, sfpc, 0, IF_BFP)
 /* LOAD FPC AND SIGNAL */
-    C(0xb2bd, LFAS,    S,     IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0)
+    F(0xb2bd, LFAS,    S,     IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0, IF_DFP)
 /* LOAD FP INTEGER */
-    C(0xb357, FIEBR,   RRF_e, Z,   0, e2, new, e1, fieb, 0)
-    C(0xb35f, FIDBR,   RRF_e, Z,   0, f2_o, f1, 0, fidb, 0)
-    C(0xb347, FIXBR,   RRF_e, Z,   0, x2_o, x1, 0, fixb, 0)
+    F(0xb357, FIEBR,   RRF_e, Z,   0, e2, new, e1, fieb, 0, IF_BFP)
+    F(0xb35f, FIDBR,   RRF_e, Z,   0, f2_o, f1, 0, fidb, 0, IF_BFP)
+    F(0xb347, FIXBR,   RRF_e, Z,   0, x2_o, x1, 0, fixb, 0, IF_BFP)
 
 /* LOAD LENGTHENED */
-    C(0xb304, LDEBR,   RRE,   Z,   0, e2, f1, 0, ldeb, 0)
-    C(0xb305, LXDBR,   RRE,   Z,   0, f2_o, x1, 0, lxdb, 0)
-    C(0xb306, LXEBR,   RRE,   Z,   0, e2, x1, 0, lxeb, 0)
-    C(0xed04, LDEB,    RXE,   Z,   0, m2_32u, f1, 0, ldeb, 0)
-    C(0xed05, LXDB,    RXE,   Z,   0, m2_64, x1, 0, lxdb, 0)
-    C(0xed06, LXEB,    RXE,   Z,   0, m2_32u, x1, 0, lxeb, 0)
+    F(0xb304, LDEBR,   RRE,   Z,   0, e2, f1, 0, ldeb, 0, IF_BFP)
+    F(0xb305, LXDBR,   RRE,   Z,   0, f2_o, x1, 0, lxdb, 0, IF_BFP)
+    F(0xb306, LXEBR,   RRE,   Z,   0, e2, x1, 0, lxeb, 0, IF_BFP)
+    F(0xed04, LDEB,    RXE,   Z,   0, m2_32u, f1, 0, ldeb, 0, IF_BFP)
+    F(0xed05, LXDB,    RXE,   Z,   0, m2_64, x1, 0, lxdb, 0, IF_BFP)
+    F(0xed06, LXEB,    RXE,   Z,   0, m2_32u, x1, 0, lxeb, 0, IF_BFP)
 /* LOAD ROUNDED */
-    C(0xb344, LEDBR,   RRE,   Z,   0, f2_o, new, e1, ledb, 0)
-    C(0xb345, LDXBR,   RRE,   Z,   0, x2_o, f1, 0, ldxb, 0)
-    C(0xb346, LEXBR,   RRE,   Z,   0, x2_o, new, e1, lexb, 0)
+    F(0xb344, LEDBR,   RRE,   Z,   0, f2_o, new, e1, ledb, 0, IF_BFP)
+    F(0xb345, LDXBR,   RRE,   Z,   0, x2_o, f1, 0, ldxb, 0, IF_BFP)
+    F(0xb346, LEXBR,   RRE,   Z,   0, x2_o, new, e1, lexb, 0, IF_BFP)
 
 /* LOAD MULTIPLE */
     C(0x9800, LM,      RS_a,  Z,   0, a2, 0, 0, lm32, 0)
@@ -644,15 +647,15 @@
     C(0x1c00, MR,      RR_a,  Z,   r1p1_32s, r2_32s, new, r1_D32, mul, 0)
     C(0x5c00, M,       RX_a,  Z,   r1p1_32s, m2_32s, new, r1_D32, mul, 0)
     C(0xe35c, MFY,     RXY_a, GIE, r1p1_32s, m2_32s, new, r1_D32, mul, 0)
-    C(0xb317, MEEBR,   RRE,   Z,   e1, e2, new, e1, meeb, 0)
-    C(0xb31c, MDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, mdb, 0)
-    C(0xb34c, MXBR,    RRE,   Z,   0, x2_o, x1, 0, mxb, 0)
-    C(0xb30c, MDEBR,   RRE,   Z,   f1_o, e2, f1, 0, mdeb, 0)
-    C(0xb307, MXDBR,   RRE,   Z,   0, f2_o, x1, 0, mxdb, 0)
-    C(0xed17, MEEB,    RXE,   Z,   e1, m2_32u, new, e1, meeb, 0)
-    C(0xed1c, MDB,     RXE,   Z,   f1_o, m2_64, f1, 0, mdb, 0)
-    C(0xed0c, MDEB,    RXE,   Z,   f1_o, m2_32u, f1, 0, mdeb, 0)
-    C(0xed07, MXDB,    RXE,   Z,   0, m2_64, x1, 0, mxdb, 0)
+    F(0xb317, MEEBR,   RRE,   Z,   e1, e2, new, e1, meeb, 0, IF_BFP)
+    F(0xb31c, MDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, mdb, 0, IF_BFP)
+    F(0xb34c, MXBR,    RRE,   Z,   0, x2_o, x1, 0, mxb, 0, IF_BFP)
+    F(0xb30c, MDEBR,   RRE,   Z,   f1_o, e2, f1, 0, mdeb, 0, IF_BFP)
+    F(0xb307, MXDBR,   RRE,   Z,   0, f2_o, x1, 0, mxdb, 0, IF_BFP)
+    F(0xed17, MEEB,    RXE,   Z,   e1, m2_32u, new, e1, meeb, 0, IF_BFP)
+    F(0xed1c, MDB,     RXE,   Z,   f1_o, m2_64, f1, 0, mdb, 0, IF_BFP)
+    F(0xed0c, MDEB,    RXE,   Z,   f1_o, m2_32u, f1, 0, mdeb, 0, IF_BFP)
+    F(0xed07, MXDB,    RXE,   Z,   0, m2_64, x1, 0, mxdb, 0, IF_BFP)
 /* MULTIPLY HALFWORD */
     C(0x4c00, MH,      RX_a,  Z,   r1_o, m2_16s, new, r1_32, mul, 0)
     C(0xe37c, MHY,     RXY_a, GIE, r1_o, m2_16s, new, r1_32, mul, 0)
@@ -677,15 +680,15 @@
     C(0xc200, MSGFI,   RIL_a, GIE, r1_o, i2, r1, 0, mul, 0)
 
 /* MULTIPLY AND ADD */
-    C(0xb30e, MAEBR,   RRD,   Z,   e1, e2, new, e1, maeb, 0)
-    C(0xb31e, MADBR,   RRD,   Z,   f1_o, f2_o, f1, 0, madb, 0)
-    C(0xed0e, MAEB,    RXF,   Z,   e1, m2_32u, new, e1, maeb, 0)
-    C(0xed1e, MADB,    RXF,   Z,   f1_o, m2_64, f1, 0, madb, 0)
+    F(0xb30e, MAEBR,   RRD,   Z,   e1, e2, new, e1, maeb, 0, IF_BFP)
+    F(0xb31e, MADBR,   RRD,   Z,   f1_o, f2_o, f1, 0, madb, 0, IF_BFP)
+    F(0xed0e, MAEB,    RXF,   Z,   e1, m2_32u, new, e1, maeb, 0, IF_BFP)
+    F(0xed1e, MADB,    RXF,   Z,   f1_o, m2_64, f1, 0, madb, 0, IF_BFP)
 /* MULTIPLY AND SUBTRACT */
-    C(0xb30f, MSEBR,   RRD,   Z,   e1, e2, new, e1, mseb, 0)
-    C(0xb31f, MSDBR,   RRD,   Z,   f1_o, f2_o, f1, 0, msdb, 0)
-    C(0xed0f, MSEB,    RXF,   Z,   e1, m2_32u, new, e1, mseb, 0)
-    C(0xed1f, MSDB,    RXF,   Z,   f1_o, m2_64, f1, 0, msdb, 0)
+    F(0xb30f, MSEBR,   RRD,   Z,   e1, e2, new, e1, mseb, 0, IF_BFP)
+    F(0xb31f, MSDBR,   RRD,   Z,   f1_o, f2_o, f1, 0, msdb, 0, IF_BFP)
+    F(0xed0f, MSEB,    RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
+    F(0xed1f, MSDB,    RXF,   Z,   f1_o, m2_64, f1, 0, msdb, 0, IF_BFP)
 
 /* OR */
     C(0x1600, OR,      RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
@@ -752,14 +755,14 @@
     D(0x010d, SAM31,   E,     Z,   0, 0, 0, 0, sam, 0, 1)
     D(0x010e, SAM64,   E,     Z,   0, 0, 0, 0, sam, 0, 3)
 /* SET FPC */
-    C(0xb384, SFPC,    RRE,   Z,   0, r1_o, 0, 0, sfpc, 0)
+    F(0xb384, SFPC,    RRE,   Z,   0, r1_o, 0, 0, sfpc, 0, IF_BFP)
 /* SET FPC AND SIGNAL */
-    C(0xb385, SFASR,   RRE,   IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0)
+    F(0xb385, SFASR,   RRE,   IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0, IF_DFP)
 /* SET BFP ROUNDING MODE */
-    C(0xb299, SRNM,    S,     Z,   0, 0, 0, 0, srnm, 0)
-    C(0xb2b8, SRNMB,   S,     FPE, 0, 0, 0, 0, srnm, 0)
+    F(0xb299, SRNM,    S,     Z,   0, 0, 0, 0, srnm, 0, IF_BFP)
+    F(0xb2b8, SRNMB,   S,     FPE, 0, 0, 0, 0, srnm, 0, IF_BFP)
 /* SET DFP ROUNDING MODE */
-    C(0xb2b9, SRNMT,   S,     DFPR, 0, 0, 0, 0, srnm, 0)
+    F(0xb2b9, SRNMT,   S,     DFPR, 0, 0, 0, 0, srnm, 0, IF_DFP)
 /* SET PROGRAM MASK */
     C(0x0400, SPM,     RR_a,  Z,   r1, 0, 0, 0, spm, 0)
 
@@ -789,20 +792,20 @@
     C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, srl, 0)
 
 /* SQUARE ROOT */
-    C(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0)
-    C(0xb315, SQDBR,   RRE,   Z,   0, f2_o, f1, 0, sqdb, 0)
-    C(0xb316, SQXBR,   RRE,   Z,   0, x2_o, x1, 0, sqxb, 0)
-    C(0xed14, SQEB,    RXE,   Z,   0, m2_32u, new, e1, sqeb, 0)
-    C(0xed15, SQDB,    RXE,   Z,   0, m2_64, f1, 0, sqdb, 0)
+    F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0, IF_BFP)
+    F(0xb315, SQDBR,   RRE,   Z,   0, f2_o, f1, 0, sqdb, 0, IF_BFP)
+    F(0xb316, SQXBR,   RRE,   Z,   0, x2_o, x1, 0, sqxb, 0, IF_BFP)
+    F(0xed14, SQEB,    RXE,   Z,   0, m2_32u, new, e1, sqeb, 0, IF_BFP)
+    F(0xed15, SQDB,    RXE,   Z,   0, m2_64, f1, 0, sqdb, 0, IF_BFP)
 
 /* STORE */
     C(0x5000, ST,      RX_a,  Z,   r1_o, a2, 0, 0, st32, 0)
     C(0xe350, STY,     RXY_a, LD,  r1_o, a2, 0, 0, st32, 0)
     C(0xe324, STG,     RXY_a, Z,   r1_o, a2, 0, 0, st64, 0)
-    C(0x6000, STD,     RX_a,  Z,   f1_o, a2, 0, 0, st64, 0)
-    C(0xed67, STDY,    RXY_a, LD,  f1_o, a2, 0, 0, st64, 0)
-    C(0x7000, STE,     RX_a,  Z,   e1, a2, 0, 0, st32, 0)
-    C(0xed66, STEY,    RXY_a, LD,  e1, a2, 0, 0, st32, 0)
+    F(0x6000, STD,     RX_a,  Z,   f1_o, a2, 0, 0, st64, 0, IF_AFP1)
+    F(0xed67, STDY,    RXY_a, LD,  f1_o, a2, 0, 0, st64, 0, IF_AFP1)
+    F(0x7000, STE,     RX_a,  Z,   e1, a2, 0, 0, st32, 0, IF_AFP1)
+    F(0xed66, STEY,    RXY_a, LD,  e1, a2, 0, 0, st32, 0, IF_AFP1)
 /* STORE RELATIVE LONG */
     C(0xc40f, STRL,    RIL_b, GIE, r1_o, ri2, 0, 0, st32, 0)
     C(0xc40b, STGRL,   RIL_b, GIE, r1_o, ri2, 0, 0, st64, 0)
@@ -837,7 +840,7 @@
 /* STORE FACILITY LIST EXTENDED */
     C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
 /* STORE FPC */
-    C(0xb29c, STFPC,   S,     Z,   0, a2, new, m2_32, efpc, 0)
+    F(0xb29c, STFPC,   S,     Z,   0, a2, new, m2_32, efpc, 0, IF_BFP)
 
 /* STORE MULTIPLE */
     D(0x9000, STM,     RS_a,  Z,   0, a2, 0, 0, stm, 0, 4)
@@ -861,11 +864,11 @@
     C(0xb9e9, SGRK,    RRF_a, DO,  r2, r3, r1, 0, sub, subs64)
     C(0xe309, SG,      RXY_a, Z,   r1, m2_64, r1, 0, sub, subs64)
     C(0xe319, SGF,     RXY_a, Z,   r1, m2_32s, r1, 0, sub, subs64)
-    C(0xb30b, SEBR,    RRE,   Z,   e1, e2, new, e1, seb, f32)
-    C(0xb31b, SDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, sdb, f64)
-    C(0xb34b, SXBR,    RRE,   Z,   0, x2_o, x1, 0, sxb, f128)
-    C(0xed0b, SEB,     RXE,   Z,   e1, m2_32u, new, e1, seb, f32)
-    C(0xed1b, SDB,     RXE,   Z,   f1_o, m2_64, f1, 0, sdb, f64)
+    F(0xb30b, SEBR,    RRE,   Z,   e1, e2, new, e1, seb, f32, IF_BFP)
+    F(0xb31b, SDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, sdb, f64, IF_BFP)
+    F(0xb34b, SXBR,    RRE,   Z,   0, x2_o, x1, 0, sxb, f128, IF_BFP)
+    F(0xed0b, SEB,     RXE,   Z,   e1, m2_32u, new, e1, seb, f32, IF_BFP)
+    F(0xed1b, SDB,     RXE,   Z,   f1_o, m2_64, f1, 0, sdb, f64, IF_BFP)
 /* SUBTRACT HALFWORD */
     C(0x4b00, SH,      RX_a,  Z,   r1, m2_16s, new, r1_32, sub, subs32)
     C(0xe37b, SHY,     RXY_a, LD,  r1, m2_16s, new, r1_32, sub, subs32)
@@ -904,9 +907,9 @@
     C(0x9300, TS,      S,     Z,   0, a2, 0, 0, ts, 0)
 
 /* TEST DATA CLASS */
-    C(0xed10, TCEB,    RXE,   Z,   e1, a2, 0, 0, tceb, 0)
-    C(0xed11, TCDB,    RXE,   Z,   f1_o, a2, 0, 0, tcdb, 0)
-    C(0xed12, TCXB,    RXE,   Z,   x1_o, a2, 0, 0, tcxb, 0)
+    F(0xed10, TCEB,    RXE,   Z,   e1, a2, 0, 0, tceb, 0, IF_BFP)
+    F(0xed11, TCDB,    RXE,   Z,   f1_o, a2, 0, 0, tcdb, 0, IF_BFP)
+    F(0xed12, TCXB,    RXE,   Z,   x1_o, a2, 0, 0, tcxb, 0, IF_BFP)
 
 /* TEST DECIMAL */
     C(0xebc0, TP,      RSL,   E2,  la1, 0, 0, 0, tp, 0)
@@ -961,126 +964,126 @@
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
-    D(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL)
-    D(0xb98a, CSPG,    RRE, DAT_ENH, r1_o, ra2, r1_P, 0, csp, 0, MO_TEQ)
+    E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
+    E(0xb98a, CSPG,    RRE, DAT_ENH, r1_o, ra2, r1_P, 0, csp, 0, MO_TEQ, IF_PRIV)
 /* DIAGNOSE (KVM hypercall) */
-    C(0x8300, DIAG,    RSI,   Z,   0, 0, 0, 0, diag, 0)
+    F(0x8300, DIAG,    RSI,   Z,   0, 0, 0, 0, diag, 0, IF_PRIV)
 /* INSERT STORAGE KEY EXTENDED */
-    C(0xb229, ISKE,    RRE,   Z,   0, r2_o, new, r1_8, iske, 0)
+    F(0xb229, ISKE,    RRE,   Z,   0, r2_o, new, r1_8, iske, 0, IF_PRIV)
 /* INVALIDATE DAT TABLE ENTRY */
-    C(0xb98e, IPDE,    RRF_b, Z,   r1_o, r2_o, 0, 0, idte, 0)
+    F(0xb98e, IPDE,    RRF_b, Z,   r1_o, r2_o, 0, 0, idte, 0, IF_PRIV)
 /* INVALIDATE PAGE TABLE ENTRY */
-    C(0xb221, IPTE,    RRF_a, Z,   r1_o, r2_o, 0, 0, ipte, 0)
+    F(0xb221, IPTE,    RRF_a, Z,   r1_o, r2_o, 0, 0, ipte, 0, IF_PRIV)
 /* LOAD CONTROL */
-    C(0xb700, LCTL,    RS_a,  Z,   0, a2, 0, 0, lctl, 0)
-    C(0xeb2f, LCTLG,   RSY_a, Z,   0, a2, 0, 0, lctlg, 0)
+    F(0xb700, LCTL,    RS_a,  Z,   0, a2, 0, 0, lctl, 0, IF_PRIV)
+    F(0xeb2f, LCTLG,   RSY_a, Z,   0, a2, 0, 0, lctlg, 0, IF_PRIV)
 /* LOAD PROGRAM PARAMETER */
-    C(0xb280, LPP,     S,   LPP,   0, m2_64, 0, 0, lpp, 0)
+    F(0xb280, LPP,     S,   LPP,   0, m2_64, 0, 0, lpp, 0, IF_PRIV)
 /* LOAD PSW */
-    C(0x8200, LPSW,    S,     Z,   0, a2, 0, 0, lpsw, 0)
+    F(0x8200, LPSW,    S,     Z,   0, a2, 0, 0, lpsw, 0, IF_PRIV)
 /* LOAD PSW EXTENDED */
-    C(0xb2b2, LPSWE,   S,     Z,   0, a2, 0, 0, lpswe, 0)
+    F(0xb2b2, LPSWE,   S,     Z,   0, a2, 0, 0, lpswe, 0, IF_PRIV)
 /* LOAD REAL ADDRESS */
-    C(0xb100, LRA,     RX_a,  Z,   0, a2, r1, 0, lra, 0)
-    C(0xe313, LRAY,    RXY_a, LD,  0, a2, r1, 0, lra, 0)
-    C(0xe303, LRAG,    RXY_a, Z,   0, a2, r1, 0, lra, 0)
+    F(0xb100, LRA,     RX_a,  Z,   0, a2, r1, 0, lra, 0, IF_PRIV)
+    F(0xe313, LRAY,    RXY_a, LD,  0, a2, r1, 0, lra, 0, IF_PRIV)
+    F(0xe303, LRAG,    RXY_a, Z,   0, a2, r1, 0, lra, 0, IF_PRIV)
 /* LOAD USING REAL ADDRESS */
-    C(0xb24b, LURA,    RRE,   Z,   0, r2, new, r1_32, lura, 0)
-    C(0xb905, LURAG,   RRE,   Z,   0, r2, r1, 0, lurag, 0)
+    F(0xb24b, LURA,    RRE,   Z,   0, r2, new, r1_32, lura, 0, IF_PRIV)
+    F(0xb905, LURAG,   RRE,   Z,   0, r2, r1, 0, lurag, 0, IF_PRIV)
 /* MOVE TO PRIMARY */
-    C(0xda00, MVCP,    SS_d,  Z,   la1, a2, 0, 0, mvcp, 0)
+    F(0xda00, MVCP,    SS_d,  Z,   la1, a2, 0, 0, mvcp, 0, IF_PRIV)
 /* MOVE TO SECONDARY */
-    C(0xdb00, MVCS,    SS_d,  Z,   la1, a2, 0, 0, mvcs, 0)
+    F(0xdb00, MVCS,    SS_d,  Z,   la1, a2, 0, 0, mvcs, 0, IF_PRIV)
 /* PURGE TLB */
-    C(0xb20d, PTLB,    S,     Z,   0, 0, 0, 0, ptlb, 0)
+    F(0xb20d, PTLB,    S,     Z,   0, 0, 0, 0, ptlb, 0, IF_PRIV)
 /* RESET REFERENCE BIT EXTENDED */
-    C(0xb22a, RRBE,    RRE,   Z,   0, r2_o, 0, 0, rrbe, 0)
+    F(0xb22a, RRBE,    RRE,   Z,   0, r2_o, 0, 0, rrbe, 0, IF_PRIV)
 /* SERVICE CALL LOGICAL PROCESSOR (PV hypercall) */
-    C(0xb220, SERVC,   RRE,   Z,   r1_o, r2_o, 0, 0, servc, 0)
+    F(0xb220, SERVC,   RRE,   Z,   r1_o, r2_o, 0, 0, servc, 0, IF_PRIV)
 /* SET ADDRESS SPACE CONTROL FAST */
-    C(0xb279, SACF,    S,     Z,   0, a2, 0, 0, sacf, 0)
+    F(0xb279, SACF,    S,     Z,   0, a2, 0, 0, sacf, 0, IF_PRIV)
 /* SET CLOCK */
-    C(0xb204, SCK,     S,     Z,   la2, 0, 0, 0, sck, 0)
+    F(0xb204, SCK,     S,     Z,   la2, 0, 0, 0, sck, 0, IF_PRIV)
 /* SET CLOCK COMPARATOR */
-    C(0xb206, SCKC,    S,     Z,   0, m2_64a, 0, 0, sckc, 0)
+    F(0xb206, SCKC,    S,     Z,   0, m2_64a, 0, 0, sckc, 0, IF_PRIV)
 /* SET CLOCK PROGRAMMABLE FIELD */
-    C(0x0107, SCKPF,   E,     Z,   0, 0, 0, 0, sckpf, 0)
+    F(0x0107, SCKPF,   E,     Z,   0, 0, 0, 0, sckpf, 0, IF_PRIV)
 /* SET CPU TIMER */
-    C(0xb208, SPT,     S,     Z,   0, m2_64a, 0, 0, spt, 0)
+    F(0xb208, SPT,     S,     Z,   0, m2_64a, 0, 0, spt, 0, IF_PRIV)
 /* SET PREFIX */
-    C(0xb210, SPX,     S,     Z,   0, m2_32ua, 0, 0, spx, 0)
+    F(0xb210, SPX,     S,     Z,   0, m2_32ua, 0, 0, spx, 0, IF_PRIV)
 /* SET PSW KEY FROM ADDRESS */
-    C(0xb20a, SPKA,    S,     Z,   0, a2, 0, 0, spka, 0)
+    F(0xb20a, SPKA,    S,     Z,   0, a2, 0, 0, spka, 0, IF_PRIV)
 /* SET STORAGE KEY EXTENDED */
-    C(0xb22b, SSKE,    RRF_c, Z,   r1_o, r2_o, 0, 0, sske, 0)
+    F(0xb22b, SSKE,    RRF_c, Z,   r1_o, r2_o, 0, 0, sske, 0, IF_PRIV)
 /* SET SYSTEM MASK */
-    C(0x8000, SSM,     S,     Z,   0, m2_8u, 0, 0, ssm, 0)
+    F(0x8000, SSM,     S,     Z,   0, m2_8u, 0, 0, ssm, 0, IF_PRIV)
 /* SIGNAL PROCESSOR */
-    C(0xae00, SIGP,    RS_a,  Z,   0, a2, 0, 0, sigp, 0)
+    F(0xae00, SIGP,    RS_a,  Z,   0, a2, 0, 0, sigp, 0, IF_PRIV)
 /* STORE CLOCK */
     C(0xb205, STCK,    S,     Z,   la2, 0, new, m1_64, stck, 0)
     C(0xb27c, STCKF,   S,     SCF, la2, 0, new, m1_64, stck, 0)
 /* STORE CLOCK EXTENDED */
     C(0xb278, STCKE,   S,     Z,   0, a2, 0, 0, stcke, 0)
 /* STORE CLOCK COMPARATOR */
-    C(0xb207, STCKC,   S,     Z,   la2, 0, new, m1_64a, stckc, 0)
+    F(0xb207, STCKC,   S,     Z,   la2, 0, new, m1_64a, stckc, 0, IF_PRIV)
 /* STORE CONTROL */
-    C(0xb600, STCTL,   RS_a,  Z,   0, a2, 0, 0, stctl, 0)
-    C(0xeb25, STCTG,   RSY_a, Z,   0, a2, 0, 0, stctg, 0)
+    F(0xb600, STCTL,   RS_a,  Z,   0, a2, 0, 0, stctl, 0, IF_PRIV)
+    F(0xeb25, STCTG,   RSY_a, Z,   0, a2, 0, 0, stctg, 0, IF_PRIV)
 /* STORE CPU ADDRESS */
-    C(0xb212, STAP,    S,     Z,   la2, 0, new, m1_16a, stap, 0)
+    F(0xb212, STAP,    S,     Z,   la2, 0, new, m1_16a, stap, 0, IF_PRIV)
 /* STORE CPU ID */
-    C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64a, stidp, 0)
+    F(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64a, stidp, 0, IF_PRIV)
 /* STORE CPU TIMER */
-    C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64a, stpt, 0)
+    F(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64a, stpt, 0, IF_PRIV)
 /* STORE FACILITY LIST */
-    C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
+    F(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0, IF_PRIV)
 /* STORE PREFIX */
-    C(0xb211, STPX,    S,     Z,   la2, 0, new, m1_32a, stpx, 0)
+    F(0xb211, STPX,    S,     Z,   la2, 0, new, m1_32a, stpx, 0, IF_PRIV)
 /* STORE SYSTEM INFORMATION */
-    C(0xb27d, STSI,    S,     Z,   0, a2, 0, 0, stsi, 0)
+    F(0xb27d, STSI,    S,     Z,   0, a2, 0, 0, stsi, 0, IF_PRIV)
 /* STORE THEN AND SYSTEM MASK */
-    C(0xac00, STNSM,   SI,    Z,   la1, 0, 0, 0, stnosm, 0)
+    F(0xac00, STNSM,   SI,    Z,   la1, 0, 0, 0, stnosm, 0, IF_PRIV)
 /* STORE THEN OR SYSTEM MASK */
-    C(0xad00, STOSM,   SI,    Z,   la1, 0, 0, 0, stnosm, 0)
+    F(0xad00, STOSM,   SI,    Z,   la1, 0, 0, 0, stnosm, 0, IF_PRIV)
 /* STORE USING REAL ADDRESS */
-    C(0xb246, STURA,   RRE,   Z,   r1_o, r2_o, 0, 0, stura, 0)
-    C(0xb925, STURG,   RRE,   Z,   r1_o, r2_o, 0, 0, sturg, 0)
+    F(0xb246, STURA,   RRE,   Z,   r1_o, r2_o, 0, 0, stura, 0, IF_PRIV)
+    F(0xb925, STURG,   RRE,   Z,   r1_o, r2_o, 0, 0, sturg, 0, IF_PRIV)
 /* TEST BLOCK */
-    C(0xb22c, TB,      RRE,   Z,   0, r2_o, 0, 0, testblock, 0)
+    F(0xb22c, TB,      RRE,   Z,   0, r2_o, 0, 0, testblock, 0, IF_PRIV)
 /* TEST PROTECTION */
     C(0xe501, TPROT,   SSE,   Z,   la1, a2, 0, 0, tprot, 0)
 
 /* CCW I/O Instructions */
-    C(0xb276, XSCH,    S,     Z,   0, 0, 0, 0, xsch, 0)
-    C(0xb230, CSCH,    S,     Z,   0, 0, 0, 0, csch, 0)
-    C(0xb231, HSCH,    S,     Z,   0, 0, 0, 0, hsch, 0)
-    C(0xb232, MSCH,    S,     Z,   0, insn, 0, 0, msch, 0)
-    C(0xb23b, RCHP,    S,     Z,   0, 0, 0, 0, rchp, 0)
-    C(0xb238, RSCH,    S,     Z,   0, 0, 0, 0, rsch, 0)
-    C(0xb237, SAL,     S,     Z,   0, 0, 0, 0, sal, 0)
-    C(0xb23c, SCHM,    S,     Z,   0, insn, 0, 0, schm, 0)
-    C(0xb274, SIGA,    S,     Z,   0, 0, 0, 0, siga, 0)
-    C(0xb23a, STCPS,   S,     Z,   0, 0, 0, 0, stcps, 0)
-    C(0xb233, SSCH,    S,     Z,   0, insn, 0, 0, ssch, 0)
-    C(0xb239, STCRW,   S,     Z,   0, insn, 0, 0, stcrw, 0)
-    C(0xb234, STSCH,   S,     Z,   0, insn, 0, 0, stsch, 0)
-    C(0xb236, TPI ,    S,     Z,   la2, 0, 0, 0, tpi, 0)
-    C(0xb235, TSCH,    S,     Z,   0, insn, 0, 0, tsch, 0)
+    F(0xb276, XSCH,    S,     Z,   0, 0, 0, 0, xsch, 0, IF_PRIV)
+    F(0xb230, CSCH,    S,     Z,   0, 0, 0, 0, csch, 0, IF_PRIV)
+    F(0xb231, HSCH,    S,     Z,   0, 0, 0, 0, hsch, 0, IF_PRIV)
+    F(0xb232, MSCH,    S,     Z,   0, insn, 0, 0, msch, 0, IF_PRIV)
+    F(0xb23b, RCHP,    S,     Z,   0, 0, 0, 0, rchp, 0, IF_PRIV)
+    F(0xb238, RSCH,    S,     Z,   0, 0, 0, 0, rsch, 0, IF_PRIV)
+    F(0xb237, SAL,     S,     Z,   0, 0, 0, 0, sal, 0, IF_PRIV)
+    F(0xb23c, SCHM,    S,     Z,   0, insn, 0, 0, schm, 0, IF_PRIV)
+    F(0xb274, SIGA,    S,     Z,   0, 0, 0, 0, siga, 0, IF_PRIV)
+    F(0xb23a, STCPS,   S,     Z,   0, 0, 0, 0, stcps, 0, IF_PRIV)
+    F(0xb233, SSCH,    S,     Z,   0, insn, 0, 0, ssch, 0, IF_PRIV)
+    F(0xb239, STCRW,   S,     Z,   0, insn, 0, 0, stcrw, 0, IF_PRIV)
+    F(0xb234, STSCH,   S,     Z,   0, insn, 0, 0, stsch, 0, IF_PRIV)
+    F(0xb236, TPI ,    S,     Z,   la2, 0, 0, 0, tpi, 0, IF_PRIV)
+    F(0xb235, TSCH,    S,     Z,   0, insn, 0, 0, tsch, 0, IF_PRIV)
     /* ??? Not listed in PoO ninth edition, but there's a linux driver that
        uses it: "A CHSC subchannel is usually present on LPAR only."  */
-    C(0xb25f, CHSC,  RRE,     Z,   0, insn, 0, 0, chsc, 0)
+    F(0xb25f, CHSC,  RRE,     Z,   0, insn, 0, 0, chsc, 0, IF_PRIV)
 
 /* zPCI Instructions */
     /* None of these instructions are documented in the PoP, so this is all
        based upon target/s390x/kvm.c and Linux code and likely incomplete */
-    C(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0)
-    C(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0)
-    C(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0)
-    C(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0)
-    C(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0)
-    C(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0)
-    C(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0)
-    C(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0)
+    F(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0, IF_PRIV)
+    F(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0, IF_PRIV)
+    F(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0, IF_PRIV)
+    F(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0, IF_PRIV)
+    F(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0, IF_PRIV)
+    F(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0, IF_PRIV)
+    F(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0, IF_PRIV)
+    F(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0, IF_PRIV)
 
 #endif /* CONFIG_USER_ONLY */
diff --git a/target/s390x/interrupt.c b/target/s390x/interrupt.c
index 25cfb3e..a17eff5 100644
--- a/target/s390x/interrupt.c
+++ b/target/s390x/interrupt.c
@@ -15,6 +15,7 @@
 #include "exec/exec-all.h"
 #include "sysemu/kvm.h"
 #include "hw/s390x/ioinst.h"
+#include "tcg_s390x.h"
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/s390_flic.h"
 #endif
@@ -29,25 +30,11 @@
     env->int_pgm_ilen = ilen;
 }
 
-static void tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
-                                       int ilen, uintptr_t ra)
-{
-#ifdef CONFIG_TCG
-    trigger_pgm_exception(env, code, ilen);
-    cpu_loop_exit_restore(CPU(s390_env_get_cpu(env)), ra);
-#else
-    g_assert_not_reached();
-#endif
-}
-
 void s390_program_interrupt(CPUS390XState *env, uint32_t code, int ilen,
                             uintptr_t ra)
 {
     S390CPU *cpu = s390_env_get_cpu(env);
 
-    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
-                  env->psw.addr);
-
     if (kvm_enabled()) {
         kvm_s390_program_interrupt(cpu, code);
     } else if (tcg_enabled()) {
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 348e8cc..2ebf26a 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -36,6 +36,7 @@
 #include "qemu/timer.h"
 #include "qemu/units.h"
 #include "qemu/mmap-alloc.h"
+#include "qemu/log.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hw_accel.h"
 #include "hw/hw.h"
@@ -292,6 +293,12 @@
         return 0;
     }
 
+    if (!hpage_1m_allowed()) {
+        error_report("This QEMU machine does not support huge page "
+                     "mappings");
+        return -EINVAL;
+    }
+
     if (path_psize != 1 * MiB) {
         error_report("Memory backing with 2G pages was specified, "
                      "but KVM does not support this memory backing");
@@ -1109,7 +1116,8 @@
         .type = KVM_S390_PROGRAM_INT,
         .u.pgm.code = code,
     };
-
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  cpu->env.psw.addr);
     kvm_s390_vcpu_interrupt(cpu, &irq);
 }
 
@@ -2291,11 +2299,26 @@
         error_setg(errp, "KVM: host CPU model could not be identified");
         return;
     }
+    /* for now, we can only provide the AP feature with HW support */
+    if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO,
+        KVM_S390_VM_CRYPTO_ENABLE_APIE)) {
+        set_bit(S390_FEAT_AP, model->features);
+    }
     /* strip of features that are not part of the maximum model */
     bitmap_and(model->features, model->features, model->def->full_feat,
                S390_FEAT_MAX);
 }
 
+static void kvm_s390_configure_apie(bool interpret)
+{
+    uint64_t attr = interpret ? KVM_S390_VM_CRYPTO_ENABLE_APIE :
+                                KVM_S390_VM_CRYPTO_DISABLE_APIE;
+
+    if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) {
+        kvm_s390_set_attr(attr);
+    }
+}
+
 void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp)
 {
     struct kvm_s390_vm_cpu_processor prop  = {
@@ -2345,6 +2368,10 @@
     if (test_bit(S390_FEAT_CMM, model->features)) {
         kvm_s390_enable_cmma();
     }
+
+    if (test_bit(S390_FEAT_AP, model->features)) {
+        kvm_s390_configure_apie(true);
+    }
 }
 
 void kvm_s390_restart_interrupt(S390CPU *cpu)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index bacae4f..490c43e 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -25,6 +25,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
+#include "qemu/atomic128.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/storage-keys.h"
@@ -1379,65 +1380,62 @@
     return cc;
 }
 
-static void do_cdsg(CPUS390XState *env, uint64_t addr,
-                    uint32_t r1, uint32_t r3, bool parallel)
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
+                  uint32_t r1, uint32_t r3)
 {
     uintptr_t ra = GETPC();
     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
     Int128 oldv;
+    uint64_t oldh, oldl;
     bool fail;
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-        fail = !int128_eq(oldv, cmpv);
-#endif
-    } else {
-        uint64_t oldh, oldl;
+    check_alignment(env, addr, 16, ra);
 
-        check_alignment(env, addr, 16, ra);
+    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
+    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
 
-        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
-        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
-
-        oldv = int128_make128(oldl, oldh);
-        fail = !int128_eq(oldv, cmpv);
-        if (fail) {
-            newv = oldv;
-        }
-
-        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
-        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+    oldv = int128_make128(oldl, oldh);
+    fail = !int128_eq(oldv, cmpv);
+    if (fail) {
+        newv = oldv;
     }
 
+    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
+    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+
     env->cc_op = fail;
     env->regs[r1] = int128_gethi(oldv);
     env->regs[r1 + 1] = int128_getlo(oldv);
 }
 
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
-                  uint32_t r1, uint32_t r3)
-{
-    do_cdsg(env, addr, r1, r3, false);
-}
-
 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
                            uint32_t r1, uint32_t r3)
 {
-    do_cdsg(env, addr, r1, r3, true);
+    uintptr_t ra = GETPC();
+    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
+    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 oldv;
+    bool fail;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    fail = !int128_eq(oldv, cmpv);
+
+    env->cc_op = fail;
+    env->regs[r1] = int128_gethi(oldv);
+    env->regs[r1 + 1] = int128_getlo(oldv);
 }
 
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                         uint64_t a2, bool parallel)
 {
-#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
     uint32_t mem_idx = cpu_mmu_index(env, false);
-#endif
     uintptr_t ra = GETPC();
     uint32_t fc = extract32(env->regs[0], 0, 8);
     uint32_t sc = extract32(env->regs[0], 8, 8);
@@ -1465,18 +1463,20 @@
     probe_write(env, a2, 0, mem_idx, ra);
 #endif
 
-    /* Note that the compare-and-swap is atomic, and the store is atomic, but
-       the complete operation is not.  Therefore we do not need to assert serial
-       context in order to implement this.  That said, restart early if we can't
-       support either operation that is supposed to be atomic.  */
+    /*
+     * Note that the compare-and-swap is atomic, and the store is atomic,
+     * but the complete operation is not.  Therefore we do not need to
+     * assert serial context in order to implement this.  That said,
+     * restart early if we can't support either operation that is supposed
+     * to be atomic.
+     */
     if (parallel) {
-        int mask = 0;
-#if !defined(CONFIG_ATOMIC64)
-        mask = -8;
-#elif !defined(CONFIG_ATOMIC128)
-        mask = -16;
+        uint32_t max = 2;
+#ifdef CONFIG_ATOMIC64
+        max = 3;
 #endif
-        if (((4 << fc) | (1 << sc)) & mask) {
+        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
+            (HAVE_ATOMIC128  ? 0 : sc > max)) {
             cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
         }
     }
@@ -1546,16 +1546,7 @@
             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
             Int128 ov;
 
-            if (parallel) {
-#ifdef CONFIG_ATOMIC128
-                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
-                cc = !int128_eq(ov, cv);
-#else
-                /* Note that we asserted !parallel above.  */
-                g_assert_not_reached();
-#endif
-            } else {
+            if (!parallel) {
                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
 
@@ -1567,6 +1558,13 @@
 
                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
+            } else if (HAVE_CMPXCHG128) {
+                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
+                cc = !int128_eq(ov, cv);
+            } else {
+                /* Note that we asserted !parallel above.  */
+                g_assert_not_reached();
             }
 
             env->regs[r3 + 0] = int128_gethi(ov);
@@ -1596,18 +1594,16 @@
             cpu_stq_data_ra(env, a2, svh, ra);
             break;
         case 4:
-            if (parallel) {
-#ifdef CONFIG_ATOMIC128
+            if (!parallel) {
+                cpu_stq_data_ra(env, a2 + 0, svh, ra);
+                cpu_stq_data_ra(env, a2 + 8, svl, ra);
+            } else if (HAVE_ATOMIC128) {
                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
                 Int128 sv = int128_make128(svl, svh);
                 helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
-#else
+            } else {
                 /* Note that we asserted !parallel above.  */
                 g_assert_not_reached();
-#endif
-            } else {
-                cpu_stq_data_ra(env, a2 + 0, svh, ra);
-                cpu_stq_data_ra(env, a2 + 8, svl, ra);
             }
             break;
         default:
@@ -2100,76 +2096,64 @@
 #endif
 
 /* load pair from quadword */
-static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
 {
     uintptr_t ra = GETPC();
     uint64_t hi, lo;
 
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
-        hi = int128_gethi(v);
-        lo = int128_getlo(v);
-#endif
-    } else {
-        check_alignment(env, addr, 16, ra);
-
-        hi = cpu_ldq_data_ra(env, addr + 0, ra);
-        lo = cpu_ldq_data_ra(env, addr + 8, ra);
-    }
+    check_alignment(env, addr, 16, ra);
+    hi = cpu_ldq_data_ra(env, addr + 0, ra);
+    lo = cpu_ldq_data_ra(env, addr + 8, ra);
 
     env->retxl = lo;
     return hi;
 }
 
-uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
-{
-    return do_lpq(env, addr, false);
-}
-
 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
 {
-    return do_lpq(env, addr, true);
+    uintptr_t ra = GETPC();
+    uint64_t hi, lo;
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
+
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+    hi = int128_gethi(v);
+    lo = int128_getlo(v);
+
+    env->retxl = lo;
+    return hi;
 }
 
 /* store pair to quadword */
-static void do_stpq(CPUS390XState *env, uint64_t addr,
-                    uint64_t low, uint64_t high, bool parallel)
-{
-    uintptr_t ra = GETPC();
-
-    if (parallel) {
-#ifndef CONFIG_ATOMIC128
-        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
-        int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-
-        Int128 v = int128_make128(low, high);
-        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
-#endif
-    } else {
-        check_alignment(env, addr, 16, ra);
-
-        cpu_stq_data_ra(env, addr + 0, high, ra);
-        cpu_stq_data_ra(env, addr + 8, low, ra);
-    }
-}
-
 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
                   uint64_t low, uint64_t high)
 {
-    do_stpq(env, addr, low, high, false);
+    uintptr_t ra = GETPC();
+
+    check_alignment(env, addr, 16, ra);
+    cpu_stq_data_ra(env, addr + 0, high, ra);
+    cpu_stq_data_ra(env, addr + 8, low, ra);
 }
 
 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
                            uint64_t low, uint64_t high)
 {
-    do_stpq(env, addr, low, high, true);
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
+
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = int128_make128(low, high);
+    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
 }
 
 /* Execute instruction.  This instruction executes an insn modified with
diff --git a/target/s390x/tcg-stub.c b/target/s390x/tcg-stub.c
index c93501d..32adb72 100644
--- a/target/s390x/tcg-stub.c
+++ b/target/s390x/tcg-stub.c
@@ -18,3 +18,13 @@
 void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque)
 {
 }
+void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
+                                              int ilen, uintptr_t ra)
+{
+    g_assert_not_reached();
+}
+void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
+                                           uintptr_t ra)
+{
+    g_assert_not_reached();
+}
diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg_s390x.h
index 4e308aa..ab2c4ba 100644
--- a/target/s390x/tcg_s390x.h
+++ b/target/s390x/tcg_s390x.h
@@ -14,5 +14,9 @@
 #define TCG_S390X_H
 
 void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque);
+void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
+                                              int ilen, uintptr_t ra);
+void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
+                                           uintptr_t ra);
 
 #endif /* TCG_S390X_H */
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 7363aab..b5bd56b 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -44,6 +44,7 @@
 #include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "qemu/atomic128.h"
 
 
 /* Information that (most) every instruction needs to manipulate.  */
@@ -314,29 +315,19 @@
     gen_program_exception(s, PGM_OPERATION);
 }
 
+static inline void gen_data_exception(uint8_t dxc)
+{
+    TCGv_i32 tmp = tcg_const_i32(dxc);
+    gen_helper_data_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
 static inline void gen_trap(DisasContext *s)
 {
-    TCGv_i32 t;
-
-    /* Set DXC to 0xff.  */
-    t = tcg_temp_new_i32();
-    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUS390XState, fpc));
-    tcg_gen_ori_i32(t, t, 0xff00);
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, fpc));
-    tcg_temp_free_i32(t);
-
-    gen_program_exception(s, PGM_DATA);
+    /* Set DXC to 0xff */
+    gen_data_exception(0xff);
 }
 
-#ifndef CONFIG_USER_ONLY
-static void check_privileged(DisasContext *s)
-{
-    if (s->base.tb->flags & FLAG_MASK_PSTATE) {
-        gen_program_exception(s, PGM_PRIVILEGED);
-    }
-}
-#endif
-
 static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2)
 {
     TCGv_i64 tmp = tcg_temp_new_i64();
@@ -1120,19 +1111,37 @@
 /* We are exiting the TB to the main loop.  */
 #define DISAS_PC_STALE_NOCHAIN  DISAS_TARGET_4
 
+
+/* Instruction flags */
+#define IF_AFP1     0x0001      /* r1 is a fp reg for HFP/FPS instructions */
+#define IF_AFP2     0x0002      /* r2 is a fp reg for HFP/FPS instructions */
+#define IF_AFP3     0x0004      /* r3 is a fp reg for HFP/FPS instructions */
+#define IF_BFP      0x0008      /* binary floating point instruction */
+#define IF_DFP      0x0010      /* decimal floating point instruction */
+#define IF_PRIV     0x0020      /* privileged instruction */
+
 struct DisasInsn {
     unsigned opc:16;
+    unsigned flags:16;
     DisasFormat fmt:8;
     unsigned fac:8;
     unsigned spec:8;
 
     const char *name;
 
+    /* Pre-process arguments before HELP_OP.  */
     void (*help_in1)(DisasContext *, DisasFields *, DisasOps *);
     void (*help_in2)(DisasContext *, DisasFields *, DisasOps *);
     void (*help_prep)(DisasContext *, DisasFields *, DisasOps *);
+
+    /*
+     * Post-process output after HELP_OP.
+     * Note that these are not called if HELP_OP returns DISAS_NORETURN.
+     */
     void (*help_wout)(DisasContext *, DisasFields *, DisasOps *);
     void (*help_cout)(DisasContext *, DisasOps *);
+
+    /* Implement the operation itself.  */
     DisasJumpType (*help_op)(DisasContext *, DisasOps *);
 
     uint64_t data;
@@ -2032,6 +2041,7 @@
     int r3 = get_field(s->fields, r3);
     int d2 = get_field(s->fields, d2);
     int b2 = get_field(s->fields, b2);
+    DisasJumpType ret = DISAS_NEXT;
     TCGv_i64 addr;
     TCGv_i32 t_r1, t_r3;
 
@@ -2039,17 +2049,20 @@
     addr = get_address(s, 0, b2, d2);
     t_r1 = tcg_const_i32(r1);
     t_r3 = tcg_const_i32(r3);
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+    } else if (HAVE_CMPXCHG128) {
         gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
     } else {
-        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+        gen_helper_exit_atomic(cpu_env);
+        ret = DISAS_NORETURN;
     }
     tcg_temp_free_i64(addr);
     tcg_temp_free_i32(t_r1);
     tcg_temp_free_i32(t_r3);
 
     set_cc_static(s);
-    return DISAS_NEXT;
+    return ret;
 }
 
 static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
@@ -2078,7 +2091,6 @@
     /* Note that in1 = R1 (zero-extended expected value),
        out = R1 (original reg), out2 = R1+1 (new value).  */
 
-    check_privileged(s);
     addr = tcg_temp_new_i64();
     old = tcg_temp_new_i64();
     tcg_gen_andi_i64(addr, o->in2, -1ULL << (mop & MO_SIZE));
@@ -2202,7 +2214,6 @@
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
     TCGv_i32 func_code = tcg_const_i32(get_field(s->fields, i2));
 
-    check_privileged(s);
     gen_helper_diag(cpu_env, r1, r3, func_code);
 
     tcg_temp_free_i32(func_code);
@@ -2463,7 +2474,6 @@
 {
     TCGv_i32 m4;
 
-    check_privileged(s);
     if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
         m4 = tcg_const_i32(get_field(s->fields, m4));
     } else {
@@ -2478,7 +2488,6 @@
 {
     TCGv_i32 m4;
 
-    check_privileged(s);
     if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
         m4 = tcg_const_i32(get_field(s->fields, m4));
     } else {
@@ -2491,7 +2500,6 @@
 
 static DisasJumpType op_iske(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_iske(o->out, cpu_env, o->in2);
     return DISAS_NEXT;
 }
@@ -2790,7 +2798,6 @@
 {
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-    check_privileged(s);
     gen_helper_lctl(cpu_env, r1, o->in2, r3);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r3);
@@ -2802,7 +2809,6 @@
 {
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-    check_privileged(s);
     gen_helper_lctlg(cpu_env, r1, o->in2, r3);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r3);
@@ -2812,7 +2818,6 @@
 
 static DisasJumpType op_lra(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_lra(o->out, cpu_env, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -2820,8 +2825,6 @@
 
 static DisasJumpType op_lpp(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
-
     tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp));
     return DISAS_NEXT;
 }
@@ -2830,12 +2833,12 @@
 {
     TCGv_i64 t1, t2;
 
-    check_privileged(s);
     per_breaking_event(s);
 
     t1 = tcg_temp_new_i64();
     t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
+                        MO_TEUL | MO_ALIGN_8);
     tcg_gen_addi_i64(o->in2, o->in2, 4);
     tcg_gen_qemu_ld32u(t2, o->in2, get_mem_index(s));
     /* Convert the 32-bit PSW_MASK into the 64-bit PSW_MASK.  */
@@ -2850,12 +2853,12 @@
 {
     TCGv_i64 t1, t2;
 
-    check_privileged(s);
     per_breaking_event(s);
 
     t1 = tcg_temp_new_i64();
     t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s));
+    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
+                        MO_TEQ | MO_ALIGN_8);
     tcg_gen_addi_i64(o->in2, o->in2, 8);
     tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s));
     gen_helper_load_psw(cpu_env, t1, t2);
@@ -3036,10 +3039,13 @@
 
 static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
 {
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_lpq(o->out, cpu_env, o->in2);
+    } else if (HAVE_ATOMIC128) {
         gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
     } else {
-        gen_helper_lpq(o->out, cpu_env, o->in2);
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
     }
     return_low128(o->out2);
     return DISAS_NEXT;
@@ -3048,14 +3054,12 @@
 #ifndef CONFIG_USER_ONLY
 static DisasJumpType op_lura(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_lura(o->out, cpu_env, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_lurag(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_lurag(o->out, cpu_env, o->in2);
     return DISAS_NEXT;
 }
@@ -3214,7 +3218,6 @@
 static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o)
 {
     int r1 = get_field(s->fields, l1);
-    check_privileged(s);
     gen_helper_mvcp(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -3223,7 +3226,6 @@
 static DisasJumpType op_mvcs(DisasContext *s, DisasOps *o)
 {
     int r1 = get_field(s->fields, l1);
-    check_privileged(s);
     gen_helper_mvcs(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -3509,7 +3511,6 @@
 #ifndef CONFIG_USER_ONLY
 static DisasJumpType op_ptlb(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_ptlb(cpu_env);
     return DISAS_NEXT;
 }
@@ -3700,7 +3701,6 @@
 #ifndef CONFIG_USER_ONLY
 static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_rrbe(cc_op, cpu_env, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -3708,7 +3708,6 @@
 
 static DisasJumpType op_sacf(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sacf(cpu_env, o->in2);
     /* Addressing mode has changed, so end the block.  */
     return DISAS_PC_STALE;
@@ -3798,7 +3797,6 @@
 #ifndef CONFIG_USER_ONLY
 static DisasJumpType op_servc(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_servc(cc_op, cpu_env, o->in2, o->in1);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -3808,7 +3806,6 @@
 {
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-    check_privileged(s);
     gen_helper_sigp(cc_op, cpu_env, o->in2, r1, r3);
     set_cc_static(s);
     tcg_temp_free_i32(r1);
@@ -3990,7 +3987,6 @@
 #ifndef CONFIG_USER_ONLY
 static DisasJumpType op_spka(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     tcg_gen_shri_i64(o->in2, o->in2, 4);
     tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4);
     return DISAS_NEXT;
@@ -3998,14 +3994,12 @@
 
 static DisasJumpType op_sske(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sske(cpu_env, o->in1, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_ssm(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8);
     /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
     return DISAS_PC_STALE_NOCHAIN;
@@ -4013,7 +4007,6 @@
 
 static DisasJumpType op_stap(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, core_id));
     return DISAS_NEXT;
 }
@@ -4055,7 +4048,6 @@
 
 static DisasJumpType op_sck(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
     gen_helper_sck(cc_op, cpu_env, o->in1);
     set_cc_static(s);
@@ -4064,21 +4056,18 @@
 
 static DisasJumpType op_sckc(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sckc(cpu_env, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_sckpf(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sckpf(cpu_env, regs[0]);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_stckc(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stckc(o->out, cpu_env);
     return DISAS_NEXT;
 }
@@ -4087,7 +4076,6 @@
 {
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-    check_privileged(s);
     gen_helper_stctg(cpu_env, r1, o->in2, r3);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r3);
@@ -4098,7 +4086,6 @@
 {
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-    check_privileged(s);
     gen_helper_stctl(cpu_env, r1, o->in2, r3);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r3);
@@ -4107,35 +4094,30 @@
 
 static DisasJumpType op_stidp(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, cpuid));
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_spt(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_spt(cpu_env, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_stfl(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stfl(cpu_env);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_stpt(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stpt(o->out, cpu_env);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_stsi(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stsi(cc_op, cpu_env, o->in2, regs[0], regs[1]);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4143,14 +4125,12 @@
 
 static DisasJumpType op_spx(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_spx(cpu_env, o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_xsch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_xsch(cpu_env, regs[1]);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4158,7 +4138,6 @@
 
 static DisasJumpType op_csch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_csch(cpu_env, regs[1]);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4166,7 +4145,6 @@
 
 static DisasJumpType op_hsch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_hsch(cpu_env, regs[1]);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4174,7 +4152,6 @@
 
 static DisasJumpType op_msch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_msch(cpu_env, regs[1], o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4182,7 +4159,6 @@
 
 static DisasJumpType op_rchp(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_rchp(cpu_env, regs[1]);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4190,7 +4166,6 @@
 
 static DisasJumpType op_rsch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_rsch(cpu_env, regs[1]);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4198,21 +4173,18 @@
 
 static DisasJumpType op_sal(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sal(cpu_env, regs[1]);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_schm(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_schm(cpu_env, regs[1], regs[2], o->in2);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_siga(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     /* From KVM code: Not provided, set CC = 3 for subchannel not operational */
     gen_op_movi_cc(s, 3);
     return DISAS_NEXT;
@@ -4220,14 +4192,12 @@
 
 static DisasJumpType op_stcps(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     /* The instruction is suppressed if not provided. */
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_ssch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_ssch(cpu_env, regs[1], o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4235,7 +4205,6 @@
 
 static DisasJumpType op_stsch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stsch(cpu_env, regs[1], o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4243,7 +4212,6 @@
 
 static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stcrw(cpu_env, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4251,7 +4219,6 @@
 
 static DisasJumpType op_tpi(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_tpi(cc_op, cpu_env, o->addr1);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4259,7 +4226,6 @@
 
 static DisasJumpType op_tsch(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_tsch(cpu_env, regs[1], o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4267,7 +4233,6 @@
 
 static DisasJumpType op_chsc(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_chsc(cpu_env, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4275,7 +4240,6 @@
 
 static DisasJumpType op_stpx(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, psa));
     tcg_gen_andi_i64(o->out, o->out, 0x7fffe000);
     return DISAS_NEXT;
@@ -4286,8 +4250,6 @@
     uint64_t i2 = get_field(s->fields, i2);
     TCGv_i64 t;
 
-    check_privileged(s);
-
     /* It is important to do what the instruction name says: STORE THEN.
        If we let the output hook perform the store then if we fault and
        restart, we'll have the wrong SYSTEM MASK in place.  */
@@ -4309,14 +4271,12 @@
 
 static DisasJumpType op_stura(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_stura(cpu_env, o->in2, o->in1);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_sturg(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sturg(cpu_env, o->in2, o->in1);
     return DISAS_NEXT;
 }
@@ -4462,10 +4422,13 @@
 
 static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
 {
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+    } else if (HAVE_ATOMIC128) {
         gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
     } else {
-        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
     }
     return DISAS_NEXT;
 }
@@ -4582,7 +4545,6 @@
 
 static DisasJumpType op_testblock(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_testblock(cc_op, cpu_env, o->in2);
     set_cc_static(s);
     return DISAS_NEXT;
@@ -4840,7 +4802,6 @@
 {
     TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
 
-    check_privileged(s);
     gen_helper_clp(cpu_env, r2);
     tcg_temp_free_i32(r2);
     set_cc_static(s);
@@ -4852,7 +4813,6 @@
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
 
-    check_privileged(s);
     gen_helper_pcilg(cpu_env, r1, r2);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r2);
@@ -4865,7 +4825,6 @@
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
 
-    check_privileged(s);
     gen_helper_pcistg(cpu_env, r1, r2);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r2);
@@ -4878,7 +4837,6 @@
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2));
 
-    check_privileged(s);
     gen_helper_stpcifc(cpu_env, r1, o->addr1, ar);
     tcg_temp_free_i32(ar);
     tcg_temp_free_i32(r1);
@@ -4888,7 +4846,6 @@
 
 static DisasJumpType op_sic(DisasContext *s, DisasOps *o)
 {
-    check_privileged(s);
     gen_helper_sic(cpu_env, o->in1, o->in2);
     return DISAS_NEXT;
 }
@@ -4898,7 +4855,6 @@
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
 
-    check_privileged(s);
     gen_helper_rpcit(cpu_env, r1, r2);
     tcg_temp_free_i32(r1);
     tcg_temp_free_i32(r2);
@@ -4912,7 +4868,6 @@
     TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
     TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2));
 
-    check_privileged(s);
     gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar);
     tcg_temp_free_i32(ar);
     tcg_temp_free_i32(r1);
@@ -4926,7 +4881,6 @@
     TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
     TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2));
 
-    check_privileged(s);
     gen_helper_mpcifc(cpu_env, r1, o->addr1, ar);
     tcg_temp_free_i32(ar);
     tcg_temp_free_i32(r1);
@@ -5834,17 +5788,24 @@
    search tree, rather than us having to post-process the table.  */
 
 #define C(OPC, NM, FT, FC, I1, I2, P, W, OP, CC) \
-    D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0)
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, 0)
 
-#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) insn_ ## NM,
+#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, 0)
+
+#define F(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, FL) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, FL)
+
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) insn_ ## NM,
 
 enum DisasInsnEnum {
 #include "insn-data.def"
 };
 
-#undef D
-#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) {                       \
+#undef E
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) {                   \
     .opc = OPC,                                                             \
+    .flags = FL,                                                            \
     .fmt = FMT_##FT,                                                        \
     .fac = FAC_##FC,                                                        \
     .spec = SPEC_in1_##I1 | SPEC_in2_##I2 | SPEC_prep_##P | SPEC_wout_##W,  \
@@ -5915,8 +5876,8 @@
 #include "insn-data.def"
 };
 
-#undef D
-#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \
+#undef E
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) \
     case OPC: return &insn_info[insn_ ## NM];
 
 static const DisasInsn *lookup_opc(uint16_t opc)
@@ -5928,6 +5889,8 @@
     }
 }
 
+#undef F
+#undef E
 #undef D
 #undef C
 
@@ -6075,6 +6038,17 @@
     return info;
 }
 
+static bool is_afp_reg(int reg)
+{
+    return reg % 2 || reg > 6;
+}
+
+static bool is_fp_pair(int reg)
+{
+    /* 0,1,4,5,8,9,12,13: to exclude the others, check for single bit */
+    return !(reg & 0x2);
+}
+
 static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
 {
     const DisasInsn *insn;
@@ -6101,42 +6075,48 @@
     }
 #endif
 
+    /* process flags */
+    if (insn->flags) {
+        /* privileged instruction */
+        if ((s->base.tb->flags & FLAG_MASK_PSTATE) && (insn->flags & IF_PRIV)) {
+            gen_program_exception(s, PGM_PRIVILEGED);
+            return DISAS_NORETURN;
+        }
+
+        /* if AFP is not enabled, instructions and registers are forbidden */
+        if (!(s->base.tb->flags & FLAG_MASK_AFP)) {
+            uint8_t dxc = 0;
+
+            if ((insn->flags & IF_AFP1) && is_afp_reg(get_field(&f, r1))) {
+                dxc = 1;
+            }
+            if ((insn->flags & IF_AFP2) && is_afp_reg(get_field(&f, r2))) {
+                dxc = 1;
+            }
+            if ((insn->flags & IF_AFP3) && is_afp_reg(get_field(&f, r3))) {
+                dxc = 1;
+            }
+            if (insn->flags & IF_BFP) {
+                dxc = 2;
+            }
+            if (insn->flags & IF_DFP) {
+                dxc = 3;
+            }
+            if (dxc) {
+                gen_data_exception(dxc);
+                return DISAS_NORETURN;
+            }
+        }
+    }
+
     /* Check for insn specification exceptions.  */
     if (insn->spec) {
-        int spec = insn->spec, excp = 0, r;
-
-        if (spec & SPEC_r1_even) {
-            r = get_field(&f, r1);
-            if (r & 1) {
-                excp = PGM_SPECIFICATION;
-            }
-        }
-        if (spec & SPEC_r2_even) {
-            r = get_field(&f, r2);
-            if (r & 1) {
-                excp = PGM_SPECIFICATION;
-            }
-        }
-        if (spec & SPEC_r3_even) {
-            r = get_field(&f, r3);
-            if (r & 1) {
-                excp = PGM_SPECIFICATION;
-            }
-        }
-        if (spec & SPEC_r1_f128) {
-            r = get_field(&f, r1);
-            if (r > 13) {
-                excp = PGM_SPECIFICATION;
-            }
-        }
-        if (spec & SPEC_r2_f128) {
-            r = get_field(&f, r2);
-            if (r > 13) {
-                excp = PGM_SPECIFICATION;
-            }
-        }
-        if (excp) {
-            gen_program_exception(s, excp);
+        if ((insn->spec & SPEC_r1_even && get_field(&f, r1) & 1) ||
+            (insn->spec & SPEC_r2_even && get_field(&f, r2) & 1) ||
+            (insn->spec & SPEC_r3_even && get_field(&f, r3) & 1) ||
+            (insn->spec & SPEC_r1_f128 && !is_fp_pair(get_field(&f, r1))) ||
+            (insn->spec & SPEC_r2_f128 && !is_fp_pair(get_field(&f, r2)))) {
+            gen_program_exception(s, PGM_SPECIFICATION);
             return DISAS_NORETURN;
         }
     }
@@ -6164,11 +6144,13 @@
     if (insn->help_op) {
         ret = insn->help_op(s, &o);
     }
-    if (insn->help_wout) {
-        insn->help_wout(s, &f, &o);
-    }
-    if (insn->help_cout) {
-        insn->help_cout(s, &o);
+    if (ret != DISAS_NORETURN) {
+        if (insn->help_wout) {
+            insn->help_wout(s, &f, &o);
+        }
+        if (insn->help_cout) {
+            insn->help_cout(s, &o);
+        }
     }
 
     /* Free any temporaries created by the helpers.  */
diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c
index 68f978d..2b49d1c 100644
--- a/target/unicore32/cpu.c
+++ b/target/unicore32/cpu.c
@@ -116,8 +116,6 @@
     env->uncached_asr = ASR_MODE_PRIV;
     env->regs[31] = 0x03000000;
 #endif
-
-    tlb_flush(cs);
 }
 
 static const VMStateDescription vmstate_uc32_cpu = {
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index daa416a..7a8015c 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2586,6 +2586,10 @@
            seen this numbered exit before, via tcg_gen_goto_tb.  */
         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
 #endif
+        /* When not chaining, exit without indicating a link.  */
+        if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+            val = 0;
+        }
     } else {
         /* This is an exit via the exitreq label.  */
         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
@@ -2603,7 +2607,10 @@
     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
 #endif
-    tcg_gen_op1i(INDEX_op_goto_tb, idx);
+    /* When not chaining, we simply fall through to the "fallback" exit.  */
+    if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+        tcg_gen_op1i(INDEX_op_goto_tb, idx);
+    }
 }
 
 void tcg_gen_lookup_and_goto_ptr(void)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index f27b22b..e85133e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -30,6 +30,7 @@
 /* Define to jump the ELF file used to communicate with GDB.  */
 #undef DEBUG_JIT
 
+#include "qemu/error-report.h"
 #include "qemu/cutils.h"
 #include "qemu/host-utils.h"
 #include "qemu/timer.h"
@@ -3361,6 +3362,7 @@
         const TCGProfile *orig = &s->prof;
 
         if (counters) {
+            PROF_ADD(prof, orig, cpu_exec_time);
             PROF_ADD(prof, orig, tb_count1);
             PROF_ADD(prof, orig, tb_count);
             PROF_ADD(prof, orig, op_count);
@@ -3412,11 +3414,32 @@
                     prof.table_op_count[i]);
     }
 }
+
+int64_t tcg_cpu_exec_time(void)
+{
+    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
+    unsigned int i;
+    int64_t ret = 0;
+
+    for (i = 0; i < n_ctxs; i++) {
+        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
+        const TCGProfile *prof = &s->prof;
+
+        ret += atomic_read(&prof->cpu_exec_time);
+    }
+    return ret;
+}
 #else
 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 {
     cpu_fprintf(f, "[TCG profiler not compiled]\n");
 }
+
+int64_t tcg_cpu_exec_time(void)
+{
+    error_report("%s: TCG profiler not compiled", __func__);
+    exit(EXIT_FAILURE);
+}
 #endif
 
 
@@ -3430,7 +3453,7 @@
 
 #ifdef CONFIG_PROFILER
     {
-        int n;
+        int n = 0;
 
         QTAILQ_FOREACH(op, &s->ops, link) {
             n++;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index f9f1237..f4efbaa 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -32,6 +32,7 @@
 #include "qemu/queue.h"
 #include "tcg-mo.h"
 #include "tcg-target.h"
+#include "qemu/int128.h"
 
 /* XXX: make safe guess about sizes */
 #define MAX_OP_PER_INSTR 266
@@ -629,12 +630,13 @@
 QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
 
 typedef struct TCGProfile {
+    int64_t cpu_exec_time;
     int64_t tb_count1;
     int64_t tb_count;
     int64_t op_count; /* total insn count */
     int op_count_max; /* max insn per TB */
-    int64_t temp_count;
     int temp_count_max;
+    int64_t temp_count;
     int64_t del_op_count;
     int64_t code_in_len;
     int64_t code_out_len;
@@ -1002,6 +1004,7 @@
 #define tcg_check_temp_count() 0
 #endif
 
+int64_t tcg_cpu_exec_time(void);
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf);
 
@@ -1454,11 +1457,14 @@
 #undef GEN_ATOMIC_HELPER
 #endif /* CONFIG_SOFTMMU */
 
-#ifdef CONFIG_ATOMIC128
-#include "qemu/int128.h"
-
-/* These aren't really a "proper" helpers because TCG cannot manage Int128.
-   However, use the same format as the others, for use by the backends. */
+/*
+ * These aren't really a "proper" helpers because TCG cannot manage Int128.
+ * However, use the same format as the others, for use by the backends.
+ *
+ * The cmpxchg functions are only defined if HAVE_CMPXCHG128;
+ * the ld/st functions are only defined if HAVE_ATOMIC128,
+ * as defined by <qemu/atomic128.h>.
+ */
 Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
                                      Int128 cmpv, Int128 newv,
                                      TCGMemOpIdx oi, uintptr_t retaddr);
@@ -1475,6 +1481,4 @@
 void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
                               TCGMemOpIdx oi, uintptr_t retaddr);
 
-#endif /* CONFIG_ATOMIC128 */
-
 #endif /* TCG_H */
diff --git a/tests/Makefile.include b/tests/Makefile.include
index d0c0a92..f77a495 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -38,107 +38,62 @@
 SYSEMU_TARGET_LIST := $(subst -softmmu.mak,,$(notdir \
    $(wildcard $(SRC_PATH)/default-configs/*-softmmu.mak)))
 
-check-unit-y = tests/check-qdict$(EXESUF)
-gcov-files-check-qdict-y = qobject/qdict.c
-check-unit-y = tests/check-block-qdict$(EXESUF)
-gcov-files-check-block-qdict-y = qobject/block-qdict.c
+check-unit-y += tests/check-qdict$(EXESUF)
+check-unit-y += tests/check-block-qdict$(EXESUF)
 check-unit-y += tests/test-char$(EXESUF)
-gcov-files-check-qdict-y = chardev/char.c
 check-unit-y += tests/check-qnum$(EXESUF)
-gcov-files-check-qnum-y = qobject/qnum.c
 check-unit-y += tests/check-qstring$(EXESUF)
-gcov-files-check-qstring-y = qobject/qstring.c
 check-unit-y += tests/check-qlist$(EXESUF)
-gcov-files-check-qlist-y = qobject/qlist.c
 check-unit-y += tests/check-qnull$(EXESUF)
-gcov-files-check-qnull-y = qobject/qnull.c
 check-unit-y += tests/check-qobject$(EXESUF)
 check-unit-y += tests/check-qjson$(EXESUF)
-gcov-files-check-qjson-y = qobject/qjson.c
 check-unit-y += tests/check-qlit$(EXESUF)
-gcov-files-check-qlit-y = qobject/qlit.c
 check-unit-y += tests/test-qobject-output-visitor$(EXESUF)
-gcov-files-test-qobject-output-visitor-y = qapi/qobject-output-visitor.c
 check-unit-y += tests/test-clone-visitor$(EXESUF)
-gcov-files-test-clone-visitor-y = qapi/qapi-clone-visitor.c
 check-unit-y += tests/test-qobject-input-visitor$(EXESUF)
-gcov-files-test-qobject-input-visitor-y = qapi/qobject-input-visitor.c
 check-unit-y += tests/test-qmp-cmds$(EXESUF)
-gcov-files-test-qmp-cmds-y = qapi/qmp-dispatch.c
 check-unit-y += tests/test-string-input-visitor$(EXESUF)
-gcov-files-test-string-input-visitor-y = qapi/string-input-visitor.c
 check-unit-y += tests/test-string-output-visitor$(EXESUF)
-gcov-files-test-string-output-visitor-y = qapi/string-output-visitor.c
 check-unit-y += tests/test-qmp-event$(EXESUF)
-gcov-files-test-qmp-event-y += qapi/qmp-event.c
 check-unit-y += tests/test-opts-visitor$(EXESUF)
-gcov-files-test-opts-visitor-y = qapi/opts-visitor.c
 check-unit-y += tests/test-coroutine$(EXESUF)
-gcov-files-test-coroutine-y = coroutine-$(CONFIG_COROUTINE_BACKEND).c
 check-unit-y += tests/test-visitor-serialization$(EXESUF)
 check-unit-y += tests/test-iov$(EXESUF)
-gcov-files-test-iov-y = util/iov.c
 check-unit-y += tests/test-aio$(EXESUF)
-gcov-files-test-aio-y = util/async.c util/qemu-timer.o
-gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
-gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
 check-unit-y += tests/test-aio-multithread$(EXESUF)
-gcov-files-test-aio-multithread-y = $(gcov-files-test-aio-y)
-gcov-files-test-aio-multithread-y += util/qemu-coroutine.c tests/iothread.c
 check-unit-y += tests/test-throttle$(EXESUF)
 check-unit-y += tests/test-thread-pool$(EXESUF)
-gcov-files-test-thread-pool-y = thread-pool.c
-gcov-files-test-hbitmap-y = util/hbitmap.c
 check-unit-y += tests/test-hbitmap$(EXESUF)
-gcov-files-test-hbitmap-y = blockjob.c
 check-unit-y += tests/test-bdrv-drain$(EXESUF)
 check-unit-y += tests/test-blockjob$(EXESUF)
 check-unit-y += tests/test-blockjob-txn$(EXESUF)
 check-unit-y += tests/test-block-backend$(EXESUF)
 check-unit-y += tests/test-x86-cpuid$(EXESUF)
 # all code tested by test-x86-cpuid is inside topology.h
-gcov-files-test-x86-cpuid-y =
 ifeq ($(CONFIG_SOFTMMU),y)
 check-unit-y += tests/test-xbzrle$(EXESUF)
-gcov-files-test-xbzrle-y = migration/xbzrle.c
 check-unit-$(CONFIG_POSIX) += tests/test-vmstate$(EXESUF)
 endif
 check-unit-y += tests/test-cutils$(EXESUF)
-gcov-files-test-cutils-y += util/cutils.c
 check-unit-y += tests/test-shift128$(EXESUF)
-gcov-files-test-shift128-y = util/host-utils.c
 check-unit-y += tests/test-mul64$(EXESUF)
-gcov-files-test-mul64-y = util/host-utils.c
 check-unit-y += tests/test-int128$(EXESUF)
 # all code tested by test-int128 is inside int128.h
-gcov-files-test-int128-y =
 check-unit-y += tests/rcutorture$(EXESUF)
-gcov-files-rcutorture-y = util/rcu.c
 check-unit-y += tests/test-rcu-list$(EXESUF)
-gcov-files-test-rcu-list-y = util/rcu.c
 check-unit-y += tests/test-rcu-simpleq$(EXESUF)
-gcov-files-test-rcu-simpleq-y = util/rcu.c
 check-unit-y += tests/test-rcu-tailq$(EXESUF)
-gcov-files-test-rcu-tailq-y = util/rcu.c
 check-unit-y += tests/test-qdist$(EXESUF)
-gcov-files-test-qdist-y = util/qdist.c
 check-unit-y += tests/test-qht$(EXESUF)
-gcov-files-test-qht-y = util/qht.c
 check-unit-y += tests/test-qht-par$(EXESUF)
-gcov-files-test-qht-par-y = util/qht.c
 check-unit-y += tests/test-bitops$(EXESUF)
 check-unit-y += tests/test-bitcnt$(EXESUF)
 check-unit-y += tests/test-qdev-global-props$(EXESUF)
 check-unit-y += tests/check-qom-interface$(EXESUF)
-gcov-files-check-qom-interface-y = qom/object.c
 check-unit-y += tests/check-qom-proplist$(EXESUF)
-gcov-files-check-qom-proplist-y = qom/object.c
 check-unit-y += tests/test-qemu-opts$(EXESUF)
-gcov-files-test-qemu-opts-y = util/qemu-option.c
 check-unit-y += tests/test-keyval$(EXESUF)
-gcov-files-test-keyval-y = util/keyval.c
 check-unit-y += tests/test-write-threshold$(EXESUF)
-gcov-files-test-write-threshold-y = block/write-threshold.c
 check-unit-y += tests/test-crypto-hash$(EXESUF)
 check-speed-y += tests/benchmark-crypto-hash$(EXESUF)
 check-unit-y += tests/test-crypto-hmac$(EXESUF)
@@ -160,109 +115,66 @@
 check-unit-y += tests/test-io-channel-command$(EXESUF)
 check-unit-y += tests/test-io-channel-buffer$(EXESUF)
 check-unit-y += tests/test-base64$(EXESUF)
-check-unit-$(if $(CONFIG_NETTLE_KDF),y,$(CONFIG_GCRYPT_KDF)) += tests/test-crypto-pbkdf$(EXESUF)
+check-unit-$(if $(CONFIG_NETTLE),y,$(CONFIG_GCRYPT)) += tests/test-crypto-pbkdf$(EXESUF)
 check-unit-y += tests/test-crypto-ivgen$(EXESUF)
 check-unit-y += tests/test-crypto-afsplit$(EXESUF)
 check-unit-y += tests/test-crypto-xts$(EXESUF)
 check-unit-y += tests/test-crypto-block$(EXESUF)
 check-unit-y += tests/test-logging$(EXESUF)
-gcov-files-test-logging-y = util/log.c
 check-unit-$(CONFIG_REPLICATION) += tests/test-replication$(EXESUF)
 check-unit-y += tests/test-bufferiszero$(EXESUF)
-gcov-files-check-bufferiszero-y = util/bufferiszero.c
 check-unit-y += tests/test-uuid$(EXESUF)
 check-unit-y += tests/ptimer-test$(EXESUF)
-gcov-files-ptimer-test-y = hw/core/ptimer.c
 check-unit-y += tests/test-qapi-util$(EXESUF)
-gcov-files-test-qapi-util-y = qapi/qapi-util.c
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
 # All QTests for now are POSIX-only, but the dependencies are
 # really in libqtest, not in the testcases themselves.
 
-check-qtest-generic-y = tests/qmp-test$(EXESUF)
-gcov-files-generic-y = monitor.c qapi/qmp-dispatch.c
+check-qtest-generic-y += tests/qmp-test$(EXESUF)
 check-qtest-generic-y += tests/qmp-cmd-test$(EXESUF)
 
 check-qtest-generic-y += tests/device-introspect-test$(EXESUF)
-gcov-files-generic-y = qdev-monitor.c qmp.c
 check-qtest-generic-y += tests/cdrom-test$(EXESUF)
 
-gcov-files-ipack-y += hw/ipack/ipack.c
 check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF)
-gcov-files-ipack-y += hw/char/ipoctal232.c
 
 check-qtest-virtioserial-y += tests/virtio-console-test$(EXESUF)
-gcov-files-virtioserial-y += hw/char/virtio-console.c
 
-gcov-files-virtio-y += i386-softmmu/hw/virtio/virtio.c
 check-qtest-virtio-y += tests/virtio-net-test$(EXESUF)
-gcov-files-virtio-y += i386-softmmu/hw/net/virtio-net.c
 check-qtest-virtio-y += tests/virtio-balloon-test$(EXESUF)
-gcov-files-virtio-y += i386-softmmu/hw/virtio/virtio-balloon.c
 check-qtest-virtio-y += tests/virtio-blk-test$(EXESUF)
-gcov-files-virtio-y += i386-softmmu/hw/block/virtio-blk.c
 check-qtest-virtio-y += tests/virtio-rng-test$(EXESUF)
-gcov-files-virtio-y += hw/virtio/virtio-rng.c
 check-qtest-virtio-y += tests/virtio-scsi-test$(EXESUF)
-gcov-files-virtio-y += i386-softmmu/hw/scsi/virtio-scsi.c
 ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
 check-qtest-virtio-y += tests/virtio-9p-test$(EXESUF)
-gcov-files-virtio-y += hw/9pfs/virtio-9p.c
-gcov-files-virtio-y += i386-softmmu/hw/9pfs/virtio-9p-device.c
 endif
 check-qtest-virtio-y += tests/virtio-serial-test$(EXESUF)
-gcov-files-virtio-y += i386-softmmu/hw/char/virtio-serial-bus.c
 check-qtest-virtio-y += $(check-qtest-virtioserial-y)
-gcov-files-virtio-y += $(gcov-files-virtioserial-y)
 
 check-qtest-pci-y += tests/e1000-test$(EXESUF)
-gcov-files-pci-y += hw/net/e1000.c
 check-qtest-pci-y += tests/e1000e-test$(EXESUF)
-gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c
 check-qtest-pci-$(CONFIG_RTL8139_PCI) += tests/rtl8139-test$(EXESUF)
-gcov-files-pci-$(CONFIG_RTL8139_PCI) += hw/net/rtl8139.c
 check-qtest-pci-$(CONFIG_PCNET_PCI) += tests/pcnet-test$(EXESUF)
-gcov-files-pci-$(CONFIG_PCNET_PCI) += hw/net/pcnet.c
-gcov-files-pci-$(CONFIG_PCNET_PCI) += hw/net/pcnet-pci.c
 check-qtest-pci-$(CONFIG_EEPRO100_PCI) += tests/eepro100-test$(EXESUF)
-gcov-files-pci-$(CONFIG_EEPRO100_PCI) += hw/net/eepro100.c
 check-qtest-pci-$(CONFIG_NE2000_PCI) += tests/ne2000-test$(EXESUF)
-gcov-files-pci-$(CONFIG_NE2000_PCI) += hw/net/ne2000.c
 check-qtest-pci-$(CONFIG_NVME_PCI) += tests/nvme-test$(EXESUF)
-gcov-files-pci-$(CONFIG_NVME_PCI) += hw/block/nvme.c
 check-qtest-pci-$(CONFIG_AC97) += tests/ac97-test$(EXESUF)
-gcov-files-pci-$(CONFIG_AC97) += hw/audio/ac97.c
 check-qtest-pci-$(CONFIG_ES1370) += tests/es1370-test$(EXESUF)
-gcov-files-pci-$(CONFIG_ES1370) += hw/audio/es1370.c
 check-qtest-pci-y += $(check-qtest-virtio-y)
-gcov-files-pci-y += $(gcov-files-virtio-y) hw/virtio/virtio-pci.c
 check-qtest-pci-$(CONFIG_IPACK) += tests/tpci200-test$(EXESUF)
-gcov-files-pci-$(CONFIG_IPACK) += hw/ipack/tpci200.c
 check-qtest-pci-$(CONFIG_IPACK) += $(check-qtest-ipack-y)
-gcov-files-pci-$(CONFIG_IPACK) += $(gcov-files-ipack-y)
 check-qtest-pci-y += tests/display-vga-test$(EXESUF)
-gcov-files-pci-y += hw/display/vga.c
-gcov-files-pci-y += hw/display/cirrus_vga.c
-gcov-files-pci-y += hw/display/vga-pci.c
-gcov-files-pci-y += hw/display/virtio-gpu.c
-gcov-files-pci-y += hw/display/virtio-gpu-pci.c
-gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
 check-qtest-pci-$(CONFIG_HDA) += tests/intel-hda-test$(EXESUF)
-gcov-files-pci-$(CONFIG_HDA) += hw/audio/intel-hda.c hw/audio/hda-codec.c
 check-qtest-pci-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF)
-gcov-files-pci-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c
 check-qtest-pci-y += tests/megasas-test$(EXESUF)
-gcov-files-pci-y += hw/scsi/megasas.c
 
 check-qtest-i386-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 check-qtest-i386-y += tests/fdc-test$(EXESUF)
-gcov-files-i386-y = hw/block/fdc.c
 check-qtest-i386-y += tests/ide-test$(EXESUF)
 check-qtest-i386-y += tests/ahci-test$(EXESUF)
 check-qtest-i386-y += tests/hd-geo-test$(EXESUF)
-gcov-files-i386-y += hw/block/hd-geometry.c
 check-qtest-i386-y += tests/boot-order-test$(EXESUF)
 check-qtest-i386-y += tests/bios-tables-test$(EXESUF)
 check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF)
@@ -274,37 +186,21 @@
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
 check-qtest-i386-y += tests/drive_del-test$(EXESUF)
 check-qtest-i386-$(CONFIG_WDT_IB700) += tests/wdt_ib700-test$(EXESUF)
-gcov-files-i386-$(CONFIG_WDT_IB700) += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c
 check-qtest-i386-y += tests/tco-test$(EXESUF)
 check-qtest-i386-y += $(check-qtest-pci-y)
-gcov-files-i386-y += $(gcov-files-pci-y)
 check-qtest-i386-$(CONFIG_VMXNET3_PCI) += tests/vmxnet3-test$(EXESUF)
-gcov-files-i386-$(CONFIG_VMXNET3_PCI) += hw/net/vmxnet3.c
-gcov-files-i386-y += hw/net/net_rx_pkt.c
-gcov-files-i386-y += hw/net/net_tx_pkt.c
 check-qtest-i386-$(CONFIG_PVPANIC) += tests/pvpanic-test$(EXESUF)
-gcov-files-i386-$(CONFIG_PVPANIC) += i386-softmmu/hw/misc/pvpanic.c
 check-qtest-i386-$(CONFIG_I82801B11) += tests/i82801b11-test$(EXESUF)
-gcov-files-i386-$(CONFIG_I82801B11) += hw/pci-bridge/i82801b11.c
 check-qtest-i386-$(CONFIG_IOH3420) += tests/ioh3420-test$(EXESUF)
-gcov-files-i386-$(CONFIG_IOH3420) += hw/pci-bridge/ioh3420.c
 check-qtest-i386-$(CONFIG_USB_OHCI) += tests/usb-hcd-ohci-test$(EXESUF)
-gcov-files-i386-$(CONFIG_USB_OHCI) += hw/usb/hcd-ohci.c
 check-qtest-i386-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF)
-gcov-files-i386-$(CONFIG_USB_UHCI) += hw/usb/hcd-uhci.c
 ifeq ($(CONFIG_USB_ECHI)$(CONFIG_USB_UHCI),yy)
 check-qtest-i386-y += tests/usb-hcd-ehci-test$(EXESUF)
 endif
-gcov-files-i386-$(CONFIG_USB_EHCI) += hw/usb/hcd-ehci.c
-gcov-files-i386-y += hw/usb/dev-hid.c
-gcov-files-i386-y += hw/usb/dev-storage.c
 check-qtest-i386-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF)
-gcov-files-i386-$(CONFIG_USB_XHCI) += hw/usb/hcd-xhci.c
-gcov-files-i386-$(CONFIG_USB_XHCI) += hw/usb/hcd-xhci-nec.c
 check-qtest-i386-y += tests/cpu-plug-test$(EXESUF)
 check-qtest-i386-y += tests/q35-test$(EXESUF)
 check-qtest-i386-y += tests/vmgenid-test$(EXESUF)
-gcov-files-i386-y += hw/pci-host/q35.c
 check-qtest-i386-$(CONFIG_VHOST_USER_NET_TEST_i386) += tests/vhost-user-test$(EXESUF)
 ifeq ($(CONFIG_VHOST_USER_NET_TEST_i386),)
 check-qtest-x86_64-$(CONFIG_VHOST_USER_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF)
@@ -321,16 +217,14 @@
 check-qtest-i386-y += tests/numa-test$(EXESUF)
 check-qtest-x86_64-y += $(check-qtest-i386-y)
 check-qtest-x86_64-$(CONFIG_SDHCI) += tests/sdhci-test$(EXESUF)
-gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
-gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
 
-check-qtest-alpha-y = tests/boot-serial-test$(EXESUF)
+check-qtest-alpha-y += tests/boot-serial-test$(EXESUF)
 
-check-qtest-hppa-y = tests/boot-serial-test$(EXESUF)
+check-qtest-hppa-y += tests/boot-serial-test$(EXESUF)
 
 check-qtest-m68k-y = tests/boot-serial-test$(EXESUF)
 
-check-qtest-microblaze-y = tests/boot-serial-test$(EXESUF)
+check-qtest-microblaze-y += tests/boot-serial-test$(EXESUF)
 
 check-qtest-mips-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 
@@ -338,7 +232,7 @@
 
 check-qtest-mips64el-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 
-check-qtest-moxie-y = tests/boot-serial-test$(EXESUF)
+check-qtest-moxie-y += tests/boot-serial-test$(EXESUF)
 
 check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 check-qtest-ppc-y += tests/boot-order-test$(EXESUF)
@@ -346,23 +240,16 @@
 check-qtest-ppc-y += tests/drive_del-test$(EXESUF)
 check-qtest-ppc-y += tests/boot-serial-test$(EXESUF)
 check-qtest-ppc-y += tests/m48t59-test$(EXESUF)
-gcov-files-ppc-y += hw/timer/m48t59.c
 
-check-qtest-ppc64-y = $(check-qtest-ppc-y)
-gcov-files-ppc64-y = $(subst ppc-softmmu/,ppc64-softmmu/,$(gcov-files-ppc-y))
+check-qtest-ppc64-y += $(check-qtest-ppc-y)
 check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF)
-gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c
 check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF)
 check-qtest-ppc64-y += tests/migration-test$(EXESUF)
 check-qtest-ppc64-y += tests/rtas-test$(EXESUF)
 check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF)
 check-qtest-ppc64-$(CONFIG_USB_OHCI) += tests/usb-hcd-ohci-test$(EXESUF)
-gcov-files-ppc64-$(CONFIG_USB_OHCI) += hw/usb/hcd-ohci.c
 check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF)
-gcov-files-ppc64-$(CONFIG_USB_UHCI) += hw/usb/hcd-uhci.c
 check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF)
-gcov-files-ppc64-$(CONFIG_USB_XHCI) += hw/usb/hcd-xhci.c
-gcov-files-ppc64-$(CONFIG_USB_XHCI) += hw/usb/hcd-xhci-nec.c
 check-qtest-ppc64-y += $(check-qtest-virtio-y)
 check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF)
 check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF)
@@ -370,31 +257,26 @@
 check-qtest-ppc64-y += tests/display-vga-test$(EXESUF)
 check-qtest-ppc64-y += tests/numa-test$(EXESUF)
 check-qtest-ppc64-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF)
-gcov-files-ppc64-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c
 check-qtest-ppc64-y += tests/cpu-plug-test$(EXESUF)
 
 check-qtest-sh4-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 
 check-qtest-sh4eb-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 
-check-qtest-sparc-y = tests/prom-env-test$(EXESUF)
+check-qtest-sparc-y += tests/prom-env-test$(EXESUF)
 check-qtest-sparc-y += tests/m48t59-test$(EXESUF)
-gcov-files-sparc-y = hw/timer/m48t59.c
 check-qtest-sparc-y += tests/boot-serial-test$(EXESUF)
 
 check-qtest-sparc64-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF)
 check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
 check-qtest-sparc64-y += tests/boot-serial-test$(EXESUF)
 
-check-qtest-arm-y = tests/tmp105-test$(EXESUF)
+check-qtest-arm-y += tests/tmp105-test$(EXESUF)
 check-qtest-arm-y += tests/pca9552-test$(EXESUF)
 check-qtest-arm-y += tests/ds1338-test$(EXESUF)
 check-qtest-arm-y += tests/m25p80-test$(EXESUF)
-gcov-files-arm-y += hw/misc/tmp105.c
 check-qtest-arm-y += tests/virtio-blk-test$(EXESUF)
-gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c
 check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
-gcov-files-arm-y += hw/timer/arm_mptimer.c
 check-qtest-arm-y += tests/boot-serial-test$(EXESUF)
 check-qtest-arm-$(CONFIG_SDHCI) += tests/sdhci-test$(EXESUF)
 check-qtest-arm-y += tests/hexloader-test$(EXESUF)
@@ -402,10 +284,11 @@
 check-qtest-aarch64-y = tests/numa-test$(EXESUF)
 check-qtest-aarch64-$(CONFIG_SDHCI) += tests/sdhci-test$(EXESUF)
 check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF)
+check-qtest-aarch64-y += tests/migration-test$(EXESUF)
 
-check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
+check-qtest-microblazeel-y += $(check-qtest-microblaze-y)
 
-check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
+check-qtest-xtensaeb-y += $(check-qtest-xtensa-y)
 
 check-qtest-s390x-y = tests/boot-serial-test$(EXESUF)
 check-qtest-s390x-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF)
@@ -415,6 +298,7 @@
 check-qtest-s390x-y += tests/drive_del-test$(EXESUF)
 check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF)
 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF)
+check-qtest-s390x-y += tests/migration-test$(EXESUF)
 
 check-qtest-generic-y += tests/machine-none-test$(EXESUF)
 check-qtest-generic-y += tests/qom-test$(EXESUF)
@@ -613,7 +497,7 @@
 	tests/test-rcu-tailq.o \
 	tests/test-qdist.o tests/test-shift128.o \
 	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
-	tests/atomic_add-bench.o
+	tests/atomic_add-bench.o tests/atomic64-bench.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -668,6 +552,10 @@
 tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
 tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y)
 tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
+tests/atomic64-bench$(EXESUF): tests/atomic64-bench.o $(test-util-obj-y)
+
+tests/fp/%:
+	$(MAKE) -C $(dir $@) $(notdir $@)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
diff --git a/tests/atomic64-bench.c b/tests/atomic64-bench.c
new file mode 100644
index 0000000..7169256
--- /dev/null
+++ b/tests/atomic64-bench.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/host-utils.h"
+#include "qemu/processor.h"
+
+struct thread_info {
+    uint64_t r;
+    uint64_t accesses;
+} QEMU_ALIGNED(64);
+
+struct count {
+    int64_t i64;
+} QEMU_ALIGNED(64);
+
+static QemuThread *threads;
+static struct thread_info *th_info;
+static unsigned int n_threads = 1;
+static unsigned int n_ready_threads;
+static struct count *counts;
+static unsigned int duration = 1;
+static unsigned int range = 1024;
+static bool test_start;
+static bool test_stop;
+
+static const char commands_string[] =
+    " -d = duration in seconds\n"
+    " -n = number of threads\n"
+    " -r = range (will be rounded up to pow2)";
+
+static void usage_complete(char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "options:\n%s\n", commands_string);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static void *thread_func(void *arg)
+{
+    struct thread_info *info = arg;
+
+    atomic_inc(&n_ready_threads);
+    while (!atomic_read(&test_start)) {
+        cpu_relax();
+    }
+
+    while (!atomic_read(&test_stop)) {
+        unsigned int index;
+
+        info->r = xorshift64star(info->r);
+        index = info->r & (range - 1);
+        atomic_read_i64(&counts[index].i64);
+        info->accesses++;
+    }
+    return NULL;
+}
+
+static void run_test(void)
+{
+    unsigned int remaining;
+    unsigned int i;
+
+    while (atomic_read(&n_ready_threads) != n_threads) {
+        cpu_relax();
+    }
+    atomic_set(&test_start, true);
+    do {
+        remaining = sleep(duration);
+    } while (remaining);
+    atomic_set(&test_stop, true);
+
+    for (i = 0; i < n_threads; i++) {
+        qemu_thread_join(&threads[i]);
+    }
+}
+
+static void create_threads(void)
+{
+    unsigned int i;
+
+    threads = g_new(QemuThread, n_threads);
+    th_info = g_new(struct thread_info, n_threads);
+    counts = g_malloc0_n(range, sizeof(*counts));
+
+    for (i = 0; i < n_threads; i++) {
+        struct thread_info *info = &th_info[i];
+
+        info->r = (i + 1) ^ time(NULL);
+        info->accesses = 0;
+        qemu_thread_create(&threads[i], NULL, thread_func, info,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+static void pr_params(void)
+{
+    printf("Parameters:\n");
+    printf(" # of threads:      %u\n", n_threads);
+    printf(" duration:          %u\n", duration);
+    printf(" ops' range:        %u\n", range);
+}
+
+static void pr_stats(void)
+{
+    unsigned long long val = 0;
+    double tx;
+    int i;
+
+    for (i = 0; i < n_threads; i++) {
+        val += th_info[i].accesses;
+    }
+    tx = val / duration / 1e6;
+
+    printf("Results:\n");
+    printf("Duration:            %u s\n", duration);
+    printf(" Throughput:         %.2f Mops/s\n", tx);
+    printf(" Throughput/thread:  %.2f Mops/s/thread\n", tx / n_threads);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "hd:n:r:");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'h':
+            usage_complete(argv);
+            exit(0);
+        case 'd':
+            duration = atoi(optarg);
+            break;
+        case 'n':
+            n_threads = atoi(optarg);
+            break;
+        case 'r':
+            range = pow2ceil(atoi(optarg));
+            break;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    pr_params();
+    create_threads();
+    run_test();
+    pr_stats();
+    return 0;
+}
diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c
index f5a0d0b..67fdf8c 100644
--- a/tests/benchmark-crypto-cipher.c
+++ b/tests/benchmark-crypto-cipher.c
@@ -15,17 +15,27 @@
 #include "crypto/init.h"
 #include "crypto/cipher.h"
 
-static void test_cipher_speed(const void *opaque)
+static void test_cipher_speed(size_t chunk_size,
+                              QCryptoCipherMode mode,
+                              QCryptoCipherAlgorithm alg)
 {
     QCryptoCipher *cipher;
     Error *err = NULL;
     double total = 0.0;
-    size_t chunk_size = (size_t)opaque;
     uint8_t *key = NULL, *iv = NULL;
     uint8_t *plaintext = NULL, *ciphertext = NULL;
-    size_t nkey = qcrypto_cipher_get_key_len(QCRYPTO_CIPHER_ALG_AES_128);
-    size_t niv = qcrypto_cipher_get_iv_len(QCRYPTO_CIPHER_ALG_AES_128,
-                                           QCRYPTO_CIPHER_MODE_CBC);
+    size_t nkey;
+    size_t niv;
+
+    if (!qcrypto_cipher_supports(alg, mode)) {
+        return;
+    }
+
+    nkey = qcrypto_cipher_get_key_len(alg);
+    niv = qcrypto_cipher_get_iv_len(alg, mode);
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+        nkey *= 2;
+    }
 
     key = g_new0(uint8_t, nkey);
     memset(key, g_test_rand_int(), nkey);
@@ -38,14 +48,14 @@
     plaintext = g_new0(uint8_t, chunk_size);
     memset(plaintext, g_test_rand_int(), chunk_size);
 
-    cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128,
-                                QCRYPTO_CIPHER_MODE_CBC,
+    cipher = qcrypto_cipher_new(alg, mode,
                                 key, nkey, &err);
     g_assert(cipher != NULL);
 
-    g_assert(qcrypto_cipher_setiv(cipher,
-                                  iv, niv,
-                                  &err) == 0);
+    if (mode != QCRYPTO_CIPHER_MODE_ECB)
+        g_assert(qcrypto_cipher_setiv(cipher,
+                                      iv, niv,
+                                      &err) == 0);
 
     g_test_timer_start();
     do {
@@ -55,13 +65,26 @@
                                         chunk_size,
                                         &err) == 0);
         total += chunk_size;
-    } while (g_test_timer_elapsed() < 5.0);
+    } while (g_test_timer_elapsed() < 1.0);
 
     total /= MiB;
-    g_print("cbc(aes128): ");
-    g_print("Testing chunk_size %zu bytes ", chunk_size);
-    g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last());
-    g_print("%.2f MB/sec\n", total / g_test_timer_last());
+    g_print("Enc chunk %zu bytes ", chunk_size);
+    g_print("%.2f MB/sec ", total / g_test_timer_last());
+
+    total = 0.0;
+    g_test_timer_start();
+    do {
+        g_assert(qcrypto_cipher_decrypt(cipher,
+                                        plaintext,
+                                        ciphertext,
+                                        chunk_size,
+                                        &err) == 0);
+        total += chunk_size;
+    } while (g_test_timer_elapsed() < 1.0);
+
+    total /= MiB;
+    g_print("Dec chunk %zu bytes ", chunk_size);
+    g_print("%.2f MB/sec ", total / g_test_timer_last());
 
     qcrypto_cipher_free(cipher);
     g_free(plaintext);
@@ -70,19 +93,99 @@
     g_free(key);
 }
 
+
+static void test_cipher_speed_ecb_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_ECB,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_ecb_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_ECB,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+static void test_cipher_speed_cbc_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CBC,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_cbc_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CBC,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+static void test_cipher_speed_ctr_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CTR,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_ctr_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_CTR,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+static void test_cipher_speed_xts_aes_128(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_XTS,
+                      QCRYPTO_CIPHER_ALG_AES_128);
+}
+
+static void test_cipher_speed_xts_aes_256(const void *opaque)
+{
+    size_t chunk_size = (size_t)opaque;
+    test_cipher_speed(chunk_size,
+                      QCRYPTO_CIPHER_MODE_XTS,
+                      QCRYPTO_CIPHER_ALG_AES_256);
+}
+
+
 int main(int argc, char **argv)
 {
-    size_t i;
-    char name[64];
-
     g_test_init(&argc, &argv, NULL);
     g_assert(qcrypto_init(NULL) == 0);
 
-    for (i = 512; i <= 64 * KiB; i *= 2) {
-        memset(name, 0 , sizeof(name));
-        snprintf(name, sizeof(name), "/crypto/cipher/speed-%zu", i);
-        g_test_add_data_func(name, (void *)i, test_cipher_speed);
-    }
+#define ADD_TEST(mode, cipher, keysize, chunk)                          \
+    g_test_add_data_func(                                               \
+        "/crypto/cipher/" #mode "-" #cipher "-" #keysize "/chunk-" #chunk, \
+        (void *)chunk,                                                  \
+        test_cipher_speed_ ## mode ## _ ## cipher ## _ ## keysize)
+
+#define ADD_TESTS(chunk)                        \
+    do {                                        \
+        ADD_TEST(ecb, aes, 128, chunk);         \
+        ADD_TEST(ecb, aes, 256, chunk);         \
+        ADD_TEST(cbc, aes, 128, chunk);         \
+        ADD_TEST(cbc, aes, 256, chunk);         \
+        ADD_TEST(ctr, aes, 128, chunk);         \
+        ADD_TEST(ctr, aes, 256, chunk);         \
+        ADD_TEST(xts, aes, 128, chunk);         \
+        ADD_TEST(xts, aes, 256, chunk);         \
+    } while (0)
+
+    ADD_TESTS(512);
+    ADD_TESTS(4096);
+    ADD_TESTS(16384);
+    ADD_TESTS(65536);
 
     return g_test_run();
 }
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 4e24930..02e77ec 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -319,7 +319,7 @@
     ret = g_spawn_command_line_sync(command_line->str, &out, &out_err, NULL, &error);
     g_assert_no_error(error);
     if (ret) {
-        ret = g_file_get_contents(sdt->asl_file, (gchar **)&sdt->asl,
+        ret = g_file_get_contents(sdt->asl_file, &sdt->asl,
                                   &sdt->asl_len, &error);
         g_assert(ret);
         g_assert_no_error(error);
@@ -390,7 +390,7 @@
         if (g_file_test(aml_file, G_FILE_TEST_EXISTS)) {
             exp_sdt.aml_file = aml_file;
         } else if (*ext != '\0') {
-            /* try fallback to generic (extention less) expected file */
+            /* try fallback to generic (extension less) expected file */
             ext = "";
             g_free(aml_file);
             goto try_again;
diff --git a/tests/check-qdict.c b/tests/check-qdict.c
index 86e9fe7..a1e8305 100644
--- a/tests/check-qdict.c
+++ b/tests/check-qdict.c
@@ -12,6 +12,8 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
 
 /*
  * Public Interface test-cases
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index cc13f3d..d876a7a 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -780,6 +780,7 @@
             if (!strstr(json_out, "\\uFFFD")) {
                 str = from_json_str(json_out, j, &error_abort);
                 g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_in);
+                qobject_unref(str);
             }
         }
     }
diff --git a/tests/check-qom-proplist.c b/tests/check-qom-proplist.c
index 92898e1..a8b2958 100644
--- a/tests/check-qom-proplist.c
+++ b/tests/check-qom-proplist.c
@@ -125,10 +125,13 @@
 
 static void dummy_init(Object *obj)
 {
+    Error *err = NULL;
+
     object_property_add_bool(obj, "bv",
                              dummy_get_bv,
                              dummy_set_bv,
-                             NULL);
+                             &err);
+    error_free_or_abort(&err);
 }
 
 
@@ -517,6 +520,32 @@
 }
 
 
+static void test_dummy_prop_iterator(ObjectPropertyIterator *iter)
+{
+    bool seenbv = false, seensv = false, seenav = false, seentype = false;
+    ObjectProperty *prop;
+
+    while ((prop = object_property_iter_next(iter))) {
+        if (!seenbv && g_str_equal(prop->name, "bv")) {
+            seenbv = true;
+        } else if (!seensv && g_str_equal(prop->name, "sv")) {
+            seensv = true;
+        } else if (!seenav && g_str_equal(prop->name, "av")) {
+            seenav = true;
+        } else if (!seentype && g_str_equal(prop->name, "type")) {
+            /* This prop comes from the base Object class */
+            seentype = true;
+        } else {
+            g_printerr("Found prop '%s'\n", prop->name);
+            g_assert_not_reached();
+        }
+    }
+    g_assert(seenbv);
+    g_assert(seenav);
+    g_assert(seensv);
+    g_assert(seentype);
+}
+
 static void test_dummy_iterator(void)
 {
     Object *parent = object_get_objects_root();
@@ -529,35 +558,21 @@
                               "sv", "Hiss hiss hiss",
                               "av", "platypus",
                               NULL));
-
-    ObjectProperty *prop;
     ObjectPropertyIterator iter;
-    bool seenbv = false, seensv = false, seenav = false, seentype;
 
     object_property_iter_init(&iter, OBJECT(dobj));
-    while ((prop = object_property_iter_next(&iter))) {
-        if (g_str_equal(prop->name, "bv")) {
-            seenbv = true;
-        } else if (g_str_equal(prop->name, "sv")) {
-            seensv = true;
-        } else if (g_str_equal(prop->name, "av")) {
-            seenav = true;
-        } else if (g_str_equal(prop->name, "type")) {
-            /* This prop comes from the base Object class */
-            seentype = true;
-        } else {
-            g_printerr("Found prop '%s'\n", prop->name);
-            g_assert_not_reached();
-        }
-    }
-    g_assert(seenbv);
-    g_assert(seenav);
-    g_assert(seensv);
-    g_assert(seentype);
-
+    test_dummy_prop_iterator(&iter);
     object_unparent(OBJECT(dobj));
 }
 
+static void test_dummy_class_iterator(void)
+{
+    ObjectPropertyIterator iter;
+    ObjectClass *klass = object_class_by_name(TYPE_DUMMY);
+
+    object_class_property_iter_init(&iter, klass);
+    test_dummy_prop_iterator(&iter);
+}
 
 static void test_dummy_delchild(void)
 {
@@ -629,6 +644,7 @@
     g_test_add_func("/qom/proplist/badenum", test_dummy_badenum);
     g_test_add_func("/qom/proplist/getenum", test_dummy_getenum);
     g_test_add_func("/qom/proplist/iterator", test_dummy_iterator);
+    g_test_add_func("/qom/proplist/class_iterator", test_dummy_class_iterator);
     g_test_add_func("/qom/proplist/delchild", test_dummy_delchild);
     g_test_add_func("/qom/resolve/partial", test_qom_partial_path);
 
diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c
index 3e93c8e..f4a677d 100644
--- a/tests/cpu-plug-test.c
+++ b/tests/cpu-plug-test.c
@@ -32,12 +32,12 @@
     unsigned int i;
 
     args = g_strdup_printf("-machine %s -cpu %s "
-                           "-smp sockets=%u,cores=%u,threads=%u,maxcpus=%u",
+                           "-smp 1,sockets=%u,cores=%u,threads=%u,maxcpus=%u",
                            s->machine, s->cpu_model,
                            s->sockets, s->cores, s->threads, s->maxcpus);
     qtest_start(args);
 
-    for (i = s->sockets * s->cores * s->threads; i < s->maxcpus; i++) {
+    for (i = 1; i < s->maxcpus; i++) {
         response = qmp("{ 'execute': 'cpu-add',"
                        "  'arguments': { 'id': %d } }", i);
         g_assert(response);
@@ -56,7 +56,7 @@
     QDict *response;
 
     args = g_strdup_printf("-machine %s -cpu %s "
-                           "-smp sockets=%u,cores=%u,threads=%u,maxcpus=%u",
+                           "-smp 1,sockets=%u,cores=%u,threads=%u,maxcpus=%u",
                            s->machine, s->cpu_model,
                            s->sockets, s->cores, s->threads, s->maxcpus);
     qtest_start(args);
@@ -79,12 +79,12 @@
     unsigned int s, c, t;
 
     args = g_strdup_printf("-machine %s -cpu %s "
-                           "-smp sockets=%u,cores=%u,threads=%u,maxcpus=%u",
+                           "-smp 1,sockets=%u,cores=%u,threads=%u,maxcpus=%u",
                            td->machine, td->cpu_model,
                            td->sockets, td->cores, td->threads, td->maxcpus);
     qtest_start(args);
 
-    for (s = td->sockets; s < td->maxcpus / td->cores / td->threads; s++) {
+    for (s = 1; s < td->sockets; s++) {
         for (c = 0; c < td->cores; c++) {
             for (t = 0; t < td->threads; t++) {
                 char *id = g_strdup_printf("id-%i-%i-%i", s, c, t);
@@ -113,7 +113,7 @@
                            td->sockets, td->cores, td->threads, td->maxcpus);
     qtest_start(args);
 
-    for (c = td->cores; c < td->maxcpus / td->sockets / td->threads; c++) {
+    for (c = 1; c < td->cores; c++) {
         char *id = g_strdup_printf("id-%i", c);
         qtest_qmp_device_add(td->device_model, id, "{'core-id':%u}", c);
         g_free(id);
@@ -148,7 +148,7 @@
     data->sockets = 1;
     data->cores = 3;
     data->threads = 2;
-    data->maxcpus = data->sockets * data->cores * data->threads * 2;
+    data->maxcpus = data->sockets * data->cores * data->threads;
     if (g_str_has_suffix(mname, "-1.4") ||
         (strcmp(mname, "pc-1.3") == 0) ||
         (strcmp(mname, "pc-1.2") == 0) ||
@@ -203,7 +203,7 @@
     data->sockets = 2;
     data->cores = 3;
     data->threads = 1;
-    data->maxcpus = data->sockets * data->cores * data->threads * 2;
+    data->maxcpus = data->sockets * data->cores * data->threads;
 
     path = g_strdup_printf("cpu-plug/%s/device-add/%ux%ux%u&maxcpus=%u",
                            mname, data->sockets, data->cores,
@@ -229,7 +229,7 @@
     data->sockets = 1;
     data->cores = 3;
     data->threads = 1;
-    data->maxcpus = data->sockets * data->cores * data->threads * 2;
+    data->maxcpus = data->sockets * data->cores * data->threads;
 
     data2 = g_memdup(data, sizeof(PlugTestData));
     data2->machine = g_strdup(data->machine);
diff --git a/tests/crypto-tls-x509-helpers.h b/tests/crypto-tls-x509-helpers.h
index 921341c..88c30d7 100644
--- a/tests/crypto-tls-x509-helpers.h
+++ b/tests/crypto-tls-x509-helpers.h
@@ -22,8 +22,7 @@
 #include <gnutls/x509.h>
 
 #if !(defined WIN32) && \
-    defined(CONFIG_TASN1) && \
-    (LIBGNUTLS_VERSION_NUMBER >= 0x020600)
+    defined(CONFIG_TASN1)
 # define QCRYPTO_HAVE_TLS_TEST_SUPPORT
 #endif
 
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 6e03235..9467e9d 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -41,7 +41,7 @@
 docker-image: ${DOCKER_TARGETS}
 
 # General rule for building docker images. If we are a sub-make
-# invoked with SKIP_DOCKER_BUILD we still check the image is upto date
+# invoked with SKIP_DOCKER_BUILD we still check the image is up to date
 # though
 ifdef SKIP_DOCKER_BUILD
 docker-image-%: $(DOCKER_FILES_DIR)/%.docker
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index d3006d4..02d8a83 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -14,14 +14,12 @@
 from __future__ import print_function
 import os
 import sys
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '..', '..', 'scripts'))
-import argparse
 import subprocess
 import json
 import hashlib
 import atexit
 import uuid
+import argparse
 import tempfile
 import re
 import signal
@@ -99,7 +97,7 @@
     return libs
 
 def _copy_binary_with_libs(src, dest_dir):
-    """Copy a binary executable and all its dependant libraries.
+    """Copy a binary executable and all its dependent libraries.
 
     This does rely on the host file-system being fairly multi-arch
     aware so the file don't clash with the guests layout."""
@@ -286,7 +284,7 @@
     name = None # Subcommand name
     def shared_args(self, parser):
         parser.add_argument("--quiet", action="store_true",
-                            help="Run quietly unless an error occured")
+                            help="Run quietly unless an error occurred")
 
     def args(self, parser):
         """Setup argument parser"""
diff --git a/tests/docker/dockerfiles/debian-bootstrap.pre b/tests/docker/dockerfiles/debian-bootstrap.pre
index 3b0ef95..c164778 100755
--- a/tests/docker/dockerfiles/debian-bootstrap.pre
+++ b/tests/docker/dockerfiles/debian-bootstrap.pre
@@ -2,7 +2,7 @@
 #
 # Simple wrapper for debootstrap, run in the docker build context
 #
-FAKEROOT=`which fakeroot 2> /dev/null`
+FAKEROOT=$(which fakeroot 2> /dev/null)
 # debootstrap < 1.0.67 generates empty sources.list, see Debian#732255
 MIN_DEBOOTSTRAP_VERSION=1.0.67
 
@@ -52,7 +52,7 @@
 
 if [ -z $DEBOOTSTRAP_DIR ]; then
     NEED_DEBOOTSTRAP=false
-    DEBOOTSTRAP=`which debootstrap 2> /dev/null`
+    DEBOOTSTRAP=$(which debootstrap 2> /dev/null)
     if [ -z $DEBOOTSTRAP ]; then
         echo "No debootstrap installed, attempting to install from SCM"
         NEED_DEBOOTSTRAP=true
diff --git a/tests/docker/test-mingw b/tests/docker/test-mingw
index 7cca7e1..b078f22 100755
--- a/tests/docker/test-mingw
+++ b/tests/docker/test-mingw
@@ -28,8 +28,7 @@
         --enable-vnc \
         --enable-bzip2 \
         --enable-guest-agent \
-        --with-sdlabi=2.0 \
-        --with-gtkabi=3.0
+        --with-sdlabi=2.0
     install_qemu
     make clean
 
diff --git a/tests/fp/.gitignore b/tests/fp/.gitignore
new file mode 100644
index 0000000..8d45d18
--- /dev/null
+++ b/tests/fp/.gitignore
@@ -0,0 +1 @@
+fp-test
diff --git a/tests/fp/Makefile b/tests/fp/Makefile
new file mode 100644
index 0000000..d649a5a
--- /dev/null
+++ b/tests/fp/Makefile
@@ -0,0 +1,597 @@
+BUILD_DIR := $(CURDIR)/../..
+
+include $(BUILD_DIR)/config-host.mak
+include $(SRC_PATH)/rules.mak
+
+SOFTFLOAT_DIR := $(SRC_PATH)/tests/fp/berkeley-softfloat-3
+TESTFLOAT_DIR := $(SRC_PATH)/tests/fp/berkeley-testfloat-3
+
+SF_SOURCE_DIR  := $(SOFTFLOAT_DIR)/source
+SF_INCLUDE_DIR := $(SOFTFLOAT_DIR)/source/include
+# we could use any specialize here, it doesn't matter
+SF_SPECIALIZE := 8086-SSE
+SF_SPECIALIZE_DIR := $(SF_SOURCE_DIR)/$(SF_SPECIALIZE)
+
+TF_SOURCE_DIR := $(TESTFLOAT_DIR)/source
+
+$(call set-vpath, $(SRC_PATH)/fpu $(SRC_PATH)/tests/fp)
+
+LIBQEMUUTIL := $(BUILD_DIR)/libqemuutil.a
+
+# Use this variable to be clear when we pull in our own implementation
+# We build the object with a default rule thanks to the vpath above
+QEMU_SOFTFLOAT_OBJ := softfloat.o
+
+QEMU_INCLUDES += -I$(SRC_PATH)/tests/fp
+QEMU_INCLUDES += -I$(SF_INCLUDE_DIR)
+QEMU_INCLUDES += -I$(SF_SPECIALIZE_DIR)
+QEMU_INCLUDES += -I$(TF_SOURCE_DIR)
+
+# work around TARGET_* poisoning
+QEMU_CFLAGS += -DHW_POISON_H
+
+# capstone has a platform.h file that clashes with softfloat's
+QEMU_CFLAGS := $(filter-out %capstone, $(QEMU_CFLAGS))
+
+# softfloat defines
+SF_OPTS :=
+SF_OPTS += -DSOFTFLOAT_ROUND_ODD
+SF_OPTS += -DINLINE_LEVEL=5
+SF_OPTS += -DSOFTFLOAT_FAST_DIV32TO16
+SF_OPTS += -DSOFTFLOAT_FAST_DIV64TO32
+SF_OPTS += -DSOFTFLOAT_FAST_INT64
+QEMU_CFLAGS += $(SF_OPTS)
+
+# silence the build of softfloat objects
+SF_CFLAGS += -Wno-missing-prototypes
+SF_CFLAGS += -Wno-redundant-decls
+SF_CFLAGS += -Wno-return-type
+SF_CFLAGS += -Wno-error
+
+# testfloat defines
+TF_OPTS :=
+TF_OPTS += -DFLOAT16
+TF_OPTS += -DFLOAT64
+TF_OPTS += -DEXTFLOAT80
+TF_OPTS += -DFLOAT128
+TF_OPTS += -DFLOAT_ROUND_ODD
+TF_OPTS += -DLONG_DOUBLE_IS_EXTFLOAT80
+QEMU_CFLAGS += $(TF_OPTS)
+
+# silence the build of testfloat objects
+TF_CFLAGS :=
+TF_CFLAGS += -Wno-strict-prototypes
+TF_CFLAGS += -Wno-unknown-pragmas
+TF_CFLAGS += -Wno-discarded-qualifiers
+TF_CFLAGS += -Wno-maybe-uninitialized
+TF_CFLAGS += -Wno-missing-prototypes
+TF_CFLAGS += -Wno-return-type
+TF_CFLAGS += -Wno-unused-function
+TF_CFLAGS += -Wno-error
+
+# softfloat objects
+SF_OBJS_PRIMITIVES :=
+SF_OBJS_PRIMITIVES += s_eq128.o
+SF_OBJS_PRIMITIVES += s_le128.o
+SF_OBJS_PRIMITIVES += s_lt128.o
+SF_OBJS_PRIMITIVES += s_shortShiftLeft128.o
+SF_OBJS_PRIMITIVES += s_shortShiftRight128.o
+SF_OBJS_PRIMITIVES += s_shortShiftRightJam64.o
+SF_OBJS_PRIMITIVES += s_shortShiftRightJam64Extra.o
+SF_OBJS_PRIMITIVES += s_shortShiftRightJam128.o
+SF_OBJS_PRIMITIVES += s_shortShiftRightJam128Extra.o
+SF_OBJS_PRIMITIVES += s_shiftRightJam32.o
+SF_OBJS_PRIMITIVES += s_shiftRightJam64.o
+SF_OBJS_PRIMITIVES += s_shiftRightJam64Extra.o
+SF_OBJS_PRIMITIVES += s_shiftRightJam128.o
+SF_OBJS_PRIMITIVES += s_shiftRightJam128Extra.o
+SF_OBJS_PRIMITIVES += s_shiftRightJam256M.o
+SF_OBJS_PRIMITIVES += s_countLeadingZeros8.o
+SF_OBJS_PRIMITIVES += s_countLeadingZeros16.o
+SF_OBJS_PRIMITIVES += s_countLeadingZeros32.o
+SF_OBJS_PRIMITIVES += s_countLeadingZeros64.o
+SF_OBJS_PRIMITIVES += s_add128.o
+SF_OBJS_PRIMITIVES += s_add256M.o
+SF_OBJS_PRIMITIVES += s_sub128.o
+SF_OBJS_PRIMITIVES += s_sub256M.o
+SF_OBJS_PRIMITIVES += s_mul64ByShifted32To128.o
+SF_OBJS_PRIMITIVES += s_mul64To128.o
+SF_OBJS_PRIMITIVES += s_mul128By32.o
+SF_OBJS_PRIMITIVES += s_mul128To256M.o
+SF_OBJS_PRIMITIVES += s_approxRecip_1Ks.o
+SF_OBJS_PRIMITIVES += s_approxRecip32_1.o
+SF_OBJS_PRIMITIVES += s_approxRecipSqrt_1Ks.o
+SF_OBJS_PRIMITIVES += s_approxRecipSqrt32_1.o
+
+SF_OBJS_SPECIALIZE :=
+SF_OBJS_SPECIALIZE += softfloat_raiseFlags.o
+SF_OBJS_SPECIALIZE += s_f16UIToCommonNaN.o
+SF_OBJS_SPECIALIZE += s_commonNaNToF16UI.o
+SF_OBJS_SPECIALIZE += s_propagateNaNF16UI.o
+SF_OBJS_SPECIALIZE += s_f32UIToCommonNaN.o
+SF_OBJS_SPECIALIZE += s_commonNaNToF32UI.o
+SF_OBJS_SPECIALIZE += s_propagateNaNF32UI.o
+SF_OBJS_SPECIALIZE += s_f64UIToCommonNaN.o
+SF_OBJS_SPECIALIZE += s_commonNaNToF64UI.o
+SF_OBJS_SPECIALIZE += s_propagateNaNF64UI.o
+SF_OBJS_SPECIALIZE += extF80M_isSignalingNaN.o
+SF_OBJS_SPECIALIZE += s_extF80UIToCommonNaN.o
+SF_OBJS_SPECIALIZE += s_commonNaNToExtF80UI.o
+SF_OBJS_SPECIALIZE += s_propagateNaNExtF80UI.o
+SF_OBJS_SPECIALIZE += f128M_isSignalingNaN.o
+SF_OBJS_SPECIALIZE += s_f128UIToCommonNaN.o
+SF_OBJS_SPECIALIZE += s_commonNaNToF128UI.o
+SF_OBJS_SPECIALIZE += s_propagateNaNF128UI.o
+
+SF_OBJS_OTHERS :=
+SF_OBJS_OTHERS += s_roundToUI32.o
+SF_OBJS_OTHERS += s_roundToUI64.o
+SF_OBJS_OTHERS += s_roundToI32.o
+SF_OBJS_OTHERS += s_roundToI64.o
+SF_OBJS_OTHERS += s_normSubnormalF16Sig.o
+SF_OBJS_OTHERS += s_roundPackToF16.o
+SF_OBJS_OTHERS += s_normRoundPackToF16.o
+SF_OBJS_OTHERS += s_addMagsF16.o
+SF_OBJS_OTHERS += s_subMagsF16.o
+SF_OBJS_OTHERS += s_mulAddF16.o
+SF_OBJS_OTHERS += s_normSubnormalF32Sig.o
+SF_OBJS_OTHERS += s_roundPackToF32.o
+SF_OBJS_OTHERS += s_normRoundPackToF32.o
+SF_OBJS_OTHERS += s_addMagsF32.o
+SF_OBJS_OTHERS += s_subMagsF32.o
+SF_OBJS_OTHERS += s_mulAddF32.o
+SF_OBJS_OTHERS += s_normSubnormalF64Sig.o
+SF_OBJS_OTHERS += s_roundPackToF64.o
+SF_OBJS_OTHERS += s_normRoundPackToF64.o
+SF_OBJS_OTHERS += s_addMagsF64.o
+SF_OBJS_OTHERS += s_subMagsF64.o
+SF_OBJS_OTHERS += s_mulAddF64.o
+SF_OBJS_OTHERS += s_normSubnormalExtF80Sig.o
+SF_OBJS_OTHERS += s_roundPackToExtF80.o
+SF_OBJS_OTHERS += s_normRoundPackToExtF80.o
+SF_OBJS_OTHERS += s_addMagsExtF80.o
+SF_OBJS_OTHERS += s_subMagsExtF80.o
+SF_OBJS_OTHERS += s_normSubnormalF128Sig.o
+SF_OBJS_OTHERS += s_roundPackToF128.o
+SF_OBJS_OTHERS += s_normRoundPackToF128.o
+SF_OBJS_OTHERS += s_addMagsF128.o
+SF_OBJS_OTHERS += s_subMagsF128.o
+SF_OBJS_OTHERS += s_mulAddF128.o
+SF_OBJS_OTHERS += softfloat_state.o
+SF_OBJS_OTHERS += ui32_to_f16.o
+SF_OBJS_OTHERS += ui32_to_f32.o
+SF_OBJS_OTHERS += ui32_to_f64.o
+SF_OBJS_OTHERS += ui32_to_extF80.o
+SF_OBJS_OTHERS += ui32_to_extF80M.o
+SF_OBJS_OTHERS += ui32_to_f128.o
+SF_OBJS_OTHERS += ui32_to_f128M.o
+SF_OBJS_OTHERS += ui64_to_f16.o
+SF_OBJS_OTHERS += ui64_to_f32.o
+SF_OBJS_OTHERS += ui64_to_f64.o
+SF_OBJS_OTHERS += ui64_to_extF80.o
+SF_OBJS_OTHERS += ui64_to_extF80M.o
+SF_OBJS_OTHERS += ui64_to_f128.o
+SF_OBJS_OTHERS += ui64_to_f128M.o
+SF_OBJS_OTHERS += i32_to_f16.o
+SF_OBJS_OTHERS += i32_to_f32.o
+SF_OBJS_OTHERS += i32_to_f64.o
+SF_OBJS_OTHERS += i32_to_extF80.o
+SF_OBJS_OTHERS += i32_to_extF80M.o
+SF_OBJS_OTHERS += i32_to_f128.o
+SF_OBJS_OTHERS += i32_to_f128M.o
+SF_OBJS_OTHERS += i64_to_f16.o
+SF_OBJS_OTHERS += i64_to_f32.o
+SF_OBJS_OTHERS += i64_to_f64.o
+SF_OBJS_OTHERS += i64_to_extF80.o
+SF_OBJS_OTHERS += i64_to_extF80M.o
+SF_OBJS_OTHERS += i64_to_f128.o
+SF_OBJS_OTHERS += i64_to_f128M.o
+SF_OBJS_OTHERS += f16_to_ui32.o
+SF_OBJS_OTHERS += f16_to_ui64.o
+SF_OBJS_OTHERS += f16_to_i32.o
+SF_OBJS_OTHERS += f16_to_i64.o
+SF_OBJS_OTHERS += f16_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += f16_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += f16_to_i32_r_minMag.o
+SF_OBJS_OTHERS += f16_to_i64_r_minMag.o
+SF_OBJS_OTHERS += f16_to_f32.o
+SF_OBJS_OTHERS += f16_to_f64.o
+SF_OBJS_OTHERS += f16_to_extF80.o
+SF_OBJS_OTHERS += f16_to_extF80M.o
+SF_OBJS_OTHERS += f16_to_f128.o
+SF_OBJS_OTHERS += f16_to_f128M.o
+SF_OBJS_OTHERS += f16_roundToInt.o
+SF_OBJS_OTHERS += f16_add.o
+SF_OBJS_OTHERS += f16_sub.o
+SF_OBJS_OTHERS += f16_mul.o
+SF_OBJS_OTHERS += f16_mulAdd.o
+SF_OBJS_OTHERS += f16_div.o
+SF_OBJS_OTHERS += f16_rem.o
+SF_OBJS_OTHERS += f16_sqrt.o
+SF_OBJS_OTHERS += f16_eq.o
+SF_OBJS_OTHERS += f16_le.o
+SF_OBJS_OTHERS += f16_lt.o
+SF_OBJS_OTHERS += f16_eq_signaling.o
+SF_OBJS_OTHERS += f16_le_quiet.o
+SF_OBJS_OTHERS += f16_lt_quiet.o
+SF_OBJS_OTHERS += f16_isSignalingNaN.o
+SF_OBJS_OTHERS += f32_to_ui32.o
+SF_OBJS_OTHERS += f32_to_ui64.o
+SF_OBJS_OTHERS += f32_to_i32.o
+SF_OBJS_OTHERS += f32_to_i64.o
+SF_OBJS_OTHERS += f32_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += f32_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += f32_to_i32_r_minMag.o
+SF_OBJS_OTHERS += f32_to_i64_r_minMag.o
+SF_OBJS_OTHERS += f32_to_f16.o
+SF_OBJS_OTHERS += f32_to_f64.o
+SF_OBJS_OTHERS += f32_to_extF80.o
+SF_OBJS_OTHERS += f32_to_extF80M.o
+SF_OBJS_OTHERS += f32_to_f128.o
+SF_OBJS_OTHERS += f32_to_f128M.o
+SF_OBJS_OTHERS += f32_roundToInt.o
+SF_OBJS_OTHERS += f32_add.o
+SF_OBJS_OTHERS += f32_sub.o
+SF_OBJS_OTHERS += f32_mul.o
+SF_OBJS_OTHERS += f32_mulAdd.o
+SF_OBJS_OTHERS += f32_div.o
+SF_OBJS_OTHERS += f32_rem.o
+SF_OBJS_OTHERS += f32_sqrt.o
+SF_OBJS_OTHERS += f32_eq.o
+SF_OBJS_OTHERS += f32_le.o
+SF_OBJS_OTHERS += f32_lt.o
+SF_OBJS_OTHERS += f32_eq_signaling.o
+SF_OBJS_OTHERS += f32_le_quiet.o
+SF_OBJS_OTHERS += f32_lt_quiet.o
+SF_OBJS_OTHERS += f32_isSignalingNaN.o
+SF_OBJS_OTHERS += f64_to_ui32.o
+SF_OBJS_OTHERS += f64_to_ui64.o
+SF_OBJS_OTHERS += f64_to_i32.o
+SF_OBJS_OTHERS += f64_to_i64.o
+SF_OBJS_OTHERS += f64_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += f64_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += f64_to_i32_r_minMag.o
+SF_OBJS_OTHERS += f64_to_i64_r_minMag.o
+SF_OBJS_OTHERS += f64_to_f16.o
+SF_OBJS_OTHERS += f64_to_f32.o
+SF_OBJS_OTHERS += f64_to_extF80.o
+SF_OBJS_OTHERS += f64_to_extF80M.o
+SF_OBJS_OTHERS += f64_to_f128.o
+SF_OBJS_OTHERS += f64_to_f128M.o
+SF_OBJS_OTHERS += f64_roundToInt.o
+SF_OBJS_OTHERS += f64_add.o
+SF_OBJS_OTHERS += f64_sub.o
+SF_OBJS_OTHERS += f64_mul.o
+SF_OBJS_OTHERS += f64_mulAdd.o
+SF_OBJS_OTHERS += f64_div.o
+SF_OBJS_OTHERS += f64_rem.o
+SF_OBJS_OTHERS += f64_sqrt.o
+SF_OBJS_OTHERS += f64_eq.o
+SF_OBJS_OTHERS += f64_le.o
+SF_OBJS_OTHERS += f64_lt.o
+SF_OBJS_OTHERS += f64_eq_signaling.o
+SF_OBJS_OTHERS += f64_le_quiet.o
+SF_OBJS_OTHERS += f64_lt_quiet.o
+SF_OBJS_OTHERS += f64_isSignalingNaN.o
+SF_OBJS_OTHERS += extF80_to_ui32.o
+SF_OBJS_OTHERS += extF80_to_ui64.o
+SF_OBJS_OTHERS += extF80_to_i32.o
+SF_OBJS_OTHERS += extF80_to_i64.o
+SF_OBJS_OTHERS += extF80_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += extF80_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += extF80_to_i32_r_minMag.o
+SF_OBJS_OTHERS += extF80_to_i64_r_minMag.o
+SF_OBJS_OTHERS += extF80_to_f16.o
+SF_OBJS_OTHERS += extF80_to_f32.o
+SF_OBJS_OTHERS += extF80_to_f64.o
+SF_OBJS_OTHERS += extF80_to_f128.o
+SF_OBJS_OTHERS += extF80_roundToInt.o
+SF_OBJS_OTHERS += extF80_add.o
+SF_OBJS_OTHERS += extF80_sub.o
+SF_OBJS_OTHERS += extF80_mul.o
+SF_OBJS_OTHERS += extF80_div.o
+SF_OBJS_OTHERS += extF80_rem.o
+SF_OBJS_OTHERS += extF80_sqrt.o
+SF_OBJS_OTHERS += extF80_eq.o
+SF_OBJS_OTHERS += extF80_le.o
+SF_OBJS_OTHERS += extF80_lt.o
+SF_OBJS_OTHERS += extF80_eq_signaling.o
+SF_OBJS_OTHERS += extF80_le_quiet.o
+SF_OBJS_OTHERS += extF80_lt_quiet.o
+SF_OBJS_OTHERS += extF80_isSignalingNaN.o
+SF_OBJS_OTHERS += extF80M_to_ui32.o
+SF_OBJS_OTHERS += extF80M_to_ui64.o
+SF_OBJS_OTHERS += extF80M_to_i32.o
+SF_OBJS_OTHERS += extF80M_to_i64.o
+SF_OBJS_OTHERS += extF80M_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += extF80M_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += extF80M_to_i32_r_minMag.o
+SF_OBJS_OTHERS += extF80M_to_i64_r_minMag.o
+SF_OBJS_OTHERS += extF80M_to_f16.o
+SF_OBJS_OTHERS += extF80M_to_f32.o
+SF_OBJS_OTHERS += extF80M_to_f64.o
+SF_OBJS_OTHERS += extF80M_to_f128M.o
+SF_OBJS_OTHERS += extF80M_roundToInt.o
+SF_OBJS_OTHERS += extF80M_add.o
+SF_OBJS_OTHERS += extF80M_sub.o
+SF_OBJS_OTHERS += extF80M_mul.o
+SF_OBJS_OTHERS += extF80M_div.o
+SF_OBJS_OTHERS += extF80M_rem.o
+SF_OBJS_OTHERS += extF80M_sqrt.o
+SF_OBJS_OTHERS += extF80M_eq.o
+SF_OBJS_OTHERS += extF80M_le.o
+SF_OBJS_OTHERS += extF80M_lt.o
+SF_OBJS_OTHERS += extF80M_eq_signaling.o
+SF_OBJS_OTHERS += extF80M_le_quiet.o
+SF_OBJS_OTHERS += extF80M_lt_quiet.o
+SF_OBJS_OTHERS += f128_to_ui32.o
+SF_OBJS_OTHERS += f128_to_ui64.o
+SF_OBJS_OTHERS += f128_to_i32.o
+SF_OBJS_OTHERS += f128_to_i64.o
+SF_OBJS_OTHERS += f128_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += f128_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += f128_to_i32_r_minMag.o
+SF_OBJS_OTHERS += f128_to_i64_r_minMag.o
+SF_OBJS_OTHERS += f128_to_f16.o
+SF_OBJS_OTHERS += f128_to_f32.o
+SF_OBJS_OTHERS += f128_to_extF80.o
+SF_OBJS_OTHERS += f128_to_f64.o
+SF_OBJS_OTHERS += f128_roundToInt.o
+SF_OBJS_OTHERS += f128_add.o
+SF_OBJS_OTHERS += f128_sub.o
+SF_OBJS_OTHERS += f128_mul.o
+SF_OBJS_OTHERS += f128_mulAdd.o
+SF_OBJS_OTHERS += f128_div.o
+SF_OBJS_OTHERS += f128_rem.o
+SF_OBJS_OTHERS += f128_sqrt.o
+SF_OBJS_OTHERS += f128_eq.o
+SF_OBJS_OTHERS += f128_le.o
+SF_OBJS_OTHERS += f128_lt.o
+SF_OBJS_OTHERS += f128_eq_signaling.o
+SF_OBJS_OTHERS += f128_le_quiet.o
+SF_OBJS_OTHERS += f128_lt_quiet.o
+SF_OBJS_OTHERS += f128_isSignalingNaN.o
+SF_OBJS_OTHERS += f128M_to_ui32.o
+SF_OBJS_OTHERS += f128M_to_ui64.o
+SF_OBJS_OTHERS += f128M_to_i32.o
+SF_OBJS_OTHERS += f128M_to_i64.o
+SF_OBJS_OTHERS += f128M_to_ui32_r_minMag.o
+SF_OBJS_OTHERS += f128M_to_ui64_r_minMag.o
+SF_OBJS_OTHERS += f128M_to_i32_r_minMag.o
+SF_OBJS_OTHERS += f128M_to_i64_r_minMag.o
+SF_OBJS_OTHERS += f128M_to_f16.o
+SF_OBJS_OTHERS += f128M_to_f32.o
+SF_OBJS_OTHERS += f128M_to_extF80M.o
+SF_OBJS_OTHERS += f128M_to_f64.o
+SF_OBJS_OTHERS += f128M_roundToInt.o
+SF_OBJS_OTHERS += f128M_add.o
+SF_OBJS_OTHERS += f128M_sub.o
+SF_OBJS_OTHERS += f128M_mul.o
+SF_OBJS_OTHERS += f128M_mulAdd.o
+SF_OBJS_OTHERS += f128M_div.o
+SF_OBJS_OTHERS += f128M_rem.o
+SF_OBJS_OTHERS += f128M_sqrt.o
+SF_OBJS_OTHERS += f128M_eq.o
+SF_OBJS_OTHERS += f128M_le.o
+SF_OBJS_OTHERS += f128M_lt.o
+SF_OBJS_OTHERS += f128M_eq_signaling.o
+SF_OBJS_OTHERS += f128M_le_quiet.o
+SF_OBJS_OTHERS += f128M_lt_quiet.o
+
+SF_OBJS_ALL_NOSPEC :=
+SF_OBJS_ALL_NOSPEC += $(SF_OBJS_PRIMITIVES)
+SF_OBJS_ALL_NOSPEC += $(SF_OBJS_OTHERS)
+
+SF_OBJS_ALL :=
+SF_OBJS_ALL += $(SF_OBJS_ALL_NOSPEC)
+SF_OBJS_ALL += $(SF_OBJS_SPECIALIZE)
+
+# testfloat objects
+TF_OBJS_GENCASES :=
+TF_OBJS_GENCASES += genCases_ui32.o
+TF_OBJS_GENCASES += genCases_ui64.o
+TF_OBJS_GENCASES += genCases_i32.o
+TF_OBJS_GENCASES += genCases_i64.o
+TF_OBJS_GENCASES += genCases_f16.o
+TF_OBJS_GENCASES += genCases_f32.o
+TF_OBJS_GENCASES += genCases_f64.o
+TF_OBJS_GENCASES += genCases_extF80.o
+TF_OBJS_GENCASES += genCases_f128.o
+
+TF_OBJS_WRITECASE :=
+TF_OBJS_WRITECASE += writeCase_a_ui32.o
+TF_OBJS_WRITECASE += writeCase_a_ui64.o
+TF_OBJS_WRITECASE += writeCase_a_f16.o
+TF_OBJS_WRITECASE += writeCase_ab_f16.o
+TF_OBJS_WRITECASE += writeCase_abc_f16.o
+TF_OBJS_WRITECASE += writeCase_a_f32.o
+TF_OBJS_WRITECASE += writeCase_ab_f32.o
+TF_OBJS_WRITECASE += writeCase_abc_f32.o
+TF_OBJS_WRITECASE += writeCase_a_f64.o
+TF_OBJS_WRITECASE += writeCase_ab_f64.o
+TF_OBJS_WRITECASE += writeCase_abc_f64.o
+TF_OBJS_WRITECASE += writeCase_a_extF80M.o
+TF_OBJS_WRITECASE += writeCase_ab_extF80M.o
+TF_OBJS_WRITECASE += writeCase_a_f128M.o
+TF_OBJS_WRITECASE += writeCase_ab_f128M.o
+TF_OBJS_WRITECASE += writeCase_abc_f128M.o
+TF_OBJS_WRITECASE += writeCase_z_bool.o
+TF_OBJS_WRITECASE += writeCase_z_ui32.o
+TF_OBJS_WRITECASE += writeCase_z_ui64.o
+TF_OBJS_WRITECASE += writeCase_z_f16.o
+TF_OBJS_WRITECASE += writeCase_z_f32.o
+TF_OBJS_WRITECASE += writeCase_z_f64.o
+TF_OBJS_WRITECASE += writeCase_z_extF80M.o
+TF_OBJS_WRITECASE += writeCase_z_f128M.o
+
+TF_OBJS_TEST :=
+TF_OBJS_TEST += test_a_ui32_z_f16.o
+TF_OBJS_TEST += test_a_ui32_z_f32.o
+TF_OBJS_TEST += test_a_ui32_z_f64.o
+TF_OBJS_TEST += test_a_ui32_z_extF80.o
+TF_OBJS_TEST += test_a_ui32_z_f128.o
+TF_OBJS_TEST += test_a_ui64_z_f16.o
+TF_OBJS_TEST += test_a_ui64_z_f32.o
+TF_OBJS_TEST += test_a_ui64_z_f64.o
+TF_OBJS_TEST += test_a_ui64_z_extF80.o
+TF_OBJS_TEST += test_a_ui64_z_f128.o
+TF_OBJS_TEST += test_a_i32_z_f16.o
+TF_OBJS_TEST += test_a_i32_z_f32.o
+TF_OBJS_TEST += test_a_i32_z_f64.o
+TF_OBJS_TEST += test_a_i32_z_extF80.o
+TF_OBJS_TEST += test_a_i32_z_f128.o
+TF_OBJS_TEST += test_a_i64_z_f16.o
+TF_OBJS_TEST += test_a_i64_z_f32.o
+TF_OBJS_TEST += test_a_i64_z_f64.o
+TF_OBJS_TEST += test_a_i64_z_extF80.o
+TF_OBJS_TEST += test_a_i64_z_f128.o
+TF_OBJS_TEST += test_a_f16_z_ui32_rx.o
+TF_OBJS_TEST += test_a_f16_z_ui64_rx.o
+TF_OBJS_TEST += test_a_f16_z_i32_rx.o
+TF_OBJS_TEST += test_a_f16_z_i64_rx.o
+TF_OBJS_TEST += test_a_f16_z_ui32_x.o
+TF_OBJS_TEST += test_a_f16_z_ui64_x.o
+TF_OBJS_TEST += test_a_f16_z_i32_x.o
+TF_OBJS_TEST += test_a_f16_z_i64_x.o
+TF_OBJS_TEST += test_a_f16_z_f32.o
+TF_OBJS_TEST += test_a_f16_z_f64.o
+TF_OBJS_TEST += test_a_f16_z_extF80.o
+TF_OBJS_TEST += test_a_f16_z_f128.o
+TF_OBJS_TEST += test_az_f16.o
+TF_OBJS_TEST += test_az_f16_rx.o
+TF_OBJS_TEST += test_abz_f16.o
+TF_OBJS_TEST += test_abcz_f16.o
+TF_OBJS_TEST += test_ab_f16_z_bool.o
+TF_OBJS_TEST += test_a_f32_z_ui32_rx.o
+TF_OBJS_TEST += test_a_f32_z_ui64_rx.o
+TF_OBJS_TEST += test_a_f32_z_i32_rx.o
+TF_OBJS_TEST += test_a_f32_z_i64_rx.o
+TF_OBJS_TEST += test_a_f32_z_ui32_x.o
+TF_OBJS_TEST += test_a_f32_z_ui64_x.o
+TF_OBJS_TEST += test_a_f32_z_i32_x.o
+TF_OBJS_TEST += test_a_f32_z_i64_x.o
+TF_OBJS_TEST += test_a_f32_z_f16.o
+TF_OBJS_TEST += test_a_f32_z_f64.o
+TF_OBJS_TEST += test_a_f32_z_extF80.o
+TF_OBJS_TEST += test_a_f32_z_f128.o
+TF_OBJS_TEST += test_az_f32.o
+TF_OBJS_TEST += test_az_f32_rx.o
+TF_OBJS_TEST += test_abz_f32.o
+TF_OBJS_TEST += test_abcz_f32.o
+TF_OBJS_TEST += test_ab_f32_z_bool.o
+TF_OBJS_TEST += test_a_f64_z_ui32_rx.o
+TF_OBJS_TEST += test_a_f64_z_ui64_rx.o
+TF_OBJS_TEST += test_a_f64_z_i32_rx.o
+TF_OBJS_TEST += test_a_f64_z_i64_rx.o
+TF_OBJS_TEST += test_a_f64_z_ui32_x.o
+TF_OBJS_TEST += test_a_f64_z_ui64_x.o
+TF_OBJS_TEST += test_a_f64_z_i32_x.o
+TF_OBJS_TEST += test_a_f64_z_i64_x.o
+TF_OBJS_TEST += test_a_f64_z_f16.o
+TF_OBJS_TEST += test_a_f64_z_f32.o
+TF_OBJS_TEST += test_a_f64_z_extF80.o
+TF_OBJS_TEST += test_a_f64_z_f128.o
+TF_OBJS_TEST += test_az_f64.o
+TF_OBJS_TEST += test_az_f64_rx.o
+TF_OBJS_TEST += test_abz_f64.o
+TF_OBJS_TEST += test_abcz_f64.o
+TF_OBJS_TEST += test_ab_f64_z_bool.o
+TF_OBJS_TEST += test_a_extF80_z_ui32_rx.o
+TF_OBJS_TEST += test_a_extF80_z_ui64_rx.o
+TF_OBJS_TEST += test_a_extF80_z_i32_rx.o
+TF_OBJS_TEST += test_a_extF80_z_i64_rx.o
+TF_OBJS_TEST += test_a_extF80_z_ui32_x.o
+TF_OBJS_TEST += test_a_extF80_z_ui64_x.o
+TF_OBJS_TEST += test_a_extF80_z_i32_x.o
+TF_OBJS_TEST += test_a_extF80_z_i64_x.o
+TF_OBJS_TEST += test_a_extF80_z_f16.o
+TF_OBJS_TEST += test_a_extF80_z_f32.o
+TF_OBJS_TEST += test_a_extF80_z_f64.o
+TF_OBJS_TEST += test_a_extF80_z_f128.o
+TF_OBJS_TEST += test_az_extF80.o
+TF_OBJS_TEST += test_az_extF80_rx.o
+TF_OBJS_TEST += test_abz_extF80.o
+TF_OBJS_TEST += test_ab_extF80_z_bool.o
+TF_OBJS_TEST += test_a_f128_z_ui32_rx.o
+TF_OBJS_TEST += test_a_f128_z_ui64_rx.o
+TF_OBJS_TEST += test_a_f128_z_i32_rx.o
+TF_OBJS_TEST += test_a_f128_z_i64_rx.o
+TF_OBJS_TEST += test_a_f128_z_ui32_x.o
+TF_OBJS_TEST += test_a_f128_z_ui64_x.o
+TF_OBJS_TEST += test_a_f128_z_i32_x.o
+TF_OBJS_TEST += test_a_f128_z_i64_x.o
+TF_OBJS_TEST += test_a_f128_z_f16.o
+TF_OBJS_TEST += test_a_f128_z_f32.o
+TF_OBJS_TEST += test_a_f128_z_f64.o
+TF_OBJS_TEST += test_a_f128_z_extF80.o
+TF_OBJS_TEST += test_az_f128.o
+TF_OBJS_TEST += test_az_f128_rx.o
+TF_OBJS_TEST += test_abz_f128.o
+TF_OBJS_TEST += test_abcz_f128.o
+TF_OBJS_TEST += test_ab_f128_z_bool.o
+
+TF_OBJS_LIB :=
+TF_OBJS_LIB += uint128_inline.o
+TF_OBJS_LIB += uint128.o
+TF_OBJS_LIB += fail.o
+TF_OBJS_LIB += functions_common.o
+TF_OBJS_LIB += functionInfos.o
+TF_OBJS_LIB += standardFunctionInfos.o
+TF_OBJS_LIB += random.o
+TF_OBJS_LIB += genCases_common.o
+TF_OBJS_LIB += $(TF_OBJS_GENCASES)
+TF_OBJS_LIB += genCases_writeTestsTotal.o
+TF_OBJS_LIB += verCases_inline.o
+TF_OBJS_LIB += verCases_common.o
+TF_OBJS_LIB += verCases_writeFunctionName.o
+TF_OBJS_LIB += readHex.o
+TF_OBJS_LIB += writeHex.o
+TF_OBJS_LIB += $(TF_OBJS_WRITECASE)
+TF_OBJS_LIB += testLoops_common.o
+TF_OBJS_LIB += $(TF_OBJS_TEST)
+
+BINARIES := fp-test$(EXESUF)
+
+# everything depends on config-host.h because platform.h includes it
+all: $(BUILD_DIR)/config-host.h
+	$(MAKE) $(BINARIES)
+
+$(LIBQEMUUTIL):
+	$(MAKE) -C $(BUILD_DIR) libqemuutil.a
+
+$(BUILD_DIR)/config-host.h:
+	$(MAKE) -C $(BUILD_DIR) config-host.h
+
+# libtestfloat.a depends on libsoftfloat.a, so specify it first
+FP_TEST_LIBS := libtestfloat.a libsoftfloat.a $(LIBQEMUUTIL)
+
+fp-test$(EXESUF): fp-test.o slowfloat.o $(QEMU_SOFTFLOAT_OBJ) $(FP_TEST_LIBS)
+
+# Custom rule to build with SF_CFLAGS
+SF_BUILD = $(call quiet-command,$(CC) $(QEMU_LOCAL_INCLUDES) $(QEMU_INCLUDES) \
+		$(QEMU_CFLAGS) $(SF_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) \
+		$($@-cflags) -c -o $@ $<,"CC","$(TARGET_DIR)$@")
+
+$(SF_OBJS_ALL_NOSPEC): %.o: $(SF_SOURCE_DIR)/%.c
+	$(SF_BUILD)
+$(SF_OBJS_SPECIALIZE): %.o: $(SF_SPECIALIZE_DIR)/%.c
+	$(SF_BUILD)
+
+libsoftfloat.a: $(SF_OBJS_ALL)
+
+# Custom rule to build with TF_CFLAGS
+$(TF_OBJS_LIB) slowfloat.o: %.o: $(TF_SOURCE_DIR)/%.c
+	$(call quiet-command,$(CC) $(QEMU_LOCAL_INCLUDES) $(QEMU_INCLUDES) \
+		$(QEMU_CFLAGS) $(TF_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) \
+		$($@-cflags) -c -o $@ $<,"CC","$(TARGET_DIR)$@")
+
+libtestfloat.a: $(TF_OBJS_LIB)
+
+clean:
+	rm -f *.o *.d $(BINARIES)
+	rm -f *.gcno *.gcda *.gcov
+	rm -f fp-test$(EXESUF)
+	rm -f libsoftfloat.a
+	rm -f libtestfloat.a
+
+-include $(wildcard *.d)
diff --git a/tests/fp/berkeley-softfloat-3 b/tests/fp/berkeley-softfloat-3
new file mode 160000
index 0000000..b64af41
--- /dev/null
+++ b/tests/fp/berkeley-softfloat-3
@@ -0,0 +1 @@
+Subproject commit b64af41c3276f97f0e181920400ee056b9c88037
diff --git a/tests/fp/berkeley-testfloat-3 b/tests/fp/berkeley-testfloat-3
new file mode 160000
index 0000000..ca9fa2b
--- /dev/null
+++ b/tests/fp/berkeley-testfloat-3
@@ -0,0 +1 @@
+Subproject commit ca9fa2ba05625ba929958f163b01747e07dd39cc
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
new file mode 100644
index 0000000..fca5763
--- /dev/null
+++ b/tests/fp/fp-test.c
@@ -0,0 +1,992 @@
+/*
+ * fp-test.c - test QEMU's softfloat implementation using Berkeley's Testfloat
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * This file is derived from testfloat/source/testsoftfloat.c. Its copyright
+ * info follows:
+ *
+ * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+ * University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions, and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions, and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the University nor the names of its contributors may
+ *     be used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef HW_POISON_H
+#error Must define HW_POISON_H to work around TARGET_* poisoning
+#endif
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include <math.h>
+#include "fpu/softfloat.h"
+#include "platform.h"
+
+#include "fail.h"
+#include "slowfloat.h"
+#include "functions.h"
+#include "genCases.h"
+#include "verCases.h"
+#include "writeCase.h"
+#include "testLoops.h"
+
+typedef float16_t (*abz_f16)(float16_t, float16_t);
+typedef bool (*ab_f16_z_bool)(float16_t, float16_t);
+typedef float32_t (*abz_f32)(float32_t, float32_t);
+typedef bool (*ab_f32_z_bool)(float32_t, float32_t);
+typedef float64_t (*abz_f64)(float64_t, float64_t);
+typedef bool (*ab_f64_z_bool)(float64_t, float64_t);
+typedef void (*abz_extF80M)(const extFloat80_t *, const extFloat80_t *,
+                            extFloat80_t *);
+typedef bool (*ab_extF80M_z_bool)(const extFloat80_t *, const extFloat80_t *);
+typedef void (*abz_f128M)(const float128_t *, const float128_t *, float128_t *);
+typedef bool (*ab_f128M_z_bool)(const float128_t *, const float128_t *);
+
+static const char * const round_mode_names[] = {
+    [ROUND_NEAR_EVEN] = "even",
+    [ROUND_MINMAG] = "zero",
+    [ROUND_MIN] = "down",
+    [ROUND_MAX] = "up",
+    [ROUND_NEAR_MAXMAG] = "tieaway",
+    [ROUND_ODD] = "odd",
+};
+static unsigned int *test_ops;
+static unsigned int n_test_ops;
+static unsigned int n_max_errors = 20;
+static unsigned int test_round_mode = ROUND_NEAR_EVEN;
+static unsigned int *round_modes;
+static unsigned int n_round_modes;
+static int test_level = 1;
+static uint8_t slow_init_flags;
+static uint8_t qemu_init_flags;
+
+/* qemu softfloat status */
+static float_status qsf;
+
+static const char commands_string[] =
+    "operations:\n"
+    "    <int>_to_<float>            <float>_add      <float>_eq\n"
+    "    <float>_to_<int>            <float>_sub      <float>_le\n"
+    "    <float>_to_<int>_r_minMag   <float>_mul      <float>_lt\n"
+    "    <float>_to_<float>          <float>_mulAdd   <float>_eq_signaling\n"
+    "    <float>_roundToInt          <float>_div      <float>_le_quiet\n"
+    "                                <float>_rem      <float>_lt_quiet\n"
+    "                                <float>_sqrt\n"
+    "    Where <int>: ui32, ui64, i32, i64\n"
+    "          <float>: f16, f32, f64, extF80, f128\n"
+    "    If no operation is provided, all the above are tested\n"
+    "options:\n"
+    " -e = max error count per test. Default: 20. Set no limit with 0\n"
+    " -f = initial FP exception flags (vioux). Default: none\n"
+    " -l = thoroughness level (1 (default), 2)\n"
+    " -r = rounding mode (even (default), zero, down, up, tieaway, odd)\n"
+    "      Set to 'all' to test all rounding modes, if applicable\n"
+    " -s = stop when a test fails";
+
+static void usage_complete(int argc, char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options] [operation1 ...]\n", argv[0]);
+    fprintf(stderr, "%s\n", commands_string);
+    exit(EXIT_FAILURE);
+}
+
+/* keep wrappers separate but do not bother defining headers for all of them */
+#include "wrap.inc.c"
+
+static void not_implemented(void)
+{
+    fprintf(stderr, "Not implemented.\n");
+}
+
+static bool blacklisted(unsigned op, int rmode)
+{
+    /* odd has only been implemented for a few 128-bit ops */
+    if (rmode == softfloat_round_odd) {
+        switch (op) {
+        case F128_ADD:
+        case F128_SUB:
+        case F128_MUL:
+        case F128_DIV:
+        case F128_TO_F64:
+        case F128_SQRT:
+            return false;
+        default:
+            return true;
+        }
+    }
+    return false;
+}
+
+static void do_testfloat(int op, int rmode, bool exact)
+{
+    abz_f16 true_abz_f16;
+    abz_f16 subj_abz_f16;
+    ab_f16_z_bool true_f16_z_bool;
+    ab_f16_z_bool subj_f16_z_bool;
+    abz_f32 true_abz_f32;
+    abz_f32 subj_abz_f32;
+    ab_f32_z_bool true_ab_f32_z_bool;
+    ab_f32_z_bool subj_ab_f32_z_bool;
+    abz_f64 true_abz_f64;
+    abz_f64 subj_abz_f64;
+    ab_f64_z_bool true_ab_f64_z_bool;
+    ab_f64_z_bool subj_ab_f64_z_bool;
+    abz_extF80M true_abz_extF80M;
+    abz_extF80M subj_abz_extF80M;
+    ab_extF80M_z_bool true_ab_extF80M_z_bool;
+    ab_extF80M_z_bool subj_ab_extF80M_z_bool;
+    abz_f128M true_abz_f128M;
+    abz_f128M subj_abz_f128M;
+    ab_f128M_z_bool true_ab_f128M_z_bool;
+    ab_f128M_z_bool subj_ab_f128M_z_bool;
+
+    fputs(">> Testing ", stderr);
+    verCases_writeFunctionName(stderr);
+    fputs("\n", stderr);
+
+    if (blacklisted(op, rmode)) {
+        not_implemented();
+        return;
+    }
+
+    switch (op) {
+    case UI32_TO_F16:
+        test_a_ui32_z_f16(slow_ui32_to_f16, qemu_ui32_to_f16);
+        break;
+    case UI32_TO_F32:
+        test_a_ui32_z_f32(slow_ui32_to_f32, qemu_ui32_to_f32);
+        break;
+    case UI32_TO_F64:
+        test_a_ui32_z_f64(slow_ui32_to_f64, qemu_ui32_to_f64);
+        break;
+    case UI32_TO_EXTF80:
+        not_implemented();
+        break;
+    case UI32_TO_F128:
+        not_implemented();
+        break;
+    case UI64_TO_F16:
+        test_a_ui64_z_f16(slow_ui64_to_f16, qemu_ui64_to_f16);
+        break;
+    case UI64_TO_F32:
+        test_a_ui64_z_f32(slow_ui64_to_f32, qemu_ui64_to_f32);
+        break;
+    case UI64_TO_F64:
+        test_a_ui64_z_f64(slow_ui64_to_f64, qemu_ui64_to_f64);
+        break;
+    case UI64_TO_EXTF80:
+        not_implemented();
+        break;
+    case UI64_TO_F128:
+        test_a_ui64_z_f128(slow_ui64_to_f128M, qemu_ui64_to_f128M);
+        break;
+    case I32_TO_F16:
+        test_a_i32_z_f16(slow_i32_to_f16, qemu_i32_to_f16);
+        break;
+    case I32_TO_F32:
+        test_a_i32_z_f32(slow_i32_to_f32, qemu_i32_to_f32);
+        break;
+    case I32_TO_F64:
+        test_a_i32_z_f64(slow_i32_to_f64, qemu_i32_to_f64);
+        break;
+    case I32_TO_EXTF80:
+        test_a_i32_z_extF80(slow_i32_to_extF80M, qemu_i32_to_extF80M);
+        break;
+    case I32_TO_F128:
+        test_a_i32_z_f128(slow_i32_to_f128M, qemu_i32_to_f128M);
+        break;
+    case I64_TO_F16:
+        test_a_i64_z_f16(slow_i64_to_f16, qemu_i64_to_f16);
+        break;
+    case I64_TO_F32:
+        test_a_i64_z_f32(slow_i64_to_f32, qemu_i64_to_f32);
+        break;
+    case I64_TO_F64:
+        test_a_i64_z_f64(slow_i64_to_f64, qemu_i64_to_f64);
+        break;
+    case I64_TO_EXTF80:
+        test_a_i64_z_extF80(slow_i64_to_extF80M, qemu_i64_to_extF80M);
+        break;
+    case I64_TO_F128:
+        test_a_i64_z_f128(slow_i64_to_f128M, qemu_i64_to_f128M);
+        break;
+    case F16_TO_UI32:
+        test_a_f16_z_ui32_rx(slow_f16_to_ui32, qemu_f16_to_ui32, rmode, exact);
+        break;
+    case F16_TO_UI64:
+        test_a_f16_z_ui64_rx(slow_f16_to_ui64, qemu_f16_to_ui64, rmode, exact);
+        break;
+    case F16_TO_I32:
+        test_a_f16_z_i32_rx(slow_f16_to_i32, qemu_f16_to_i32, rmode, exact);
+        break;
+    case F16_TO_I64:
+        test_a_f16_z_i64_rx(slow_f16_to_i64, qemu_f16_to_i64, rmode, exact);
+        break;
+    case F16_TO_UI32_R_MINMAG:
+        test_a_f16_z_ui32_x(slow_f16_to_ui32_r_minMag,
+                            qemu_f16_to_ui32_r_minMag, exact);
+        break;
+    case F16_TO_UI64_R_MINMAG:
+        test_a_f16_z_ui64_x(slow_f16_to_ui64_r_minMag,
+                            qemu_f16_to_ui64_r_minMag, exact);
+        break;
+    case F16_TO_I32_R_MINMAG:
+        test_a_f16_z_i32_x(slow_f16_to_i32_r_minMag, qemu_f16_to_i32_r_minMag,
+                           exact);
+        break;
+    case F16_TO_I64_R_MINMAG:
+        test_a_f16_z_i64_x(slow_f16_to_i64_r_minMag, qemu_f16_to_i64_r_minMag,
+                           exact);
+        break;
+    case F16_TO_F32:
+        test_a_f16_z_f32(slow_f16_to_f32, qemu_f16_to_f32);
+        break;
+    case F16_TO_F64:
+        test_a_f16_z_f64(slow_f16_to_f64, qemu_f16_to_f64);
+        break;
+    case F16_TO_EXTF80:
+        not_implemented();
+        break;
+    case F16_TO_F128:
+        not_implemented();
+        break;
+    case F16_ROUNDTOINT:
+        test_az_f16_rx(slow_f16_roundToInt, qemu_f16_roundToInt, rmode, exact);
+        break;
+    case F16_ADD:
+        true_abz_f16 = slow_f16_add;
+        subj_abz_f16 = qemu_f16_add;
+        goto test_abz_f16;
+    case F16_SUB:
+        true_abz_f16 = slow_f16_sub;
+        subj_abz_f16 = qemu_f16_sub;
+        goto test_abz_f16;
+    case F16_MUL:
+        true_abz_f16 = slow_f16_mul;
+        subj_abz_f16 = qemu_f16_mul;
+        goto test_abz_f16;
+    case F16_DIV:
+        true_abz_f16 = slow_f16_div;
+        subj_abz_f16 = qemu_f16_div;
+        goto test_abz_f16;
+    case F16_REM:
+        not_implemented();
+        break;
+    test_abz_f16:
+        test_abz_f16(true_abz_f16, subj_abz_f16);
+        break;
+    case F16_MULADD:
+        test_abcz_f16(slow_f16_mulAdd, qemu_f16_mulAdd);
+        break;
+    case F16_SQRT:
+        test_az_f16(slow_f16_sqrt, qemu_f16_sqrt);
+        break;
+    case F16_EQ:
+        true_f16_z_bool = slow_f16_eq;
+        subj_f16_z_bool = qemu_f16_eq;
+        goto test_ab_f16_z_bool;
+    case F16_LE:
+        true_f16_z_bool = slow_f16_le;
+        subj_f16_z_bool = qemu_f16_le;
+        goto test_ab_f16_z_bool;
+    case F16_LT:
+        true_f16_z_bool = slow_f16_lt;
+        subj_f16_z_bool = qemu_f16_lt;
+        goto test_ab_f16_z_bool;
+    case F16_EQ_SIGNALING:
+        true_f16_z_bool = slow_f16_eq_signaling;
+        subj_f16_z_bool = qemu_f16_eq_signaling;
+        goto test_ab_f16_z_bool;
+    case F16_LE_QUIET:
+        true_f16_z_bool = slow_f16_le_quiet;
+        subj_f16_z_bool = qemu_f16_le_quiet;
+        goto test_ab_f16_z_bool;
+    case F16_LT_QUIET:
+        true_f16_z_bool = slow_f16_lt_quiet;
+        subj_f16_z_bool = qemu_f16_lt_quiet;
+    test_ab_f16_z_bool:
+        test_ab_f16_z_bool(true_f16_z_bool, subj_f16_z_bool);
+        break;
+    case F32_TO_UI32:
+        test_a_f32_z_ui32_rx(slow_f32_to_ui32, qemu_f32_to_ui32, rmode, exact);
+        break;
+    case F32_TO_UI64:
+        test_a_f32_z_ui64_rx(slow_f32_to_ui64, qemu_f32_to_ui64, rmode, exact);
+        break;
+    case F32_TO_I32:
+        test_a_f32_z_i32_rx(slow_f32_to_i32, qemu_f32_to_i32, rmode, exact);
+        break;
+    case F32_TO_I64:
+        test_a_f32_z_i64_rx(slow_f32_to_i64, qemu_f32_to_i64, rmode, exact);
+        break;
+    case F32_TO_UI32_R_MINMAG:
+        test_a_f32_z_ui32_x(slow_f32_to_ui32_r_minMag,
+                            qemu_f32_to_ui32_r_minMag, exact);
+        break;
+    case F32_TO_UI64_R_MINMAG:
+        test_a_f32_z_ui64_x(slow_f32_to_ui64_r_minMag,
+                            qemu_f32_to_ui64_r_minMag, exact);
+        break;
+    case F32_TO_I32_R_MINMAG:
+        test_a_f32_z_i32_x(slow_f32_to_i32_r_minMag, qemu_f32_to_i32_r_minMag,
+                           exact);
+        break;
+    case F32_TO_I64_R_MINMAG:
+        test_a_f32_z_i64_x(slow_f32_to_i64_r_minMag, qemu_f32_to_i64_r_minMag,
+                           exact);
+        break;
+    case F32_TO_F16:
+        test_a_f32_z_f16(slow_f32_to_f16, qemu_f32_to_f16);
+        break;
+    case F32_TO_F64:
+        test_a_f32_z_f64(slow_f32_to_f64, qemu_f32_to_f64);
+        break;
+    case F32_TO_EXTF80:
+        test_a_f32_z_extF80(slow_f32_to_extF80M, qemu_f32_to_extF80M);
+        break;
+    case F32_TO_F128:
+        test_a_f32_z_f128(slow_f32_to_f128M, qemu_f32_to_f128M);
+        break;
+    case F32_ROUNDTOINT:
+        test_az_f32_rx(slow_f32_roundToInt, qemu_f32_roundToInt, rmode, exact);
+        break;
+    case F32_ADD:
+        true_abz_f32 = slow_f32_add;
+        subj_abz_f32 = qemu_f32_add;
+        goto test_abz_f32;
+    case F32_SUB:
+        true_abz_f32 = slow_f32_sub;
+        subj_abz_f32 = qemu_f32_sub;
+        goto test_abz_f32;
+    case F32_MUL:
+        true_abz_f32 = slow_f32_mul;
+        subj_abz_f32 = qemu_f32_mul;
+        goto test_abz_f32;
+    case F32_DIV:
+        true_abz_f32 = slow_f32_div;
+        subj_abz_f32 = qemu_f32_div;
+        goto test_abz_f32;
+    case F32_REM:
+        true_abz_f32 = slow_f32_rem;
+        subj_abz_f32 = qemu_f32_rem;
+    test_abz_f32:
+        test_abz_f32(true_abz_f32, subj_abz_f32);
+        break;
+    case F32_MULADD:
+        test_abcz_f32(slow_f32_mulAdd, qemu_f32_mulAdd);
+        break;
+    case F32_SQRT:
+        test_az_f32(slow_f32_sqrt, qemu_f32_sqrt);
+        break;
+    case F32_EQ:
+        true_ab_f32_z_bool = slow_f32_eq;
+        subj_ab_f32_z_bool = qemu_f32_eq;
+        goto test_ab_f32_z_bool;
+    case F32_LE:
+        true_ab_f32_z_bool = slow_f32_le;
+        subj_ab_f32_z_bool = qemu_f32_le;
+        goto test_ab_f32_z_bool;
+    case F32_LT:
+        true_ab_f32_z_bool = slow_f32_lt;
+        subj_ab_f32_z_bool = qemu_f32_lt;
+        goto test_ab_f32_z_bool;
+    case F32_EQ_SIGNALING:
+        true_ab_f32_z_bool = slow_f32_eq_signaling;
+        subj_ab_f32_z_bool = qemu_f32_eq_signaling;
+        goto test_ab_f32_z_bool;
+    case F32_LE_QUIET:
+        true_ab_f32_z_bool = slow_f32_le_quiet;
+        subj_ab_f32_z_bool = qemu_f32_le_quiet;
+        goto test_ab_f32_z_bool;
+    case F32_LT_QUIET:
+        true_ab_f32_z_bool = slow_f32_lt_quiet;
+        subj_ab_f32_z_bool = qemu_f32_lt_quiet;
+    test_ab_f32_z_bool:
+        test_ab_f32_z_bool(true_ab_f32_z_bool, subj_ab_f32_z_bool);
+        break;
+    case F64_TO_UI32:
+        test_a_f64_z_ui32_rx(slow_f64_to_ui32, qemu_f64_to_ui32, rmode, exact);
+        break;
+    case F64_TO_UI64:
+        test_a_f64_z_ui64_rx(slow_f64_to_ui64, qemu_f64_to_ui64, rmode, exact);
+        break;
+    case F64_TO_I32:
+        test_a_f64_z_i32_rx(slow_f64_to_i32, qemu_f64_to_i32, rmode, exact);
+        break;
+    case F64_TO_I64:
+        test_a_f64_z_i64_rx(slow_f64_to_i64, qemu_f64_to_i64, rmode, exact);
+        break;
+    case F64_TO_UI32_R_MINMAG:
+        test_a_f64_z_ui32_x(slow_f64_to_ui32_r_minMag,
+                            qemu_f64_to_ui32_r_minMag, exact);
+        break;
+    case F64_TO_UI64_R_MINMAG:
+        test_a_f64_z_ui64_x(slow_f64_to_ui64_r_minMag,
+                            qemu_f64_to_ui64_r_minMag, exact);
+        break;
+    case F64_TO_I32_R_MINMAG:
+        test_a_f64_z_i32_x(slow_f64_to_i32_r_minMag, qemu_f64_to_i32_r_minMag,
+                           exact);
+        break;
+    case F64_TO_I64_R_MINMAG:
+        test_a_f64_z_i64_x(slow_f64_to_i64_r_minMag, qemu_f64_to_i64_r_minMag,
+                           exact);
+        break;
+    case F64_TO_F16:
+        test_a_f64_z_f16(slow_f64_to_f16, qemu_f64_to_f16);
+        break;
+    case F64_TO_F32:
+        test_a_f64_z_f32(slow_f64_to_f32, qemu_f64_to_f32);
+        break;
+    case F64_TO_EXTF80:
+        test_a_f64_z_extF80(slow_f64_to_extF80M, qemu_f64_to_extF80M);
+        break;
+    case F64_TO_F128:
+        test_a_f64_z_f128(slow_f64_to_f128M, qemu_f64_to_f128M);
+        break;
+    case F64_ROUNDTOINT:
+        test_az_f64_rx(slow_f64_roundToInt, qemu_f64_roundToInt, rmode, exact);
+        break;
+    case F64_ADD:
+        true_abz_f64 = slow_f64_add;
+        subj_abz_f64 = qemu_f64_add;
+        goto test_abz_f64;
+    case F64_SUB:
+        true_abz_f64 = slow_f64_sub;
+        subj_abz_f64 = qemu_f64_sub;
+        goto test_abz_f64;
+    case F64_MUL:
+        true_abz_f64 = slow_f64_mul;
+        subj_abz_f64 = qemu_f64_mul;
+        goto test_abz_f64;
+    case F64_DIV:
+        true_abz_f64 = slow_f64_div;
+        subj_abz_f64 = qemu_f64_div;
+        goto test_abz_f64;
+    case F64_REM:
+        true_abz_f64 = slow_f64_rem;
+        subj_abz_f64 = qemu_f64_rem;
+    test_abz_f64:
+        test_abz_f64(true_abz_f64, subj_abz_f64);
+        break;
+    case F64_MULADD:
+        test_abcz_f64(slow_f64_mulAdd, qemu_f64_mulAdd);
+        break;
+    case F64_SQRT:
+        test_az_f64(slow_f64_sqrt, qemu_f64_sqrt);
+        break;
+    case F64_EQ:
+        true_ab_f64_z_bool = slow_f64_eq;
+        subj_ab_f64_z_bool = qemu_f64_eq;
+        goto test_ab_f64_z_bool;
+    case F64_LE:
+        true_ab_f64_z_bool = slow_f64_le;
+        subj_ab_f64_z_bool = qemu_f64_le;
+        goto test_ab_f64_z_bool;
+    case F64_LT:
+        true_ab_f64_z_bool = slow_f64_lt;
+        subj_ab_f64_z_bool = qemu_f64_lt;
+        goto test_ab_f64_z_bool;
+    case F64_EQ_SIGNALING:
+        true_ab_f64_z_bool = slow_f64_eq_signaling;
+        subj_ab_f64_z_bool = qemu_f64_eq_signaling;
+        goto test_ab_f64_z_bool;
+    case F64_LE_QUIET:
+        true_ab_f64_z_bool = slow_f64_le_quiet;
+        subj_ab_f64_z_bool = qemu_f64_le_quiet;
+        goto test_ab_f64_z_bool;
+    case F64_LT_QUIET:
+        true_ab_f64_z_bool = slow_f64_lt_quiet;
+        subj_ab_f64_z_bool = qemu_f64_lt_quiet;
+    test_ab_f64_z_bool:
+        test_ab_f64_z_bool(true_ab_f64_z_bool, subj_ab_f64_z_bool);
+        break;
+    case EXTF80_TO_UI32:
+        not_implemented();
+        break;
+    case EXTF80_TO_UI64:
+        not_implemented();
+        break;
+    case EXTF80_TO_I32:
+        test_a_extF80_z_i32_rx(slow_extF80M_to_i32, qemu_extF80M_to_i32, rmode,
+                               exact);
+        break;
+    case EXTF80_TO_I64:
+        test_a_extF80_z_i64_rx(slow_extF80M_to_i64, qemu_extF80M_to_i64, rmode,
+                               exact);
+        break;
+    case EXTF80_TO_UI32_R_MINMAG:
+        not_implemented();
+        break;
+    case EXTF80_TO_UI64_R_MINMAG:
+        not_implemented();
+        break;
+    case EXTF80_TO_I32_R_MINMAG:
+        test_a_extF80_z_i32_x(slow_extF80M_to_i32_r_minMag,
+                              qemu_extF80M_to_i32_r_minMag, exact);
+        break;
+    case EXTF80_TO_I64_R_MINMAG:
+        test_a_extF80_z_i64_x(slow_extF80M_to_i64_r_minMag,
+                              qemu_extF80M_to_i64_r_minMag, exact);
+        break;
+    case EXTF80_TO_F16:
+        not_implemented();
+        break;
+    case EXTF80_TO_F32:
+        test_a_extF80_z_f32(slow_extF80M_to_f32, qemu_extF80M_to_f32);
+        break;
+    case EXTF80_TO_F64:
+        test_a_extF80_z_f64(slow_extF80M_to_f64, qemu_extF80M_to_f64);
+        break;
+    case EXTF80_TO_F128:
+        test_a_extF80_z_f128(slow_extF80M_to_f128M, qemu_extF80M_to_f128M);
+        break;
+    case EXTF80_ROUNDTOINT:
+        test_az_extF80_rx(slow_extF80M_roundToInt, qemu_extF80M_roundToInt,
+                          rmode, exact);
+        break;
+    case EXTF80_ADD:
+        true_abz_extF80M = slow_extF80M_add;
+        subj_abz_extF80M = qemu_extF80M_add;
+        goto test_abz_extF80;
+    case EXTF80_SUB:
+        true_abz_extF80M = slow_extF80M_sub;
+        subj_abz_extF80M = qemu_extF80M_sub;
+        goto test_abz_extF80;
+    case EXTF80_MUL:
+        true_abz_extF80M = slow_extF80M_mul;
+        subj_abz_extF80M = qemu_extF80M_mul;
+        goto test_abz_extF80;
+    case EXTF80_DIV:
+        true_abz_extF80M = slow_extF80M_div;
+        subj_abz_extF80M = qemu_extF80M_div;
+        goto test_abz_extF80;
+    case EXTF80_REM:
+        true_abz_extF80M = slow_extF80M_rem;
+        subj_abz_extF80M = qemu_extF80M_rem;
+    test_abz_extF80:
+        test_abz_extF80(true_abz_extF80M, subj_abz_extF80M);
+        break;
+    case EXTF80_SQRT:
+        test_az_extF80(slow_extF80M_sqrt, qemu_extF80M_sqrt);
+        break;
+    case EXTF80_EQ:
+        true_ab_extF80M_z_bool = slow_extF80M_eq;
+        subj_ab_extF80M_z_bool = qemu_extF80M_eq;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LE:
+        true_ab_extF80M_z_bool = slow_extF80M_le;
+        subj_ab_extF80M_z_bool = qemu_extF80M_le;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LT:
+        true_ab_extF80M_z_bool = slow_extF80M_lt;
+        subj_ab_extF80M_z_bool = qemu_extF80M_lt;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_EQ_SIGNALING:
+        true_ab_extF80M_z_bool = slow_extF80M_eq_signaling;
+        subj_ab_extF80M_z_bool = qemu_extF80M_eq_signaling;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LE_QUIET:
+        true_ab_extF80M_z_bool = slow_extF80M_le_quiet;
+        subj_ab_extF80M_z_bool = qemu_extF80M_le_quiet;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LT_QUIET:
+        true_ab_extF80M_z_bool = slow_extF80M_lt_quiet;
+        subj_ab_extF80M_z_bool = qemu_extF80M_lt_quiet;
+    test_ab_extF80_z_bool:
+        test_ab_extF80_z_bool(true_ab_extF80M_z_bool, subj_ab_extF80M_z_bool);
+        break;
+    case F128_TO_UI32:
+        not_implemented();
+        break;
+    case F128_TO_UI64:
+        test_a_f128_z_ui64_rx(slow_f128M_to_ui64, qemu_f128M_to_ui64, rmode,
+                              exact);
+        break;
+    case F128_TO_I32:
+        test_a_f128_z_i32_rx(slow_f128M_to_i32, qemu_f128M_to_i32, rmode,
+                             exact);
+        break;
+    case F128_TO_I64:
+        test_a_f128_z_i64_rx(slow_f128M_to_i64, qemu_f128M_to_i64, rmode,
+                             exact);
+        break;
+    case F128_TO_UI32_R_MINMAG:
+        test_a_f128_z_ui32_x(slow_f128M_to_ui32_r_minMag,
+                             qemu_f128M_to_ui32_r_minMag, exact);
+        break;
+    case F128_TO_UI64_R_MINMAG:
+        test_a_f128_z_ui64_x(slow_f128M_to_ui64_r_minMag,
+                             qemu_f128M_to_ui64_r_minMag, exact);
+        break;
+    case F128_TO_I32_R_MINMAG:
+        test_a_f128_z_i32_x(slow_f128M_to_i32_r_minMag,
+                            qemu_f128M_to_i32_r_minMag, exact);
+        break;
+    case F128_TO_I64_R_MINMAG:
+        test_a_f128_z_i64_x(slow_f128M_to_i64_r_minMag,
+                            qemu_f128M_to_i64_r_minMag, exact);
+        break;
+    case F128_TO_F16:
+        not_implemented();
+        break;
+    case F128_TO_F32:
+        test_a_f128_z_f32(slow_f128M_to_f32, qemu_f128M_to_f32);
+        break;
+    case F128_TO_F64:
+        test_a_f128_z_f64(slow_f128M_to_f64, qemu_f128M_to_f64);
+        break;
+    case F128_TO_EXTF80:
+        test_a_f128_z_extF80(slow_f128M_to_extF80M, qemu_f128M_to_extF80M);
+        break;
+    case F128_ROUNDTOINT:
+        test_az_f128_rx(slow_f128M_roundToInt, qemu_f128M_roundToInt, rmode,
+                        exact);
+        break;
+    case F128_ADD:
+        true_abz_f128M = slow_f128M_add;
+        subj_abz_f128M = qemu_f128M_add;
+        goto test_abz_f128;
+    case F128_SUB:
+        true_abz_f128M = slow_f128M_sub;
+        subj_abz_f128M = qemu_f128M_sub;
+        goto test_abz_f128;
+    case F128_MUL:
+        true_abz_f128M = slow_f128M_mul;
+        subj_abz_f128M = qemu_f128M_mul;
+        goto test_abz_f128;
+    case F128_DIV:
+        true_abz_f128M = slow_f128M_div;
+        subj_abz_f128M = qemu_f128M_div;
+        goto test_abz_f128;
+    case F128_REM:
+        true_abz_f128M = slow_f128M_rem;
+        subj_abz_f128M = qemu_f128M_rem;
+    test_abz_f128:
+        test_abz_f128(true_abz_f128M, subj_abz_f128M);
+        break;
+    case F128_MULADD:
+        not_implemented();
+        break;
+    case F128_SQRT:
+        test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
+        break;
+    case F128_EQ:
+        true_ab_f128M_z_bool = slow_f128M_eq;
+        subj_ab_f128M_z_bool = qemu_f128M_eq;
+        goto test_ab_f128_z_bool;
+    case F128_LE:
+        true_ab_f128M_z_bool = slow_f128M_le;
+        subj_ab_f128M_z_bool = qemu_f128M_le;
+        goto test_ab_f128_z_bool;
+    case F128_LT:
+        true_ab_f128M_z_bool = slow_f128M_lt;
+        subj_ab_f128M_z_bool = qemu_f128M_lt;
+        goto test_ab_f128_z_bool;
+    case F128_EQ_SIGNALING:
+        true_ab_f128M_z_bool = slow_f128M_eq_signaling;
+        subj_ab_f128M_z_bool = qemu_f128M_eq_signaling;
+        goto test_ab_f128_z_bool;
+    case F128_LE_QUIET:
+        true_ab_f128M_z_bool = slow_f128M_le_quiet;
+        subj_ab_f128M_z_bool = qemu_f128M_le_quiet;
+        goto test_ab_f128_z_bool;
+    case F128_LT_QUIET:
+        true_ab_f128M_z_bool = slow_f128M_lt_quiet;
+        subj_ab_f128M_z_bool = qemu_f128M_lt_quiet;
+    test_ab_f128_z_bool:
+        test_ab_f128_z_bool(true_ab_f128M_z_bool, subj_ab_f128M_z_bool);
+        break;
+    }
+    if ((verCases_errorStop && verCases_anyErrors)) {
+        verCases_exitWithStatus();
+    }
+}
+
+static unsigned int test_name_to_op(const char *arg)
+{
+    unsigned int i;
+
+    /* counting begins at 1 */
+    for (i = 1; i < NUM_FUNCTIONS; i++) {
+        const char *name = functionInfos[i].namePtr;
+
+        if (name && !strcmp(name, arg)) {
+            return i;
+        }
+    }
+    return 0;
+}
+
+static unsigned int round_name_to_mode(const char *name)
+{
+    int i;
+
+    /* counting begins at 1 */
+    for (i = 1; i < NUM_ROUNDINGMODES; i++) {
+        if (!strcmp(round_mode_names[i], name)) {
+            return i;
+        }
+    }
+    return 0;
+}
+
+static int set_init_flags(const char *flags)
+{
+    const char *p;
+
+    for (p = flags; *p != '\0'; p++) {
+        switch (*p) {
+        case 'v':
+            slow_init_flags |= softfloat_flag_invalid;
+            qemu_init_flags |= float_flag_invalid;
+            break;
+        case 'i':
+            slow_init_flags |= softfloat_flag_infinite;
+            qemu_init_flags |= float_flag_divbyzero;
+            break;
+        case 'o':
+            slow_init_flags |= softfloat_flag_overflow;
+            qemu_init_flags |= float_flag_overflow;
+            break;
+        case 'u':
+            slow_init_flags |= softfloat_flag_underflow;
+            qemu_init_flags |= float_flag_underflow;
+            break;
+        case 'x':
+            slow_init_flags |= softfloat_flag_inexact;
+            qemu_init_flags |= float_flag_inexact;
+            break;
+        default:
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static uint8_t slow_clear_flags(void)
+{
+    uint8_t prev = slowfloat_exceptionFlags;
+
+    slowfloat_exceptionFlags = slow_init_flags;
+    return prev;
+}
+
+static uint8_t qemu_clear_flags(void)
+{
+    uint8_t prev = qemu_flags_to_sf(qsf.float_exception_flags);
+
+    qsf.float_exception_flags = qemu_init_flags;
+    return prev;
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    unsigned int i;
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "he:f:l:r:s");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'h':
+            usage_complete(argc, argv);
+            exit(EXIT_SUCCESS);
+        case 'e':
+            if (qemu_strtoui(optarg, NULL, 0, &n_max_errors)) {
+                fprintf(stderr, "fatal: invalid max error count\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'f':
+            if (set_init_flags(optarg)) {
+                fprintf(stderr, "fatal: flags must be a subset of 'vioux'\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'l':
+            if (qemu_strtoi(optarg, NULL, 0, &test_level)) {
+                fprintf(stderr, "fatal: invalid test level\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'r':
+            if (!strcmp(optarg, "all")) {
+                test_round_mode = 0;
+            } else {
+                test_round_mode = round_name_to_mode(optarg);
+                if (test_round_mode == 0) {
+                    fprintf(stderr, "fatal: invalid rounding mode\n");
+                    exit(EXIT_FAILURE);
+                }
+            }
+            break;
+        case 's':
+            verCases_errorStop = true;
+            break;
+        case '?':
+            /* invalid option or missing argument; getopt prints error info */
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    /* set rounding modes */
+    if (test_round_mode == 0) {
+        /* test all rounding modes; note that counting begins at 1 */
+        n_round_modes = NUM_ROUNDINGMODES - 1;
+        round_modes = g_malloc_n(n_round_modes, sizeof(*round_modes));
+        for (i = 0; i < n_round_modes; i++) {
+            round_modes[i] = i + 1;
+        }
+    } else {
+        n_round_modes = 1;
+        round_modes = g_malloc(sizeof(*round_modes));
+        round_modes[0] = test_round_mode;
+    }
+
+    /* set test ops */
+    if (optind == argc) {
+        /* test all ops; note that counting begins at 1 */
+        n_test_ops = NUM_FUNCTIONS - 1;
+        test_ops = g_malloc_n(n_test_ops, sizeof(*test_ops));
+        for (i = 0; i < n_test_ops; i++) {
+            test_ops[i] = i + 1;
+        }
+    } else {
+        n_test_ops = argc - optind;
+        test_ops = g_malloc_n(n_test_ops, sizeof(*test_ops));
+        for (i = 0; i < n_test_ops; i++) {
+            const char *name = argv[i + optind];
+            unsigned int op = test_name_to_op(name);
+
+            if (op == 0) {
+                fprintf(stderr, "fatal: invalid op '%s'\n", name);
+                exit(EXIT_FAILURE);
+            }
+            test_ops[i] = op;
+        }
+    }
+}
+
+static void QEMU_NORETURN run_test(void)
+{
+    unsigned int i;
+
+    genCases_setLevel(test_level);
+    verCases_maxErrorCount = n_max_errors;
+
+    testLoops_trueFlagsFunction = slow_clear_flags;
+    testLoops_subjFlagsFunction = qemu_clear_flags;
+
+    for (i = 0; i < n_test_ops; i++) {
+        unsigned int op = test_ops[i];
+        int j;
+
+        if (functionInfos[op].namePtr == NULL) {
+            continue;
+        }
+        verCases_functionNamePtr = functionInfos[op].namePtr;
+
+        for (j = 0; j < n_round_modes; j++) {
+            int attrs = functionInfos[op].attribs;
+            int round = round_modes[j];
+            int rmode = roundingModes[round];
+            int k;
+
+            verCases_roundingCode = 0;
+            slowfloat_roundingMode = rmode;
+            qsf.float_rounding_mode = sf_rounding_to_qemu(rmode);
+
+            if (attrs & (FUNC_ARG_ROUNDINGMODE | FUNC_EFF_ROUNDINGMODE)) {
+                /* print rounding mode if the op is affected by it */
+                verCases_roundingCode = round;
+            } else if (j > 0) {
+                /* if the op is not sensitive to rounding, move on */
+                break;
+            }
+
+            /* QEMU doesn't have !exact */
+            verCases_exact = true;
+            verCases_usesExact = !!(attrs & FUNC_ARG_EXACT);
+
+            for (k = 0; k < 3; k++) {
+                int prec80 = 32;
+                int l;
+
+                if (k == 1) {
+                    prec80 = 64;
+                } else if (k == 2) {
+                    prec80 = 80;
+                }
+
+                verCases_roundingPrecision = 0;
+                slow_extF80_roundingPrecision = prec80;
+                qsf.floatx80_rounding_precision = prec80;
+
+                if (attrs & FUNC_EFF_ROUNDINGPRECISION) {
+                    verCases_roundingPrecision = prec80;
+                } else if (k > 0) {
+                    /* if the op is not sensitive to prec80, move on */
+                    break;
+                }
+
+                /* note: the count begins at 1 */
+                for (l = 1; l < NUM_TININESSMODES; l++) {
+                    int tmode = tininessModes[l];
+
+                    verCases_tininessCode = 0;
+                    slowfloat_detectTininess = tmode;
+                    qsf.float_detect_tininess = sf_tininess_to_qemu(tmode);
+
+                    if (attrs & FUNC_EFF_TININESSMODE ||
+                        ((attrs & FUNC_EFF_TININESSMODE_REDUCEDPREC) &&
+                         prec80 && prec80 < 80)) {
+                        verCases_tininessCode = l;
+                    } else if (l > 1) {
+                        /* if the op is not sensitive to tininess, move on */
+                        break;
+                    }
+
+                    do_testfloat(op, rmode, true);
+                }
+            }
+        }
+    }
+    verCases_exitWithStatus();
+    /* old compilers might miss that we exited */
+    g_assert_not_reached();
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    fail_programName = argv[0];
+    run_test(); /* does not return */
+}
diff --git a/tests/fp/platform.h b/tests/fp/platform.h
new file mode 100644
index 0000000..c20ba70
--- /dev/null
+++ b/tests/fp/platform.h
@@ -0,0 +1,41 @@
+#ifndef QEMU_TESTFLOAT_PLATFORM_H
+#define QEMU_TESTFLOAT_PLATFORM_H
+/*
+ * Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+ * California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions, and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions, and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the University nor the names of its contributors may
+ *     be used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "config-host.h"
+
+#ifndef HOST_WORDS_BIGENDIAN
+#define LITTLEENDIAN 1
+/* otherwise do not define it */
+#endif
+
+#define INLINE static inline
+
+#endif /* QEMU_TESTFLOAT_PLATFORM_H */
diff --git a/tests/fp/wrap.inc.c b/tests/fp/wrap.inc.c
new file mode 100644
index 0000000..d3bf600
--- /dev/null
+++ b/tests/fp/wrap.inc.c
@@ -0,0 +1,653 @@
+/*
+ * In this file we wrap QEMU FP functions to look like softfloat/testfloat's,
+ * so that we can use the testfloat infrastructure as-is.
+ *
+ * This file must be included directly from fp-test.c. We could compile it
+ * separately, but it would be tedious to add declarations for all the wrappers.
+ */
+
+static signed char sf_tininess_to_qemu(uint_fast8_t mode)
+{
+    switch (mode) {
+    case softfloat_tininess_beforeRounding:
+        return float_tininess_before_rounding;
+    case softfloat_tininess_afterRounding:
+        return float_tininess_after_rounding;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static signed char sf_rounding_to_qemu(uint_fast8_t mode)
+{
+    switch (mode) {
+    case softfloat_round_near_even:
+        return float_round_nearest_even;
+    case softfloat_round_minMag:
+        return float_round_to_zero;
+    case softfloat_round_min:
+        return float_round_down;
+    case softfloat_round_max:
+        return float_round_up;
+    case softfloat_round_near_maxMag:
+        return float_round_ties_away;
+    case softfloat_round_odd:
+        return float_round_to_odd;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static uint_fast8_t qemu_flags_to_sf(uint8_t qflags)
+{
+    uint_fast8_t ret = 0;
+
+    if (qflags & float_flag_invalid) {
+        ret |= softfloat_flag_invalid;
+    }
+    if (qflags & float_flag_divbyzero) {
+        ret |= softfloat_flag_infinite;
+    }
+    if (qflags & float_flag_overflow) {
+        ret |= softfloat_flag_overflow;
+    }
+    if (qflags & float_flag_underflow) {
+        ret |= softfloat_flag_underflow;
+    }
+    if (qflags & float_flag_inexact) {
+        ret |= softfloat_flag_inexact;
+    }
+    return ret;
+}
+
+/*
+ * floatx80 and float128 cannot be cast between qemu and softfloat, because
+ * in softfloat the order of the fields depends on the host's endianness.
+ */
+static extFloat80_t qemu_to_soft80(floatx80 a)
+{
+    extFloat80_t ret;
+
+    ret.signif = a.low;
+    ret.signExp = a.high;
+    return ret;
+}
+
+static floatx80 soft_to_qemu80(extFloat80_t a)
+{
+    floatx80 ret;
+
+    ret.low = a.signif;
+    ret.high = a.signExp;
+    return ret;
+}
+
+static float128_t qemu_to_soft128(float128 a)
+{
+    float128_t ret;
+    struct uint128 *to = (struct uint128 *)&ret;
+
+    to->v0 = a.low;
+    to->v64 = a.high;
+    return ret;
+}
+
+static float128 soft_to_qemu128(float128_t a)
+{
+    struct uint128 *from = (struct uint128 *)&a;
+    float128 ret;
+
+    ret.low = from->v0;
+    ret.high = from->v64;
+    return ret;
+}
+
+/* conversions */
+#define WRAP_SF_TO_SF_IEEE(name, func, a_type, b_type)  \
+    static b_type##_t name(a_type##_t a)                \
+    {                                                   \
+        a_type *ap = (a_type *)&a;                      \
+        b_type ret;                                     \
+                                                        \
+        ret = func(*ap, true, &qsf);                    \
+        return *(b_type##_t *)&ret;                     \
+    }
+
+WRAP_SF_TO_SF_IEEE(qemu_f16_to_f32, float16_to_float32, float16, float32)
+WRAP_SF_TO_SF_IEEE(qemu_f16_to_f64, float16_to_float64, float16, float64)
+
+WRAP_SF_TO_SF_IEEE(qemu_f32_to_f16, float32_to_float16, float32, float16)
+WRAP_SF_TO_SF_IEEE(qemu_f64_to_f16, float64_to_float16, float64, float16)
+#undef WRAP_SF_TO_SF_IEEE
+
+#define WRAP_SF_TO_SF(name, func, a_type, b_type)       \
+    static b_type##_t name(a_type##_t a)                \
+    {                                                   \
+        a_type *ap = (a_type *)&a;                      \
+        b_type ret;                                     \
+                                                        \
+        ret = func(*ap, &qsf);                          \
+        return *(b_type##_t *)&ret;                     \
+    }
+
+WRAP_SF_TO_SF(qemu_f32_to_f64, float32_to_float64, float32, float64)
+WRAP_SF_TO_SF(qemu_f64_to_f32, float64_to_float32, float64, float32)
+#undef WRAP_SF_TO_SF
+
+#define WRAP_SF_TO_80(name, func, type)                 \
+    static void name(type##_t a, extFloat80_t *res)     \
+    {                                                   \
+        floatx80 ret;                                   \
+        type *ap = (type *)&a;                          \
+                                                        \
+        ret = func(*ap, &qsf);                          \
+        *res = qemu_to_soft80(ret);                     \
+    }
+
+WRAP_SF_TO_80(qemu_f32_to_extF80M, float32_to_floatx80, float32)
+WRAP_SF_TO_80(qemu_f64_to_extF80M, float64_to_floatx80, float64)
+#undef WRAP_SF_TO_80
+
+#define WRAP_SF_TO_128(name, func, type)                \
+    static void name(type##_t a, float128_t *res)       \
+    {                                                   \
+        float128 ret;                                   \
+        type *ap = (type *)&a;                          \
+                                                        \
+        ret = func(*ap, &qsf);                          \
+        *res = qemu_to_soft128(ret);                    \
+    }
+
+WRAP_SF_TO_128(qemu_f32_to_f128M, float32_to_float128, float32)
+WRAP_SF_TO_128(qemu_f64_to_f128M, float64_to_float128, float64)
+#undef WRAP_SF_TO_128
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_SF_TO_INT(name, func, type, fast_type)                     \
+    static fast_type name(type##_t a, uint_fast8_t round, bool exact)   \
+    {                                                                   \
+        type *ap = (type *)&a;                                          \
+                                                                        \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        return func(*ap, &qsf);                                         \
+    }
+
+WRAP_SF_TO_INT(qemu_f16_to_ui32, float16_to_uint32, float16, uint_fast32_t)
+WRAP_SF_TO_INT(qemu_f16_to_ui64, float16_to_uint64, float16, uint_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f32_to_ui32, float32_to_uint32, float32, uint_fast32_t)
+WRAP_SF_TO_INT(qemu_f32_to_ui64, float32_to_uint64, float32, uint_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f64_to_ui32, float64_to_uint32, float64, uint_fast32_t)
+WRAP_SF_TO_INT(qemu_f64_to_ui64, float64_to_uint64, float64, uint_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f16_to_i32, float16_to_int32, float16, int_fast32_t)
+WRAP_SF_TO_INT(qemu_f16_to_i64, float16_to_int64, float16, int_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f32_to_i32, float32_to_int32, float32, int_fast32_t)
+WRAP_SF_TO_INT(qemu_f32_to_i64, float32_to_int64, float32, int_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f64_to_i32, float64_to_int32, float64, int_fast32_t)
+WRAP_SF_TO_INT(qemu_f64_to_i64, float64_to_int64, float64, int_fast64_t)
+#undef WRAP_SF_TO_INT
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_SF_TO_INT_MINMAG(name, func, type, fast_type)      \
+    static fast_type name(type##_t a, bool exact)               \
+    {                                                           \
+        type *ap = (type *)&a;                                  \
+                                                                \
+        return func(*ap, &qsf);                                 \
+    }
+
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_ui32_r_minMag,
+                      float16_to_uint32_round_to_zero, float16, uint_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_ui64_r_minMag,
+                      float16_to_uint64_round_to_zero, float16, uint_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_i32_r_minMag,
+                      float16_to_int32_round_to_zero, float16, int_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_i64_r_minMag,
+                      float16_to_int64_round_to_zero, float16, int_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_ui32_r_minMag,
+                      float32_to_uint32_round_to_zero, float32, uint_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_ui64_r_minMag,
+                      float32_to_uint64_round_to_zero, float32, uint_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_i32_r_minMag,
+                      float32_to_int32_round_to_zero, float32, int_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_i64_r_minMag,
+                      float32_to_int64_round_to_zero, float32, int_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_ui32_r_minMag,
+                      float64_to_uint32_round_to_zero, float64, uint_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_ui64_r_minMag,
+                      float64_to_uint64_round_to_zero, float64, uint_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_i32_r_minMag,
+                      float64_to_int32_round_to_zero, float64, int_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_i64_r_minMag,
+                      float64_to_int64_round_to_zero, float64, int_fast64_t)
+#undef WRAP_SF_TO_INT_MINMAG
+
+#define WRAP_80_TO_SF(name, func, type)                 \
+    static type##_t name(const extFloat80_t *ap)        \
+    {                                                   \
+        floatx80 a;                                     \
+        type ret;                                       \
+                                                        \
+        a = soft_to_qemu80(*ap);                        \
+        ret = func(a, &qsf);                            \
+        return *(type##_t *)&ret;                       \
+    }
+
+WRAP_80_TO_SF(qemu_extF80M_to_f32, floatx80_to_float32, float32)
+WRAP_80_TO_SF(qemu_extF80M_to_f64, floatx80_to_float64, float64)
+#undef WRAP_80_TO_SF
+
+#define WRAP_128_TO_SF(name, func, type)        \
+    static type##_t name(const float128_t *ap)  \
+    {                                           \
+        float128 a;                             \
+        type ret;                               \
+                                                \
+        a = soft_to_qemu128(*ap);               \
+        ret = func(a, &qsf);                    \
+        return *(type##_t *)&ret;               \
+    }
+
+WRAP_128_TO_SF(qemu_f128M_to_f32, float128_to_float32, float32)
+WRAP_128_TO_SF(qemu_f128M_to_f64, float128_to_float64, float64)
+#undef WRAP_128_TO_SF
+
+static void qemu_extF80M_to_f128M(const extFloat80_t *from, float128_t *to)
+{
+    floatx80 qfrom;
+    float128 qto;
+
+    qfrom = soft_to_qemu80(*from);
+    qto = floatx80_to_float128(qfrom, &qsf);
+    *to = qemu_to_soft128(qto);
+}
+
+static void qemu_f128M_to_extF80M(const float128_t *from, extFloat80_t *to)
+{
+    float128 qfrom;
+    floatx80 qto;
+
+    qfrom = soft_to_qemu128(*from);
+    qto = float128_to_floatx80(qfrom, &qsf);
+    *to = qemu_to_soft80(qto);
+}
+
+#define WRAP_INT_TO_SF(name, func, int_type, type)      \
+    static type##_t name(int_type a)                    \
+    {                                                   \
+        type ret;                                       \
+                                                        \
+        ret = func(a, &qsf);                            \
+        return *(type##_t *)&ret;                       \
+    }
+
+WRAP_INT_TO_SF(qemu_ui32_to_f16, uint32_to_float16, uint32_t, float16)
+WRAP_INT_TO_SF(qemu_ui32_to_f32, uint32_to_float32, uint32_t, float32)
+WRAP_INT_TO_SF(qemu_ui32_to_f64, uint32_to_float64, uint32_t, float64)
+
+WRAP_INT_TO_SF(qemu_ui64_to_f16, uint64_to_float16, uint64_t, float16)
+WRAP_INT_TO_SF(qemu_ui64_to_f32, uint64_to_float32, uint64_t, float32)
+WRAP_INT_TO_SF(qemu_ui64_to_f64, uint64_to_float64, uint64_t, float64)
+
+WRAP_INT_TO_SF(qemu_i32_to_f16, int32_to_float16, int32_t, float16)
+WRAP_INT_TO_SF(qemu_i32_to_f32, int32_to_float32, int32_t, float32)
+WRAP_INT_TO_SF(qemu_i32_to_f64, int32_to_float64, int32_t, float64)
+
+WRAP_INT_TO_SF(qemu_i64_to_f16, int64_to_float16, int64_t, float16)
+WRAP_INT_TO_SF(qemu_i64_to_f32, int64_to_float32, int64_t, float32)
+WRAP_INT_TO_SF(qemu_i64_to_f64, int64_to_float64, int64_t, float64)
+#undef WRAP_INT_TO_SF
+
+#define WRAP_INT_TO_80(name, func, int_type)            \
+    static void name(int_type a, extFloat80_t *res)     \
+    {                                                   \
+        floatx80 ret;                                   \
+                                                        \
+        ret = func(a, &qsf);                            \
+        *res = qemu_to_soft80(ret);                     \
+    }
+
+WRAP_INT_TO_80(qemu_i32_to_extF80M, int32_to_floatx80, int32_t)
+WRAP_INT_TO_80(qemu_i64_to_extF80M, int64_to_floatx80, int64_t)
+#undef WRAP_INT_TO_80
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_80_TO_INT(name, func, fast_type)                           \
+    static fast_type name(const extFloat80_t *ap, uint_fast8_t round,   \
+                          bool exact)                                   \
+    {                                                                   \
+        floatx80 a;                                                     \
+                                                                        \
+        a = soft_to_qemu80(*ap);                                        \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        return func(a, &qsf);                                           \
+    }
+
+WRAP_80_TO_INT(qemu_extF80M_to_i32, floatx80_to_int32, int_fast32_t)
+WRAP_80_TO_INT(qemu_extF80M_to_i64, floatx80_to_int64, int_fast64_t)
+#undef WRAP_80_TO_INT
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_80_TO_INT_MINMAG(name, func, fast_type)            \
+    static fast_type name(const extFloat80_t *ap, bool exact)   \
+    {                                                           \
+        floatx80 a;                                             \
+                                                                \
+        a = soft_to_qemu80(*ap);                                \
+        return func(a, &qsf);                                   \
+    }
+
+WRAP_80_TO_INT_MINMAG(qemu_extF80M_to_i32_r_minMag,
+                      floatx80_to_int32_round_to_zero, int_fast32_t)
+WRAP_80_TO_INT_MINMAG(qemu_extF80M_to_i64_r_minMag,
+                      floatx80_to_int64_round_to_zero, int_fast64_t)
+#undef WRAP_80_TO_INT_MINMAG
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_128_TO_INT(name, func, fast_type)                          \
+    static fast_type name(const float128_t *ap, uint_fast8_t round,     \
+                          bool exact)                                   \
+    {                                                                   \
+        float128 a;                                                     \
+                                                                        \
+        a = soft_to_qemu128(*ap);                                       \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        return func(a, &qsf);                                           \
+    }
+
+WRAP_128_TO_INT(qemu_f128M_to_i32, float128_to_int32, int_fast32_t)
+WRAP_128_TO_INT(qemu_f128M_to_i64, float128_to_int64, int_fast64_t)
+
+WRAP_128_TO_INT(qemu_f128M_to_ui64, float128_to_uint64, uint_fast64_t)
+#undef WRAP_128_TO_INT
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_128_TO_INT_MINMAG(name, func, fast_type)           \
+    static fast_type name(const float128_t *ap, bool exact)     \
+    {                                                           \
+        float128 a;                                             \
+                                                                \
+        a = soft_to_qemu128(*ap);                               \
+        return func(a, &qsf);                                   \
+    }
+
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_i32_r_minMag,
+                       float128_to_int32_round_to_zero, int_fast32_t)
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_i64_r_minMag,
+                       float128_to_int64_round_to_zero, int_fast64_t)
+
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_ui32_r_minMag,
+                       float128_to_uint32_round_to_zero, uint_fast32_t)
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_ui64_r_minMag,
+                       float128_to_uint64_round_to_zero, uint_fast64_t)
+#undef WRAP_128_TO_INT_MINMAG
+
+#define WRAP_INT_TO_128(name, func, int_type)           \
+    static void name(int_type a, float128_t *res)       \
+    {                                                   \
+        float128 ret;                                   \
+                                                        \
+        ret = func(a, &qsf);                            \
+        *res = qemu_to_soft128(ret);                    \
+    }
+
+WRAP_INT_TO_128(qemu_ui64_to_f128M, uint64_to_float128, uint64_t)
+
+WRAP_INT_TO_128(qemu_i32_to_f128M, int32_to_float128, int32_t)
+WRAP_INT_TO_128(qemu_i64_to_f128M, int64_to_float128, int64_t)
+#undef WRAP_INT_TO_128
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_ROUND_TO_INT(name, func, type)                             \
+    static type##_t name(type##_t a, uint_fast8_t round, bool exact)    \
+    {                                                                   \
+        type *ap = (type *)&a;                                          \
+        type ret;                                                       \
+                                                                        \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        ret = func(*ap, &qsf);                                          \
+        return *(type##_t *)&ret;                                       \
+    }
+
+WRAP_ROUND_TO_INT(qemu_f16_roundToInt, float16_round_to_int, float16)
+WRAP_ROUND_TO_INT(qemu_f32_roundToInt, float32_round_to_int, float32)
+WRAP_ROUND_TO_INT(qemu_f64_roundToInt, float64_round_to_int, float64)
+#undef WRAP_ROUND_TO_INT
+
+static void qemu_extF80M_roundToInt(const extFloat80_t *ap, uint_fast8_t round,
+                                    bool exact, extFloat80_t *res)
+{
+    floatx80 a;
+    floatx80 ret;
+
+    a = soft_to_qemu80(*ap);
+    qsf.float_rounding_mode = sf_rounding_to_qemu(round);
+    ret = floatx80_round_to_int(a, &qsf);
+    *res = qemu_to_soft80(ret);
+}
+
+static void qemu_f128M_roundToInt(const float128_t *ap, uint_fast8_t round,
+                                  bool exact, float128_t *res)
+{
+    float128 a;
+    float128 ret;
+
+    a = soft_to_qemu128(*ap);
+    qsf.float_rounding_mode = sf_rounding_to_qemu(round);
+    ret = float128_round_to_int(a, &qsf);
+    *res = qemu_to_soft128(ret);
+}
+
+/* operations */
+#define WRAP1(name, func, type)                 \
+    static type##_t name(type##_t a)            \
+    {                                           \
+        type *ap = (type *)&a;                  \
+        type ret;                               \
+                                                \
+        ret = func(*ap, &qsf);                  \
+        return *(type##_t *)&ret;               \
+    }
+
+#define WRAP2(name, func, type)                         \
+    static type##_t name(type##_t a, type##_t b)        \
+    {                                                   \
+        type *ap = (type *)&a;                          \
+        type *bp = (type *)&b;                          \
+        type ret;                                       \
+                                                        \
+        ret = func(*ap, *bp, &qsf);                     \
+        return *(type##_t *)&ret;                       \
+    }
+
+#define WRAP_COMMON_OPS(b)                              \
+    WRAP1(qemu_f##b##_sqrt, float##b##_sqrt, float##b)  \
+    WRAP2(qemu_f##b##_add, float##b##_add, float##b)    \
+    WRAP2(qemu_f##b##_sub, float##b##_sub, float##b)    \
+    WRAP2(qemu_f##b##_mul, float##b##_mul, float##b)    \
+    WRAP2(qemu_f##b##_div, float##b##_div, float##b)
+
+WRAP_COMMON_OPS(16)
+WRAP_COMMON_OPS(32)
+WRAP_COMMON_OPS(64)
+#undef WRAP_COMMON
+
+WRAP2(qemu_f32_rem, float32_rem, float32)
+WRAP2(qemu_f64_rem, float64_rem, float64)
+#undef WRAP2
+#undef WRAP1
+
+#define WRAP1_80(name, func)                                    \
+    static void name(const extFloat80_t *ap, extFloat80_t *res) \
+    {                                                           \
+        floatx80 a;                                             \
+        floatx80 ret;                                           \
+                                                                \
+        a = soft_to_qemu80(*ap);                                \
+        ret = func(a, &qsf);                                    \
+        *res = qemu_to_soft80(ret);                             \
+    }
+
+WRAP1_80(qemu_extF80M_sqrt, floatx80_sqrt)
+#undef WRAP1_80
+
+#define WRAP1_128(name, func)                                   \
+    static void name(const float128_t *ap, float128_t *res)     \
+    {                                                           \
+        float128 a;                                             \
+        float128 ret;                                           \
+                                                                \
+        a = soft_to_qemu128(*ap);                               \
+        ret = func(a, &qsf);                                    \
+        *res = qemu_to_soft128(ret);                            \
+    }
+
+WRAP1_128(qemu_f128M_sqrt, float128_sqrt)
+#undef WRAP1_128
+
+#define WRAP2_80(name, func)                                            \
+    static void name(const extFloat80_t *ap, const extFloat80_t *bp,    \
+                     extFloat80_t *res)                                 \
+    {                                                                   \
+        floatx80 a;                                                     \
+        floatx80 b;                                                     \
+        floatx80 ret;                                                   \
+                                                                        \
+        a = soft_to_qemu80(*ap);                                        \
+        b = soft_to_qemu80(*bp);                                        \
+        ret = func(a, b, &qsf);                                         \
+        *res = qemu_to_soft80(ret);                                     \
+    }
+
+WRAP2_80(qemu_extF80M_add, floatx80_add)
+WRAP2_80(qemu_extF80M_sub, floatx80_sub)
+WRAP2_80(qemu_extF80M_mul, floatx80_mul)
+WRAP2_80(qemu_extF80M_div, floatx80_div)
+WRAP2_80(qemu_extF80M_rem, floatx80_rem)
+#undef WRAP2_80
+
+#define WRAP2_128(name, func)                                           \
+    static void name(const float128_t *ap, const float128_t *bp,        \
+                     float128_t *res)                                   \
+    {                                                                   \
+        float128 a;                                                     \
+        float128 b;                                                     \
+        float128 ret;                                                   \
+                                                                        \
+        a = soft_to_qemu128(*ap);                                       \
+        b = soft_to_qemu128(*bp);                                       \
+        ret = func(a, b, &qsf);                                         \
+        *res = qemu_to_soft128(ret);                                    \
+    }
+
+WRAP2_128(qemu_f128M_add, float128_add)
+WRAP2_128(qemu_f128M_sub, float128_sub)
+WRAP2_128(qemu_f128M_mul, float128_mul)
+WRAP2_128(qemu_f128M_div, float128_div)
+WRAP2_128(qemu_f128M_rem, float128_rem)
+#undef WRAP2_128
+
+#define WRAP_MULADD(name, func, type)                           \
+    static type##_t name(type##_t a, type##_t b, type##_t c)    \
+    {                                                           \
+        type *ap = (type *)&a;                                  \
+        type *bp = (type *)&b;                                  \
+        type *cp = (type *)&c;                                  \
+        type ret;                                               \
+                                                                \
+        ret = func(*ap, *bp, *cp, 0, &qsf);                     \
+        return *(type##_t *)&ret;                               \
+    }
+
+WRAP_MULADD(qemu_f16_mulAdd, float16_muladd, float16)
+WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
+WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
+#undef WRAP_MULADD
+
+#define WRAP_CMP16(name, func, retcond)         \
+    static bool name(float16_t a, float16_t b)  \
+    {                                           \
+        float16 *ap = (float16 *)&a;            \
+        float16 *bp = (float16 *)&b;            \
+        int ret;                                \
+                                                \
+        ret = func(*ap, *bp, &qsf);             \
+        return retcond;                         \
+    }
+
+WRAP_CMP16(qemu_f16_eq_signaling, float16_compare, ret == 0)
+WRAP_CMP16(qemu_f16_eq, float16_compare_quiet, ret == 0)
+WRAP_CMP16(qemu_f16_le, float16_compare, ret <= 0)
+WRAP_CMP16(qemu_f16_lt, float16_compare, ret < 0)
+WRAP_CMP16(qemu_f16_le_quiet, float16_compare_quiet, ret <= 0)
+WRAP_CMP16(qemu_f16_lt_quiet, float16_compare_quiet, ret < 0)
+#undef WRAP_CMP16
+
+#define WRAP_CMP(name, func, type)              \
+    static bool name(type##_t a, type##_t b)    \
+    {                                           \
+        type *ap = (type *)&a;                  \
+        type *bp = (type *)&b;                  \
+                                                \
+        return !!func(*ap, *bp, &qsf);          \
+    }
+
+#define GEN_WRAP_CMP(b)                                                 \
+    WRAP_CMP(qemu_f##b##_eq_signaling, float##b##_eq, float##b)         \
+    WRAP_CMP(qemu_f##b##_eq, float##b##_eq_quiet, float##b)             \
+    WRAP_CMP(qemu_f##b##_le, float##b##_le, float##b)                   \
+    WRAP_CMP(qemu_f##b##_lt, float##b##_lt, float##b)                   \
+    WRAP_CMP(qemu_f##b##_le_quiet, float##b##_le_quiet, float##b)       \
+    WRAP_CMP(qemu_f##b##_lt_quiet, float##b##_lt_quiet, float##b)
+
+GEN_WRAP_CMP(32)
+GEN_WRAP_CMP(64)
+#undef GEN_WRAP_CMP
+#undef WRAP_CMP
+
+#define WRAP_CMP80(name, func)                                          \
+    static bool name(const extFloat80_t *ap, const extFloat80_t *bp)    \
+    {                                                                   \
+        floatx80 a;                                                     \
+        floatx80 b;                                                     \
+                                                                        \
+        a = soft_to_qemu80(*ap);                                        \
+        b = soft_to_qemu80(*bp);                                        \
+        return !!func(a, b, &qsf);                                      \
+    }
+
+WRAP_CMP80(qemu_extF80M_eq_signaling, floatx80_eq)
+WRAP_CMP80(qemu_extF80M_eq, floatx80_eq_quiet)
+WRAP_CMP80(qemu_extF80M_le, floatx80_le)
+WRAP_CMP80(qemu_extF80M_lt, floatx80_lt)
+WRAP_CMP80(qemu_extF80M_le_quiet, floatx80_le_quiet)
+WRAP_CMP80(qemu_extF80M_lt_quiet, floatx80_le_quiet)
+#undef WRAP_CMP80
+
+#define WRAP_CMP128(name, func)                                         \
+    static bool name(const float128_t *ap, const float128_t *bp)        \
+    {                                                                   \
+        float128 a;                                                     \
+        float128 b;                                                     \
+                                                                        \
+        a = soft_to_qemu128(*ap);                                       \
+        b = soft_to_qemu128(*bp);                                       \
+        return !!func(a, b, &qsf);                                      \
+    }
+
+WRAP_CMP128(qemu_f128M_eq_signaling, float128_eq)
+WRAP_CMP128(qemu_f128M_eq, float128_eq_quiet)
+WRAP_CMP128(qemu_f128M_le, float128_le)
+WRAP_CMP128(qemu_f128M_lt, float128_lt)
+WRAP_CMP128(qemu_f128M_le_quiet, float128_le_quiet)
+WRAP_CMP128(qemu_f128M_lt_quiet, float128_lt_quiet)
+#undef WRAP_CMP128
diff --git a/tests/guest-debug/test-gdbstub.py b/tests/guest-debug/test-gdbstub.py
index 474d2c5..0e4ac01 100644
--- a/tests/guest-debug/test-gdbstub.py
+++ b/tests/guest-debug/test-gdbstub.py
@@ -122,7 +122,7 @@
 
 
 def run_test():
-    "Run throught the tests one by one"
+    "Run through the tests one by one"
 
     print ("Checking we can step the first few instructions")
     step_ok = 0
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 2cd5736..44ce118 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -48,10 +48,6 @@
 static GHookList abrt_hooks;
 static struct sigaction sigact_old;
 
-#define g_assert_no_errno(ret) do { \
-    g_assert_cmpint(ret, !=, -1); \
-} while (0)
-
 static int qtest_query_target_endianness(QTestState *s);
 
 static int init_socket(const char *socket_path)
@@ -61,7 +57,7 @@
     int ret;
 
     sock = socket(PF_UNIX, SOCK_STREAM, 0);
-    g_assert_no_errno(sock);
+    g_assert_cmpint(sock, !=, -1);
 
     addr.sun_family = AF_UNIX;
     snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
@@ -70,9 +66,9 @@
     do {
         ret = bind(sock, (struct sockaddr *)&addr, sizeof(addr));
     } while (ret == -1 && errno == EINTR);
-    g_assert_no_errno(ret);
+    g_assert_cmpint(ret, !=, -1);
     ret = listen(sock, 1);
-    g_assert_no_errno(ret);
+    g_assert_cmpint(ret, !=, -1);
 
     return sock;
 }
@@ -325,7 +321,6 @@
             continue;
         }
 
-        g_assert_no_errno(len);
         g_assert_cmpint(len, >, 0);
 
         offset += len;
diff --git a/tests/migration-test.c b/tests/migration-test.c
index 20f38f1..06ca506 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -86,12 +86,24 @@
  * repeatedly. It outputs a 'B' at a fixed rate while it's still running.
  */
 #include "tests/migration/i386/a-b-bootblock.h"
+#include "tests/migration/aarch64/a-b-kernel.h"
 
-static void init_bootfile_x86(const char *bootpath)
+static void init_bootfile(const char *bootpath, void *content)
 {
     FILE *bootfile = fopen(bootpath, "wb");
 
-    g_assert_cmpint(fwrite(x86_bootsect, 512, 1, bootfile), ==, 1);
+    g_assert_cmpint(fwrite(content, 512, 1, bootfile), ==, 1);
+    fclose(bootfile);
+}
+
+#include "tests/migration/s390x/a-b-bios.h"
+
+static void init_bootfile_s390x(const char *bootpath)
+{
+    FILE *bootfile = fopen(bootpath, "wb");
+    size_t len = sizeof(s390x_elf);
+
+    g_assert_cmpint(fwrite(s390x_elf, len, 1, bootfile), ==, 1);
     fclose(bootfile);
 }
 
@@ -428,7 +440,7 @@
     got_stop = false;
 
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
-        init_bootfile_x86(bootpath);
+        init_bootfile(bootpath, x86_bootsect);
         cmd_src = g_strdup_printf("-machine accel=%s -m 150M"
                                   " -name source,debug-threads=on"
                                   " -serial file:%s/src_serial"
@@ -442,6 +454,19 @@
                                   accel, tmpfs, bootpath, uri);
         start_address = X86_TEST_MEM_START;
         end_address = X86_TEST_MEM_END;
+    } else if (g_str_equal(arch, "s390x")) {
+        init_bootfile_s390x(bootpath);
+        cmd_src = g_strdup_printf("-machine accel=%s -m 128M"
+                                  " -name source,debug-threads=on"
+                                  " -serial file:%s/src_serial -bios %s",
+                                  accel, tmpfs, bootpath);
+        cmd_dst = g_strdup_printf("-machine accel=%s -m 128M"
+                                  " -name target,debug-threads=on"
+                                  " -serial file:%s/dest_serial -bios %s"
+                                  " -incoming %s",
+                                  accel, tmpfs, bootpath, uri);
+        start_address = S390_TEST_MEM_START;
+        end_address = S390_TEST_MEM_END;
     } else if (strcmp(arch, "ppc64") == 0) {
         cmd_src = g_strdup_printf("-machine accel=%s -m 256M -nodefaults"
                                   " -name source,debug-threads=on"
@@ -459,6 +484,24 @@
 
         start_address = PPC_TEST_MEM_START;
         end_address = PPC_TEST_MEM_END;
+    } else if (strcmp(arch, "aarch64") == 0) {
+        init_bootfile(bootpath, aarch64_kernel);
+        cmd_src = g_strdup_printf("-machine virt,accel=%s,gic-version=max "
+                                  "-name vmsource,debug-threads=on -cpu max "
+                                  "-m 150M -serial file:%s/src_serial "
+                                  "-kernel %s ",
+                                  accel, tmpfs, bootpath);
+        cmd_dst = g_strdup_printf("-machine virt,accel=%s,gic-version=max "
+                                  "-name vmdest,debug-threads=on -cpu max "
+                                  "-m 150M -serial file:%s/dest_serial "
+                                  "-kernel %s "
+                                  "-incoming %s ",
+                                  accel, tmpfs, bootpath, uri);
+
+        start_address = ARM_TEST_MEM_START;
+        end_address = ARM_TEST_MEM_END;
+
+        g_assert(sizeof(aarch64_kernel) <= ARM_TEST_MAX_KERNEL_SIZE);
     } else {
         g_assert_not_reached();
     }
@@ -545,7 +588,7 @@
 {
     QTestState *from;
 
-    from = qtest_start("");
+    from = qtest_start("-machine none");
 
     deprecated_set_downtime(from, 0.12345);
     deprecated_set_speed(from, 12345);
@@ -760,6 +803,22 @@
         return 0;
     }
 
+    /*
+     * Similar to ppc64, s390x seems to be touchy with TCG, so disable it
+     * there until the problems are resolved
+     */
+    if (g_str_equal(qtest_get_arch(), "s390x")) {
+#if defined(HOST_S390X)
+        if (access("/dev/kvm", R_OK | W_OK)) {
+            g_test_message("Skipping test: kvm not available");
+            return 0;
+        }
+#else
+        g_test_message("Skipping test: Need s390x host to work properly");
+        return 0;
+#endif
+    }
+
     tmpfs = mkdtemp(template);
     if (!tmpfs) {
         g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno));
diff --git a/tests/migration/Makefile b/tests/migration/Makefile
index dc3b551..13e99b1 100644
--- a/tests/migration/Makefile
+++ b/tests/migration/Makefile
@@ -5,10 +5,23 @@
 # See the COPYING file in the top-level directory.
 #
 
-TARGET_LIST = i386
+TARGET_LIST = i386 aarch64 s390x
 
 SRC_PATH = ../..
 
+.PHONY: help $(TARGET_LIST)
+help:
+	@echo "Create migration guest includes.  We generate a binary."
+	@echo "And then convert that binary to an include file that can be"
+	@echo "run in a guest."
+	@echo "Possible operations are:"
+	@echo
+	@echo " $(MAKE) clean                Remove all intermediate files"
+	@echo " $(MAKE) target               Generate for that target"
+	@echo " $(MAKE) CROSS_PREFIX=... target"
+	@echo "                              Cross-compile than target"
+	@echo " Possible targets are: $(TARGET_LIST)"
+
 override define __note
 /* This file is automatically generated from the assembly file in
  * tests/migration/$@. Edit that file and then run "make all"
@@ -18,16 +31,8 @@
 endef
 export __note
 
-find-arch-cross-cc = $(lastword $(shell grep -h "CROSS_CC_GUEST=" $(wildcard $(SRC_PATH)/$(patsubst i386,*86*,$(1))-softmmu/config-target.mak) /dev/null))
-parse-cross-prefix = $(subst gcc,,$(patsubst cc,gcc,$(patsubst CROSS_CC_GUEST="%",%,$(call find-arch-cross-cc,$(1)))))
-gen-cross-prefix = $(patsubst %-,CROSS_PREFIX=%-,$(call parse-cross-prefix,$(1)))
-
-.PHONY: all $(TARGET_LIST)
-
-all: $(TARGET_LIST)
-
 $(TARGET_LIST):
-	$(MAKE) -C $@ $(call gen-cross-prefix,$@)
+	$(MAKE) CROSS_PREFIX=$(CROSS_PREFIX) -C $@
 
 clean:
 	for target in $(TARGET_LIST); do \
diff --git a/tests/migration/aarch64/Makefile b/tests/migration/aarch64/Makefile
new file mode 100644
index 0000000..9c4fa18
--- /dev/null
+++ b/tests/migration/aarch64/Makefile
@@ -0,0 +1,18 @@
+# To specify cross compiler prefix, use CROSS_PREFIX=
+#   $ make CROSS_PREFIX=aarch64-linux-gnu-
+
+.PHONY: all clean
+all: a-b-kernel.h
+
+a-b-kernel.h: aarch64.kernel
+	echo "$$__note" > $@
+	xxd -i $< | sed -e 's/.*int.*//' >> $@
+
+aarch64.kernel: aarch64.elf
+	$(CROSS_PREFIX)objcopy -O binary $< $@
+
+aarch64.elf: a-b-kernel.S
+	$(CROSS_PREFIX)gcc -o $@ -nostdlib -Wl,--build-id=none $<
+
+clean:
+	$(RM) *.kernel *.elf
diff --git a/tests/migration/aarch64/a-b-kernel.S b/tests/migration/aarch64/a-b-kernel.S
new file mode 100644
index 0000000..0225945
--- /dev/null
+++ b/tests/migration/aarch64/a-b-kernel.S
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. and/or its affiliates
+#
+# Author:
+#   Wei Huang <wei@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# Note: Please make sure the compiler compiles the assembly code below with
+# pc-relative address. Also the branch instructions should use relative
+# addresses only.
+
+#include "../migration-test.h"
+
+.section .text
+
+        .globl  _start
+
+_start:
+        /* disable MMU to use phys mem address */
+        mrs     x0, sctlr_el1
+        bic     x0, x0, #(1<<0)
+        msr     sctlr_el1, x0
+        isb
+
+        /* traverse test memory region */
+        mov     x0, #ARM_TEST_MEM_START
+        mov     x1, #ARM_TEST_MEM_END
+
+        /* output char 'A' to PL011 */
+        mov     w3, 'A'
+        mov     x2, #ARM_MACH_VIRT_UART
+        strb    w3, [x2]
+
+        /* clean up memory */
+        mov     w3, #0
+        mov     x4, x0
+clean:
+        strb    w3, [x4]
+        add     x4, x4, #TEST_MEM_PAGE_SIZE
+        cmp     x4, x1
+        ble     clean
+
+        /* w5 keeps a counter so we can limit the output speed */
+        mov     w5, #0
+
+        /* main body */
+mainloop:
+        mov     x4, x0
+
+innerloop:
+        /* increment the first byte of each page by 1 */
+        ldrb    w3, [x4]
+        add     w3, w3, #1
+        and     w3, w3, #0xff
+        strb    w3, [x4]
+
+        /* make sure QEMU user space can see consistent data as MMU is off */
+        dc      civac, x4
+
+        add     x4, x4, #TEST_MEM_PAGE_SIZE
+        cmp     x4, x1
+        blt     innerloop
+
+        add     w5, w5, #1
+        and     w5, w5, #0xff
+        cmp     w5, #0
+        bne     mainloop
+
+        /* output char 'B' to PL011 */
+        mov     w3, 'B'
+        strb    w3, [x2]
+
+        b       mainloop
diff --git a/tests/migration/aarch64/a-b-kernel.h b/tests/migration/aarch64/a-b-kernel.h
new file mode 100644
index 0000000..0a9b011
--- /dev/null
+++ b/tests/migration/aarch64/a-b-kernel.h
@@ -0,0 +1,18 @@
+/* This file is automatically generated from the assembly file in
+ * tests/migration/aarch64. Edit that file and then run "make all"
+ * inside tests/migration to update, and then remember to send both
+ * the header and the assembler differences in your patch submission.
+ */
+unsigned char aarch64_kernel[] = {
+  0x00, 0x10, 0x38, 0xd5, 0x00, 0xf8, 0x7f, 0x92, 0x00, 0x10, 0x18, 0xd5,
+  0xdf, 0x3f, 0x03, 0xd5, 0x00, 0x02, 0xa8, 0xd2, 0x01, 0xc8, 0xa8, 0xd2,
+  0x23, 0x08, 0x80, 0x52, 0x02, 0x20, 0xa1, 0xd2, 0x43, 0x00, 0x00, 0x39,
+  0x03, 0x00, 0x80, 0x52, 0xe4, 0x03, 0x00, 0xaa, 0x83, 0x00, 0x00, 0x39,
+  0x84, 0x04, 0x40, 0x91, 0x9f, 0x00, 0x01, 0xeb, 0xad, 0xff, 0xff, 0x54,
+  0x05, 0x00, 0x80, 0x52, 0xe4, 0x03, 0x00, 0xaa, 0x83, 0x00, 0x40, 0x39,
+  0x63, 0x04, 0x00, 0x11, 0x63, 0x1c, 0x00, 0x12, 0x83, 0x00, 0x00, 0x39,
+  0x24, 0x7e, 0x0b, 0xd5, 0x84, 0x04, 0x40, 0x91, 0x9f, 0x00, 0x01, 0xeb,
+  0x2b, 0xff, 0xff, 0x54, 0xa5, 0x04, 0x00, 0x11, 0xa5, 0x1c, 0x00, 0x12,
+  0xbf, 0x00, 0x00, 0x71, 0x81, 0xfe, 0xff, 0x54, 0x43, 0x08, 0x80, 0x52,
+  0x43, 0x00, 0x00, 0x39, 0xf1, 0xff, 0xff, 0x17
+};
diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py
index a6b8cec..61d2abb 100644
--- a/tests/migration/guestperf/shell.py
+++ b/tests/migration/guestperf/shell.py
@@ -19,14 +19,12 @@
 #
 
 
-import os
-import os.path
-import sys
-sys.path.append(os.path.join(os.path.dirname(__file__),
-                             '..', '..', '..', 'scripts'))
 import argparse
 import fnmatch
+import os
+import os.path
 import platform
+import sys
 import logging
 
 from guestperf.hardware import Hardware
diff --git a/tests/migration/migration-test.h b/tests/migration/migration-test.h
index c4c0c52..03c2523 100644
--- a/tests/migration/migration-test.h
+++ b/tests/migration/migration-test.h
@@ -14,8 +14,21 @@
 #define X86_TEST_MEM_START (1 * 1024 * 1024)
 #define X86_TEST_MEM_END   (100 * 1024 * 1024)
 
+/* S390 */
+#define S390_TEST_MEM_START (1 * 1024 * 1024)
+#define S390_TEST_MEM_END   (100 * 1024 * 1024)
+
 /* PPC */
 #define PPC_TEST_MEM_START (1 * 1024 * 1024)
 #define PPC_TEST_MEM_END   (100 * 1024 * 1024)
 
+/* ARM */
+#define ARM_TEST_MEM_START (0x40000000 + 1 * 1024 * 1024)
+#define ARM_TEST_MEM_END   (0x40000000 + 100 * 1024 * 1024)
+#define ARM_MACH_VIRT_UART 0x09000000
+/* AArch64 kernel load address is 0x40080000, and the test memory starts at
+ * 0x40100000. So the maximum allowable kernel size is 512KB.
+ */
+#define ARM_TEST_MAX_KERNEL_SIZE (512 * 1024)
+
 #endif /* _TEST_MIGRATION_H_ */
diff --git a/tests/migration/s390x/Makefile b/tests/migration/s390x/Makefile
new file mode 100644
index 0000000..6393c3e
--- /dev/null
+++ b/tests/migration/s390x/Makefile
@@ -0,0 +1,24 @@
+# To specify cross compiler prefix, use CROSS_PREFIX=
+#   $ make CROSS_PREFIX=s390x-linux-gnu-
+
+.PHONY: all clean
+all: a-b-bios.h
+fwdir=../../../pc-bios/s390-ccw
+
+CFLAGS+=-ffreestanding -fno-delete-null-pointer-checks -fPIE -Os \
+	-msoft-float -march=z900 -fno-asynchronous-unwind-tables -Wl,-pie \
+	-Wl,--build-id=none -nostdlib
+
+a-b-bios.h: s390x.elf
+	echo "$$__note" > header.tmp
+	xxd -i $< | sed -e 's/.*int.*//' >> header.tmp
+	mv header.tmp $@
+
+# We use common-page-size=16 to avoid big padding in the ELF file
+s390x.elf: a-b-bios.c
+	$(CROSS_PREFIX)gcc $(CFLAGS) -I$(fwdir) $(fwdir)/start.S \
+		$(fwdir)/sclp.c -Wl,-zcommon-page-size=16 -o $@ $<
+	$(CROSS_PREFIX)strip $@
+
+clean:
+	@rm -rf *.elf *.o
diff --git a/tests/migration/s390x/a-b-bios.c b/tests/migration/s390x/a-b-bios.c
new file mode 100644
index 0000000..a0327cd
--- /dev/null
+++ b/tests/migration/s390x/a-b-bios.c
@@ -0,0 +1,36 @@
+/*
+ * S390 guest code used in migration tests
+ *
+ * Copyright 2018 Thomas Huth, Red Hat Inc.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#define LOADPARM_LEN 8  /* Needed for sclp.h */
+
+#include <libc.h>
+#include <s390-ccw.h>
+#include <sclp.h>
+
+char stack[0x8000] __attribute__((aligned(4096)));
+
+#define START_ADDRESS  (1024 * 1024)
+#define END_ADDRESS    (100 * 1024 * 1024)
+
+void main(void)
+{
+    unsigned long addr;
+
+    sclp_setup();
+    sclp_print("A");
+
+    while (1) {
+        for (addr = START_ADDRESS; addr < END_ADDRESS; addr += 4096) {
+            *(volatile char *)addr += 1;  /* Change pages */
+        }
+        sclp_print("B");
+    }
+}
diff --git a/tests/migration/s390x/a-b-bios.h b/tests/migration/s390x/a-b-bios.h
new file mode 100644
index 0000000..e722dc7
--- /dev/null
+++ b/tests/migration/s390x/a-b-bios.h
@@ -0,0 +1,253 @@
+/* This file is automatically generated from the a-b-bios.c file in
+ * tests/migration/s390x. Edit that file and then run "make all"
+ * inside tests/migration to update, and then remember to send both
+ * the header and the assembler differences in your patch submission.
+ */
+unsigned char s390x_elf[] = {
+  0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x78, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x80,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, 0x07, 0x00, 0x40,
+  0x00, 0x0c, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x88, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x01, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+  0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x01, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc8,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc8, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x0c,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x0c, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x98, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x64, 0x74, 0xe5, 0x51,
+  0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x10, 0x64, 0x74, 0xe5, 0x52, 0x00, 0x00, 0x00, 0x04,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+  0x2f, 0x6c, 0x69, 0x62, 0x2f, 0x6c, 0x64, 0x36, 0x34, 0x2e, 0x73, 0x6f,
+  0x2e, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x02, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xeb, 0xef, 0xf0, 0x70,
+  0x00, 0x24, 0xa7, 0xfb, 0xff, 0x60, 0xc0, 0xe5, 0x00, 0x00, 0x01, 0x1f,
+  0xc0, 0x20, 0x00, 0x00, 0x02, 0x64, 0xc0, 0xe5, 0x00, 0x00, 0x01, 0x35,
+  0xa5, 0x1e, 0x00, 0x10, 0xa7, 0x29, 0x63, 0x00, 0xe3, 0x30, 0x10, 0x00,
+  0x00, 0x90, 0xa7, 0x3a, 0x00, 0x01, 0x42, 0x30, 0x10, 0x00, 0xa7, 0x1b,
+  0x10, 0x00, 0xa7, 0x27, 0xff, 0xf7, 0xc0, 0x20, 0x00, 0x00, 0x02, 0x50,
+  0xa7, 0xf4, 0xff, 0xeb, 0x07, 0x07, 0x07, 0x07, 0xc0, 0xf0, 0x00, 0x00,
+  0x56, 0xc4, 0xc0, 0x20, 0x00, 0x00, 0x0a, 0xbd, 0xc0, 0x30, 0x00, 0x00,
+  0x56, 0xbe, 0xb9, 0x0b, 0x00, 0x32, 0xb9, 0x02, 0x00, 0x33, 0xa7, 0x84,
+  0x00, 0x19, 0xa7, 0x3b, 0xff, 0xff, 0xeb, 0x43, 0x00, 0x08, 0x00, 0x0c,
+  0xb9, 0x02, 0x00, 0x44, 0xb9, 0x04, 0x00, 0x12, 0xa7, 0x84, 0x00, 0x09,
+  0xd7, 0xff, 0x10, 0x00, 0x10, 0x00, 0x41, 0x10, 0x11, 0x00, 0xa7, 0x47,
+  0xff, 0xfb, 0xc0, 0x20, 0x00, 0x00, 0x00, 0x07, 0x44, 0x30, 0x20, 0x00,
+  0xa7, 0xf4, 0xff, 0xb6, 0xd7, 0x00, 0x10, 0x00, 0x10, 0x00, 0xc0, 0x10,
+  0x00, 0x00, 0x00, 0x29, 0xb2, 0xb2, 0x10, 0x00, 0xeb, 0x00, 0xf0, 0x00,
+  0x00, 0x25, 0x96, 0x02, 0xf0, 0x06, 0xeb, 0x00, 0xf0, 0x00, 0x00, 0x2f,
+  0xc0, 0x10, 0x00, 0x00, 0x00, 0x11, 0xe3, 0x10, 0x01, 0xb8, 0x00, 0x24,
+  0xc0, 0x10, 0x00, 0x00, 0x00, 0x26, 0xd2, 0x07, 0x01, 0xb0, 0x10, 0x00,
+  0xc0, 0x10, 0x00, 0x00, 0x00, 0x18, 0xb2, 0xb2, 0x10, 0x00, 0xeb, 0x00,
+  0xf0, 0x00, 0x00, 0x25, 0x94, 0xfd, 0xf0, 0x06, 0xeb, 0x00, 0xf0, 0x00,
+  0x00, 0x2f, 0x07, 0xfe, 0x07, 0x07, 0x07, 0x07, 0x00, 0x02, 0x00, 0x01,
+  0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x03, 0x02, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x80, 0x00, 0x00, 0x00,
+  0xeb, 0xbf, 0xf0, 0x58, 0x00, 0x24, 0xc0, 0x10, 0x00, 0x00, 0x0e, 0x59,
+  0xa7, 0xfb, 0xff, 0x60, 0xb2, 0x20, 0x00, 0x21, 0xb2, 0x22, 0x00, 0xb0,
+  0x88, 0xb0, 0x00, 0x1c, 0xc0, 0xe5, 0xff, 0xff, 0xff, 0xba, 0xa7, 0xbe,
+  0x00, 0x03, 0xa7, 0x84, 0x00, 0x13, 0xa7, 0xbe, 0x00, 0x02, 0xa7, 0x28,
+  0x00, 0x00, 0xa7, 0x74, 0x00, 0x04, 0xa7, 0x28, 0xff, 0xfe, 0xe3, 0x40,
+  0xf1, 0x10, 0x00, 0x04, 0xb9, 0x14, 0x00, 0x22, 0xeb, 0xbf, 0xf0, 0xf8,
+  0x00, 0x04, 0x07, 0xf4, 0xa7, 0x28, 0xff, 0xff, 0xa7, 0xf4, 0xff, 0xf5,
+  0x07, 0x07, 0x07, 0x07, 0xeb, 0xbf, 0xf0, 0x58, 0x00, 0x24, 0xc0, 0xd0,
+  0x00, 0x00, 0x01, 0x21, 0xa7, 0xfb, 0xff, 0x60, 0xa7, 0xb9, 0x00, 0x00,
+  0xa7, 0x19, 0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0x0e, 0x24, 0xa7, 0x3b,
+  0x00, 0x01, 0xa7, 0x37, 0x00, 0x23, 0xc0, 0x20, 0x00, 0x00, 0x0e, 0x1d,
+  0x18, 0x31, 0xa7, 0x1a, 0x00, 0x06, 0x40, 0x10, 0x20, 0x08, 0xa7, 0x3a,
+  0x00, 0x0e, 0xa7, 0x18, 0x1a, 0x00, 0x40, 0x30, 0x20, 0x00, 0x92, 0x00,
+  0x20, 0x02, 0x40, 0x10, 0x20, 0x0a, 0xe3, 0x20, 0xd0, 0x00, 0x00, 0x04,
+  0xc0, 0xe5, 0xff, 0xff, 0xff, 0xac, 0xe3, 0x40, 0xf1, 0x10, 0x00, 0x04,
+  0xb9, 0x04, 0x00, 0x2b, 0xeb, 0xbf, 0xf0, 0xf8, 0x00, 0x04, 0x07, 0xf4,
+  0xb9, 0x04, 0x00, 0x51, 0xa7, 0x5b, 0x00, 0x01, 0xa7, 0x09, 0x0f, 0xf7,
+  0xb9, 0x21, 0x00, 0x50, 0xa7, 0x24, 0xff, 0xd7, 0x41, 0xeb, 0x20, 0x00,
+  0x95, 0x0a, 0xe0, 0x00, 0xa7, 0x74, 0x00, 0x08, 0x41, 0x11, 0x40, 0x0e,
+  0x92, 0x0d, 0x10, 0x00, 0xb9, 0x04, 0x00, 0x15, 0x43, 0x5b, 0x20, 0x00,
+  0x42, 0x51, 0x40, 0x0e, 0xa7, 0xbb, 0x00, 0x01, 0x41, 0x10, 0x10, 0x01,
+  0xa7, 0xf4, 0xff, 0xbf, 0xc0, 0x50, 0x00, 0x00, 0x00, 0xd4, 0xc0, 0x10,
+  0x00, 0x00, 0x0d, 0xd9, 0xa7, 0x48, 0x00, 0x1c, 0x40, 0x40, 0x10, 0x00,
+  0x50, 0x20, 0x10, 0x0c, 0xa7, 0x48, 0x00, 0x04, 0xe3, 0x20, 0x50, 0x00,
+  0x00, 0x04, 0x40, 0x40, 0x10, 0x0a, 0x50, 0x30, 0x10, 0x10, 0xc0, 0xf4,
+  0xff, 0xff, 0xff, 0x6b, 0xa7, 0x39, 0x00, 0x40, 0xa7, 0x29, 0x00, 0x00,
+  0xc0, 0xf4, 0xff, 0xff, 0xff, 0xe4, 0x07, 0x07, 0xb9, 0x04, 0x00, 0x13,
+  0xa7, 0x2a, 0xff, 0xff, 0xb9, 0x04, 0x00, 0x34, 0xa7, 0x48, 0x00, 0x01,
+  0x15, 0x24, 0xa7, 0x24, 0x00, 0x07, 0xb9, 0x04, 0x00, 0x21, 0xc0, 0xf4,
+  0xff, 0xff, 0xff, 0x7f, 0xa7, 0x29, 0xff, 0xff, 0x07, 0xfe, 0x07, 0x07,
+  0xa7, 0x39, 0x00, 0x00, 0x41, 0x13, 0x20, 0x00, 0x95, 0x00, 0x10, 0x00,
+  0xa7, 0x74, 0x00, 0x05, 0xc0, 0xf4, 0xff, 0xff, 0xff, 0x70, 0xa7, 0x3b,
+  0x00, 0x01, 0xa7, 0xf4, 0xff, 0xf5, 0x07, 0x07, 0xeb, 0xbf, 0xf0, 0x58,
+  0x00, 0x24, 0xc0, 0xd0, 0x00, 0x00, 0x00, 0x91, 0xa7, 0xfb, 0xff, 0x60,
+  0xb9, 0x04, 0x00, 0xb2, 0xa7, 0x19, 0x00, 0x20, 0xc0, 0x20, 0x00, 0x00,
+  0x0d, 0x8c, 0x92, 0x00, 0x20, 0x00, 0xa7, 0x2b, 0x00, 0x01, 0xa7, 0x17,
+  0xff, 0xfc, 0xc0, 0x10, 0x00, 0x00, 0x0d, 0x83, 0xa7, 0x28, 0x00, 0x20,
+  0x40, 0x20, 0x10, 0x00, 0xe3, 0x20, 0xd0, 0x00, 0x00, 0x04, 0xc0, 0xe5,
+  0xff, 0xff, 0xff, 0x1d, 0x12, 0x22, 0xa7, 0x74, 0x00, 0x17, 0xa7, 0x19,
+  0x00, 0x00, 0xc0, 0x40, 0x00, 0x00, 0x00, 0x75, 0xc0, 0x50, 0x00, 0x00,
+  0x0d, 0x7a, 0xa7, 0x29, 0x00, 0x08, 0xe3, 0x31, 0x50, 0x00, 0x00, 0x90,
+  0x43, 0x33, 0x40, 0x00, 0x42, 0x31, 0xb0, 0x00, 0xa7, 0x1b, 0x00, 0x01,
+  0xa7, 0x27, 0xff, 0xf7, 0xe3, 0x40, 0xf1, 0x10, 0x00, 0x04, 0xeb, 0xbf,
+  0xf0, 0xf8, 0x00, 0x04, 0x07, 0xf4, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+  0xeb, 0xaf, 0xf0, 0x50, 0x00, 0x24, 0xc0, 0xd0, 0x00, 0x00, 0x00, 0x51,
+  0xa7, 0xfb, 0xff, 0x60, 0xa7, 0x19, 0x0f, 0xf8, 0xb9, 0x21, 0x00, 0x31,
+  0xb9, 0x04, 0x00, 0xa2, 0xa7, 0xc4, 0x00, 0x2d, 0xa7, 0xb9, 0x0f, 0xf8,
+  0xc0, 0x10, 0x00, 0x00, 0x0d, 0x42, 0xa7, 0x28, 0x10, 0x00, 0x40, 0x20,
+  0x10, 0x00, 0x92, 0x00, 0x10, 0x02, 0xe3, 0x20, 0xd0, 0x00, 0x00, 0x04,
+  0xc0, 0xe5, 0xff, 0xff, 0xfe, 0xda, 0xa7, 0xbb, 0x00, 0x01, 0xa7, 0x19,
+  0x00, 0x00, 0xc0, 0x20, 0x00, 0x00, 0x0d, 0x2f, 0xa7, 0xb7, 0x00, 0x17,
+  0xc0, 0x10, 0x00, 0x00, 0x0d, 0x2a, 0xe3, 0x40, 0xf1, 0x10, 0x00, 0x04,
+  0xe3, 0x20, 0x10, 0x08, 0x00, 0x91, 0xa7, 0x2a, 0xff, 0xf9, 0xb9, 0x14,
+  0x00, 0x22, 0xeb, 0xaf, 0xf0, 0xf0, 0x00, 0x04, 0x07, 0xf4, 0xb9, 0x04,
+  0x00, 0xb3, 0xa7, 0xf4, 0xff, 0xd5, 0x43, 0x31, 0x20, 0x0f, 0x42, 0x31,
+  0xa0, 0x00, 0xa7, 0x1b, 0x00, 0x01, 0xa7, 0xf4, 0xff, 0xe3, 0x07, 0x07,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x78, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x05, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x20, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x3c, 0x28, 0x2b, 0x7c, 0x26, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x2e, 0x2d, 0x2f, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x60, 0x3a, 0x23,
+  0x40, 0x27, 0x3d, 0x22, 0x2e, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+  0x68, 0x69, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x6a, 0x6b, 0x6c,
+  0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x41, 0x42, 0x43,
+  0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x2e, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x2e, 0x2e,
+  0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+  0x59, 0x5a, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x30, 0x31, 0x32, 0x33,
+  0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e,
+  0x41, 0x00, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x6f, 0xff, 0xfe, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xd8,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xf8, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x6f, 0xff, 0xff, 0xfb, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x47, 0x43, 0x43, 0x3a, 0x20, 0x28, 0x47, 0x4e, 0x55, 0x29, 0x20, 0x38,
+  0x2e, 0x32, 0x2e, 0x31, 0x20, 0x32, 0x30, 0x31, 0x38, 0x30, 0x39, 0x30,
+  0x35, 0x20, 0x28, 0x52, 0x65, 0x64, 0x20, 0x48, 0x61, 0x74, 0x20, 0x38,
+  0x2e, 0x32, 0x2e, 0x31, 0x2d, 0x33, 0x29, 0x00, 0x00, 0x2e, 0x73, 0x68,
+  0x73, 0x74, 0x72, 0x74, 0x61, 0x62, 0x00, 0x2e, 0x69, 0x6e, 0x74, 0x65,
+  0x72, 0x70, 0x00, 0x2e, 0x67, 0x6e, 0x75, 0x2e, 0x68, 0x61, 0x73, 0x68,
+  0x00, 0x2e, 0x64, 0x79, 0x6e, 0x73, 0x79, 0x6d, 0x00, 0x2e, 0x64, 0x79,
+  0x6e, 0x73, 0x74, 0x72, 0x00, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x00, 0x2e,
+  0x72, 0x6f, 0x64, 0x61, 0x74, 0x61, 0x00, 0x2e, 0x64, 0x79, 0x6e, 0x61,
+  0x6d, 0x69, 0x63, 0x00, 0x2e, 0x67, 0x6f, 0x74, 0x00, 0x2e, 0x62, 0x73,
+  0x73, 0x00, 0x2e, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc8, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x01, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x13, 0x6f, 0xff, 0xff, 0xf6, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xd8,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xd8, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x0b,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x01, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xf8,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x04,
+  0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x25,
+  0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x02, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x30,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x30, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x03, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x05, 0xe8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0xe8,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x24, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b,
+  0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x07, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0,
+  0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+  0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0xe0,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xe0, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x08,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xf8,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x07, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x24, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00
+};
diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c
index ca5cc3b..54b3fd2 100644
--- a/tests/ptimer-test-stubs.c
+++ b/tests/ptimer-test-stubs.c
@@ -34,14 +34,19 @@
 int use_icount = 1;
 bool qtest_allowed;
 
-void timer_init_tl(QEMUTimer *ts,
-                   QEMUTimerList *timer_list, int scale,
-                   QEMUTimerCB *cb, void *opaque)
+void timer_init_full(QEMUTimer *ts,
+                     QEMUTimerListGroup *timer_list_group, QEMUClockType type,
+                     int scale, int attributes,
+                     QEMUTimerCB *cb, void *opaque)
 {
-    ts->timer_list = timer_list;
+    if (!timer_list_group) {
+        timer_list_group = &main_loop_tlg;
+    }
+    ts->timer_list = timer_list_group->tl[type];
     ts->cb = cb;
     ts->opaque = opaque;
     ts->scale = scale;
+    ts->attributes = attributes;
     ts->expire_time = -1;
 }
 
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 0871bff..6b50540 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -95,35 +95,31 @@
 qemu-img: Image size must be less than 8 EiB!
 
 qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2
-qemu-img: Value '-1024' is out of range for parameter 'size'
-qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
+qemu-img: TEST_DIR/t.qcow2: Value '-1024' is out of range for parameter 'size'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k
 qemu-img: Image size must be less than 8 EiB!
 
 qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2
-qemu-img: Value '-1k' is out of range for parameter 'size'
-qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
+qemu-img: TEST_DIR/t.qcow2: Value '-1k' is out of range for parameter 'size'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte
 qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for
 qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
 
 qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: TEST_DIR/t.qcow2: Parameter 'size' expects a non-negative number below 2^64
 Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
 and exabytes, respectively.
-qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar
 qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for
 qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
 
 qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: TEST_DIR/t.qcow2: Parameter 'size' expects a non-negative number below 2^64
 Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
 and exabytes, respectively.
-qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 == Check correct interpretation of suffixes for cluster size ==
 
diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
index f243db9..69850c4 100755
--- a/tests/qemu-iotests/169
+++ b/tests/qemu-iotests/169
@@ -24,6 +24,7 @@
 import itertools
 import operator
 import new
+import re
 from iotests import qemu_img
 
 
@@ -58,7 +59,6 @@
                   'granularity': granularity}
         if persistent:
             params['persistent'] = True
-            params['autoload'] = True
 
         result = vm.qmp('block-dirty-bitmap-add', **params)
         self.assert_qmp(result, 'return', {});
@@ -77,6 +77,58 @@
             self.assert_qmp(result, 'error/desc',
                             "Dirty bitmap 'bitmap0' not found");
 
+    def do_test_migration_resume_source(self, persistent, migrate_bitmaps):
+        granularity = 512
+
+        # regions = ((start, count), ...)
+        regions = ((0, 0x10000),
+                   (0xf0000, 0x10000),
+                   (0xa0201, 0x1000))
+
+        mig_caps = [{'capability': 'events', 'state': True}]
+        if migrate_bitmaps:
+            mig_caps.append({'capability': 'dirty-bitmaps', 'state': True})
+
+        result = self.vm_a.qmp('migrate-set-capabilities',
+                               capabilities=mig_caps)
+        self.assert_qmp(result, 'return', {})
+
+        self.add_bitmap(self.vm_a, granularity, persistent)
+        for r in regions:
+            self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
+        sha256 = self.get_bitmap_hash(self.vm_a)
+
+        result = self.vm_a.qmp('migrate', uri=mig_cmd)
+        while True:
+            event = self.vm_a.event_wait('MIGRATION')
+            if event['data']['status'] == 'completed':
+                break
+
+        # test that bitmap is still here
+        removed = (not migrate_bitmaps) and persistent
+        self.check_bitmap(self.vm_a, False if removed else sha256)
+
+        self.vm_a.qmp('cont')
+
+        # test that bitmap is still here after invalidation
+        self.check_bitmap(self.vm_a, sha256)
+
+        # shutdown and check that invalidation didn't fail
+        self.vm_a.shutdown()
+
+        # catch 'Could not reopen qcow2 layer: Bitmap already exists'
+        # possible error
+        log = self.vm_a.get_log()
+        log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log)
+        log = re.sub(r'^(wrote .* bytes at offset .*\n.*KiB.*ops.*sec.*\n){3}',
+                     '', log)
+        log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log)
+        self.assertEqual(log, '')
+
+        # test that bitmap is still persistent
+        self.vm_a.launch()
+        self.check_bitmap(self.vm_a, sha256 if persistent else False)
+
     def do_test_migration(self, persistent, migrate_bitmaps, online,
                           shared_storage):
         granularity = 512
@@ -134,6 +186,14 @@
 
         if should_migrate:
             self.vm_b.shutdown()
+
+            # catch 'Could not reopen qcow2 layer: Bitmap already exists'
+            # possible error
+            log = self.vm_b.get_log()
+            log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log)
+            log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log)
+            self.assertEqual(log, '')
+
             # recreate vm_b, as we don't want -incoming option (this will lead
             # to "cat" process left alive after test finish)
             self.vm_b = iotests.VM(path_suffix='b')
@@ -144,7 +204,7 @@
 
 def inject_test_case(klass, name, method, *args, **kwargs):
     mc = operator.methodcaller(method, *args, **kwargs)
-    setattr(klass, 'test_' + name, new.instancemethod(mc, None, klass))
+    setattr(klass, 'test_' + method + name, new.instancemethod(mc, None, klass))
 
 for cmb in list(itertools.product((True, False), repeat=4)):
     name = ('_' if cmb[0] else '_not_') + 'persistent_'
@@ -155,6 +215,12 @@
     inject_test_case(TestDirtyBitmapMigration, name, 'do_test_migration',
                      *list(cmb))
 
+for cmb in list(itertools.product((True, False), repeat=2)):
+    name = ('_' if cmb[0] else '_not_') + 'persistent_'
+    name += ('_' if cmb[1] else '_not_') + 'migbitmap'
+
+    inject_test_case(TestDirtyBitmapMigration, name,
+                     'do_test_migration_resume_source', *list(cmb))
 
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
index b6f2576..3a89159 100644
--- a/tests/qemu-iotests/169.out
+++ b/tests/qemu-iotests/169.out
@@ -1,5 +1,5 @@
-................
+....................
 ----------------------------------------------------------------------
-Ran 16 tests
+Ran 20 tests
 
 OK
diff --git a/tests/qemu-iotests/218 b/tests/qemu-iotests/218
old mode 100644
new mode 100755
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index f285484..dadde2a 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -257,7 +257,7 @@
 }
 
 
-# Silenty kills the QEMU process
+# Silently kills the QEMU process
 #
 # If $wait is set to anything other than the empty string, the process will not
 # be killed but only waited for, and any output will be forwarded to stdout. If
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 44bee16..70ca65b 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -170,7 +170,7 @@
 fi
 
 if [ ! -d "$TEST_DIR" ]; then
-    echo "common.config: Error: \$TEST_DIR ($TEST_DIR) is not a directory"
+    echo "common.rc: Error: \$TEST_DIR ($TEST_DIR) is not a directory"
     exit 1
 fi
 
@@ -179,7 +179,7 @@
 fi
 
 if [ ! -d "$SAMPLE_IMG_DIR" ]; then
-    echo "common.config: Error: \$SAMPLE_IMG_DIR ($SAMPLE_IMG_DIR) is not a directory"
+    echo "common.rc: Error: \$SAMPLE_IMG_DIR ($SAMPLE_IMG_DIR) is not a directory"
     exit 1
 fi
 
diff --git a/tests/tcg/Makefile.include b/tests/tcg/Makefile.include
index 57470b2..c581bd6 100644
--- a/tests/tcg/Makefile.include
+++ b/tests/tcg/Makefile.include
@@ -2,7 +2,7 @@
 #
 # TCG tests (per-target rules)
 #
-# This Makefile fragement is included from the per-target
+# This Makefile fragment is included from the per-target
 # Makefile.target so will be invoked for each linux-user program we
 # build. We have two options for compiling, either using a configured
 # guest compiler or calling one of our docker images to do it for us.
diff --git a/tests/tcg/Makefile.probe b/tests/tcg/Makefile.probe
index 15c0412..9dc6546 100644
--- a/tests/tcg/Makefile.probe
+++ b/tests/tcg/Makefile.probe
@@ -2,7 +2,7 @@
 #
 # TCG Compiler Probe
 #
-# This Makefile fragement is included multiple times in the main make
+# This Makefile fragment is included multiple times in the main make
 # script to probe for available compilers. This is used to build up a
 # selection of required docker targets before we invoke a sub-make for
 # each target.
diff --git a/tests/tcg/README b/tests/tcg/README
index a5643d3..2a58f9a 100644
--- a/tests/tcg/README
+++ b/tests/tcg/README
@@ -10,6 +10,6 @@
 
 LM32
 ====
-The testsuite for LM32 is in tests/tcg/cris.  You can run it
+The testsuite for LM32 is in tests/tcg/lm32.  You can run it
 with "make test-lm32".
 
diff --git a/tests/tcg/mips/mips64-dsp/subq_s_pw.c b/tests/tcg/mips/mips64-dsp/subq_s_pw.c
index e8e0b05..4c080b7 100644
--- a/tests/tcg/mips/mips64-dsp/subq_s_pw.c
+++ b/tests/tcg/mips/mips64-dsp/subq_s_pw.c
@@ -24,7 +24,7 @@
     rt = 0x123456789ABCDEF1;
     rs = 0x123456789ABCDEF2;
     result =  0x0000000000000001;
-    /* This time we do not set dspctrl, but it setted in pre-action. */
+    /* This time we do not set dspctrl, but set it in pre-action. */
     dspresult = 0x1;
 
     __asm
diff --git a/tests/tcg/mips/mipsr5900/Makefile b/tests/tcg/mips/mipsr5900/Makefile
new file mode 100644
index 0000000..a1c388b
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/Makefile
@@ -0,0 +1,30 @@
+-include ../../config-host.mak
+
+CROSS=mipsr5900el-unknown-linux-gnu-
+
+SIM=qemu-mipsel
+SIM_FLAGS=-cpu R5900
+
+CC      = $(CROSS)gcc
+CFLAGS  = -Wall -mabi=32 -march=r5900 -static
+
+TESTCASES = div1.tst
+TESTCASES += divu1.tst
+TESTCASES += mflohi1.tst
+TESTCASES += mtlohi1.tst
+TESTCASES += mult.tst
+TESTCASES += multu.tst
+
+all: $(TESTCASES)
+
+%.tst: %.c
+	$(CC) $(CFLAGS) $< -o $@
+
+check: $(TESTCASES)
+	@for case in $(TESTCASES); do \
+        echo $(SIM) $(SIM_FLAGS) ./$$case;\
+        $(SIM) $(SIM_FLAGS) ./$$case; \
+	done
+
+clean:
+	$(RM) -rf $(TESTCASES)
diff --git a/tests/tcg/mips/mipsr5900/div1.c b/tests/tcg/mips/mipsr5900/div1.c
new file mode 100644
index 0000000..83dafa0
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/div1.c
@@ -0,0 +1,73 @@
+/*
+ * Test R5900-specific DIV1.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <assert.h>
+
+struct quotient_remainder { int32_t quotient, remainder; };
+
+static struct quotient_remainder div1(int32_t rs, int32_t rt)
+{
+    int32_t lo, hi;
+
+    __asm__ __volatile__ (
+            "    div1 $0, %2, %3\n"
+            "    mflo1 %0\n"
+            "    mfhi1 %1\n"
+            : "=r" (lo), "=r" (hi)
+            : "r" (rs), "r" (rt));
+
+    assert(rs / rt == lo);
+    assert(rs % rt == hi);
+
+    return (struct quotient_remainder) { .quotient = lo, .remainder = hi };
+}
+
+static void verify_div1(int32_t rs, int32_t rt,
+                        int32_t expected_quotient,
+                        int32_t expected_remainder)
+{
+    struct quotient_remainder qr = div1(rs, rt);
+
+    assert(qr.quotient == expected_quotient);
+    assert(qr.remainder == expected_remainder);
+}
+
+static void verify_div1_negations(int32_t rs, int32_t rt,
+                                  int32_t expected_quotient,
+                                  int32_t expected_remainder)
+{
+    verify_div1(rs, rt, expected_quotient, expected_remainder);
+    verify_div1(rs, -rt, -expected_quotient, expected_remainder);
+    verify_div1(-rs, rt, -expected_quotient, -expected_remainder);
+    verify_div1(-rs, -rt, expected_quotient, -expected_remainder);
+}
+
+int main()
+{
+    verify_div1_negations(0, 1, 0, 0);
+    verify_div1_negations(1, 1, 1, 0);
+    verify_div1_negations(1, 2, 0, 1);
+    verify_div1_negations(17, 19, 0, 17);
+    verify_div1_negations(19, 17, 1, 2);
+    verify_div1_negations(77773, 101, 770, 3);
+
+    verify_div1(-0x80000000,  1, -0x80000000, 0);
+
+    /*
+     * Supplementary explanation from the Toshiba TX System RISC TX79 Core
+     * Architecture manual, A-38 and B-7, https://wiki.qemu.org/File:C790.pdf
+     *
+     * Normally, when 0x80000000 (-2147483648) the signed minimum value is
+     * divided by 0xFFFFFFFF (-1), the operation will result in an overflow.
+     * However, in this instruction an overflow exception doesn't occur and
+     * the result will be as follows:
+     *
+     * Quotient is 0x80000000 (-2147483648), and remainder is 0x00000000 (0).
+     */
+    verify_div1(-0x80000000, -1, -0x80000000, 0);
+
+    return 0;
+}
diff --git a/tests/tcg/mips/mipsr5900/divu1.c b/tests/tcg/mips/mipsr5900/divu1.c
new file mode 100644
index 0000000..72aeed3
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/divu1.c
@@ -0,0 +1,48 @@
+/*
+ * Test R5900-specific DIVU1.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <assert.h>
+
+struct quotient_remainder { uint32_t quotient, remainder; };
+
+static struct quotient_remainder divu1(uint32_t rs, uint32_t rt)
+{
+    uint32_t lo, hi;
+
+    __asm__ __volatile__ (
+            "    divu1 $0, %2, %3\n"
+            "    mflo1 %0\n"
+            "    mfhi1 %1\n"
+            : "=r" (lo), "=r" (hi)
+            : "r" (rs), "r" (rt));
+
+    assert(rs / rt == lo);
+    assert(rs % rt == hi);
+
+    return (struct quotient_remainder) { .quotient = lo, .remainder = hi };
+}
+
+static void verify_divu1(uint32_t rs, uint32_t rt,
+                         uint32_t expected_quotient,
+                         uint32_t expected_remainder)
+{
+    struct quotient_remainder qr = divu1(rs, rt);
+
+    assert(qr.quotient == expected_quotient);
+    assert(qr.remainder == expected_remainder);
+}
+
+int main()
+{
+    verify_divu1(0, 1, 0, 0);
+    verify_divu1(1, 1, 1, 0);
+    verify_divu1(1, 2, 0, 1);
+    verify_divu1(17, 19, 0, 17);
+    verify_divu1(19, 17, 1, 2);
+    verify_divu1(77773, 101, 770, 3);
+
+    return 0;
+}
diff --git a/tests/tcg/mips/mipsr5900/mflohi1.c b/tests/tcg/mips/mipsr5900/mflohi1.c
new file mode 100644
index 0000000..eed3683
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/mflohi1.c
@@ -0,0 +1,35 @@
+/*
+ * Test R5900-specific MFLO1 and MFHI1.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <assert.h>
+
+int main()
+{
+    int32_t rs  = 12207031, rt  = 305175781;
+    int32_t rs1 = 32452867, rt1 = 49979687;
+    int64_t lo, hi, lo1, hi1;
+    int64_t r, r1;
+
+    /* Test both LO/HI and LO1/HI1 to verify separation. */
+    __asm__ __volatile__ (
+            "    mult $0, %4, %5\n"
+            "    mult1 $0, %6, %7\n"
+            "    mflo %0\n"
+            "    mfhi %1\n"
+            "    mflo1 %2\n"
+            "    mfhi1 %3\n"
+            : "=r" (lo),  "=r" (hi),
+              "=r" (lo1), "=r" (hi1)
+            : "r" (rs),  "r" (rt),
+              "r" (rs1), "r" (rt1));
+    r  = ((int64_t)hi  << 32) | (uint32_t)lo;
+    r1 = ((int64_t)hi1 << 32) | (uint32_t)lo1;
+
+    assert(r  == 3725290219116211);
+    assert(r1 == 1621984134912629);
+
+    return 0;
+}
diff --git a/tests/tcg/mips/mipsr5900/mtlohi1.c b/tests/tcg/mips/mipsr5900/mtlohi1.c
new file mode 100644
index 0000000..7f3e728
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/mtlohi1.c
@@ -0,0 +1,40 @@
+/*
+ * Test R5900-specific MTLO1 and MTHI1.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <assert.h>
+
+int main()
+{
+    int32_t tlo  = 12207031, thi  = 305175781;
+    int32_t tlo1 = 32452867, thi1 = 49979687;
+    int32_t flo, fhi, flo1, fhi1;
+
+    /* Test both LO/HI and LO1/HI1 to verify separation. */
+    __asm__ __volatile__ (
+            "    mtlo  %4\n"
+            "    mthi  %5\n"
+            "    mtlo1 %6\n"
+            "    mthi1 %7\n"
+            "    move  %0, $0\n"
+            "    move  %1, $0\n"
+            "    move  %2, $0\n"
+            "    move  %3, $0\n"
+            "    mflo  %0\n"
+            "    mfhi  %1\n"
+            "    mflo1 %2\n"
+            "    mfhi1 %3\n"
+            : "=r" (flo),  "=r" (fhi),
+              "=r" (flo1), "=r" (fhi1)
+            : "r" (tlo),  "r" (thi),
+              "r" (tlo1), "r" (thi1));
+
+    assert(flo  == 12207031);
+    assert(fhi  == 305175781);
+    assert(flo1 == 32452867);
+    assert(fhi1 == 49979687);
+
+    return 0;
+}
diff --git a/tests/tcg/mips/mipsr5900/mult.c b/tests/tcg/mips/mipsr5900/mult.c
new file mode 100644
index 0000000..5710b39
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/mult.c
@@ -0,0 +1,76 @@
+/*
+ * Test R5900-specific three-operand MULT and MULT1.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <assert.h>
+
+static int64_t mult(int32_t rs, int32_t rt)
+{
+    int32_t rd, lo, hi;
+    int64_t r;
+
+    __asm__ __volatile__ (
+            "    mult %0, %3, %4\n"
+            "    mflo %1\n"
+            "    mfhi %2\n"
+            : "=r" (rd), "=r" (lo), "=r" (hi)
+            : "r" (rs), "r" (rt));
+    r = ((int64_t)hi << 32) | (uint32_t)lo;
+
+    assert((int64_t)rs * rt == r);
+    assert(rd == lo);
+
+    return r;
+}
+
+static int64_t mult1(int32_t rs, int32_t rt)
+{
+    int32_t rd, lo, hi;
+    int64_t r;
+
+    __asm__ __volatile__ (
+            "    mult1 %0, %3, %4\n"
+            "    mflo1 %1\n"
+            "    mfhi1 %2\n"
+            : "=r" (rd), "=r" (lo), "=r" (hi)
+            : "r" (rs), "r" (rt));
+    r = ((int64_t)hi << 32) | (uint32_t)lo;
+
+    assert((int64_t)rs * rt == r);
+    assert(rd == lo);
+
+    return r;
+}
+
+static int64_t mult_variants(int32_t rs, int32_t rt)
+{
+    int64_t rd  = mult(rs, rt);
+    int64_t rd1 = mult1(rs, rt);
+
+    assert(rd == rd1);
+
+    return rd;
+}
+
+static void verify_mult_negations(int32_t rs, int32_t rt, int64_t expected)
+{
+    assert(mult_variants(rs, rt) == expected);
+    assert(mult_variants(-rs, rt) == -expected);
+    assert(mult_variants(rs, -rt) == -expected);
+    assert(mult_variants(-rs, -rt) == expected);
+}
+
+int main()
+{
+    verify_mult_negations(17, 19, 323);
+    verify_mult_negations(77773, 99991, 7776600043);
+    verify_mult_negations(12207031, 305175781, 3725290219116211);
+
+    assert(mult_variants(-0x80000000,  0x7FFFFFFF) == -0x3FFFFFFF80000000);
+    assert(mult_variants(-0x80000000, -0x7FFFFFFF) ==  0x3FFFFFFF80000000);
+    assert(mult_variants(-0x80000000, -0x80000000) ==  0x4000000000000000);
+
+    return 0;
+}
diff --git a/tests/tcg/mips/mipsr5900/multu.c b/tests/tcg/mips/mipsr5900/multu.c
new file mode 100644
index 0000000..f043904
--- /dev/null
+++ b/tests/tcg/mips/mipsr5900/multu.c
@@ -0,0 +1,68 @@
+/*
+ * Test R5900-specific three-operand MULTU and MULTU1.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <assert.h>
+
+static uint64_t multu(uint32_t rs, uint32_t rt)
+{
+    uint32_t rd, lo, hi;
+    uint64_t r;
+
+    __asm__ __volatile__ (
+            "    multu %0, %3, %4\n"
+            "    mflo %1\n"
+            "    mfhi %2\n"
+            : "=r" (rd), "=r" (lo), "=r" (hi)
+            : "r" (rs), "r" (rt));
+    r = ((uint64_t)hi << 32) | (uint32_t)lo;
+
+    assert((uint64_t)rs * rt == r);
+    assert(rd == lo);
+
+    return r;
+}
+
+static uint64_t multu1(uint32_t rs, uint32_t rt)
+{
+    uint32_t rd, lo, hi;
+    uint64_t r;
+
+    __asm__ __volatile__ (
+            "    multu1 %0, %3, %4\n"
+            "    mflo1 %1\n"
+            "    mfhi1 %2\n"
+            : "=r" (rd), "=r" (lo), "=r" (hi)
+            : "r" (rs), "r" (rt));
+    r = ((uint64_t)hi << 32) | (uint32_t)lo;
+
+    assert((uint64_t)rs * rt == r);
+    assert(rd == lo);
+
+    return r;
+}
+
+static uint64_t multu_variants(uint32_t rs, uint32_t rt)
+{
+    uint64_t rd  = multu(rs, rt);
+    uint64_t rd1 = multu1(rs, rt);
+
+    assert(rd == rd1);
+
+    return rd;
+}
+
+int main()
+{
+    assert(multu_variants(17, 19) == 323);
+    assert(multu_variants(77773, 99991) == 7776600043);
+    assert(multu_variants(12207031, 305175781) == 3725290219116211);
+
+    assert(multu_variants(0x80000000U, 0x7FFFFFFF) == 0x3FFFFFFF80000000);
+    assert(multu_variants(0x80000000U, 0x80000000U) ==  0x4000000000000000);
+    assert(multu_variants(0xFFFFFFFFU, 0xFFFFFFFFU) ==  0xFFFFFFFE00000001U);
+
+    return 0;
+}
diff --git a/tests/test-char.c b/tests/test-char.c
index 2a2ff32..831e37f 100644
--- a/tests/test-char.c
+++ b/tests/test-char.c
@@ -307,7 +307,7 @@
     return 10;
 }
 
-static void char_socket_test_common(Chardev *chr)
+static void char_socket_test_common(Chardev *chr, bool reconnect)
 {
     Chardev *chr_client;
     QObject *addr;
@@ -327,7 +327,8 @@
     addr = object_property_get_qobject(OBJECT(chr), "addr", &error_abort);
     qdict = qobject_to(QDict, addr);
     port = qdict_get_str(qdict, "port");
-    tmp = g_strdup_printf("tcp:127.0.0.1:%s", port);
+    tmp = g_strdup_printf("tcp:127.0.0.1:%s%s", port,
+                          reconnect ? ",reconnect=1" : "");
     qobject_unref(qdict);
 
     qemu_chr_fe_init(&be, chr, &error_abort);
@@ -347,6 +348,12 @@
     g_assert_cmpint(id, >, 0);
     main_loop();
 
+    d.chr = chr_client;
+    id = g_idle_add(char_socket_test_idle, &d);
+    g_source_set_name_by_id(id, "test-idle");
+    g_assert_cmpint(id, >, 0);
+    main_loop();
+
     g_assert(object_property_get_bool(OBJECT(chr), "connected", &error_abort));
     g_assert(object_property_get_bool(OBJECT(chr_client),
                                       "connected", &error_abort));
@@ -356,6 +363,7 @@
 
     object_unparent(OBJECT(chr_client));
 
+    d.chr = chr;
     d.conn_expected = false;
     g_idle_add(char_socket_test_idle, &d);
     main_loop();
@@ -368,7 +376,15 @@
 {
     Chardev *chr = qemu_chr_new("server", "tcp:127.0.0.1:0,server,nowait");
 
-    char_socket_test_common(chr);
+    char_socket_test_common(chr, false);
+}
+
+
+static void char_socket_reconnect_test(void)
+{
+    Chardev *chr = qemu_chr_new("server", "tcp:127.0.0.1:0,server,nowait");
+
+    char_socket_test_common(chr, true);
 }
 
 
@@ -400,7 +416,7 @@
 
     qemu_opts_del(opts);
 
-    char_socket_test_common(chr);
+    char_socket_test_common(chr, false);
 }
 
 
@@ -819,6 +835,7 @@
     g_test_add_func("/char/file-fifo", char_file_fifo_test);
 #endif
     g_test_add_func("/char/socket/basic", char_socket_basic_test);
+    g_test_add_func("/char/socket/reconnect", char_socket_reconnect_test);
     g_test_add_func("/char/socket/fdpass", char_socket_fdpass_test);
     g_test_add_func("/char/udp", char_udp_test);
 #ifdef HAVE_CHARDEV_SERIAL
diff --git a/tests/test-crypto-block.c b/tests/test-crypto-block.c
index fd29a04..fae4ffc 100644
--- a/tests/test-crypto-block.c
+++ b/tests/test-crypto-block.c
@@ -29,7 +29,7 @@
 #endif
 
 #if (defined(_WIN32) || defined RUSAGE_THREAD) && \
-    (defined(CONFIG_NETTLE_KDF) || defined(CONFIG_GCRYPT_KDF))
+    (defined(CONFIG_NETTLE) || defined(CONFIG_GCRYPT))
 #define TEST_LUKS
 #else
 #undef TEST_LUKS
diff --git a/tests/test-crypto-tlscredsx509.c b/tests/test-crypto-tlscredsx509.c
index 30f9ac4..940a026 100644
--- a/tests/test-crypto-tlscredsx509.c
+++ b/tests/test-crypto-tlscredsx509.c
@@ -283,14 +283,8 @@
                  true, true, GNUTLS_KP_TLS_WWW_SERVER, NULL,
                  0, 0);
 
-    /* Technically a CA cert with basic constraints
-     * key purpose == key signing + non-critical should
-     * be rejected. GNUTLS < 3.1 does not reject it and
-     * we don't anticipate them changing this behaviour
-     */
     TLS_TEST_REG(badca1, true, cacert4req.filename, servercert4req.filename,
-                (GNUTLS_VERSION_MAJOR == 3 && GNUTLS_VERSION_MINOR >= 1) ||
-                GNUTLS_VERSION_MAJOR > 3);
+                 true);
     TLS_TEST_REG(badca2, true,
                  cacert5req.filename, servercert5req.filename, true);
     TLS_TEST_REG(badca3, true,
diff --git a/tests/test-crypto-xts.c b/tests/test-crypto-xts.c
index 1f1412c..6fb61cf 100644
--- a/tests/test-crypto-xts.c
+++ b/tests/test-crypto-xts.c
@@ -1,7 +1,7 @@
 /*
  * QEMU Crypto XTS cipher mode
  *
- * Copyright (c) 2015-2016 Red Hat, Inc.
+ * Copyright (c) 2015-2018 Red Hat, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -340,70 +340,161 @@
 static void test_xts(const void *opaque)
 {
     const QCryptoXTSTestData *data = opaque;
-    unsigned char out[512], Torg[16], T[16];
+    uint8_t out[512], Torg[16], T[16];
     uint64_t seq;
-    int j;
-    unsigned long len;
     struct TestAES aesdata;
     struct TestAES aestweak;
 
-    for (j = 0; j < 2; j++) {
-        /* skip the cases where
-         * the length is smaller than 2*blocklen
-         * or the length is not a multiple of 32
-         */
-        if ((j == 1) && ((data->PTLEN < 32) || (data->PTLEN % 32))) {
-            continue;
-        }
-        len = data->PTLEN / 2;
+    AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
+    AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
+    AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
+    AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
 
-        AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
-        AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
-        AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
-        AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
+    seq = data->seqnum;
+    STORE64L(seq, Torg);
+    memset(Torg + 8, 0, 8);
 
-        seq = data->seqnum;
-        STORE64L(seq, Torg);
-        memset(Torg + 8, 0, 8);
+    memcpy(T, Torg, sizeof(T));
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, data->PTX);
 
-        memcpy(T, Torg, sizeof(T));
-        if (j == 0) {
-            xts_encrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, data->PTLEN, out, data->PTX);
-        } else {
-            xts_encrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, out, data->PTX);
-            xts_encrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, &out[len], &data->PTX[len]);
-        }
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
 
-        g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+    memcpy(T, Torg, sizeof(T));
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, data->CTX);
 
-        memcpy(T, Torg, sizeof(T));
-        if (j == 0) {
-            xts_decrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, data->PTLEN, out, data->CTX);
-        } else {
-            xts_decrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, out, data->CTX);
-            xts_decrypt(&aesdata, &aestweak,
-                        test_xts_aes_encrypt,
-                        test_xts_aes_decrypt,
-                        T, len, &out[len], &data->CTX[len]);
-        }
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+}
 
-        g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
-    }
+
+static void test_xts_split(const void *opaque)
+{
+    const QCryptoXTSTestData *data = opaque;
+    uint8_t out[512], Torg[16], T[16];
+    uint64_t seq;
+    unsigned long len = data->PTLEN / 2;
+    struct TestAES aesdata;
+    struct TestAES aestweak;
+
+    AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
+    AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
+    AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
+    AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
+
+    seq = data->seqnum;
+    STORE64L(seq, Torg);
+    memset(Torg + 8, 0, 8);
+
+    memcpy(T, Torg, sizeof(T));
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, out, data->PTX);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, &out[len], &data->PTX[len]);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    memcpy(T, Torg, sizeof(T));
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, out, data->CTX);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, len, &out[len], &data->CTX[len]);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+}
+
+
+static void test_xts_unaligned(const void *opaque)
+{
+#define BAD_ALIGN 3
+    const QCryptoXTSTestData *data = opaque;
+    uint8_t in[512 + BAD_ALIGN], out[512 + BAD_ALIGN];
+    uint8_t Torg[16], T[16 + BAD_ALIGN];
+    uint64_t seq;
+    struct TestAES aesdata;
+    struct TestAES aestweak;
+
+    AES_set_encrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.enc);
+    AES_set_decrypt_key(data->key1, data->keylen / 2 * 8, &aesdata.dec);
+    AES_set_encrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.enc);
+    AES_set_decrypt_key(data->key2, data->keylen / 2 * 8, &aestweak.dec);
+
+    seq = data->seqnum;
+    STORE64L(seq, Torg);
+    memset(Torg + 8, 0, 8);
+
+    /* IV not aligned */
+    memcpy(T + BAD_ALIGN, Torg, 16);
+    memcpy(in, data->PTX, data->PTLEN);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T + BAD_ALIGN, data->PTLEN, out, in);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    /* plain text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in + BAD_ALIGN, data->PTX, data->PTLEN);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, in + BAD_ALIGN);
+
+    g_assert(memcmp(out, data->CTX, data->PTLEN) == 0);
+
+    /* cipher text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in, data->PTX, data->PTLEN);
+    xts_encrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out + BAD_ALIGN, in);
+
+    g_assert(memcmp(out + BAD_ALIGN, data->CTX, data->PTLEN) == 0);
+
+
+    /* IV not aligned */
+    memcpy(T + BAD_ALIGN, Torg, 16);
+    memcpy(in, data->CTX, data->PTLEN);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T + BAD_ALIGN, data->PTLEN, out, in);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+
+    /* cipher text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in + BAD_ALIGN, data->CTX, data->PTLEN);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out, in + BAD_ALIGN);
+
+    g_assert(memcmp(out, data->PTX, data->PTLEN) == 0);
+
+    /* plain text not aligned */
+    memcpy(T, Torg, 16);
+    memcpy(in, data->CTX, data->PTLEN);
+    xts_decrypt(&aesdata, &aestweak,
+                test_xts_aes_encrypt,
+                test_xts_aes_decrypt,
+                T, data->PTLEN, out + BAD_ALIGN, in);
+
+    g_assert(memcmp(out + BAD_ALIGN, data->PTX, data->PTLEN) == 0);
 }
 
 
@@ -416,7 +507,22 @@
     g_assert(qcrypto_init(NULL) == 0);
 
     for (i = 0; i < G_N_ELEMENTS(test_data); i++) {
-        g_test_add_data_func(test_data[i].path, &test_data[i], test_xts);
+        gchar *path = g_strdup_printf("%s/basic", test_data[i].path);
+        g_test_add_data_func(path, &test_data[i], test_xts);
+        g_free(path);
+
+        /* skip the cases where the length is smaller than 2*blocklen
+         * or the length is not a multiple of 32
+         */
+        if ((test_data[i].PTLEN >= 32) && !(test_data[i].PTLEN % 32)) {
+            path = g_strdup_printf("%s/split", test_data[i].path);
+            g_test_add_data_func(path, &test_data[i], test_xts_split);
+            g_free(path);
+        }
+
+        path = g_strdup_printf("%s/unaligned", test_data[i].path);
+        g_test_add_data_func(path, &test_data[i], test_xts_unaligned);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c
index 192bfbf..2e6f70b 100644
--- a/tests/test-rcu-list.c
+++ b/tests/test-rcu-list.c
@@ -33,8 +33,8 @@
 static QemuMutex counts_mutex;
 static long long n_reads = 0LL;
 static long long n_updates = 0LL;
-static long long n_reclaims = 0LL;
-static long long n_nodes_removed = 0LL;
+static int64_t n_reclaims;
+static int64_t n_nodes_removed;
 static long long n_nodes = 0LL;
 static int g_test_in_charge = 0;
 
@@ -104,7 +104,7 @@
     struct list_element *el = container_of(prcu, struct list_element, rcu);
     g_free(el);
     /* Accessed only from call_rcu thread.  */
-    n_reclaims++;
+    atomic_set_i64(&n_reclaims, n_reclaims + 1);
 }
 
 #if TEST_LIST_TYPE == 1
@@ -232,7 +232,7 @@
     qemu_mutex_lock(&counts_mutex);
     n_nodes += n_nodes_local;
     n_updates += n_updates_local;
-    n_nodes_removed += n_removed_local;
+    atomic_set_i64(&n_nodes_removed, n_nodes_removed + n_removed_local);
     qemu_mutex_unlock(&counts_mutex);
     return NULL;
 }
@@ -286,19 +286,21 @@
         n_removed_local++;
     }
     qemu_mutex_lock(&counts_mutex);
-    n_nodes_removed += n_removed_local;
+    atomic_set_i64(&n_nodes_removed, n_nodes_removed + n_removed_local);
     qemu_mutex_unlock(&counts_mutex);
     synchronize_rcu();
-    while (n_nodes_removed > n_reclaims) {
+    while (atomic_read_i64(&n_nodes_removed) > atomic_read_i64(&n_reclaims)) {
         g_usleep(100);
         synchronize_rcu();
     }
     if (g_test_in_charge) {
-        g_assert_cmpint(n_nodes_removed, ==, n_reclaims);
+        g_assert_cmpint(atomic_read_i64(&n_nodes_removed), ==,
+                        atomic_read_i64(&n_reclaims));
     } else {
         printf("%s: %d readers; 1 updater; nodes read: "  \
-               "%lld, nodes removed: %lld; nodes reclaimed: %lld\n",
-               test, nthreadsrunning - 1, n_reads, n_nodes_removed, n_reclaims);
+               "%lld, nodes removed: %"PRIi64"; nodes reclaimed: %"PRIi64"\n",
+               test, nthreadsrunning - 1, n_reads,
+               atomic_read_i64(&n_nodes_removed), atomic_read_i64(&n_reclaims));
         exit(0);
     }
 }
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 716aff7..45d58d8 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -169,7 +169,7 @@
                           int mem, enum test_memfd memfd, const char *mem_path,
                           const char *chr_opts, const char *extra)
 {
-    if (memfd == TEST_MEMFD_AUTO && qemu_memfd_check()) {
+    if (memfd == TEST_MEMFD_AUTO && qemu_memfd_check(0)) {
         memfd = TEST_MEMFD_YES;
     }
 
@@ -903,7 +903,7 @@
     s->queues = 2;
     test_server_listen(s);
 
-    if (qemu_memfd_check()) {
+    if (qemu_memfd_check(0)) {
         cmd = g_strdup_printf(
             QEMU_CMD_MEMFD QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d "
             "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d",
@@ -963,7 +963,7 @@
     /* run the main loop thread so the chardev may operate */
     thread = g_thread_new(NULL, thread_function, loop);
 
-    if (qemu_memfd_check()) {
+    if (qemu_memfd_check(0)) {
         qtest_add_data_func("/vhost-user/read-guest-mem/memfd",
                             GINT_TO_POINTER(TEST_MEMFD_YES),
                             test_read_guest_mem);
diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index cafbc6b..5caf77d 100755
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -18,7 +18,7 @@
 import time
 import datetime
 sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts"))
-from qemu import QEMUMachine
+from qemu import QEMUMachine, kvm_available
 import subprocess
 import hashlib
 import optparse
@@ -42,6 +42,8 @@
     BUILD_SCRIPT = ""
     # The guest name, to be overridden by subclasses
     name = "#base"
+    # The guest architecture, to be overridden by subclasses
+    arch = "#arch"
     def __init__(self, debug=False, vcpus=None):
         self._guest = None
         self._tmpdir = os.path.realpath(tempfile.mkdtemp(prefix="vm-test-",
@@ -70,9 +72,9 @@
             "-device", "virtio-net-pci,netdev=vnet",
             "-vnc", "127.0.0.1:0,to=20",
             "-serial", "file:%s" % os.path.join(self._tmpdir, "serial.out")]
-        if vcpus:
+        if vcpus and vcpus > 1:
             self._args += ["-smp", str(vcpus)]
-        if os.access("/dev/kvm", os.R_OK | os.W_OK):
+        if kvm_available(self.arch):
             self._args += ["-enable-kvm"]
         else:
             logging.info("KVM not available, not using -enable-kvm")
@@ -151,7 +153,7 @@
             "-device", "virtio-blk,drive=drive0,bootindex=0"]
         args += self._data_args + extra_args
         logging.debug("QEMU args: %s", " ".join(args))
-        qemu_bin = os.environ.get("QEMU", "qemu-system-x86_64")
+        qemu_bin = os.environ.get("QEMU", "qemu-system-" + self.arch)
         guest = QEMUMachine(binary=qemu_bin, args=args)
         try:
             guest.launch()
@@ -177,11 +179,14 @@
 
     def wait_ssh(self, seconds=300):
         starttime = datetime.datetime.now()
+        endtime = starttime + datetime.timedelta(seconds=seconds)
         guest_up = False
-        while (datetime.datetime.now() - starttime).total_seconds() < seconds:
+        while datetime.datetime.now() < endtime:
             if self.ssh("exit 0") == 0:
                 guest_up = True
                 break
+            seconds = (endtime - datetime.datetime.now()).total_seconds()
+            logging.debug("%ds before timeout", seconds)
             time.sleep(1)
         if not guest_up:
             raise Exception("Timeout while waiting for guest ssh")
@@ -195,7 +200,14 @@
     def qmp(self, *args, **kwargs):
         return self._guest.qmp(*args, **kwargs)
 
-def parse_args(vm_name):
+def parse_args(vmcls):
+
+    def get_default_jobs():
+        if kvm_available(vmcls.arch):
+            return multiprocessing.cpu_count() / 2
+        else:
+            return 1
+
     parser = optparse.OptionParser(
         description="VM test utility.  Exit codes: "
                     "0 = success, "
@@ -204,11 +216,11 @@
                     "3 = test command failed")
     parser.add_option("--debug", "-D", action="store_true",
                       help="enable debug output")
-    parser.add_option("--image", "-i", default="%s.img" % vm_name,
+    parser.add_option("--image", "-i", default="%s.img" % vmcls.name,
                       help="image file name")
     parser.add_option("--force", "-f", action="store_true",
                       help="force build image even if image exists")
-    parser.add_option("--jobs", type=int, default=multiprocessing.cpu_count() / 2,
+    parser.add_option("--jobs", type=int, default=get_default_jobs(),
                       help="number of virtual CPUs")
     parser.add_option("--verbose", "-V", action="store_true",
                       help="Pass V=1 to builds within the guest")
@@ -225,7 +237,7 @@
 
 def main(vmcls):
     try:
-        args, argv = parse_args(vmcls.name)
+        args, argv = parse_args(vmcls)
         if not argv and not args.build_qemu and not args.build_image:
             print("Nothing to do?")
             return 1
diff --git a/tests/vm/centos b/tests/vm/centos
index afd560c..daa2dbc 100755
--- a/tests/vm/centos
+++ b/tests/vm/centos
@@ -19,6 +19,7 @@
 
 class CentosVM(basevm.BaseVM):
     name = "centos"
+    arch = "x86_64"
     BUILD_SCRIPT = """
         set -e;
         cd $(mktemp -d);
diff --git a/tests/vm/freebsd b/tests/vm/freebsd
index b698312..19a3729 100755
--- a/tests/vm/freebsd
+++ b/tests/vm/freebsd
@@ -18,6 +18,7 @@
 
 class FreeBSDVM(basevm.BaseVM):
     name = "freebsd"
+    arch = "x86_64"
     BUILD_SCRIPT = """
         set -e;
         rm -rf /var/tmp/qemu-test.*
diff --git a/tests/vm/netbsd b/tests/vm/netbsd
index a4e2582..fac6a7c 100755
--- a/tests/vm/netbsd
+++ b/tests/vm/netbsd
@@ -18,6 +18,7 @@
 
 class NetBSDVM(basevm.BaseVM):
     name = "netbsd"
+    arch = "x86_64"
     BUILD_SCRIPT = """
         set -e;
         rm -rf /var/tmp/qemu-test.*
diff --git a/tests/vm/openbsd b/tests/vm/openbsd
index 52500ee..cfe0572 100755
--- a/tests/vm/openbsd
+++ b/tests/vm/openbsd
@@ -18,6 +18,7 @@
 
 class OpenBSDVM(basevm.BaseVM):
     name = "openbsd"
+    arch = "x86_64"
     BUILD_SCRIPT = """
         set -e;
         rm -rf /var/tmp/qemu-test.*
diff --git a/tests/vm/ubuntu.i386 b/tests/vm/ubuntu.i386
index 3f6ed48..1b7e1ab 100755
--- a/tests/vm/ubuntu.i386
+++ b/tests/vm/ubuntu.i386
@@ -19,6 +19,7 @@
 
 class UbuntuX86VM(basevm.BaseVM):
     name = "ubuntu.i386"
+    arch = "i386"
     BUILD_SCRIPT = """
         set -e;
         cd $(mktemp -d);
diff --git a/tpm.c b/tpm.c
index 9303172..9c9e20b 100644
--- a/tpm.c
+++ b/tpm.c
@@ -89,19 +89,19 @@
     int i;
 
     if (!QLIST_EMPTY(&tpm_backends)) {
-        error_report("Only one TPM is allowed.");
+        error_setg(errp, "Only one TPM is allowed.");
         return 1;
     }
 
     id = qemu_opts_id(opts);
     if (id == NULL) {
-        error_report(QERR_MISSING_PARAMETER, "id");
+        error_setg(errp, QERR_MISSING_PARAMETER, "id");
         return 1;
     }
 
     value = qemu_opt_get(opts, "type");
     if (!value) {
-        error_report(QERR_MISSING_PARAMETER, "type");
+        error_setg(errp, QERR_MISSING_PARAMETER, "type");
         tpm_display_backend_drivers();
         return 1;
     }
@@ -109,8 +109,8 @@
     i = qapi_enum_parse(&TpmType_lookup, value, -1, NULL);
     be = i >= 0 ? tpm_be_find_by_type(i) : NULL;
     if (be == NULL) {
-        error_report(QERR_INVALID_PARAMETER_VALUE,
-                     "type", "a TPM backend type");
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
+                   "a TPM backend type");
         tpm_display_backend_drivers();
         return 1;
     }
@@ -118,7 +118,7 @@
     /* validate backend specific opts */
     qemu_opts_validate(opts, be->opts, &local_err);
     if (local_err) {
-        error_report_err(local_err);
+        error_propagate(errp, local_err);
         return 1;
     }
 
@@ -151,14 +151,10 @@
  * Initialize the TPM. Process the tpmdev command line options describing the
  * TPM backend.
  */
-int tpm_init(void)
+void tpm_init(void)
 {
-    if (qemu_opts_foreach(qemu_find_opts("tpmdev"),
-                          tpm_init_tpmdev, NULL, NULL)) {
-        return -1;
-    }
-
-    return 0;
+    qemu_opts_foreach(qemu_find_opts("tpmdev"),
+                      tpm_init_tpmdev, NULL, &error_fatal);
 }
 
 /*
diff --git a/ui/curses.c b/ui/curses.c
index 59d819f..f4e7a12 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -28,6 +28,7 @@
 #include <termios.h>
 #endif
 
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "ui/console.h"
 #include "ui/input.h"
@@ -421,9 +422,8 @@
         keyboard_layout = "en-us";
 #endif
     if(keyboard_layout) {
-        kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout);
-        if (!kbd_layout)
-            exit(1);
+        kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout,
+                                          &error_fatal);
     }
 }
 
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index fb00ad1..a77c25b 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -48,11 +48,7 @@
         return;
     }
 
-#if GTK_CHECK_VERSION(3, 0, 0)
     Window x11_window = gdk_x11_window_get_xid(gdk_window);
-#else
-    Window x11_window = gdk_x11_drawable_get_xid(gdk_window);
-#endif
     if (!x11_window) {
         return;
     }
@@ -82,7 +78,8 @@
                        vc->gfx.esurface, vc->gfx.ectx);
 
         window = gtk_widget_get_window(vc->gfx.drawing_area);
-        gdk_drawable_get_size(window, &ww, &wh);
+        ww = gdk_window_get_width(window);
+        wh = gdk_window_get_height(window);
         surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh);
         surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds);
 
@@ -265,7 +262,8 @@
                    vc->gfx.esurface, vc->gfx.ectx);
 
     window = gtk_widget_get_window(vc->gfx.drawing_area);
-    gdk_drawable_get_size(window, &ww, &wh);
+    ww = gdk_window_get_width(window);
+    wh = gdk_window_get_height(window);
     egl_fb_setup_default(&vc->gfx.win_fb, ww, wh);
     if (vc->gfx.cursor_fb.texture) {
         egl_texture_blit(vc->gfx.gls, &vc->gfx.win_fb, &vc->gfx.guest_fb,
diff --git a/ui/gtk.c b/ui/gtk.c
index 3ddb5fe..579990b 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -111,49 +111,6 @@
 # define VTE_CHECK_VERSION(a, b, c) 0
 #endif
 
-#if defined(CONFIG_VTE) && !GTK_CHECK_VERSION(3, 0, 0)
-/*
- * The gtk2 vte terminal widget seriously messes up the window resize
- * for some reason.  You basically can't make the qemu window smaller
- * any more because the toplevel window geoemtry hints are overridden.
- *
- * Workaround that by hiding all vte widgets, except the one in the
- * current tab.
- *
- * Luckily everything works smooth in gtk3.
- */
-# define VTE_RESIZE_HACK 1
-#endif
-
-#if !GTK_CHECK_VERSION(2, 20, 0)
-#define gtk_widget_get_realized(widget) GTK_WIDGET_REALIZED(widget)
-#endif
-
-#ifndef GDK_IS_X11_DISPLAY
-#define GDK_IS_X11_DISPLAY(dpy) (dpy == dpy)
-#endif
-#ifndef GDK_IS_WAYLAND_DISPLAY
-#define GDK_IS_WAYLAND_DISPLAY(dpy) (dpy == dpy)
-#endif
-#ifndef GDK_IS_WIN32_DISPLAY
-#define GDK_IS_WIN32_DISPLAY(dpy) (dpy == dpy)
-#endif
-
-#if !GTK_CHECK_VERSION(2, 22, 0)
-#define GDK_KEY_0 GDK_0
-#define GDK_KEY_1 GDK_1
-#define GDK_KEY_2 GDK_2
-#define GDK_KEY_f GDK_f
-#define GDK_KEY_g GDK_g
-#define GDK_KEY_m GDK_m
-#define GDK_KEY_q GDK_q
-#define GDK_KEY_plus GDK_plus
-#define GDK_KEY_equal GDK_equal
-#define GDK_KEY_minus GDK_minus
-#define GDK_KEY_Pause GDK_Pause
-#define GDK_KEY_Delete GDK_Delete
-#endif
-
 /* Some older mingw versions lack this constant or have
  * it conditionally defined */
 #ifdef _WIN32
@@ -455,7 +412,8 @@
 {
     GtkWidget *area = vc->gfx.drawing_area;
     int ww, wh;
-    gdk_drawable_get_size(gtk_widget_get_window(area), &ww, &wh);
+    ww = gdk_window_get_width(gtk_widget_get_window(area));
+    wh = gdk_window_get_height(gtk_widget_get_window(area));
 #if defined(CONFIG_GTK_GL)
     if (vc->gfx.gls && gtk_use_gl_area) {
         gtk_gl_area_queue_render(GTK_GL_AREA(vc->gfx.drawing_area));
@@ -530,7 +488,8 @@
     if (!win) {
         return;
     }
-    gdk_drawable_get_size(win, &ww, &wh);
+    ww = gdk_window_get_width(win);
+    wh = gdk_window_get_height(win);
 
     mx = my = 0;
     if (ww > fbw) {
@@ -549,7 +508,6 @@
     graphic_hw_update(dcl->con);
 }
 
-#if GTK_CHECK_VERSION(3, 0, 0)
 static GdkDevice *gd_get_pointer(GdkDisplay *dpy)
 {
 #if GTK_CHECK_VERSION(3, 20, 0)
@@ -580,24 +538,6 @@
     vc->s->last_x = x;
     vc->s->last_y = y;
 }
-#else
-static void gd_mouse_set(DisplayChangeListener *dcl,
-                         int x, int y, int visible)
-{
-    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
-    gint x_root, y_root;
-
-    if (qemu_input_is_absolute()) {
-        return;
-    }
-
-    gdk_window_get_root_coords(gtk_widget_get_window(vc->gfx.drawing_area),
-                               x, y, &x_root, &y_root);
-    gdk_display_warp_pointer(gtk_widget_get_display(vc->gfx.drawing_area),
-                             gtk_widget_get_screen(vc->gfx.drawing_area),
-                             x_root, y_root);
-}
-#endif
 
 static void gd_cursor_define(DisplayChangeListener *dcl,
                              QEMUCursor *c)
@@ -619,11 +559,7 @@
          pixbuf, c->hot_x, c->hot_y);
     gdk_window_set_cursor(gtk_widget_get_window(vc->gfx.drawing_area), cursor);
     g_object_unref(pixbuf);
-#if !GTK_CHECK_VERSION(3, 0, 0)
-    gdk_cursor_unref(cursor);
-#else
     g_object_unref(cursor);
-#endif
 }
 
 static void gd_switch(DisplayChangeListener *dcl,
@@ -863,7 +799,8 @@
     fbw = surface_width(vc->gfx.ds);
     fbh = surface_height(vc->gfx.ds);
 
-    gdk_drawable_get_size(gtk_widget_get_window(widget), &ww, &wh);
+    ww = gdk_window_get_width(gtk_widget_get_window(widget));
+    wh = gdk_window_get_height(gtk_widget_get_window(widget));
 
     if (s->full_screen) {
         vc->gfx.scale_x = (double)ww / fbw;
@@ -907,29 +844,6 @@
     return TRUE;
 }
 
-#if !GTK_CHECK_VERSION(3, 0, 0)
-static gboolean gd_expose_event(GtkWidget *widget, GdkEventExpose *expose,
-                                void *opaque)
-{
-    cairo_t *cr;
-    gboolean ret;
-
-    cr = gdk_cairo_create(gtk_widget_get_window(widget));
-    cairo_rectangle(cr,
-                    expose->area.x,
-                    expose->area.y,
-                    expose->area.width,
-                    expose->area.height);
-    cairo_clip(cr);
-
-    ret = gd_draw_event(widget, cr, opaque);
-
-    cairo_destroy(cr);
-
-    return ret;
-}
-#endif
-
 static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
                                 void *opaque)
 {
@@ -947,8 +861,8 @@
     fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x;
     fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y;
 
-    gdk_drawable_get_size(gtk_widget_get_window(vc->gfx.drawing_area),
-                          &ww, &wh);
+    ww = gdk_window_get_width(gtk_widget_get_window(vc->gfx.drawing_area));
+    wh = gdk_window_get_height(gtk_widget_get_window(vc->gfx.drawing_area));
 
     mx = my = 0;
     if (ww > fbw) {
@@ -1026,13 +940,8 @@
         }
 
         if (x != (int)motion->x_root || y != (int)motion->y_root) {
-#if GTK_CHECK_VERSION(3, 0, 0)
             GdkDevice *dev = gdk_event_get_device((GdkEvent *)motion);
             gdk_device_warp(dev, screen, x, y);
-#else
-            GdkDisplay *display = gtk_widget_get_display(widget);
-            gdk_display_warp_pointer(display, screen, x, y);
-#endif
             s->last_set = FALSE;
             return FALSE;
         }
@@ -1089,7 +998,6 @@
         btn = INPUT_BUTTON_WHEEL_UP;
     } else if (scroll->direction == GDK_SCROLL_DOWN) {
         btn = INPUT_BUTTON_WHEEL_DOWN;
-#if GTK_CHECK_VERSION(3, 4, 0)
     } else if (scroll->direction == GDK_SCROLL_SMOOTH) {
         gdouble delta_x, delta_y;
         if (!gdk_event_get_scroll_deltas((GdkEvent *)scroll,
@@ -1101,7 +1009,6 @@
         } else {
             btn = INPUT_BUTTON_WHEEL_UP;
         }
-#endif
     } else {
         return TRUE;
     }
@@ -1309,10 +1216,6 @@
     VirtualConsole *vc = opaque;
 
     gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(vc->menu_item), TRUE);
-#if !GTK_CHECK_VERSION(3, 0, 0)
-    /* GTK2 sends the accel key to the target console - ignore this until */
-    vc->s->ignore_keys = true;
-#endif
 }
 
 static void gd_menu_show_tabs(GtkMenuItem *item, void *opaque)
@@ -1539,7 +1442,7 @@
         gdk_seat_ungrab(seat);
     }
 }
-#elif GTK_CHECK_VERSION(3, 0, 0)
+#else
 static void gd_grab_devices(VirtualConsole *vc, bool grab,
                             GdkInputSource source, GdkEventMask mask,
                             GdkCursor *cursor)
@@ -1578,14 +1481,10 @@
 
 #if GTK_CHECK_VERSION(3, 20, 0)
     gd_grab_update(vc, true, vc->s->ptr_owner == vc);
-#elif GTK_CHECK_VERSION(3, 0, 0)
+#else
     gd_grab_devices(vc, true, GDK_SOURCE_KEYBOARD,
                    GDK_KEY_PRESS_MASK | GDK_KEY_RELEASE_MASK,
                    NULL);
-#else
-    gdk_keyboard_grab(gtk_widget_get_window(vc->gfx.drawing_area),
-                      FALSE,
-                      GDK_CURRENT_TIME);
 #endif
     vc->s->kbd_owner = vc;
     gd_update_caption(vc->s);
@@ -1603,10 +1502,8 @@
 
 #if GTK_CHECK_VERSION(3, 20, 0)
     gd_grab_update(vc, false, vc->s->ptr_owner == vc);
-#elif GTK_CHECK_VERSION(3, 0, 0)
-    gd_grab_devices(vc, false, GDK_SOURCE_KEYBOARD, 0, NULL);
 #else
-    gdk_keyboard_ungrab(GDK_CURRENT_TIME);
+    gd_grab_devices(vc, false, GDK_SOURCE_KEYBOARD, 0, NULL);
 #endif
     gd_update_caption(s);
     trace_gd_ungrab(vc->label, "kbd");
@@ -1628,7 +1525,7 @@
     gd_grab_update(vc, vc->s->kbd_owner == vc, true);
     gdk_device_get_position(gd_get_pointer(display),
                             NULL, &vc->s->grab_x_root, &vc->s->grab_y_root);
-#elif GTK_CHECK_VERSION(3, 0, 0)
+#else
     gd_grab_devices(vc, true, GDK_SOURCE_MOUSE,
                     GDK_POINTER_MOTION_MASK |
                     GDK_BUTTON_PRESS_MASK |
@@ -1638,19 +1535,6 @@
                     vc->s->null_cursor);
     gdk_device_get_position(gd_get_pointer(display),
                             NULL, &vc->s->grab_x_root, &vc->s->grab_y_root);
-#else
-    gdk_pointer_grab(gtk_widget_get_window(vc->gfx.drawing_area),
-                     FALSE, /* All events to come to our window directly */
-                     GDK_POINTER_MOTION_MASK |
-                     GDK_BUTTON_PRESS_MASK |
-                     GDK_BUTTON_RELEASE_MASK |
-                     GDK_BUTTON_MOTION_MASK |
-                     GDK_SCROLL_MASK,
-                     NULL, /* Allow cursor to move over entire desktop */
-                     vc->s->null_cursor,
-                     GDK_CURRENT_TIME);
-    gdk_display_get_pointer(display, NULL,
-                            &vc->s->grab_x_root, &vc->s->grab_y_root, NULL);
 #endif
     vc->s->ptr_owner = vc;
     gd_update_caption(vc->s);
@@ -1673,16 +1557,11 @@
     gdk_device_warp(gd_get_pointer(display),
                     gtk_widget_get_screen(vc->gfx.drawing_area),
                     vc->s->grab_x_root, vc->s->grab_y_root);
-#elif GTK_CHECK_VERSION(3, 0, 0)
+#else
     gd_grab_devices(vc, false, GDK_SOURCE_MOUSE, 0, NULL);
     gdk_device_warp(gd_get_pointer(display),
                     gtk_widget_get_screen(vc->gfx.drawing_area),
                     vc->s->grab_x_root, vc->s->grab_y_root);
-#else
-    gdk_pointer_ungrab(GDK_CURRENT_TIME);
-    gdk_display_warp_pointer(display,
-                             gtk_widget_get_screen(vc->gfx.drawing_area),
-                             vc->s->grab_x_root, vc->s->grab_y_root);
 #endif
     gd_update_caption(s);
     trace_gd_ungrab(vc->label, "ptr");
@@ -1715,21 +1594,10 @@
         return;
     }
 
-#ifdef VTE_RESIZE_HACK
-    vc = gd_vc_find_current(s);
-    if (vc && vc->type == GD_VC_VTE) {
-        gtk_widget_hide(vc->vte.terminal);
-    }
-#endif
     vc = gd_vc_find_by_page(s, arg2);
     if (!vc) {
         return;
     }
-#ifdef VTE_RESIZE_HACK
-    if (vc->type == GD_VC_VTE) {
-        gtk_widget_show(vc->vte.terminal);
-    }
-#endif
     gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(vc->menu_item),
                                    TRUE);
     on_vga = (vc->type == GD_VC_GFX &&
@@ -1802,11 +1670,9 @@
     gtk_accel_group_connect(s->accel_group, GDK_KEY_1 + idx,
             HOTKEY_MODIFIERS, 0,
             g_cclosure_new_swap(G_CALLBACK(gd_accel_switch_vc), vc, NULL));
-#if GTK_CHECK_VERSION(3, 8, 0)
     gtk_accel_label_set_accel(
             GTK_ACCEL_LABEL(gtk_bin_get_child(GTK_BIN(vc->menu_item))),
             GDK_KEY_1 + idx, HOTKEY_MODIFIERS);
-#endif
 
     g_signal_connect(vc->menu_item, "activate",
                      G_CALLBACK(gd_menu_switch_vc), s);
@@ -1951,32 +1817,29 @@
     g_signal_connect(vc->vte.terminal, "commit", G_CALLBACK(gd_vc_in), vc);
 
     /* The documentation says that the default is UTF-8, but actually it is
-     * 7-bit ASCII at least in VTE 0.38.
-     */
+     * 7-bit ASCII at least in VTE 0.38. The function is deprecated since
+     * VTE 0.54 (only UTF-8 is supported now). */
+#if !VTE_CHECK_VERSION(0, 54, 0)
 #if VTE_CHECK_VERSION(0, 38, 0)
     vte_terminal_set_encoding(VTE_TERMINAL(vc->vte.terminal), "UTF-8", NULL);
 #else
     vte_terminal_set_encoding(VTE_TERMINAL(vc->vte.terminal), "UTF-8");
 #endif
+#endif
 
     vte_terminal_set_scrollback_lines(VTE_TERMINAL(vc->vte.terminal), -1);
     vte_terminal_set_size(VTE_TERMINAL(vc->vte.terminal),
                           VC_TERM_X_MIN, VC_TERM_Y_MIN);
 
-#if VTE_CHECK_VERSION(0, 28, 0) && GTK_CHECK_VERSION(3, 0, 0)
+#if VTE_CHECK_VERSION(0, 28, 0)
     vadjustment = gtk_scrollable_get_vadjustment
         (GTK_SCROLLABLE(vc->vte.terminal));
 #else
     vadjustment = vte_terminal_get_adjustment(VTE_TERMINAL(vc->vte.terminal));
 #endif
 
-#if GTK_CHECK_VERSION(3, 0, 0)
     box = gtk_box_new(GTK_ORIENTATION_HORIZONTAL, 2);
     scrollbar = gtk_scrollbar_new(GTK_ORIENTATION_VERTICAL, vadjustment);
-#else
-    box = gtk_hbox_new(false, 2);
-    scrollbar = gtk_vscrollbar_new(vadjustment);
-#endif
 
     gtk_box_pack_end(GTK_BOX(box), scrollbar, FALSE, FALSE, 0);
     gtk_box_pack_end(GTK_BOX(box), vc->vte.terminal, TRUE, TRUE, 0);
@@ -2015,7 +1878,6 @@
 
 static void gd_connect_vc_gfx_signals(VirtualConsole *vc)
 {
-#if GTK_CHECK_VERSION(3, 0, 0)
     g_signal_connect(vc->gfx.drawing_area, "draw",
                      G_CALLBACK(gd_draw_event), vc);
 #if defined(CONFIG_GTK_GL)
@@ -2027,10 +1889,6 @@
                          G_CALLBACK(gd_resize_event), vc);
     }
 #endif
-#else
-    g_signal_connect(vc->gfx.drawing_area, "expose-event",
-                     G_CALLBACK(gd_expose_event), vc);
-#endif
     if (qemu_console_is_graphic(vc->gfx.dcl.con)) {
         g_signal_connect(vc->gfx.drawing_area, "event",
                          G_CALLBACK(gd_event), vc);
@@ -2136,7 +1994,7 @@
                               QemuConsole *con, int idx,
                               GSList *group, GtkWidget *view_menu)
 {
-    bool zoom_to_fit;
+    bool zoom_to_fit = false;
 
     vc->label = qemu_console_get_label(con);
     vc->s = s;
@@ -2234,11 +2092,9 @@
 
     gtk_accel_group_connect(s->accel_group, GDK_KEY_f, HOTKEY_MODIFIERS, 0,
             g_cclosure_new_swap(G_CALLBACK(gd_accel_full_screen), s, NULL));
-#if GTK_CHECK_VERSION(3, 8, 0)
     gtk_accel_label_set_accel(
             GTK_ACCEL_LABEL(gtk_bin_get_child(GTK_BIN(s->full_screen_item))),
             GDK_KEY_f, HOTKEY_MODIFIERS);
-#endif
     gtk_menu_shell_append(GTK_MENU_SHELL(view_menu), s->full_screen_item);
 
     separator = gtk_separator_menu_item_new();
@@ -2317,11 +2173,9 @@
                                    TRUE);
     gtk_accel_group_connect(s->accel_group, GDK_KEY_m, HOTKEY_MODIFIERS, 0,
             g_cclosure_new_swap(G_CALLBACK(gd_accel_show_menubar), s, NULL));
-#if GTK_CHECK_VERSION(3, 8, 0)
     gtk_accel_label_set_accel(
             GTK_ACCEL_LABEL(gtk_bin_get_child(GTK_BIN(s->show_menubar_item))),
             GDK_KEY_m, HOTKEY_MODIFIERS);
-#endif
     gtk_menu_shell_append(GTK_MENU_SHELL(view_menu), s->show_menubar_item);
 
     return view_menu;
@@ -2370,17 +2224,8 @@
     assert(opts->type == DISPLAY_TYPE_GTK);
     s->opts = opts;
 
-#if !GTK_CHECK_VERSION(3, 0, 0)
-    g_printerr("Running QEMU with GTK 2.x is deprecated, and will be removed\n"
-               "in a future release. Please switch to GTK 3.x instead\n");
-#endif
-
     s->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
-#if GTK_CHECK_VERSION(3, 2, 0)
     s->vbox = gtk_box_new(GTK_ORIENTATION_VERTICAL, 0);
-#else
-    s->vbox = gtk_vbox_new(FALSE, 0);
-#endif
     s->notebook = gtk_notebook_new();
     s->menu_bar = gtk_menu_bar_new();
 
@@ -2431,23 +2276,6 @@
 
     gtk_widget_show_all(s->window);
 
-#ifdef VTE_RESIZE_HACK
-    {
-        VirtualConsole *cur = gd_vc_find_current(s);
-        if (cur) {
-            int i;
-
-            for (i = 0; i < s->nb_vcs; i++) {
-                VirtualConsole *vc = &s->vc[i];
-                if (vc && vc->type == GD_VC_VTE && vc != cur) {
-                    gtk_widget_hide(vc->vte.terminal);
-                }
-            }
-            gd_update_windowsize(cur);
-        }
-    }
-#endif
-
     vc = gd_vc_find_current(s);
     gtk_widget_set_sensitive(s->view_menu, vc != NULL);
 #ifdef CONFIG_VTE
diff --git a/ui/input.c b/ui/input.c
index 51b1019..7c9a410 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -448,8 +448,9 @@
     }
 
     if (!kbd_timer) {
-        kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process,
-                                 &kbd_queue);
+        kbd_timer = timer_new_full(NULL, QEMU_CLOCK_VIRTUAL,
+                                   SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
+                                   qemu_input_queue_process, &kbd_queue);
     }
     if (queue_count < queue_limit) {
         qemu_input_queue_delay(&kbd_queue, kbd_timer,
diff --git a/ui/keymaps.c b/ui/keymaps.c
index 43fe604..085889b 100644
--- a/ui/keymaps.c
+++ b/ui/keymaps.c
@@ -27,6 +27,7 @@
 #include "sysemu/sysemu.h"
 #include "trace.h"
 #include "qemu/error-report.h"
+#include "qapi/error.h"
 
 struct keysym2code {
     uint32_t count;
@@ -79,10 +80,11 @@
     trace_keymap_add(keysym, keycode, line);
 }
 
-static kbd_layout_t *parse_keyboard_layout(const name2keysym_t *table,
-                                           const char *language,
-                                           kbd_layout_t *k)
+static int parse_keyboard_layout(kbd_layout_t *k,
+                                 const name2keysym_t *table,
+                                 const char *language, Error **errp)
 {
+    int ret;
     FILE *f;
     char * filename;
     char line[1024];
@@ -94,13 +96,8 @@
     f = filename ? fopen(filename, "r") : NULL;
     g_free(filename);
     if (!f) {
-        fprintf(stderr, "Could not read keymap file: '%s'\n", language);
-        return NULL;
-    }
-
-    if (!k) {
-        k = g_new0(kbd_layout_t, 1);
-        k->hash = g_hash_table_new(NULL, NULL);
+        error_setg(errp, "could not read keymap file: '%s'", language);
+        return -1;
     }
 
     for(;;) {
@@ -118,7 +115,10 @@
             continue;
         }
         if (!strncmp(line, "include ", 8)) {
-            parse_keyboard_layout(table, line + 8, k);
+            if (parse_keyboard_layout(k, table, line + 8, errp) < 0) {
+                ret = -1;
+                goto out;
+            }
         } else {
             int offset = 0;
             while (line[offset] != 0 &&
@@ -164,15 +164,27 @@
             }
         }
     }
+
+    ret = 0;
+out:
     fclose(f);
-    return k;
+    return ret;
 }
 
 
 kbd_layout_t *init_keyboard_layout(const name2keysym_t *table,
-                                   const char *language)
+                                   const char *language, Error **errp)
 {
-    return parse_keyboard_layout(table, language, NULL);
+    kbd_layout_t *k;
+
+    k = g_new0(kbd_layout_t, 1);
+    k->hash = g_hash_table_new(NULL, NULL);
+    if (parse_keyboard_layout(k, table, language, errp) < 0) {
+        g_hash_table_unref(k->hash);
+        g_free(k);
+        return NULL;
+    }
+    return k;
 }
 
 
diff --git a/ui/keymaps.h b/ui/keymaps.h
index 0693588..98213a4 100644
--- a/ui/keymaps.h
+++ b/ui/keymaps.h
@@ -53,7 +53,7 @@
 typedef struct kbd_layout_t kbd_layout_t;
 
 kbd_layout_t *init_keyboard_layout(const name2keysym_t *table,
-                                   const char *language);
+                                   const char *language, Error **errp);
 int keysym2scancode(kbd_layout_t *k, int keysym,
                     bool shift, bool altgr, bool ctrl);
 int keycode_is_keypad(kbd_layout_t *k, int keycode);
diff --git a/ui/sdl.c b/ui/sdl.c
index a5fd503..190b16f 100644
--- a/ui/sdl.c
+++ b/ui/sdl.c
@@ -29,6 +29,7 @@
 #include <SDL.h>
 #include <SDL_syswm.h>
 
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "ui/console.h"
@@ -917,9 +918,8 @@
         keyboard_layout = "en-us";
 #endif
     if(keyboard_layout) {
-        kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout);
-        if (!kbd_layout)
-            exit(1);
+        kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout,
+                                          &error_fatal);
     }
 
     g_printerr("Running QEMU with SDL 1.2 is deprecated, and will be removed\n"
diff --git a/ui/sdl2-2d.c b/ui/sdl2-2d.c
index 8548440..091ecfc 100644
--- a/ui/sdl2-2d.c
+++ b/ui/sdl2-2d.c
@@ -101,15 +101,24 @@
     case PIXMAN_r5g6b5:
         format = SDL_PIXELFORMAT_RGB565;
         break;
+    case PIXMAN_a8r8g8b8:
     case PIXMAN_x8r8g8b8:
         format = SDL_PIXELFORMAT_ARGB8888;
         break;
+    case PIXMAN_a8b8g8r8:
+    case PIXMAN_x8b8g8r8:
+        format = SDL_PIXELFORMAT_ABGR8888;
+        break;
+    case PIXMAN_r8g8b8a8:
     case PIXMAN_r8g8b8x8:
         format = SDL_PIXELFORMAT_RGBA8888;
         break;
     case PIXMAN_b8g8r8x8:
         format = SDL_PIXELFORMAT_BGRX8888;
         break;
+    case PIXMAN_b8g8r8a8:
+        format = SDL_PIXELFORMAT_BGRA8888;
+        break;
     default:
         g_assert_not_reached();
     }
@@ -149,7 +158,13 @@
      * the native ones. Thes are the ones I have tested.
      */
     return (format == PIXMAN_x8r8g8b8 ||
+            format == PIXMAN_a8r8g8b8 ||
+            format == PIXMAN_a8b8g8r8 ||
+            format == PIXMAN_x8b8g8r8 ||
             format == PIXMAN_b8g8r8x8 ||
+            format == PIXMAN_b8g8r8a8 ||
+            format == PIXMAN_r8g8b8x8 ||
+            format == PIXMAN_r8g8b8a8 ||
             format == PIXMAN_x1r5g5b5 ||
             format == PIXMAN_r5g6b5);
 }
diff --git a/ui/sdl2.c b/ui/sdl2.c
index 2696b95..a10b6e3 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -786,6 +786,9 @@
                 SDL_GetError());
         exit(1);
     }
+#ifdef SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR /* only available since SDL 2.0.8 */
+    SDL_SetHint(SDL_HINT_VIDEO_X11_NET_WM_BYPASS_COMPOSITOR, "0");
+#endif
     SDL_SetHint(SDL_HINT_GRAB_KEYBOARD, "1");
     memset(&info, 0, sizeof(info));
     SDL_VERSION(&info.version);
diff --git a/ui/spice-core.c b/ui/spice-core.c
index a4fbbc3..ebaae24 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -597,9 +597,9 @@
     if (strcmp(name, "tls-channel") == 0) {
         int *tls_port = opaque;
         if (!*tls_port) {
-            error_report("spice: tried to setup tls-channel"
-                         " without specifying a TLS port");
-            exit(1);
+            error_setg(errp, "spice: tried to setup tls-channel"
+                       " without specifying a TLS port");
+            return -1;
         }
         security = SPICE_CHANNEL_SECURITY_SSL;
     }
@@ -615,8 +615,9 @@
         rc = spice_server_set_channel_security(spice_server, value, security);
     }
     if (rc != 0) {
-        error_report("spice: failed to set channel security for %s", value);
-        exit(1);
+        error_setg(errp, "spice: failed to set channel security for %s",
+                   value);
+        return -1;
     }
     return 0;
 }
@@ -787,7 +788,7 @@
     spice_server_set_playback_compression
         (spice_server, qemu_opt_get_bool(opts, "playback-compression", 1));
 
-    qemu_opt_foreach(opts, add_channel, &tls_port, NULL);
+    qemu_opt_foreach(opts, add_channel, &tls_port, &error_fatal);
 
     spice_server_set_name(spice_server, qemu_name);
     spice_server_set_uuid(spice_server, (unsigned char *)&qemu_uuid);
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 2f8adb6..52f8cb5 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -674,10 +674,28 @@
 
     memset(&info, 0, sizeof(info));
 
-    head = qemu_console_get_head(ssd->dcl.con);
-    if (mc->num_of_monitors > head) {
-        info.width  = mc->monitors[head].width;
-        info.height = mc->monitors[head].height;
+    if (mc->num_of_monitors == 1) {
+        /*
+         * New spice-server version which filters the list of monitors
+         * to only include those that belong to our display channel.
+         *
+         * single-head configuration (where filtering doesn't matter)
+         * takes this code path too.
+         */
+        info.width  = mc->monitors[0].width;
+        info.height = mc->monitors[0].height;
+    } else {
+        /*
+         * Old spice-server which gives us all monitors, so we have to
+         * figure ourself which entry we need.  Array index is the
+         * channel_id, which is the qemu console index, see
+         * qemu_spice_add_display_interface().
+         */
+        head = qemu_console_get_index(ssd->dcl.con);
+        if (mc->num_of_monitors > head) {
+            info.width  = mc->monitors[head].width;
+            info.height = mc->monitors[head].height;
+        }
     }
 
     trace_qemu_spice_ui_info(ssd->qxl.id, info.width, info.height);
diff --git a/ui/vnc.c b/ui/vnc.c
index cf221c8..0c1b477 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3205,7 +3205,7 @@
     .dpy_cursor_define    = vnc_dpy_cursor_define,
 };
 
-void vnc_display_init(const char *id)
+void vnc_display_init(const char *id, Error **errp)
 {
     VncDisplay *vd;
 
@@ -3222,13 +3222,14 @@
 
     if (keyboard_layout) {
         trace_vnc_key_map_init(keyboard_layout);
-        vd->kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout);
+        vd->kbd_layout = init_keyboard_layout(name2keysym,
+                                              keyboard_layout, errp);
     } else {
-        vd->kbd_layout = init_keyboard_layout(name2keysym, "en-us");
+        vd->kbd_layout = init_keyboard_layout(name2keysym, "en-us", errp);
     }
 
     if (!vd->kbd_layout) {
-        exit(1);
+        return;
     }
 
     vd->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
@@ -4079,11 +4080,15 @@
     char *id = (char *)qemu_opts_id(opts);
 
     assert(id);
-    vnc_display_init(id);
+    vnc_display_init(id, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -1;
+    }
     vnc_display_open(id, &local_err);
     if (local_err != NULL) {
-        error_reportf_err(local_err, "Failed to start VNC server: ");
-        exit(1);
+        error_propagate(errp, local_err);
+        return -1;
     }
     return 0;
 }
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 0e88899..0820923 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -3,6 +3,7 @@
 util-obj-y += lockcnt.o
 util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o
 util-obj-y += main-loop.o iohandler.o
+util-obj-$(call lnot,$(CONFIG_ATOMIC64)) += atomic64.o
 util-obj-$(CONFIG_POSIX) += aio-posix.o
 util-obj-$(CONFIG_POSIX) += compatfd.o
 util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 621b302..51c41ed 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -40,7 +40,7 @@
 
 #ifdef CONFIG_EPOLL_CREATE1
 
-/* The fd number threashold to switch to epoll */
+/* The fd number threshold to switch to epoll */
 #define EPOLL_ENABLE_THRESHOLD 64
 
 static void aio_epoll_disable(AioContext *ctx)
diff --git a/util/atomic64.c b/util/atomic64.c
new file mode 100644
index 0000000..b198a6c
--- /dev/null
+++ b/util/atomic64.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "qemu/thread.h"
+
+#ifdef CONFIG_ATOMIC64
+#error This file must only be compiled if !CONFIG_ATOMIC64
+#endif
+
+/*
+ * When !CONFIG_ATOMIC64, we serialize both reads and writes with spinlocks.
+ * We use an array of spinlocks, with padding computed at run-time based on
+ * the host's dcache line size.
+ * We point to the array with a void * to simplify the padding's computation.
+ * Each spinlock is located every lock_size bytes.
+ */
+static void *lock_array;
+static size_t lock_size;
+
+/*
+ * Systems without CONFIG_ATOMIC64 are unlikely to have many cores, so we use a
+ * small array of locks.
+ */
+#define NR_LOCKS 16
+
+static QemuSpin *addr_to_lock(const void *addr)
+{
+    uintptr_t a = (uintptr_t)addr;
+    uintptr_t idx;
+
+    idx = a >> qemu_dcache_linesize_log;
+    idx ^= (idx >> 8) ^ (idx >> 16);
+    idx &= NR_LOCKS - 1;
+    return lock_array + idx * lock_size;
+}
+
+#define GEN_READ(name, type)                    \
+    type name(const type *ptr)                  \
+    {                                           \
+        QemuSpin *lock = addr_to_lock(ptr);     \
+        type ret;                               \
+                                                \
+        qemu_spin_lock(lock);                   \
+        ret = *ptr;                             \
+        qemu_spin_unlock(lock);                 \
+        return ret;                             \
+    }
+
+GEN_READ(atomic_read_i64, int64_t)
+GEN_READ(atomic_read_u64, uint64_t)
+#undef GEN_READ
+
+#define GEN_SET(name, type)                     \
+    void name(type *ptr, type val)              \
+    {                                           \
+        QemuSpin *lock = addr_to_lock(ptr);     \
+                                                \
+        qemu_spin_lock(lock);                   \
+        *ptr = val;                             \
+        qemu_spin_unlock(lock);                 \
+    }
+
+GEN_SET(atomic_set_i64, int64_t)
+GEN_SET(atomic_set_u64, uint64_t)
+#undef GEN_SET
+
+void atomic64_init(void)
+{
+    int i;
+
+    lock_size = ROUND_UP(sizeof(QemuSpin), qemu_dcache_linesize);
+    lock_array = qemu_memalign(qemu_dcache_linesize, lock_size * NR_LOCKS);
+    for (i = 0; i < NR_LOCKS; i++) {
+        QemuSpin *lock = lock_array + i * lock_size;
+
+        qemu_spin_init(lock);
+    }
+}
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index db5172d..3cd080b 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -7,9 +7,13 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/atomic.h"
 
 int qemu_icache_linesize = 0;
+int qemu_icache_linesize_log;
 int qemu_dcache_linesize = 0;
+int qemu_dcache_linesize_log;
 
 /*
  * Operating system specific detection mechanisms.
@@ -172,6 +176,13 @@
     arch_cache_info(&isize, &dsize);
     fallback_cache_info(&isize, &dsize);
 
+    assert((isize & (isize - 1)) == 0);
+    assert((dsize & (dsize - 1)) == 0);
+
     qemu_icache_linesize = isize;
+    qemu_icache_linesize_log = ctz32(isize);
     qemu_dcache_linesize = dsize;
+    qemu_dcache_linesize_log = ctz32(dsize);
+
+    atomic64_init();
 }
diff --git a/util/cutils.c b/util/cutils.c
index 9205e09..698bd31 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -769,3 +769,8 @@
 
     return g_strdup_printf("%0.3g %sB", (double)val / div, suffixes[i]);
 }
+
+int qemu_pstrcmp0(const char **str1, const char **str2)
+{
+    return g_strcmp0(*str1, *str2);
+}
diff --git a/util/error.c b/util/error.c
index 3efdd69..b5ccbd8 100644
--- a/util/error.c
+++ b/util/error.c
@@ -292,3 +292,16 @@
         error_free(local_err);
     }
 }
+
+void error_propagate_prepend(Error **dst_errp, Error *err,
+                             const char *fmt, ...)
+{
+    va_list ap;
+
+    if (dst_errp && !*dst_errp) {
+        va_start(ap, fmt);
+        error_vprepend(&err, fmt, ap);
+        va_end(ap);
+    } /* else error is being ignored, don't bother with prepending */
+    error_propagate(dst_errp, err);
+}
diff --git a/util/hbitmap.c b/util/hbitmap.c
index bcd3040..8d402c5 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -723,6 +723,10 @@
     }
 }
 
+bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b)
+{
+    return (a->size == b->size) && (a->granularity == b->granularity);
+}
 
 /**
  * Given HBitmaps A and B, let A := A (BITOR) B.
@@ -731,14 +735,15 @@
  * @return true if the merge was successful,
  *         false if it was not attempted.
  */
-bool hbitmap_merge(HBitmap *a, const HBitmap *b)
+bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
 {
     int i;
     uint64_t j;
 
-    if ((a->size != b->size) || (a->granularity != b->granularity)) {
+    if (!hbitmap_can_merge(a, b) || !hbitmap_can_merge(a, result)) {
         return false;
     }
+    assert(hbitmap_can_merge(b, result));
 
     if (hbitmap_count(b) == 0) {
         return true;
@@ -750,10 +755,13 @@
      */
     for (i = HBITMAP_LEVELS - 1; i >= 0; i--) {
         for (j = 0; j < a->sizes[i]; j++) {
-            a->levels[i][j] |= b->levels[i][j];
+            result->levels[i][j] = a->levels[i][j] | b->levels[i][j];
         }
     }
 
+    /* Recompute the dirty count */
+    result->count = hb_count_between(result, 0, result->size - 1);
+
     return true;
 }
 
diff --git a/util/memfd.c b/util/memfd.c
index 6287946..8debd0d 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -45,22 +45,6 @@
 }
 #endif
 
-#ifndef MFD_CLOEXEC
-#define MFD_CLOEXEC 0x0001U
-#endif
-
-#ifndef MFD_ALLOW_SEALING
-#define MFD_ALLOW_SEALING 0x0002U
-#endif
-
-#ifndef MFD_HUGETLB
-#define MFD_HUGETLB 0x0004U
-#endif
-
-#ifndef MFD_HUGE_SHIFT
-#define MFD_HUGE_SHIFT 26
-#endif
-
 int qemu_memfd_create(const char *name, size_t size, bool hugetlb,
                       uint64_t hugetlbsize, unsigned int seals, Error **errp)
 {
@@ -201,23 +185,16 @@
  *
  * Check if host supports memfd.
  */
-bool qemu_memfd_check(void)
+bool qemu_memfd_check(unsigned int flags)
 {
 #ifdef CONFIG_LINUX
-    static int memfd_check = MEMFD_TODO;
+    int mfd = memfd_create("test", flags);
 
-    if (memfd_check == MEMFD_TODO) {
-        int mfd = memfd_create("test", 0);
-        if (mfd >= 0) {
-            memfd_check = MEMFD_OK;
-            close(mfd);
-        } else {
-            memfd_check = MEMFD_KO;
-        }
+    if (mfd >= 0) {
+        close(mfd);
+        return true;
     }
-
-    return memfd_check == MEMFD_OK;
-#else
-    return false;
 #endif
+
+    return false;
 }
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 13b6f8d..fbd0dc8 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -88,6 +88,79 @@
     return daemon(nochdir, noclose);
 }
 
+bool qemu_write_pidfile(const char *path, Error **errp)
+{
+    int fd;
+    char pidstr[32];
+
+    while (1) {
+        struct stat a, b;
+        struct flock lock = {
+            .l_type = F_WRLCK,
+            .l_whence = SEEK_SET,
+            .l_len = 0,
+        };
+
+        fd = qemu_open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
+        if (fd == -1) {
+            error_setg_errno(errp, errno, "Cannot open pid file");
+            return false;
+        }
+
+        if (fstat(fd, &b) < 0) {
+            error_setg_errno(errp, errno, "Cannot stat file");
+            goto fail_close;
+        }
+
+        if (fcntl(fd, F_SETLK, &lock)) {
+            error_setg_errno(errp, errno, "Cannot lock pid file");
+            goto fail_close;
+        }
+
+        /*
+         * Now make sure the path we locked is the same one that now
+         * exists on the filesystem.
+         */
+        if (stat(path, &a) < 0) {
+            /*
+             * PID file disappeared, someone else must be racing with
+             * us, so try again.
+             */
+            close(fd);
+            continue;
+        }
+
+        if (a.st_ino == b.st_ino) {
+            break;
+        }
+
+        /*
+         * PID file was recreated, someone else must be racing with
+         * us, so try again.
+         */
+        close(fd);
+    }
+
+    if (ftruncate(fd, 0) < 0) {
+        error_setg_errno(errp, errno, "Failed to truncate pid file");
+        goto fail_unlink;
+    }
+
+    snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
+    if (write(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
+        error_setg(errp, "Failed to write pid file");
+        goto fail_unlink;
+    }
+
+    return true;
+
+fail_unlink:
+    unlink(path);
+fail_close:
+    close(fd);
+    return false;
+}
+
 void *qemu_oom_check(void *ptr)
 {
     if (ptr == NULL) {
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 25dd159..b4c17f5 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -776,3 +776,30 @@
     }
     return ret;
 }
+
+bool qemu_write_pidfile(const char *filename, Error **errp)
+{
+    char buffer[128];
+    int len;
+    HANDLE file;
+    OVERLAPPED overlap;
+    BOOL ret;
+    memset(&overlap, 0, sizeof(overlap));
+
+    file = CreateFile(filename, GENERIC_WRITE, FILE_SHARE_READ, NULL,
+                      OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (file == INVALID_HANDLE_VALUE) {
+        error_setg(errp, "Failed to create PID file");
+        return false;
+    }
+    len = snprintf(buffer, sizeof(buffer), FMT_pid "\n", (pid_t)getpid());
+    ret = WriteFile(file, (LPCVOID)buffer, (DWORD)len,
+                    NULL, &overlap);
+    CloseHandle(file);
+    if (ret == 0) {
+        error_setg(errp, "Failed to write PID file");
+        return false;
+    }
+    return true;
+}
diff --git a/util/qemu-error.c b/util/qemu-error.c
index 4ab428f..fcbe8a1 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -194,7 +194,6 @@
  * Format arguments like vsprintf().  The resulting message should be
  * a single phrase, with no newline or trailing punctuation.
  * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
  */
 static void vreport(report_type type, const char *fmt, va_list ap)
 {
@@ -242,7 +241,6 @@
  * Format arguments like vsprintf().  The resulting message should be
  * a single phrase, with no newline or trailing punctuation.
  * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
  */
 void warn_vreport(const char *fmt, va_list ap)
 {
@@ -255,7 +253,6 @@
  * Format arguments like vsprintf().  The resulting message should be
  * a single phrase, with no newline or trailing punctuation.
  * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
  */
 void info_vreport(const char *fmt, va_list ap)
 {
@@ -283,7 +280,6 @@
  * Format arguments like sprintf(). The resulting message should be a
  * single phrase, with no newline or trailing punctuation.
  * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
  */
 void warn_report(const char *fmt, ...)
 {
@@ -300,7 +296,6 @@
  * Format arguments like sprintf(). The resulting message should be a
  * single phrase, with no newline or trailing punctuation.
  * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
  */
 void info_report(const char *fmt, ...)
 {
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 01886ef..9a5f263 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -208,17 +208,51 @@
     return result;
 }
 
+static const char *opt_type_to_string(enum QemuOptType type)
+{
+    switch (type) {
+    case QEMU_OPT_STRING:
+        return "str";
+    case QEMU_OPT_BOOL:
+        return "bool (on/off)";
+    case QEMU_OPT_NUMBER:
+        return "num";
+    case QEMU_OPT_SIZE:
+        return "size";
+    }
+
+    g_assert_not_reached();
+}
+
 void qemu_opts_print_help(QemuOptsList *list)
 {
     QemuOptDesc *desc;
+    int i;
+    GPtrArray *array = g_ptr_array_new();
 
     assert(list);
     desc = list->desc;
     while (desc && desc->name) {
-        printf("%-16s %s\n", desc->name,
-               desc->help ? desc->help : "No description available");
+        GString *str = g_string_new(NULL);
+        if (list->name) {
+            g_string_append_printf(str, "%s.", list->name);
+        }
+        g_string_append_printf(str, "%s=%s", desc->name,
+                               opt_type_to_string(desc->type));
+        if (desc->help) {
+            g_string_append_printf(str, " - %s", desc->help);
+        }
+        g_ptr_array_add(array, g_string_free(str, false));
         desc++;
     }
+
+    g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
+    for (i = 0; i < array->len; i++) {
+        printf("%s\n", (char *)array->pdata[i]);
+    }
+    g_ptr_array_set_free_func(array, g_free);
+    g_ptr_array_free(array, true);
+
 }
 /* ------------------------------------------------------------------ */
 
@@ -486,7 +520,7 @@
 }
 
 static void opt_set(QemuOpts *opts, const char *name, char *value,
-                    bool prepend, Error **errp)
+                    bool prepend, bool *invalidp, Error **errp)
 {
     QemuOpt *opt;
     const QemuOptDesc *desc;
@@ -496,6 +530,9 @@
     if (!desc && !opts_accepts_any(opts)) {
         g_free(value);
         error_setg(errp, QERR_INVALID_PARAMETER, name);
+        if (invalidp) {
+            *invalidp = true;
+        }
         return;
     }
 
@@ -519,7 +556,7 @@
 void qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
                   Error **errp)
 {
-    opt_set(opts, name, g_strdup(value), false, errp);
+    opt_set(opts, name, g_strdup(value), false, NULL, errp);
 }
 
 void qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val,
@@ -750,7 +787,8 @@
 }
 
 static void opts_do_parse(QemuOpts *opts, const char *params,
-                          const char *firstname, bool prepend, Error **errp)
+                          const char *firstname, bool prepend,
+                          bool *invalidp, Error **errp)
 {
     char *option = NULL;
     char *value = NULL;
@@ -785,7 +823,7 @@
         }
         if (strcmp(option, "id") != 0) {
             /* store and parse */
-            opt_set(opts, option, value, prepend, &local_err);
+            opt_set(opts, option, value, prepend, invalidp, &local_err);
             value = NULL;
             if (local_err) {
                 error_propagate(errp, local_err);
@@ -814,11 +852,12 @@
 void qemu_opts_do_parse(QemuOpts *opts, const char *params,
                        const char *firstname, Error **errp)
 {
-    opts_do_parse(opts, params, firstname, false, errp);
+    opts_do_parse(opts, params, firstname, false, NULL, errp);
 }
 
 static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
-                            bool permit_abbrev, bool defaults, Error **errp)
+                            bool permit_abbrev, bool defaults,
+                            bool *invalidp, Error **errp)
 {
     const char *firstname;
     char *id = NULL;
@@ -850,7 +889,7 @@
         return NULL;
     }
 
-    opts_do_parse(opts, params, firstname, defaults, &local_err);
+    opts_do_parse(opts, params, firstname, defaults, invalidp, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         qemu_opts_del(opts);
@@ -870,7 +909,7 @@
 QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
                           bool permit_abbrev, Error **errp)
 {
-    return opts_parse(list, params, permit_abbrev, false, errp);
+    return opts_parse(list, params, permit_abbrev, false, NULL, errp);
 }
 
 /**
@@ -886,10 +925,16 @@
 {
     Error *err = NULL;
     QemuOpts *opts;
+    bool invalidp = false;
 
-    opts = opts_parse(list, params, permit_abbrev, false, &err);
+    opts = opts_parse(list, params, permit_abbrev, false, &invalidp, &err);
     if (err) {
-        error_report_err(err);
+        if (invalidp && has_help_option(params)) {
+            qemu_opts_print_help(list);
+            error_free(err);
+        } else {
+            error_report_err(err);
+        }
     }
     return opts;
 }
@@ -899,7 +944,7 @@
 {
     QemuOpts *opts;
 
-    opts = opts_parse(list, params, permit_abbrev, true, NULL);
+    opts = opts_parse(list, params, permit_abbrev, true, NULL, NULL);
     assert(opts);
 }
 
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 86bfe84..1cc1b2f 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -339,14 +339,19 @@
 }
 
 
-void timer_init_tl(QEMUTimer *ts,
-                   QEMUTimerList *timer_list, int scale,
-                   QEMUTimerCB *cb, void *opaque)
+void timer_init_full(QEMUTimer *ts,
+                     QEMUTimerListGroup *timer_list_group, QEMUClockType type,
+                     int scale, int attributes,
+                     QEMUTimerCB *cb, void *opaque)
 {
-    ts->timer_list = timer_list;
+    if (!timer_list_group) {
+        timer_list_group = &main_loop_tlg;
+    }
+    ts->timer_list = timer_list_group->tl[type];
     ts->cb = cb;
     ts->opaque = opaque;
     ts->scale = scale;
+    ts->attributes = attributes;
     ts->expire_time = -1;
 }
 
@@ -484,6 +489,7 @@
     bool progress = false;
     QEMUTimerCB *cb;
     void *opaque;
+    bool need_replay_checkpoint = false;
 
     if (!atomic_read(&timer_list->active_timers)) {
         return false;
@@ -499,8 +505,15 @@
         break;
     default:
     case QEMU_CLOCK_VIRTUAL:
-        if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
-            goto out;
+        if (replay_mode != REPLAY_MODE_NONE) {
+            /* Checkpoint for virtual clock is redundant in cases where
+             * it's being triggered with only non-EXTERNAL timers, because
+             * these timers don't change guest state directly.
+             * Since it has conditional dependence on specific timers, it is
+             * subject to race conditions and requires special handling.
+             * See below.
+             */
+            need_replay_checkpoint = true;
         }
         break;
     case QEMU_CLOCK_HOST:
@@ -515,14 +528,39 @@
         break;
     }
 
+    /*
+     * Extract expired timers from active timers list and and process them.
+     *
+     * In rr mode we need "filtered" checkpointing for virtual clock.  The
+     * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
+     * and that must only be done once since the clock value stays the same. Because
+     * non-EXTERNAL timers may appear in the timers list while it being processed,
+     * the checkpoint can be issued at a time until no timers are left and we are
+     * done".
+     */
     current_time = qemu_clock_get_ns(timer_list->clock->type);
-    for(;;) {
-        qemu_mutex_lock(&timer_list->active_timers_lock);
-        ts = timer_list->active_timers;
+    qemu_mutex_lock(&timer_list->active_timers_lock);
+    while ((ts = timer_list->active_timers)) {
         if (!timer_expired_ns(ts, current_time)) {
-            qemu_mutex_unlock(&timer_list->active_timers_lock);
+            /* No expired timers left.  The checkpoint can be skipped
+             * if no timers fired or they were all external.
+             */
             break;
         }
+        if (need_replay_checkpoint
+                && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
+            /* once we got here, checkpoint clock only once */
+            need_replay_checkpoint = false;
+            qemu_mutex_unlock(&timer_list->active_timers_lock);
+            if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
+                goto out;
+            }
+            qemu_mutex_lock(&timer_list->active_timers_lock);
+            /* The lock was released; start over again in case the list was
+             * modified.
+             */
+            continue;
+        }
 
         /* remove timer from the list before calling the callback */
         timer_list->active_timers = ts->next;
@@ -530,12 +568,15 @@
         ts->expire_time = -1;
         cb = ts->cb;
         opaque = ts->opaque;
-        qemu_mutex_unlock(&timer_list->active_timers_lock);
 
         /* run the callback (the timer list can be modified) */
+        qemu_mutex_unlock(&timer_list->active_timers_lock);
         cb(opaque);
+        qemu_mutex_lock(&timer_list->active_timers_lock);
+
         progress = true;
     }
+    qemu_mutex_unlock(&timer_list->active_timers_lock);
 
 out:
     qemu_event_set(&timer_list->timers_done_ev);
diff --git a/util/qsp.c b/util/qsp.c
index 2de3a97..a848b09 100644
--- a/util/qsp.c
+++ b/util/qsp.c
@@ -84,13 +84,6 @@
     uint64_t n_acqs;
     uint64_t ns;
     unsigned int n_objs; /* count of coalesced objs; only used for reporting */
-#ifndef CONFIG_ATOMIC64
-    /*
-     * If we cannot update the counts atomically, then use a seqlock.
-     * We don't need an associated lock because the updates are thread-local.
-     */
-    QemuSeqLock sequence;
-#endif
 };
 typedef struct QSPEntry QSPEntry;
 
@@ -345,46 +338,15 @@
 }
 
 /*
- * @from is in the global hash table; read it atomically if the host
- * supports it, otherwise use the seqlock.
- */
-static void qsp_entry_aggregate(QSPEntry *to, const QSPEntry *from)
-{
-#ifdef CONFIG_ATOMIC64
-    to->ns += atomic_read__nocheck(&from->ns);
-    to->n_acqs += atomic_read__nocheck(&from->n_acqs);
-#else
-    unsigned int version;
-    uint64_t ns, n_acqs;
-
-    do {
-        version = seqlock_read_begin(&from->sequence);
-        ns = atomic_read__nocheck(&from->ns);
-        n_acqs = atomic_read__nocheck(&from->n_acqs);
-    } while (seqlock_read_retry(&from->sequence, version));
-
-    to->ns += ns;
-    to->n_acqs += n_acqs;
-#endif
-}
-
-/*
  * @e is in the global hash table; it is only written to by the current thread,
  * so we write to it atomically (as in "write once") to prevent torn reads.
- * If the host doesn't support u64 atomics, use the seqlock.
  */
 static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
 {
-#ifndef CONFIG_ATOMIC64
-    seqlock_write_begin(&e->sequence);
-#endif
-    atomic_set__nocheck(&e->ns, e->ns + delta);
+    atomic_set_u64(&e->ns, e->ns + delta);
     if (acq) {
-        atomic_set__nocheck(&e->n_acqs, e->n_acqs + 1);
+        atomic_set_u64(&e->n_acqs, e->n_acqs + 1);
     }
-#ifndef CONFIG_ATOMIC64
-    seqlock_write_end(&e->sequence);
-#endif
 }
 
 static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
@@ -550,7 +512,12 @@
 
     hash = qsp_entry_no_thread_hash(e);
     agg = qsp_entry_find(ht, e, hash);
-    qsp_entry_aggregate(agg, e);
+    /*
+     * The entry is in the global hash table; read from it atomically (as in
+     * "read once").
+     */
+    agg->ns += atomic_read_u64(&e->ns);
+    agg->n_acqs += atomic_read_u64(&e->n_acqs);
 }
 
 static void qsp_iter_diff(void *p, uint32_t hash, void *htp)
diff --git a/vl.c b/vl.c
index cc55fe0..1fcacc5 100644
--- a/vl.c
+++ b/vl.c
@@ -147,8 +147,15 @@
 int nb_nics;
 NICInfo nd_table[MAX_NICS];
 int autostart;
-static int rtc_utc = 1;
-static int rtc_date_offset = -1; /* -1 means no change */
+static enum {
+    RTC_BASE_UTC,
+    RTC_BASE_LOCALTIME,
+    RTC_BASE_DATETIME,
+} rtc_base_type = RTC_BASE_UTC;
+static time_t rtc_ref_start_datetime;
+static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */
+static int rtc_host_datetime_offset = -1; /* valid & used only with
+                                             RTC_BASE_DATETIME */
 QEMUClockType rtc_clock;
 int vga_interface_type = VGA_NONE;
 static DisplayOptions dpy;
@@ -242,6 +249,7 @@
 static QemuOptsList qemu_rtc_opts = {
     .name = "rtc",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head),
+    .merge_lists = true,
     .desc = {
         {
             .name = "base",
@@ -780,28 +788,42 @@
 }
 
 /***********************************************************/
-/* real time host monotonic timer */
-
-static time_t qemu_time(void)
+/* RTC reference time/date access */
+static time_t qemu_ref_timedate(QEMUClockType clock)
 {
-    return qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
+    time_t value = qemu_clock_get_ms(clock) / 1000;
+    switch (clock) {
+    case QEMU_CLOCK_REALTIME:
+        value -= rtc_realtime_clock_offset;
+        /* no break */
+    case QEMU_CLOCK_VIRTUAL:
+        value += rtc_ref_start_datetime;
+        break;
+    case QEMU_CLOCK_HOST:
+        if (rtc_base_type == RTC_BASE_DATETIME) {
+            value -= rtc_host_datetime_offset;
+        }
+        break;
+    default:
+        assert(0);
+    }
+    return value;
 }
 
-/***********************************************************/
-/* host time/date access */
 void qemu_get_timedate(struct tm *tm, int offset)
 {
-    time_t ti = qemu_time();
+    time_t ti = qemu_ref_timedate(rtc_clock);
 
     ti += offset;
-    if (rtc_date_offset == -1) {
-        if (rtc_utc)
-            gmtime_r(&ti, tm);
-        else
-            localtime_r(&ti, tm);
-    } else {
-        ti -= rtc_date_offset;
+
+    switch (rtc_base_type) {
+    case RTC_BASE_DATETIME:
+    case RTC_BASE_UTC:
         gmtime_r(&ti, tm);
+        break;
+    case RTC_BASE_LOCALTIME:
+        localtime_r(&ti, tm);
+        break;
     }
 }
 
@@ -809,23 +831,28 @@
 {
     time_t seconds;
 
-    if (rtc_date_offset == -1)
-        if (rtc_utc)
-            seconds = mktimegm(tm);
-        else {
-            struct tm tmp = *tm;
-            tmp.tm_isdst = -1; /* use timezone to figure it out */
-            seconds = mktime(&tmp);
-	}
-    else
-        seconds = mktimegm(tm) + rtc_date_offset;
+    switch (rtc_base_type) {
+    case RTC_BASE_DATETIME:
+    case RTC_BASE_UTC:
+        seconds = mktimegm(tm);
+        break;
+    case RTC_BASE_LOCALTIME:
+    {
+        struct tm tmp = *tm;
+        tmp.tm_isdst = -1; /* use timezone to figure it out */
+        seconds = mktime(&tmp);
+        break;
+    }
+    default:
+        abort();
+    }
 
-    return seconds - qemu_time();
+    return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST);
 }
 
-static void configure_rtc_date_offset(const char *startdate)
+static void configure_rtc_base_datetime(const char *startdate)
 {
-    time_t rtc_start_date;
+    time_t rtc_start_datetime;
     struct tm tm;
 
     if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon,
@@ -841,33 +868,40 @@
     }
     tm.tm_year -= 1900;
     tm.tm_mon--;
-    rtc_start_date = mktimegm(&tm);
-    if (rtc_start_date == -1) {
+    rtc_start_datetime = mktimegm(&tm);
+    if (rtc_start_datetime == -1) {
     date_fail:
-        error_report("invalid date format");
+        error_report("invalid datetime format");
         error_printf("valid formats: "
                      "'2006-06-17T16:01:21' or '2006-06-17'\n");
         exit(1);
     }
-    rtc_date_offset = qemu_time() - rtc_start_date;
+    rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime;
+    rtc_ref_start_datetime = rtc_start_datetime;
 }
 
 static void configure_rtc(QemuOpts *opts)
 {
     const char *value;
 
+    /* Set defaults */
+    rtc_clock = QEMU_CLOCK_HOST;
+    rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
+    rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+
     value = qemu_opt_get(opts, "base");
     if (value) {
         if (!strcmp(value, "utc")) {
-            rtc_utc = 1;
+            rtc_base_type = RTC_BASE_UTC;
         } else if (!strcmp(value, "localtime")) {
             Error *blocker = NULL;
-            rtc_utc = 0;
+            rtc_base_type = RTC_BASE_LOCALTIME;
             error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED,
                       "-rtc base=localtime");
             replay_add_blocker(blocker);
         } else {
-            configure_rtc_date_offset(value);
+            rtc_base_type = RTC_BASE_DATETIME;
+            configure_rtc_base_datetime(value);
         }
     }
     value = qemu_opt_get(opts, "clock");
@@ -1059,12 +1093,12 @@
     fd_opaque = qemu_opt_get(opts, "opaque");
 
     if (fd < 0) {
-        error_report("fd option is required and must be non-negative");
+        error_setg(errp, "fd option is required and must be non-negative");
         return -1;
     }
 
     if (fd <= STDERR_FILENO) {
-        error_report("fd cannot be a standard I/O stream");
+        error_setg(errp, "fd cannot be a standard I/O stream");
         return -1;
     }
 
@@ -1074,12 +1108,12 @@
      */
     flags = fcntl(fd, F_GETFD);
     if (flags == -1 || (flags & FD_CLOEXEC)) {
-        error_report("fd is not valid or already in use");
+        error_setg(errp, "fd is not valid or already in use");
         return -1;
     }
 
     if (fdset_id < 0) {
-        error_report("set option is required and must be non-negative");
+        error_setg(errp, "set option is required and must be non-negative");
         return -1;
     }
 
@@ -1092,7 +1126,7 @@
     }
 #endif
     if (dupfd == -1) {
-        error_report("error duplicating fd: %s", strerror(errno));
+        error_setg(errp, "error duplicating fd: %s", strerror(errno));
         return -1;
     }
 
@@ -1129,7 +1163,7 @@
 {
     BlockInterfaceType *block_default_type = opaque;
 
-    return drive_new(opts, *block_default_type) == NULL;
+    return drive_new(opts, *block_default_type, errp) == NULL;
 }
 
 static int drive_enable_snapshot(void *opaque, QemuOpts *opts, Error **errp)
@@ -1155,10 +1189,7 @@
         drive_enable_snapshot(NULL, opts, NULL);
     }
 
-    dinfo = drive_new(opts, type);
-    if (!dinfo) {
-        exit(1);
-    }
+    dinfo = drive_new(opts, type, &error_abort);
     dinfo->is_default = true;
 
 }
@@ -1199,11 +1230,14 @@
 
         /* compute missing values, prefer sockets over cores over threads */
         if (cpus == 0 || sockets == 0) {
-            sockets = sockets > 0 ? sockets : 1;
             cores = cores > 0 ? cores : 1;
             threads = threads > 0 ? threads : 1;
             if (cpus == 0) {
+                sockets = sockets > 0 ? sockets : 1;
                 cpus = cores * threads * sockets;
+            } else {
+                max_cpus = qemu_opt_get_number(opts, "maxcpus", cpus);
+                sockets = max_cpus / (cores * threads);
             }
         } else if (cores == 0) {
             threads = threads > 0 ? threads : 1;
@@ -1235,6 +1269,13 @@
             exit(1);
         }
 
+        if (sockets * cores * threads != max_cpus) {
+            warn_report("Invalid CPU topology deprecated: "
+                        "sockets (%u) * cores (%u) * threads (%u) "
+                        "!= maxcpus (%u)",
+                        sockets, cores, threads, max_cpus);
+        }
+
         smp_cpus = cpus;
         smp_cores = cores;
         smp_threads = threads;
@@ -2002,15 +2043,10 @@
 
 static void parse_display_qapi(const char *optarg)
 {
-    Error *err = NULL;
     DisplayOptions *opts;
     Visitor *v;
 
-    v = qobject_input_visitor_new_str(optarg, "type", &err);
-    if (!v) {
-        error_report_err(err);
-        exit(1);
-    }
+    v = qobject_input_visitor_new_str(optarg, "type", &error_fatal);
 
     visit_type_DisplayOptions(v, NULL, &opts, &error_fatal);
     QAPI_CLONE_MEMBERS(DisplayOptions, &dpy, opts);
@@ -2179,7 +2215,7 @@
     FWCfgState *fw_cfg = (FWCfgState *) opaque;
 
     if (fw_cfg == NULL) {
-        error_report("fw_cfg device not available");
+        error_setg(errp, "fw_cfg device not available");
         return -1;
     }
     name = qemu_opt_get(opts, "name");
@@ -2188,15 +2224,16 @@
 
     /* we need name and either a file or the content string */
     if (!(nonempty_str(name) && (nonempty_str(file) || nonempty_str(str)))) {
-        error_report("invalid argument(s)");
+        error_setg(errp, "invalid argument(s)");
         return -1;
     }
     if (nonempty_str(file) && nonempty_str(str)) {
-        error_report("file and string are mutually exclusive");
+        error_setg(errp, "file and string are mutually exclusive");
         return -1;
     }
     if (strlen(name) > FW_CFG_MAX_FILE_PATH - 1) {
-        error_report("name too long (max. %d char)", FW_CFG_MAX_FILE_PATH - 1);
+        error_setg(errp, "name too long (max. %d char)",
+                   FW_CFG_MAX_FILE_PATH - 1);
         return -1;
     }
     if (strncmp(name, "opt/", 4) != 0) {
@@ -2208,7 +2245,7 @@
         buf = g_memdup(str, size);
     } else {
         if (!g_file_get_contents(file, &buf, &size, NULL)) {
-            error_report("can't load %s", file);
+            error_setg(errp, "can't load %s", file);
             return -1;
         }
     }
@@ -2226,12 +2263,10 @@
 
 static int device_init_func(void *opaque, QemuOpts *opts, Error **errp)
 {
-    Error *err = NULL;
     DeviceState *dev;
 
-    dev = qdev_device_add(opts, &err);
+    dev = qdev_device_add(opts, errp);
     if (!dev) {
-        error_report_err(err);
         return -1;
     }
     object_unref(OBJECT(dev));
@@ -2244,7 +2279,7 @@
 
     if (!qemu_chr_new_from_opts(opts, &local_err)) {
         if (local_err) {
-            error_report_err(local_err);
+            error_propagate(errp, local_err);
             return -1;
         }
         exit(0);
@@ -2255,7 +2290,7 @@
 #ifdef CONFIG_VIRTFS
 static int fsdev_init_func(void *opaque, QemuOpts *opts, Error **errp)
 {
-    return qemu_fsdev_add(opts);
+    return qemu_fsdev_add(opts, errp);
 }
 #endif
 
@@ -2275,8 +2310,8 @@
     } else if (strcmp(mode, "control") == 0) {
         flags = MONITOR_USE_CONTROL;
     } else {
-        error_report("unknown monitor mode \"%s\"", mode);
-        exit(1);
+        error_setg(errp, "unknown monitor mode \"%s\"", mode);
+        return -1;
     }
 
     if (qemu_opt_get_bool(opts, "pretty", 0))
@@ -2290,8 +2325,8 @@
     chardev = qemu_opt_get(opts, "chardev");
     chr = qemu_chr_find(chardev);
     if (chr == NULL) {
-        error_report("chardev \"%s\" not found", chardev);
-        exit(1);
+        error_setg(errp, "chardev \"%s\" not found", chardev);
+        return -1;
     }
 
     monitor_init(chr, flags);
@@ -2310,7 +2345,7 @@
     } else {
         snprintf(label, sizeof(label), "compat_monitor%d",
                  monitor_device_index);
-        opts = qemu_chr_parse_compat(label, optarg);
+        opts = qemu_chr_parse_compat(label, optarg, true);
         if (!opts) {
             error_report("parse error: %s", optarg);
             exit(1);
@@ -2382,7 +2417,7 @@
     snprintf(label, sizeof(label), "serial%d", index);
     serial_hds = g_renew(Chardev *, serial_hds, index + 1);
 
-    serial_hds[index] = qemu_chr_new(label, devname);
+    serial_hds[index] = qemu_chr_new_mux_mon(label, devname);
     if (!serial_hds[index]) {
         error_report("could not connect serial device"
                      " to character backend '%s'", devname);
@@ -2418,7 +2453,7 @@
         exit(1);
     }
     snprintf(label, sizeof(label), "parallel%d", index);
-    parallel_hds[index] = qemu_chr_new(label, devname);
+    parallel_hds[index] = qemu_chr_new_mux_mon(label, devname);
     if (!parallel_hds[index]) {
         error_report("could not connect parallel device"
                      " to character backend '%s'", devname);
@@ -2449,7 +2484,7 @@
     qemu_opt_set(dev_opts, "driver", "virtconsole", &error_abort);
 
     snprintf(label, sizeof(label), "virtcon%d", index);
-    virtcon_hds[index] = qemu_chr_new(label, devname);
+    virtcon_hds[index] = qemu_chr_new_mux_mon(label, devname);
     if (!virtcon_hds[index]) {
         error_report("could not connect virtio console"
                      " to character backend '%s'", devname);
@@ -2465,7 +2500,8 @@
 {
     QemuOpts *opts;
 
-    if (!qemu_chr_new("debugcon", devname)) {
+    if (!qemu_chr_new_mux_mon("debugcon", devname)) {
+        error_report("invalid character backend '%s'", devname);
         exit(1);
     }
     opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL);
@@ -2560,6 +2596,16 @@
     notifier_list_notify(&exit_notifiers, NULL);
 }
 
+static const char *pid_file;
+static Notifier qemu_unlink_pidfile_notifier;
+
+static void qemu_unlink_pidfile(Notifier *n, void *data)
+{
+    if (pid_file) {
+        unlink(pid_file);
+    }
+}
+
 bool machine_init_done;
 
 void qemu_add_machine_init_done_notifier(Notifier *notify)
@@ -2671,7 +2717,7 @@
     g_free(qom_name);
 
     if (local_err) {
-        error_report_err(local_err);
+        error_propagate(errp, local_err);
         return -1;
     }
 
@@ -2686,8 +2732,55 @@
  * cannot be created here, as it depends on the chardev
  * already existing.
  */
-static bool object_create_initial(const char *type)
+static bool object_create_initial(const char *type, QemuOpts *opts)
 {
+    ObjectClass *klass;
+
+    if (is_help_option(type)) {
+        GSList *l, *list;
+
+        printf("List of user creatable objects:\n");
+        list = object_class_get_list_sorted(TYPE_USER_CREATABLE, false);
+        for (l = list; l != NULL; l = l->next) {
+            ObjectClass *oc = OBJECT_CLASS(l->data);
+            printf("%s\n", object_class_get_name(oc));
+        }
+        g_slist_free(list);
+        exit(0);
+    }
+
+    klass = object_class_by_name(type);
+    if (klass && qemu_opt_has_help_opt(opts)) {
+        ObjectPropertyIterator iter;
+        ObjectProperty *prop;
+        GPtrArray *array = g_ptr_array_new();
+        int i;
+
+        object_class_property_iter_init(&iter, klass);
+        while ((prop = object_property_iter_next(&iter))) {
+            GString *str;
+
+            if (!prop->set) {
+                continue;
+            }
+
+            str = g_string_new(NULL);
+            g_string_append_printf(str, "%s.%s=%s", type,
+                                   prop->name, prop->type);
+            if (prop->description) {
+                g_string_append_printf(str, " - %s", prop->description);
+            }
+            g_ptr_array_add(array, g_string_free(str, false));
+        }
+        g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
+        for (i = 0; i < array->len; i++) {
+            printf("%s\n", (char *)array->pdata[i]);
+        }
+        g_ptr_array_set_free_func(array, g_free);
+        g_ptr_array_free(array, true);
+        exit(0);
+    }
+
     if (g_str_equal(type, "rng-egd") ||
         g_str_has_prefix(type, "pr-manager-")) {
         return false;
@@ -2734,9 +2827,9 @@
  * The remainder of object creation happens after the
  * creation of chardev, fsdev, net clients and device data types.
  */
-static bool object_create_delayed(const char *type)
+static bool object_create_delayed(const char *type, QemuOpts *opts)
 {
-    return !object_create_initial(type);
+    return !object_create_initial(type, opts);
 }
 
 
@@ -2884,7 +2977,6 @@
     const char *vga_model = NULL;
     const char *qtest_chrdev = NULL;
     const char *qtest_log = NULL;
-    const char *pid_file = NULL;
     const char *incoming = NULL;
     bool userconfig = true;
     bool nographic = false;
@@ -2961,7 +3053,6 @@
         error_reportf_err(err, "cannot initialize crypto: ");
         exit(1);
     }
-    rtc_clock = QEMU_CLOCK_HOST;
 
     QLIST_INIT (&vm_change_state_head);
     os_setup_early_signal_handling();
@@ -3681,7 +3772,6 @@
                 if (!opts) {
                     exit(1);
                 }
-                configure_rtc(opts);
                 break;
             case QEMU_OPTION_tb_size:
 #ifndef CONFIG_TCG
@@ -3899,6 +3989,8 @@
         exit(EXIT_FAILURE);
     }
 
+    configure_rtc(qemu_find_opts_singleton("rtc"));
+
     machine_class = select_machine();
 
     set_memory_options(&ram_slots, &maxram_size, machine_class);
@@ -3906,11 +3998,14 @@
     os_daemonize();
     rcu_disable_atfork();
 
-    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
-        error_report("could not acquire pid file: %s", strerror(errno));
+    if (pid_file && !qemu_write_pidfile(pid_file, &err)) {
+        error_reportf_err(err, "cannot create PID file: ");
         exit(1);
     }
 
+    qemu_unlink_pidfile_notifier.notify = qemu_unlink_pidfile;
+    qemu_add_exit_notifier(&qemu_unlink_pidfile_notifier);
+
     if (qemu_init_main_loop(&main_loop_err)) {
         error_report_err(main_loop_err);
         exit(1);
@@ -3918,26 +4013,20 @@
 
 #ifdef CONFIG_SECCOMP
     olist = qemu_find_opts_err("sandbox", NULL);
-    if (olist && qemu_opts_foreach(olist, parse_sandbox, NULL, NULL)) {
-        exit(1);
+    if (olist) {
+        qemu_opts_foreach(olist, parse_sandbox, NULL, &error_fatal);
     }
 #endif
 
-    if (qemu_opts_foreach(qemu_find_opts("name"),
-                          parse_name, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("name"),
+                      parse_name, NULL, &error_fatal);
 
 #ifndef _WIN32
-    if (qemu_opts_foreach(qemu_find_opts("add-fd"),
-                          parse_add_fd, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("add-fd"),
+                      parse_add_fd, NULL, &error_fatal);
 
-    if (qemu_opts_foreach(qemu_find_opts("add-fd"),
-                          cleanup_add_fd, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("add-fd"),
+                      cleanup_add_fd, NULL, &error_fatal);
 #endif
 
     current_machine = MACHINE(object_new(object_class_get_name(
@@ -4178,22 +4267,16 @@
     page_size_init();
     socket_init();
 
-    if (qemu_opts_foreach(qemu_find_opts("object"),
-                          user_creatable_add_opts_foreach,
-                          object_create_initial, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("object"),
+                      user_creatable_add_opts_foreach,
+                      object_create_initial, &error_fatal);
 
-    if (qemu_opts_foreach(qemu_find_opts("chardev"),
-                          chardev_init_func, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("chardev"),
+                      chardev_init_func, NULL, &error_fatal);
 
 #ifdef CONFIG_VIRTFS
-    if (qemu_opts_foreach(qemu_find_opts("fsdev"),
-                          fsdev_init_func, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("fsdev"),
+                      fsdev_init_func, NULL, &error_fatal);
 #endif
 
     if (qemu_opts_foreach(qemu_find_opts("device"),
@@ -4202,11 +4285,8 @@
     }
 
     machine_opts = qemu_get_machine_opts();
-    if (qemu_opt_foreach(machine_opts, machine_set_property, current_machine,
-                         NULL)) {
-        object_unref(OBJECT(current_machine));
-        exit(1);
-    }
+    qemu_opt_foreach(machine_opts, machine_set_property, current_machine,
+                     &error_fatal);
 
     configure_accelerator(current_machine);
 
@@ -4306,22 +4386,16 @@
 #endif
     }
 
-    colo_info_init();
-
     if (net_init_clients(&err) < 0) {
         error_report_err(err);
         exit(1);
     }
 
-    if (qemu_opts_foreach(qemu_find_opts("object"),
-                          user_creatable_add_opts_foreach,
-                          object_create_delayed, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("object"),
+                      user_creatable_add_opts_foreach,
+                      object_create_delayed, &error_fatal);
 
-    if (tpm_init() < 0) {
-        exit(1);
-    }
+    tpm_init();
 
     /* init the bluetooth world */
     if (foreach_device_config(DEV_BT, bt_parse))
@@ -4362,8 +4436,9 @@
                           NULL, NULL);
     }
     if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func,
-                          &machine_class->block_default_type, NULL)) {
-        exit(1);
+                          &machine_class->block_default_type, &error_fatal)) {
+        /* We printed help */
+        exit(0);
     }
 
     default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2,
@@ -4371,10 +4446,8 @@
     default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
     default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 
-    if (qemu_opts_foreach(qemu_find_opts("mon"),
-                          mon_init_func, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("mon"),
+                      mon_init_func, NULL, &error_fatal);
 
     if (foreach_device_config(DEV_SERIAL, serial_parse) < 0)
         exit(1);
@@ -4437,10 +4510,8 @@
         hax_sync_vcpus();
     }
 
-    if (qemu_opts_foreach(qemu_find_opts("fw_cfg"),
-                          parse_fw_cfg, fw_cfg_find(), NULL) != 0) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("fw_cfg"),
+                      parse_fw_cfg, fw_cfg_find(), &error_fatal);
 
     /* init USB devices */
     if (machine_usb(current_machine)) {
@@ -4453,10 +4524,8 @@
 
     /* init generic devices */
     rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE);
-    if (qemu_opts_foreach(qemu_find_opts("device"),
-                          device_init_func, NULL, NULL)) {
-        exit(1);
-    }
+    qemu_opts_foreach(qemu_find_opts("device"),
+                      device_init_func, NULL, &error_fatal);
 
     cpu_synchronize_all_post_init();
 
@@ -4492,7 +4561,7 @@
     /* init remote displays */
 #ifdef CONFIG_VNC
     qemu_opts_foreach(qemu_find_opts("vnc"),
-                      vnc_init_func, NULL, NULL);
+                      vnc_init_func, NULL, &error_fatal);
 #endif
 
     if (using_spice) {
@@ -4523,9 +4592,7 @@
     replay_checkpoint(CHECKPOINT_RESET);
     qemu_system_reset(SHUTDOWN_CAUSE_NONE);
     register_global_state();
-    if (replay_mode != REPLAY_MODE_NONE) {
-        replay_vmstate_init();
-    } else if (loadvm) {
+    if (loadvm) {
         Error *local_err = NULL;
         if (load_snapshot(loadvm, &local_err) < 0) {
             error_report_err(local_err);
@@ -4533,6 +4600,9 @@
             exit(1);
         }
     }
+    if (replay_mode != REPLAY_MODE_NONE) {
+        replay_vmstate_init();
+    }
 
     qdev_prop_check_globals();
     if (vmstate_dump_file) {
diff --git a/win_dump.c b/win_dump.c
index b15c191..e10a783 100644
--- a/win_dump.c
+++ b/win_dump.c
@@ -30,28 +30,32 @@
     void *buf;
     uint64_t addr = run->BasePage << TARGET_PAGE_BITS;
     uint64_t size = run->PageCount << TARGET_PAGE_BITS;
-    uint64_t len = size;
+    uint64_t len, l;
+    size_t total = 0;
 
-    buf = cpu_physical_memory_map(addr, &len, false);
-    if (!buf) {
-        error_setg(errp, "win-dump: failed to map run");
-        return 0;
-    }
-    if (len != size) {
-        error_setg(errp, "win-dump: failed to map entire run");
-        len = 0;
-        goto out_unmap;
+    while (size) {
+        len = size;
+
+        buf = cpu_physical_memory_map(addr, &len, false);
+        if (!buf) {
+            error_setg(errp, "win-dump: failed to map physical range"
+                             " 0x%016" PRIx64 "-0x%016" PRIx64, addr, addr + size - 1);
+            return 0;
+        }
+
+        l = qemu_write_full(fd, buf, len);
+        cpu_physical_memory_unmap(buf, addr, false, len);
+        if (l != len) {
+            error_setg(errp, QERR_IO_ERROR);
+            return 0;
+        }
+
+        addr += l;
+        size -= l;
+        total += l;
     }
 
-    len = qemu_write_full(fd, buf, len);
-    if (len != size) {
-        error_setg(errp, QERR_IO_ERROR);
-    }
-
-out_unmap:
-    cpu_physical_memory_unmap(buf, addr, false, len);
-
-    return len;
+    return total;
 }
 
 static void write_runs(DumpState *s, WinDumpHeader64 *h, Error **errp)
diff --git a/win_dump.h b/win_dump.h
index f9e1faf..b8c2534 100644
--- a/win_dump.h
+++ b/win_dump.h
@@ -8,169 +8,11 @@
  *
  */
 
-typedef struct WinDumpPhyMemRun64 {
-    uint64_t BasePage;
-    uint64_t PageCount;
-} QEMU_PACKED WinDumpPhyMemRun64;
+#ifndef WIN_DUMP_H
+#define WIN_DUMP_H
 
-typedef struct WinDumpPhyMemDesc64 {
-    uint32_t NumberOfRuns;
-    uint32_t unused;
-    uint64_t NumberOfPages;
-    WinDumpPhyMemRun64 Run[43];
-} QEMU_PACKED WinDumpPhyMemDesc64;
-
-typedef struct WinDumpExceptionRecord {
-    uint32_t ExceptionCode;
-    uint32_t ExceptionFlags;
-    uint64_t ExceptionRecord;
-    uint64_t ExceptionAddress;
-    uint32_t NumberParameters;
-    uint32_t unused;
-    uint64_t ExceptionInformation[15];
-} QEMU_PACKED WinDumpExceptionRecord;
-
-typedef struct WinDumpHeader64 {
-    char Signature[4];
-    char ValidDump[4];
-    uint32_t MajorVersion;
-    uint32_t MinorVersion;
-    uint64_t DirectoryTableBase;
-    uint64_t PfnDatabase;
-    uint64_t PsLoadedModuleList;
-    uint64_t PsActiveProcessHead;
-    uint32_t MachineImageType;
-    uint32_t NumberProcessors;
-    union {
-        struct {
-            uint32_t BugcheckCode;
-            uint32_t unused0;
-            uint64_t BugcheckParameter1;
-            uint64_t BugcheckParameter2;
-            uint64_t BugcheckParameter3;
-            uint64_t BugcheckParameter4;
-        };
-        uint8_t BugcheckData[40];
-    };
-    uint8_t VersionUser[32];
-    uint64_t KdDebuggerDataBlock;
-    union {
-        WinDumpPhyMemDesc64 PhysicalMemoryBlock;
-        uint8_t PhysicalMemoryBlockBuffer[704];
-    };
-    union {
-        uint8_t ContextBuffer[3000];
-    };
-    WinDumpExceptionRecord Exception;
-    uint32_t DumpType;
-    uint32_t unused1;
-    uint64_t RequiredDumpSpace;
-    uint64_t SystemTime;
-    char Comment[128];
-    uint64_t SystemUpTime;
-    uint32_t MiniDumpFields;
-    uint32_t SecondaryDataState;
-    uint32_t ProductType;
-    uint32_t SuiteMask;
-    uint32_t WriterStatus;
-    uint8_t unused2;
-    uint8_t KdSecondaryVersion;
-    uint8_t reserved[4018];
-} QEMU_PACKED WinDumpHeader64;
+#include "qemu/win_dump_defs.h"
 
 void create_win_dump(DumpState *s, Error **errp);
 
-#define KDBG_OWNER_TAG_OFFSET64             0x10
-#define KDBG_MM_PFN_DATABASE_OFFSET64       0xC0
-#define KDBG_KI_BUGCHECK_DATA_OFFSET64      0x88
-#define KDBG_KI_PROCESSOR_BLOCK_OFFSET64    0x218
-#define KDBG_OFFSET_PRCB_CONTEXT_OFFSET64   0x338
-
-#define VMCOREINFO_ELF_NOTE_HDR_SIZE    24
-
-#define WIN_CTX_X64 0x00100000L
-
-#define WIN_CTX_CTL 0x00000001L
-#define WIN_CTX_INT 0x00000002L
-#define WIN_CTX_SEG 0x00000004L
-#define WIN_CTX_FP  0x00000008L
-#define WIN_CTX_DBG 0x00000010L
-
-#define WIN_CTX_FULL    (WIN_CTX_X64 | WIN_CTX_CTL | WIN_CTX_INT | WIN_CTX_FP)
-#define WIN_CTX_ALL     (WIN_CTX_FULL | WIN_CTX_SEG | WIN_CTX_DBG)
-
-#define LIVE_SYSTEM_DUMP    0x00000161
-
-typedef struct WinM128A {
-    uint64_t low;
-    int64_t high;
-} QEMU_ALIGNED(16) WinM128A;
-
-typedef struct WinContext {
-    uint64_t PHome[6];
-
-    uint32_t ContextFlags;
-    uint32_t MxCsr;
-
-    uint16_t SegCs;
-    uint16_t SegDs;
-    uint16_t SegEs;
-    uint16_t SegFs;
-    uint16_t SegGs;
-    uint16_t SegSs;
-    uint32_t EFlags;
-
-    uint64_t Dr0;
-    uint64_t Dr1;
-    uint64_t Dr2;
-    uint64_t Dr3;
-    uint64_t Dr6;
-    uint64_t Dr7;
-
-    uint64_t Rax;
-    uint64_t Rcx;
-    uint64_t Rdx;
-    uint64_t Rbx;
-    uint64_t Rsp;
-    uint64_t Rbp;
-    uint64_t Rsi;
-    uint64_t Rdi;
-    uint64_t R8;
-    uint64_t R9;
-    uint64_t R10;
-    uint64_t R11;
-    uint64_t R12;
-    uint64_t R13;
-    uint64_t R14;
-    uint64_t R15;
-
-    uint64_t Rip;
-
-    struct {
-        uint16_t ControlWord;
-        uint16_t StatusWord;
-        uint8_t TagWord;
-        uint8_t Reserved1;
-        uint16_t ErrorOpcode;
-        uint32_t ErrorOffset;
-        uint16_t ErrorSelector;
-        uint16_t Reserved2;
-        uint32_t DataOffset;
-        uint16_t DataSelector;
-        uint16_t Reserved3;
-        uint32_t MxCsr;
-        uint32_t MxCsr_Mask;
-        WinM128A FloatRegisters[8];
-        WinM128A XmmRegisters[16];
-        uint8_t Reserved4[96];
-    } FltSave;
-
-    WinM128A VectorRegister[26];
-    uint64_t VectorControl;
-
-    uint64_t DebugControl;
-    uint64_t LastBranchToRip;
-    uint64_t LastBranchFromRip;
-    uint64_t LastExceptionToRip;
-    uint64_t LastExceptionFromRip;
-} QEMU_ALIGNED(16) WinContext;
+#endif /* WIN_DUMP_H */