Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-and-plugin-updates-180521-2' into staging

testing and plugin updates:

  - various fixes for binfmt_misc docker images
  - add hexagon check-tcg support docker image
  - add tricore check-tcg support
  - refactor ppc docker images
  - add missing ppc64le tests
  - don't use host_cc for test fallback
  - check-tcg configure.sh tweaks for cross compile/clang
  - fix some memory leaks in plugins

# gpg: Signature made Tue 18 May 2021 09:37:21 BST
# gpg:                using RSA key 6685AE99E75167BCAFC8DF35FBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>" [full]
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8  DF35 FBD0 DB09 5A9E 2A44

* remotes/stsquad/tags/pull-testing-and-plugin-updates-180521-2: (29 commits)
  configure: use cc, not host_cc to set cross_cc for build arch
  tests/tcg: don't allow clang as a cross compiler
  tests/tcg: fix missing return
  tests/tcg/ppc64le: tests for brh/brw/brd
  tests/docker: gcc-10 based images for ppc64{,le} tests
  tests/tcg/tricore: Add muls test
  tests/tcg/tricore: Add msub test
  tests/tcg/tricore: Add madd test
  tests/tcg/tricore: Add ftoi test
  tests/tcg/tricore: Add fmul test
  tests/tcg/tricore: Add fadd test
  tests/tcg/tricore: Add dvstep test
  tests/tcg/tricore: Add clz test
  tests/tcg/tricore: Add bmerge test
  tests/tcg/tricore: Add macros to create tests and first test 'abs'
  configure: Emit HOST_CC to config-host.mak
  tests/tcg/tricore: Add build infrastructure
  hw/tricore: Add testdevice for tests in tests/tcg/
  tests/tcg: Run timeout cmds using --foreground
  tests/tcg: Add docker_as and docker_ld cmds
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 40bba0f..75e0f2d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1959,7 +1959,7 @@
 M: Klaus Jensen <its@irrelevant.dk>
 L: qemu-block@nongnu.org
 S: Supported
-F: hw/block/nvme*
+F: hw/nvme/*
 F: include/block/nvme.h
 F: tests/qtest/nvme-test.c
 F: docs/system/nvme.rst
@@ -2438,6 +2438,7 @@
 Main loop
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
+F: include/exec/gen-icount.h
 F: include/qemu/main-loop.h
 F: include/sysemu/runstate.h
 F: include/sysemu/runstate-action.h
@@ -3247,6 +3248,8 @@
 F: block/export/vhost-user-blk-server.h
 F: include/qemu/vhost-user-server.h
 F: tests/qtest/libqos/vhost-user-blk.c
+F: tests/qtest/libqos/vhost-user-blk.h
+F: tests/qtest/vhost-user-blk-test.c
 F: util/vhost-user-server.c
 
 FUSE block device exports
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 521da4a..ac7d28c 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -1073,9 +1073,8 @@
     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
         int32_t ai = *(int32_t *)(a + i);
         int32_t bi = *(int32_t *)(b + i);
-        int32_t di = ai + bi;
-        if (((di ^ ai) &~ (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int32_t di;
+        if (sadd32_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT32_MAX : INT32_MIN);
         }
         *(int32_t *)(d + i) = di;
@@ -1091,9 +1090,8 @@
     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
         int64_t ai = *(int64_t *)(a + i);
         int64_t bi = *(int64_t *)(b + i);
-        int64_t di = ai + bi;
-        if (((di ^ ai) &~ (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int64_t di;
+        if (sadd64_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT64_MAX : INT64_MIN);
         }
         *(int64_t *)(d + i) = di;
@@ -1143,9 +1141,8 @@
     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
         int32_t ai = *(int32_t *)(a + i);
         int32_t bi = *(int32_t *)(b + i);
-        int32_t di = ai - bi;
-        if (((di ^ ai) & (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int32_t di;
+        if (ssub32_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT32_MAX : INT32_MIN);
         }
         *(int32_t *)(d + i) = di;
@@ -1161,9 +1158,8 @@
     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
         int64_t ai = *(int64_t *)(a + i);
         int64_t bi = *(int64_t *)(b + i);
-        int64_t di = ai - bi;
-        if (((di ^ ai) & (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int64_t di;
+        if (ssub64_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT64_MAX : INT64_MIN);
         }
         *(int64_t *)(d + i) = di;
@@ -1209,8 +1205,8 @@
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
         uint32_t ai = *(uint32_t *)(a + i);
         uint32_t bi = *(uint32_t *)(b + i);
-        uint32_t di = ai + bi;
-        if (di < ai) {
+        uint32_t di;
+        if (uadd32_overflow(ai, bi, &di)) {
             di = UINT32_MAX;
         }
         *(uint32_t *)(d + i) = di;
@@ -1226,8 +1222,8 @@
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
         uint64_t ai = *(uint64_t *)(a + i);
         uint64_t bi = *(uint64_t *)(b + i);
-        uint64_t di = ai + bi;
-        if (di < ai) {
+        uint64_t di;
+        if (uadd64_overflow(ai, bi, &di)) {
             di = UINT64_MAX;
         }
         *(uint64_t *)(d + i) = di;
@@ -1273,8 +1269,8 @@
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
         uint32_t ai = *(uint32_t *)(a + i);
         uint32_t bi = *(uint32_t *)(b + i);
-        uint32_t di = ai - bi;
-        if (ai < bi) {
+        uint32_t di;
+        if (usub32_overflow(ai, bi, &di)) {
             di = 0;
         }
         *(uint32_t *)(d + i) = di;
@@ -1290,8 +1286,8 @@
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
         uint64_t ai = *(uint64_t *)(a + i);
         uint64_t bi = *(uint64_t *)(b + i);
-        uint64_t di = ai - bi;
-        if (ai < bi) {
+        uint64_t di;
+        if (usub64_overflow(ai, bi, &di)) {
             di = 0;
         }
         *(uint64_t *)(d + i) = di;
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index ae7e873..fbf8fc6 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -2042,8 +2042,15 @@
             int i;
             qemu_log("  data: [size=%d]\n", data_size);
             for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
-                qemu_log("0x%08" PRIxPTR ":  .quad  0x%" TCG_PRIlx "\n",
-                         (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
+                if (sizeof(tcg_target_ulong) == 8) {
+                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
+                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
+                } else if (sizeof(tcg_target_ulong) == 4) {
+                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
+                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
+                } else {
+                    qemu_build_not_reached();
+                }
             }
         }
         qemu_log("\n");
diff --git a/block.c b/block.c
index 75a82af..0dc9728 100644
--- a/block.c
+++ b/block.c
@@ -2916,13 +2916,14 @@
                                    child_role, perm, shared_perm, opaque,
                                    &child, tran, errp);
     if (ret < 0) {
-        bdrv_unref(child_bs);
-        return NULL;
+        assert(child == NULL);
+        goto out;
     }
 
     ret = bdrv_refresh_perms(child_bs, errp);
-    tran_finalize(tran, ret);
 
+out:
+    tran_finalize(tran, ret);
     bdrv_unref(child_bs);
     return child;
 }
@@ -4049,7 +4050,7 @@
         ret = bdrv_flush(bs_entry->state.bs);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Error flushing drive");
-            goto cleanup;
+            goto abort;
         }
     }
 
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index fa06996..1862563 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -70,9 +70,16 @@
 static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
                                  size_t size)
 {
-    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
+    uint64_t nb_sectors;
     uint64_t total_sectors;
 
+    if (size % VIRTIO_BLK_SECTOR_SIZE) {
+        return false;
+    }
+
+    nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
+
+    QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
     if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
         return false;
     }
diff --git a/block/qcow2.c b/block/qcow2.c
index 9727ae8..39b91ef 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5089,6 +5089,7 @@
     BDRVQcow2State *s = bs->opaque;
     bdi->cluster_size = s->cluster_size;
     bdi->vm_state_offset = qcow2_vm_state_offset(s);
+    bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
     return 0;
 }
 
diff --git a/default-configs/targets/sparc64-linux-user.mak b/default-configs/targets/sparc64-linux-user.mak
index 8469242..9d23ab4 100644
--- a/default-configs/targets/sparc64-linux-user.mak
+++ b/default-configs/targets/sparc64-linux-user.mak
@@ -1,5 +1,6 @@
 TARGET_ARCH=sparc64
 TARGET_BASE_ARCH=sparc
+TARGET_ABI_DIR=sparc
 TARGET_SYSTBL_ABI=common,64
 TARGET_SYSTBL=syscall.tbl
 TARGET_ALIGNED_ONLY=y
diff --git a/fpu/softfloat-parts-addsub.c.inc b/fpu/softfloat-parts-addsub.c.inc
new file mode 100644
index 0000000..ae5c101
--- /dev/null
+++ b/fpu/softfloat-parts-addsub.c.inc
@@ -0,0 +1,62 @@
+/*
+ * Floating point arithmetic implementation
+ *
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ *  the SoftFloat-2a license
+ *  the BSD license
+ *  GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014 will be
+ * taken to be licensed under the Softfloat-2a license unless specifically
+ * indicated otherwise.
+ */
+
+static void partsN(add_normal)(FloatPartsN *a, FloatPartsN *b)
+{
+    int exp_diff = a->exp - b->exp;
+
+    if (exp_diff > 0) {
+        frac_shrjam(b, exp_diff);
+    } else if (exp_diff < 0) {
+        frac_shrjam(a, -exp_diff);
+        a->exp = b->exp;
+    }
+
+    if (frac_add(a, a, b)) {
+        frac_shrjam(a, 1);
+        a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+        a->exp += 1;
+    }
+}
+
+static bool partsN(sub_normal)(FloatPartsN *a, FloatPartsN *b)
+{
+    int exp_diff = a->exp - b->exp;
+    int shift;
+
+    if (exp_diff > 0) {
+        frac_shrjam(b, exp_diff);
+        frac_sub(a, a, b);
+    } else if (exp_diff < 0) {
+        a->exp = b->exp;
+        a->sign ^= 1;
+        frac_shrjam(a, -exp_diff);
+        frac_sub(a, b, a);
+    } else if (frac_sub(a, a, b)) {
+        /* Overflow means that A was less than B. */
+        frac_neg(a);
+        a->sign ^= 1;
+    }
+
+    shift = frac_normalize(a);
+    if (likely(shift < N)) {
+        a->exp -= shift;
+	return true;
+    }
+    a->cls = float_class_zero;
+    return false;
+}
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
new file mode 100644
index 0000000..a897a5a
--- /dev/null
+++ b/fpu/softfloat-parts.c.inc
@@ -0,0 +1,817 @@
+/*
+ * QEMU float support
+ *
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ *  the SoftFloat-2a license
+ *  the BSD license
+ *  GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014 will be
+ * taken to be licensed under the Softfloat-2a license unless specifically
+ * indicated otherwise.
+ */
+
+static void partsN(return_nan)(FloatPartsN *a, float_status *s)
+{
+    switch (a->cls) {
+    case float_class_snan:
+        float_raise(float_flag_invalid, s);
+        if (s->default_nan_mode) {
+            parts_default_nan(a, s);
+        } else {
+            parts_silence_nan(a, s);
+        }
+        break;
+    case float_class_qnan:
+        if (s->default_nan_mode) {
+            parts_default_nan(a, s);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
+                                     float_status *s)
+{
+    if (is_snan(a->cls) || is_snan(b->cls)) {
+        float_raise(float_flag_invalid, s);
+    }
+
+    if (s->default_nan_mode) {
+        parts_default_nan(a, s);
+    } else {
+        int cmp = frac_cmp(a, b);
+        if (cmp == 0) {
+            cmp = a->sign < b->sign;
+        }
+
+        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
+            a = b;
+        }
+        if (is_snan(a->cls)) {
+            parts_silence_nan(a, s);
+        }
+    }
+    return a;
+}
+
+static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
+                                            FloatPartsN *c, float_status *s,
+                                            int ab_mask, int abc_mask)
+{
+    int which;
+
+    if (unlikely(abc_mask & float_cmask_snan)) {
+        float_raise(float_flag_invalid, s);
+    }
+
+    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
+                          ab_mask == float_cmask_infzero, s);
+
+    if (s->default_nan_mode || which == 3) {
+        /*
+         * Note that this check is after pickNaNMulAdd so that function
+         * has an opportunity to set the Invalid flag for infzero.
+         */
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    switch (which) {
+    case 0:
+        break;
+    case 1:
+        a = b;
+        break;
+    case 2:
+        a = c;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (is_snan(a->cls)) {
+        parts_silence_nan(a, s);
+    }
+    return a;
+}
+
+/*
+ * Canonicalize the FloatParts structure.  Determine the class,
+ * unbias the exponent, and normalize the fraction.
+ */
+static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
+                                 const FloatFmt *fmt)
+{
+    if (unlikely(p->exp == 0)) {
+        if (likely(frac_eqz(p))) {
+            p->cls = float_class_zero;
+        } else if (status->flush_inputs_to_zero) {
+            float_raise(float_flag_input_denormal, status);
+            p->cls = float_class_zero;
+            frac_clear(p);
+        } else {
+            int shift = frac_normalize(p);
+            p->cls = float_class_normal;
+            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+        }
+    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
+        p->cls = float_class_normal;
+        p->exp -= fmt->exp_bias;
+        frac_shl(p, fmt->frac_shift);
+        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+    } else if (likely(frac_eqz(p))) {
+        p->cls = float_class_inf;
+    } else {
+        frac_shl(p, fmt->frac_shift);
+        p->cls = (parts_is_snan_frac(p->frac_hi, status)
+                  ? float_class_snan : float_class_qnan);
+    }
+}
+
+/*
+ * Round and uncanonicalize a floating-point number by parts. There
+ * are FRAC_SHIFT bits that may require rounding at the bottom of the
+ * fraction; these bits will be removed. The exponent will be biased
+ * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+ */
+static void partsN(uncanon)(FloatPartsN *p, float_status *s,
+                            const FloatFmt *fmt)
+{
+    const int exp_max = fmt->exp_max;
+    const int frac_shift = fmt->frac_shift;
+    const uint64_t frac_lsb = fmt->frac_lsb;
+    const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
+    const uint64_t round_mask = fmt->round_mask;
+    const uint64_t roundeven_mask = fmt->roundeven_mask;
+    uint64_t inc;
+    bool overflow_norm;
+    int exp, flags = 0;
+
+    if (unlikely(p->cls != float_class_normal)) {
+        switch (p->cls) {
+        case float_class_zero:
+            p->exp = 0;
+            frac_clear(p);
+            return;
+        case float_class_inf:
+            g_assert(!fmt->arm_althp);
+            p->exp = fmt->exp_max;
+            frac_clear(p);
+            return;
+        case float_class_qnan:
+        case float_class_snan:
+            g_assert(!fmt->arm_althp);
+            p->exp = fmt->exp_max;
+            frac_shr(p, fmt->frac_shift);
+            return;
+        default:
+            break;
+        }
+        g_assert_not_reached();
+    }
+
+    switch (s->float_rounding_mode) {
+    case float_round_nearest_even:
+        overflow_norm = false;
+        inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+        break;
+    case float_round_ties_away:
+        overflow_norm = false;
+        inc = frac_lsbm1;
+        break;
+    case float_round_to_zero:
+        overflow_norm = true;
+        inc = 0;
+        break;
+    case float_round_up:
+        inc = p->sign ? 0 : round_mask;
+        overflow_norm = p->sign;
+        break;
+    case float_round_down:
+        inc = p->sign ? round_mask : 0;
+        overflow_norm = !p->sign;
+        break;
+    case float_round_to_odd:
+        overflow_norm = true;
+        inc = p->frac_lo & frac_lsb ? 0 : round_mask;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    exp = p->exp + fmt->exp_bias;
+    if (likely(exp > 0)) {
+        if (p->frac_lo & round_mask) {
+            flags |= float_flag_inexact;
+            if (frac_addi(p, p, inc)) {
+                frac_shr(p, 1);
+                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+                exp++;
+            }
+        }
+        frac_shr(p, frac_shift);
+
+        if (fmt->arm_althp) {
+            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
+            if (unlikely(exp > exp_max)) {
+                /* Overflow.  Return the maximum normal.  */
+                flags = float_flag_invalid;
+                exp = exp_max;
+                frac_allones(p);
+            }
+        } else if (unlikely(exp >= exp_max)) {
+            flags |= float_flag_overflow | float_flag_inexact;
+            if (overflow_norm) {
+                exp = exp_max - 1;
+                frac_allones(p);
+            } else {
+                p->cls = float_class_inf;
+                exp = exp_max;
+                frac_clear(p);
+            }
+        }
+    } else if (s->flush_to_zero) {
+        flags |= float_flag_output_denormal;
+        p->cls = float_class_zero;
+        exp = 0;
+        frac_clear(p);
+    } else {
+        bool is_tiny = s->tininess_before_rounding || exp < 0;
+
+        if (!is_tiny) {
+            FloatPartsN discard;
+            is_tiny = !frac_addi(&discard, p, inc);
+        }
+
+        frac_shrjam(p, 1 - exp);
+
+        if (p->frac_lo & round_mask) {
+            /* Need to recompute round-to-even/round-to-odd. */
+            switch (s->float_rounding_mode) {
+            case float_round_nearest_even:
+                inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
+                       ? frac_lsbm1 : 0);
+                break;
+            case float_round_to_odd:
+                inc = p->frac_lo & frac_lsb ? 0 : round_mask;
+                break;
+            default:
+                break;
+            }
+            flags |= float_flag_inexact;
+            frac_addi(p, p, inc);
+        }
+
+        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
+        frac_shr(p, frac_shift);
+
+        if (is_tiny && (flags & float_flag_inexact)) {
+            flags |= float_flag_underflow;
+        }
+        if (exp == 0 && frac_eqz(p)) {
+            p->cls = float_class_zero;
+        }
+    }
+    p->exp = exp;
+    float_raise(flags, s);
+}
+
+/*
+ * Returns the result of adding or subtracting the values of the
+ * floating-point values `a' and `b'. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ */
+static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
+                                   float_status *s, bool subtract)
+{
+    bool b_sign = b->sign ^ subtract;
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+
+    if (a->sign != b_sign) {
+        /* Subtraction */
+        if (likely(ab_mask == float_cmask_normal)) {
+            if (parts_sub_normal(a, b)) {
+                return a;
+            }
+            /* Subtract was exact, fall through to set sign. */
+            ab_mask = float_cmask_zero;
+        }
+
+        if (ab_mask == float_cmask_zero) {
+            a->sign = s->float_rounding_mode == float_round_down;
+            return a;
+        }
+
+        if (unlikely(ab_mask & float_cmask_anynan)) {
+            goto p_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            if (a->cls != float_class_inf) {
+                /* N - Inf */
+                goto return_b;
+            }
+            if (b->cls != float_class_inf) {
+                /* Inf - N */
+                return a;
+            }
+            /* Inf - Inf */
+            float_raise(float_flag_invalid, s);
+            parts_default_nan(a, s);
+            return a;
+        }
+    } else {
+        /* Addition */
+        if (likely(ab_mask == float_cmask_normal)) {
+            parts_add_normal(a, b);
+            return a;
+        }
+
+        if (ab_mask == float_cmask_zero) {
+            return a;
+        }
+
+        if (unlikely(ab_mask & float_cmask_anynan)) {
+            goto p_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            a->cls = float_class_inf;
+            return a;
+        }
+    }
+
+    if (b->cls == float_class_zero) {
+        g_assert(a->cls == float_class_normal);
+        return a;
+    }
+
+    g_assert(a->cls == float_class_zero);
+    g_assert(b->cls == float_class_normal);
+ return_b:
+    b->sign = b_sign;
+    return b;
+
+ p_nan:
+    return parts_pick_nan(a, b, s);
+}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b'. The operation is performed according to the IEC/IEEE Standard
+ * for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
+                                float_status *s)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    bool sign = a->sign ^ b->sign;
+
+    if (likely(ab_mask == float_cmask_normal)) {
+        FloatPartsW tmp;
+
+        frac_mulw(&tmp, a, b);
+        frac_truncjam(a, &tmp);
+
+        a->exp += b->exp + 1;
+        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+            frac_add(a, a, a);
+            a->exp -= 1;
+        }
+
+        a->sign = sign;
+        return a;
+    }
+
+    /* Inf * Zero == NaN */
+    if (unlikely(ab_mask == float_cmask_infzero)) {
+        float_raise(float_flag_invalid, s);
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        return parts_pick_nan(a, b, s);
+    }
+
+    /* Multiply by 0 or Inf */
+    if (ab_mask & float_cmask_inf) {
+        a->cls = float_class_inf;
+        a->sign = sign;
+        return a;
+    }
+
+    g_assert(ab_mask & float_cmask_zero);
+    a->cls = float_class_zero;
+    a->sign = sign;
+    return a;
+}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b' then adding 'c', with no intermediate rounding step after the
+ * multiplication. The operation is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
+ * The flags argument allows the caller to select negation of the
+ * addend, the intermediate product, or the final result. (The
+ * difference between this and having the caller do a separate
+ * negation is that negating externally will flip the sign bit on NaNs.)
+ *
+ * Requires A and C extracted into a double-sized structure to provide the
+ * extra space for the widening multiply.
+ */
+static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
+                                   FloatPartsN *c, int flags, float_status *s)
+{
+    int ab_mask, abc_mask;
+    FloatPartsW p_widen, c_widen;
+
+    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    abc_mask = float_cmask(c->cls) | ab_mask;
+
+    /*
+     * It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (unlikely(abc_mask & float_cmask_anynan)) {
+        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
+    }
+
+    if (flags & float_muladd_negate_c) {
+        c->sign ^= 1;
+    }
+
+    /* Compute the sign of the product into A. */
+    a->sign ^= b->sign;
+    if (flags & float_muladd_negate_product) {
+        a->sign ^= 1;
+    }
+
+    if (unlikely(ab_mask != float_cmask_normal)) {
+        if (unlikely(ab_mask == float_cmask_infzero)) {
+            goto d_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            if (c->cls == float_class_inf && a->sign != c->sign) {
+                goto d_nan;
+            }
+            goto return_inf;
+        }
+
+        g_assert(ab_mask & float_cmask_zero);
+        if (c->cls == float_class_normal) {
+            *a = *c;
+            goto return_normal;
+        }
+        if (c->cls == float_class_zero) {
+            if (a->sign != c->sign) {
+                goto return_sub_zero;
+            }
+            goto return_zero;
+        }
+        g_assert(c->cls == float_class_inf);
+    }
+
+    if (unlikely(c->cls == float_class_inf)) {
+        a->sign = c->sign;
+        goto return_inf;
+    }
+
+    /* Perform the multiplication step. */
+    p_widen.sign = a->sign;
+    p_widen.exp = a->exp + b->exp + 1;
+    frac_mulw(&p_widen, a, b);
+    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+        frac_add(&p_widen, &p_widen, &p_widen);
+        p_widen.exp -= 1;
+    }
+
+    /* Perform the addition step. */
+    if (c->cls != float_class_zero) {
+        /* Zero-extend C to less significant bits. */
+        frac_widen(&c_widen, c);
+        c_widen.exp = c->exp;
+
+        if (a->sign == c->sign) {
+            parts_add_normal(&p_widen, &c_widen);
+        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
+            goto return_sub_zero;
+        }
+    }
+
+    /* Narrow with sticky bit, for proper rounding later. */
+    frac_truncjam(a, &p_widen);
+    a->sign = p_widen.sign;
+    a->exp = p_widen.exp;
+
+ return_normal:
+    if (flags & float_muladd_halve_result) {
+        a->exp -= 1;
+    }
+ finish_sign:
+    if (flags & float_muladd_negate_result) {
+        a->sign ^= 1;
+    }
+    return a;
+
+ return_sub_zero:
+    a->sign = s->float_rounding_mode == float_round_down;
+ return_zero:
+    a->cls = float_class_zero;
+    goto finish_sign;
+
+ return_inf:
+    a->cls = float_class_inf;
+    goto finish_sign;
+
+ d_nan:
+    float_raise(float_flag_invalid, s);
+    parts_default_nan(a, s);
+    return a;
+}
+
+/*
+ * Returns the result of dividing the floating-point value `a' by the
+ * corresponding value `b'. The operation is performed according to
+ * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
+                                float_status *s)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    bool sign = a->sign ^ b->sign;
+
+    if (likely(ab_mask == float_cmask_normal)) {
+        a->sign = sign;
+        a->exp -= b->exp + frac_div(a, b);
+        return a;
+    }
+
+    /* 0/0 or Inf/Inf => NaN */
+    if (unlikely(ab_mask == float_cmask_zero) ||
+        unlikely(ab_mask == float_cmask_inf)) {
+        float_raise(float_flag_invalid, s);
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    /* All the NaN cases */
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        return parts_pick_nan(a, b, s);
+    }
+
+    a->sign = sign;
+
+    /* Inf / X */
+    if (a->cls == float_class_inf) {
+        return a;
+    }
+
+    /* 0 / X */
+    if (a->cls == float_class_zero) {
+        return a;
+    }
+
+    /* X / Inf */
+    if (b->cls == float_class_inf) {
+        a->cls = float_class_zero;
+        return a;
+    }
+
+    /* X / 0 => Inf */
+    g_assert(b->cls == float_class_zero);
+    float_raise(float_flag_divbyzero, s);
+    a->cls = float_class_inf;
+    return a;
+}
+
+/*
+ * Rounds the floating-point value `a' to an integer, and returns the
+ * result as a floating-point value. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ *
+ * parts_round_to_int_normal is an internal helper function for
+ * normal numbers only, returning true for inexact but not directly
+ * raising float_flag_inexact.
+ */
+static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
+                                        int scale, int frac_size)
+{
+    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
+    int shift_adj;
+
+    scale = MIN(MAX(scale, -0x10000), 0x10000);
+    a->exp += scale;
+
+    if (a->exp < 0) {
+        bool one;
+
+        /* All fractional */
+        switch (rmode) {
+        case float_round_nearest_even:
+            one = false;
+            if (a->exp == -1) {
+                FloatPartsN tmp;
+                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
+                frac_add(&tmp, a, a);
+                /* Anything remaining means frac > 0.5. */
+                one = !frac_eqz(&tmp);
+            }
+            break;
+        case float_round_ties_away:
+            one = a->exp == -1;
+            break;
+        case float_round_to_zero:
+            one = false;
+            break;
+        case float_round_up:
+            one = !a->sign;
+            break;
+        case float_round_down:
+            one = a->sign;
+            break;
+        case float_round_to_odd:
+            one = true;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        frac_clear(a);
+        a->exp = 0;
+        if (one) {
+            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
+        } else {
+            a->cls = float_class_zero;
+        }
+        return true;
+    }
+
+    if (a->exp >= frac_size) {
+        /* All integral */
+        return false;
+    }
+
+    if (N > 64 && a->exp < N - 64) {
+        /*
+         * Rounding is not in the low word -- shift lsb to bit 2,
+         * which leaves room for sticky and rounding bit.
+         */
+        shift_adj = (N - 1) - (a->exp + 2);
+        frac_shrjam(a, shift_adj);
+        frac_lsb = 1 << 2;
+    } else {
+        shift_adj = 0;
+        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
+    }
+
+    frac_lsbm1 = frac_lsb >> 1;
+    rnd_mask = frac_lsb - 1;
+    rnd_even_mask = rnd_mask | frac_lsb;
+
+    if (!(a->frac_lo & rnd_mask)) {
+        /* Fractional bits already clear, undo the shift above. */
+        frac_shl(a, shift_adj);
+        return false;
+    }
+
+    switch (rmode) {
+    case float_round_nearest_even:
+        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+        break;
+    case float_round_ties_away:
+        inc = frac_lsbm1;
+        break;
+    case float_round_to_zero:
+        inc = 0;
+        break;
+    case float_round_up:
+        inc = a->sign ? 0 : rnd_mask;
+        break;
+    case float_round_down:
+        inc = a->sign ? rnd_mask : 0;
+        break;
+    case float_round_to_odd:
+        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (shift_adj == 0) {
+        if (frac_addi(a, a, inc)) {
+            frac_shr(a, 1);
+            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+            a->exp++;
+        }
+        a->frac_lo &= ~rnd_mask;
+    } else {
+        frac_addi(a, a, inc);
+        a->frac_lo &= ~rnd_mask;
+        /* Be careful shifting back, not to overflow */
+        frac_shl(a, shift_adj - 1);
+        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
+            a->exp++;
+        } else {
+            frac_add(a, a, a);
+        }
+    }
+    return true;
+}
+
+static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
+                                 int scale, float_status *s,
+                                 const FloatFmt *fmt)
+{
+    switch (a->cls) {
+    case float_class_qnan:
+    case float_class_snan:
+        parts_return_nan(a, s);
+        break;
+    case float_class_zero:
+    case float_class_inf:
+        break;
+    case float_class_normal:
+        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
+            float_raise(float_flag_inexact, s);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * Returns the result of converting the floating-point value `a' to
+ * the two's complement integer format. The conversion is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic---which means in particular that the conversion is
+ * rounded according to the current rounding mode. If `a' is a NaN,
+ * the largest positive integer is returned. Otherwise, if the
+ * conversion overflows, the largest integer with the same sign as `a'
+ * is returned.
+*/
+static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
+                                     int scale, int64_t min, int64_t max,
+                                     float_status *s)
+{
+    int flags = 0;
+    uint64_t r;
+
+    switch (p->cls) {
+    case float_class_snan:
+    case float_class_qnan:
+        flags = float_flag_invalid;
+        r = max;
+        break;
+
+    case float_class_inf:
+        flags = float_flag_invalid;
+        r = p->sign ? min : max;
+        break;
+
+    case float_class_zero:
+        return 0;
+
+    case float_class_normal:
+        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
+        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
+            flags = float_flag_inexact;
+        }
+
+        if (p->exp <= DECOMPOSED_BINARY_POINT) {
+            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
+        } else {
+            r = UINT64_MAX;
+        }
+        if (p->sign) {
+            if (r <= -(uint64_t)min) {
+                r = -r;
+            } else {
+                flags = float_flag_invalid;
+                r = min;
+            }
+        } else if (r > max) {
+            flags = float_flag_invalid;
+            r = max;
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    float_raise(flags, s);
+    return r;
+}
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index e19809c..c895733 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -129,7 +129,7 @@
 | The pattern for a default generated deconstructed floating-point NaN.
 *----------------------------------------------------------------------------*/
 
-static FloatParts parts_default_nan(float_status *status)
+static void parts64_default_nan(FloatParts64 *p, float_status *status)
 {
     bool sign = 0;
     uint64_t frac;
@@ -163,7 +163,7 @@
     }
 #endif
 
-    return (FloatParts) {
+    *p = (FloatParts64) {
         .cls = float_class_qnan,
         .sign = sign,
         .exp = INT_MAX,
@@ -171,26 +171,55 @@
     };
 }
 
+static void parts128_default_nan(FloatParts128 *p, float_status *status)
+{
+    /*
+     * Extrapolate from the choices made by parts64_default_nan to fill
+     * in the quad-floating format.  If the low bit is set, assume we
+     * want to set all non-snan bits.
+     */
+    FloatParts64 p64;
+    parts64_default_nan(&p64, status);
+
+    *p = (FloatParts128) {
+        .cls = float_class_qnan,
+        .sign = p64.sign,
+        .exp = INT_MAX,
+        .frac_hi = p64.frac,
+        .frac_lo = -(p64.frac & 1)
+    };
+}
+
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN from a signalling NaN for the deconstructed
 | floating-point parts.
 *----------------------------------------------------------------------------*/
 
-static FloatParts parts_silence_nan(FloatParts a, float_status *status)
+static uint64_t parts_silence_nan_frac(uint64_t frac, float_status *status)
 {
     g_assert(!no_signaling_nans(status));
-#if defined(TARGET_HPPA)
-    a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
-    a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
-#else
+    g_assert(!status->default_nan_mode);
+
+    /* The only snan_bit_is_one target without default_nan_mode is HPPA. */
     if (snan_bit_is_one(status)) {
-        return parts_default_nan(status);
+        frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
+        frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
     } else {
-        a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
+        frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
     }
-#endif
-    a.cls = float_class_qnan;
-    return a;
+    return frac;
+}
+
+static void parts64_silence_nan(FloatParts64 *p, float_status *status)
+{
+    p->frac = parts_silence_nan_frac(p->frac, status);
+    p->cls = float_class_qnan;
+}
+
+static void parts128_silence_nan(FloatParts128 *p, float_status *status)
+{
+    p->frac_hi = parts_silence_nan_frac(p->frac_hi, status);
+    p->cls = float_class_qnan;
 }
 
 /*----------------------------------------------------------------------------
@@ -228,18 +257,6 @@
     = make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
 
 /*----------------------------------------------------------------------------
-| Raises the exceptions specified by `flags'.  Floating-point traps can be
-| defined here if desired.  It is currently not possible for such a trap
-| to substitute a result value.  If traps are not implemented, this routine
-| should be simply `float_exception_flags |= flags;'.
-*----------------------------------------------------------------------------*/
-
-void float_raise(uint8_t flags, float_status *status)
-{
-    status->float_exception_flags |= flags;
-}
-
-/*----------------------------------------------------------------------------
 | Internal canonical NaN format.
 *----------------------------------------------------------------------------*/
 typedef struct {
@@ -1071,25 +1088,6 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns a quiet NaN from a signalling NaN for the quadruple-precision
-| floating point value `a'.
-*----------------------------------------------------------------------------*/
-
-float128 float128_silence_nan(float128 a, float_status *status)
-{
-    if (no_signaling_nans(status)) {
-        g_assert_not_reached();
-    } else {
-        if (snan_bit_is_one(status)) {
-            return float128_default_nan(status);
-        } else {
-            a.high |= UINT64_C(0x0000800000000000);
-            return a;
-        }
-    }
-}
-
-/*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 67cfa0f..0dc2203 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -132,7 +132,7 @@
         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
                                      soft_t ## _is_neg(*a));            \
-            s->float_exception_flags |= float_flag_input_denormal;      \
+            float_raise(float_flag_input_denormal, s);                  \
         }                                                               \
     }
 
@@ -360,7 +360,7 @@
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f32_is_inf(ur))) {
-        s->float_exception_flags |= float_flag_overflow;
+        float_raise(float_flag_overflow, s);
     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
         goto soft;
     }
@@ -391,7 +391,7 @@
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f64_is_inf(ur))) {
-        s->float_exception_flags |= float_flag_overflow;
+        float_raise(float_flag_overflow, s);
     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
         goto soft;
     }
@@ -469,6 +469,20 @@
     float_class_snan,
 } FloatClass;
 
+#define float_cmask(bit)  (1u << (bit))
+
+enum {
+    float_cmask_zero    = float_cmask(float_class_zero),
+    float_cmask_normal  = float_cmask(float_class_normal),
+    float_cmask_inf     = float_cmask(float_class_inf),
+    float_cmask_qnan    = float_cmask(float_class_qnan),
+    float_cmask_snan    = float_cmask(float_class_snan),
+
+    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
+    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
+};
+
+
 /* Simple helpers for checking if, or what kind of, NaN we have */
 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 {
@@ -486,26 +500,52 @@
 }
 
 /*
- * Structure holding all of the decomposed parts of a float. The
- * exponent is unbiased and the fraction is normalized. All
- * calculations are done with a 64 bit fraction and then rounded as
- * appropriate for the final format.
+ * Structure holding all of the decomposed parts of a float.
+ * The exponent is unbiased and the fraction is normalized.
  *
- * Thanks to the packed FloatClass a decent compiler should be able to
- * fit the whole structure into registers and avoid using the stack
- * for parameter passing.
+ * The fraction words are stored in big-endian word ordering,
+ * so that truncation from a larger format to a smaller format
+ * can be done simply by ignoring subsequent elements.
  */
 
 typedef struct {
-    uint64_t frac;
-    int32_t  exp;
     FloatClass cls;
     bool sign;
-} FloatParts;
+    int32_t exp;
+    union {
+        /* Routines that know the structure may reference the singular name. */
+        uint64_t frac;
+        /*
+         * Routines expanded with multiple structures reference "hi" and "lo"
+         * depending on the operation.  In FloatParts64, "hi" and "lo" are
+         * both the same word and aliased here.
+         */
+        uint64_t frac_hi;
+        uint64_t frac_lo;
+    };
+} FloatParts64;
 
-#define DECOMPOSED_BINARY_POINT    (64 - 2)
+typedef struct {
+    FloatClass cls;
+    bool sign;
+    int32_t exp;
+    uint64_t frac_hi;
+    uint64_t frac_lo;
+} FloatParts128;
+
+typedef struct {
+    FloatClass cls;
+    bool sign;
+    int32_t exp;
+    uint64_t frac_hi;
+    uint64_t frac_hm;  /* high-middle */
+    uint64_t frac_lm;  /* low-middle */
+    uint64_t frac_lo;
+} FloatParts256;
+
+/* These apply to the most significant word of each FloatPartsN. */
+#define DECOMPOSED_BINARY_POINT    63
 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
-#define DECOMPOSED_OVERFLOW_BIT    (DECOMPOSED_IMPLICIT_BIT << 1)
 
 /* Structure holding all of the relevant parameters for a format.
  *   exp_size: the size of the exponent field
@@ -539,11 +579,11 @@
     .exp_bias       = ((1 << E) - 1) >> 1,                           \
     .exp_max        = (1 << E) - 1,                                  \
     .frac_size      = F,                                             \
-    .frac_shift     = DECOMPOSED_BINARY_POINT - F,                   \
-    .frac_lsb       = 1ull << (DECOMPOSED_BINARY_POINT - F),         \
-    .frac_lsbm1     = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1),   \
-    .round_mask     = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1,   \
-    .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
+    .frac_shift     = (-F - 1) & 63,                                 \
+    .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
+    .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
+    .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
+    .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 
 static const FloatFmt float16_params = {
     FLOAT_PARAMS(5, 10)
@@ -566,65 +606,101 @@
     FLOAT_PARAMS(11, 52)
 };
 
-/* Unpack a float to parts, but do not canonicalize.  */
-static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
-{
-    const int sign_pos = fmt.frac_size + fmt.exp_size;
+static const FloatFmt float128_params = {
+    FLOAT_PARAMS(15, 112)
+};
 
-    return (FloatParts) {
+/* Unpack a float to parts, but do not canonicalize.  */
+static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
+{
+    const int f_size = fmt->frac_size;
+    const int e_size = fmt->exp_size;
+
+    *r = (FloatParts64) {
         .cls = float_class_unclassified,
-        .sign = extract64(raw, sign_pos, 1),
-        .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
-        .frac = extract64(raw, 0, fmt.frac_size),
+        .sign = extract64(raw, f_size + e_size, 1),
+        .exp = extract64(raw, f_size, e_size),
+        .frac = extract64(raw, 0, f_size)
     };
 }
 
-static inline FloatParts float16_unpack_raw(float16 f)
+static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 {
-    return unpack_raw(float16_params, f);
+    unpack_raw64(p, &float16_params, f);
 }
 
-static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
+static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 {
-    return unpack_raw(bfloat16_params, f);
+    unpack_raw64(p, &bfloat16_params, f);
 }
 
-static inline FloatParts float32_unpack_raw(float32 f)
+static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 {
-    return unpack_raw(float32_params, f);
+    unpack_raw64(p, &float32_params, f);
 }
 
-static inline FloatParts float64_unpack_raw(float64 f)
+static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 {
-    return unpack_raw(float64_params, f);
+    unpack_raw64(p, &float64_params, f);
+}
+
+static void float128_unpack_raw(FloatParts128 *p, float128 f)
+{
+    const int f_size = float128_params.frac_size - 64;
+    const int e_size = float128_params.exp_size;
+
+    *p = (FloatParts128) {
+        .cls = float_class_unclassified,
+        .sign = extract64(f.high, f_size + e_size, 1),
+        .exp = extract64(f.high, f_size, e_size),
+        .frac_hi = extract64(f.high, 0, f_size),
+        .frac_lo = f.low,
+    };
 }
 
 /* Pack a float from parts, but do not canonicalize.  */
-static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
+static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 {
-    const int sign_pos = fmt.frac_size + fmt.exp_size;
-    uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
-    return deposit64(ret, sign_pos, 1, p.sign);
+    const int f_size = fmt->frac_size;
+    const int e_size = fmt->exp_size;
+    uint64_t ret;
+
+    ret = (uint64_t)p->sign << (f_size + e_size);
+    ret = deposit64(ret, f_size, e_size, p->exp);
+    ret = deposit64(ret, 0, f_size, p->frac);
+    return ret;
 }
 
-static inline float16 float16_pack_raw(FloatParts p)
+static inline float16 float16_pack_raw(const FloatParts64 *p)
 {
-    return make_float16(pack_raw(float16_params, p));
+    return make_float16(pack_raw64(p, &float16_params));
 }
 
-static inline bfloat16 bfloat16_pack_raw(FloatParts p)
+static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 {
-    return pack_raw(bfloat16_params, p);
+    return pack_raw64(p, &bfloat16_params);
 }
 
-static inline float32 float32_pack_raw(FloatParts p)
+static inline float32 float32_pack_raw(const FloatParts64 *p)
 {
-    return make_float32(pack_raw(float32_params, p));
+    return make_float32(pack_raw64(p, &float32_params));
 }
 
-static inline float64 float64_pack_raw(FloatParts p)
+static inline float64 float64_pack_raw(const FloatParts64 *p)
 {
-    return make_float64(pack_raw(float64_params, p));
+    return make_float64(pack_raw64(p, &float64_params));
+}
+
+static float128 float128_pack_raw(const FloatParts128 *p)
+{
+    const int f_size = float128_params.frac_size - 64;
+    const int e_size = float128_params.exp_size;
+    uint64_t hi;
+
+    hi = (uint64_t)p->sign << (f_size + e_size);
+    hi = deposit64(hi, f_size, e_size, p->exp);
+    hi = deposit64(hi, 0, f_size, p->frac_hi);
+    return make_float128(hi, p->frac_lo);
 }
 
 /*----------------------------------------------------------------------------
@@ -637,474 +713,807 @@
 *----------------------------------------------------------------------------*/
 #include "softfloat-specialize.c.inc"
 
-/* Canonicalize EXP and FRAC, setting CLS.  */
-static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
-                                  float_status *status)
-{
-    if (part.exp == parm->exp_max && !parm->arm_althp) {
-        if (part.frac == 0) {
-            part.cls = float_class_inf;
-        } else {
-            part.frac <<= parm->frac_shift;
-            part.cls = (parts_is_snan_frac(part.frac, status)
-                        ? float_class_snan : float_class_qnan);
-        }
-    } else if (part.exp == 0) {
-        if (likely(part.frac == 0)) {
-            part.cls = float_class_zero;
-        } else if (status->flush_inputs_to_zero) {
-            float_raise(float_flag_input_denormal, status);
-            part.cls = float_class_zero;
-            part.frac = 0;
-        } else {
-            int shift = clz64(part.frac) - 1;
-            part.cls = float_class_normal;
-            part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
-            part.frac <<= shift;
-        }
-    } else {
-        part.cls = float_class_normal;
-        part.exp -= parm->exp_bias;
-        part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
-    }
-    return part;
-}
+#define PARTS_GENERIC_64_128(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 
-/* Round and uncanonicalize a floating-point number by parts. There
- * are FRAC_SHIFT bits that may require rounding at the bottom of the
- * fraction; these bits will be removed. The exponent will be biased
- * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+#define PARTS_GENERIC_64_128_256(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
+                 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
+
+#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
+#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
+
+static void parts64_return_nan(FloatParts64 *a, float_status *s);
+static void parts128_return_nan(FloatParts128 *a, float_status *s);
+
+#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
+
+static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
+                                      float_status *s);
+static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
+                                        float_status *s);
+
+#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
+
+static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
+                                             FloatParts64 *c, float_status *s,
+                                             int ab_mask, int abc_mask);
+static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
+                                               FloatParts128 *b,
+                                               FloatParts128 *c,
+                                               float_status *s,
+                                               int ab_mask, int abc_mask);
+
+#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
+    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
+
+static void parts64_canonicalize(FloatParts64 *p, float_status *status,
+                                 const FloatFmt *fmt);
+static void parts128_canonicalize(FloatParts128 *p, float_status *status,
+                                  const FloatFmt *fmt);
+
+#define parts_canonicalize(A, S, F) \
+    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
+
+static void parts64_uncanon(FloatParts64 *p, float_status *status,
+                            const FloatFmt *fmt);
+static void parts128_uncanon(FloatParts128 *p, float_status *status,
+                             const FloatFmt *fmt);
+
+#define parts_uncanon(A, S, F) \
+    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
+
+static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
+static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
+static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
+
+#define parts_add_normal(A, B) \
+    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
+
+static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
+static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
+static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
+
+#define parts_sub_normal(A, B) \
+    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
+
+static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
+                                    float_status *s, bool subtract);
+static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
+                                      float_status *s, bool subtract);
+
+#define parts_addsub(A, B, S, Z) \
+    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
+
+static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
+                                 float_status *s);
+static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
+                                   float_status *s);
+
+#define parts_mul(A, B, S) \
+    PARTS_GENERIC_64_128(mul, A)(A, B, S)
+
+static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
+                                    FloatParts64 *c, int flags,
+                                    float_status *s);
+static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
+                                      FloatParts128 *c, int flags,
+                                      float_status *s);
+
+#define parts_muladd(A, B, C, Z, S) \
+    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+
+static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
+                                 float_status *s);
+static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
+                                   float_status *s);
+
+#define parts_div(A, B, S) \
+    PARTS_GENERIC_64_128(div, A)(A, B, S)
+
+static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
+                                        int scale, int frac_size);
+static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
+                                         int scale, int frac_size);
+
+#define parts_round_to_int_normal(A, R, C, F) \
+    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
+
+static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
+                                 int scale, float_status *s,
+                                 const FloatFmt *fmt);
+static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
+                                  int scale, float_status *s,
+                                  const FloatFmt *fmt);
+
+#define parts_round_to_int(A, R, C, S, F) \
+    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
+
+static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
+                                     int scale, int64_t min, int64_t max,
+                                     float_status *s);
+static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
+                                     int scale, int64_t min, int64_t max,
+                                     float_status *s);
+
+#define parts_float_to_sint(P, R, Z, MN, MX, S) \
+    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
+
+/*
+ * Helper functions for softfloat-parts.c.inc, per-size operations.
  */
 
-static FloatParts round_canonical(FloatParts p, float_status *s,
-                                  const FloatFmt *parm)
+#define FRAC_GENERIC_64_128(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+
+#define FRAC_GENERIC_64_128_256(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
+                 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+
+static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 {
-    const uint64_t frac_lsb = parm->frac_lsb;
-    const uint64_t frac_lsbm1 = parm->frac_lsbm1;
-    const uint64_t round_mask = parm->round_mask;
-    const uint64_t roundeven_mask = parm->roundeven_mask;
-    const int exp_max = parm->exp_max;
-    const int frac_shift = parm->frac_shift;
-    uint64_t frac, inc;
-    int exp, flags = 0;
-    bool overflow_norm;
+    return uadd64_overflow(a->frac, b->frac, &r->frac);
+}
 
-    frac = p.frac;
-    exp = p.exp;
+static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
+{
+    bool c = 0;
+    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+    return c;
+}
 
-    switch (p.cls) {
-    case float_class_normal:
-        switch (s->float_rounding_mode) {
-        case float_round_nearest_even:
-            overflow_norm = false;
-            inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
-            break;
-        case float_round_ties_away:
-            overflow_norm = false;
-            inc = frac_lsbm1;
-            break;
-        case float_round_to_zero:
-            overflow_norm = true;
-            inc = 0;
-            break;
-        case float_round_up:
-            inc = p.sign ? 0 : round_mask;
-            overflow_norm = p.sign;
-            break;
-        case float_round_down:
-            inc = p.sign ? round_mask : 0;
-            overflow_norm = !p.sign;
-            break;
-        case float_round_to_odd:
-            overflow_norm = true;
-            inc = frac & frac_lsb ? 0 : round_mask;
-            break;
-        default:
-            g_assert_not_reached();
+static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
+{
+    bool c = 0;
+    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
+    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
+    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+    return c;
+}
+
+#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
+
+static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
+{
+    return uadd64_overflow(a->frac, c, &r->frac);
+}
+
+static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
+{
+    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
+    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
+}
+
+#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
+
+static void frac64_allones(FloatParts64 *a)
+{
+    a->frac = -1;
+}
+
+static void frac128_allones(FloatParts128 *a)
+{
+    a->frac_hi = a->frac_lo = -1;
+}
+
+#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
+
+static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
+{
+    return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
+}
+
+static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
+{
+    uint64_t ta = a->frac_hi, tb = b->frac_hi;
+    if (ta == tb) {
+        ta = a->frac_lo, tb = b->frac_lo;
+        if (ta == tb) {
+            return 0;
         }
+    }
+    return ta < tb ? -1 : 1;
+}
 
-        exp += parm->exp_bias;
-        if (likely(exp > 0)) {
-            if (frac & round_mask) {
-                flags |= float_flag_inexact;
-                frac += inc;
-                if (frac & DECOMPOSED_OVERFLOW_BIT) {
-                    frac >>= 1;
-                    exp++;
-                }
-            }
-            frac >>= frac_shift;
+#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 
-            if (parm->arm_althp) {
-                /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
-                if (unlikely(exp > exp_max)) {
-                    /* Overflow.  Return the maximum normal.  */
-                    flags = float_flag_invalid;
-                    exp = exp_max;
-                    frac = -1;
-                }
-            } else if (unlikely(exp >= exp_max)) {
-                flags |= float_flag_overflow | float_flag_inexact;
-                if (overflow_norm) {
-                    exp = exp_max - 1;
-                    frac = -1;
-                } else {
-                    p.cls = float_class_inf;
-                    goto do_inf;
-                }
-            }
-        } else if (s->flush_to_zero) {
-            flags |= float_flag_output_denormal;
-            p.cls = float_class_zero;
-            goto do_zero;
-        } else {
-            bool is_tiny = s->tininess_before_rounding
-                        || (exp < 0)
-                        || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
+static void frac64_clear(FloatParts64 *a)
+{
+    a->frac = 0;
+}
 
-            shift64RightJamming(frac, 1 - exp, &frac);
-            if (frac & round_mask) {
-                /* Need to recompute round-to-even.  */
-                switch (s->float_rounding_mode) {
-                case float_round_nearest_even:
-                    inc = ((frac & roundeven_mask) != frac_lsbm1
-                           ? frac_lsbm1 : 0);
-                    break;
-                case float_round_to_odd:
-                    inc = frac & frac_lsb ? 0 : round_mask;
-                    break;
-                default:
-                    break;
-                }
-                flags |= float_flag_inexact;
-                frac += inc;
-            }
+static void frac128_clear(FloatParts128 *a)
+{
+    a->frac_hi = a->frac_lo = 0;
+}
 
-            exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
-            frac >>= frac_shift;
+#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 
-            if (is_tiny && (flags & float_flag_inexact)) {
-                flags |= float_flag_underflow;
-            }
-            if (exp == 0 && frac == 0) {
-                p.cls = float_class_zero;
-            }
-        }
-        break;
+static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
+{
+    uint64_t n1, n0, r, q;
+    bool ret;
 
-    case float_class_zero:
-    do_zero:
-        exp = 0;
-        frac = 0;
-        break;
+    /*
+     * We want a 2*N / N-bit division to produce exactly an N-bit
+     * result, so that we do not lose any precision and so that we
+     * do not have to renormalize afterward.  If A.frac < B.frac,
+     * then division would produce an (N-1)-bit result; shift A left
+     * by one to produce the an N-bit result, and return true to
+     * decrement the exponent to match.
+     *
+     * The udiv_qrnnd algorithm that we're using requires normalization,
+     * i.e. the msb of the denominator must be set, which is already true.
+     */
+    ret = a->frac < b->frac;
+    if (ret) {
+        n0 = a->frac;
+        n1 = 0;
+    } else {
+        n0 = a->frac >> 1;
+        n1 = a->frac << 63;
+    }
+    q = udiv_qrnnd(&r, n0, n1, b->frac);
 
-    case float_class_inf:
-    do_inf:
-        assert(!parm->arm_althp);
-        exp = exp_max;
-        frac = 0;
-        break;
+    /* Set lsb if there is a remainder, to set inexact. */
+    a->frac = q | (r != 0);
 
-    case float_class_qnan:
-    case float_class_snan:
-        assert(!parm->arm_althp);
-        exp = exp_max;
-        frac >>= parm->frac_shift;
-        break;
+    return ret;
+}
 
-    default:
-        g_assert_not_reached();
+static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
+{
+    uint64_t q0, q1, a0, a1, b0, b1;
+    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
+    bool ret = false;
+
+    a0 = a->frac_hi, a1 = a->frac_lo;
+    b0 = b->frac_hi, b1 = b->frac_lo;
+
+    ret = lt128(a0, a1, b0, b1);
+    if (!ret) {
+        a1 = shr_double(a0, a1, 1);
+        a0 = a0 >> 1;
     }
 
-    float_raise(flags, s);
-    p.exp = exp;
-    p.frac = frac;
-    return p;
+    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
+    q0 = estimateDiv128To64(a0, a1, b0);
+
+    /*
+     * Estimate is high because B1 was not included (unless B1 == 0).
+     * Reduce quotient and increase remainder until remainder is non-negative.
+     * This loop will execute 0 to 2 times.
+     */
+    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
+    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
+    while (r0 != 0) {
+        q0--;
+        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
+    }
+
+    /* Repeat using the remainder, producing a second word of quotient. */
+    q1 = estimateDiv128To64(r1, r2, b0);
+    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
+    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
+    while (r1 != 0) {
+        q1--;
+        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
+    }
+
+    /* Any remainder indicates inexact; set sticky bit. */
+    q1 |= (r2 | r3) != 0;
+
+    a->frac_hi = q0;
+    a->frac_lo = q1;
+    return ret;
 }
 
-/* Explicit FloatFmt version */
-static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
-                                            const FloatFmt *params)
+#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
+
+static bool frac64_eqz(FloatParts64 *a)
 {
-    return sf_canonicalize(float16_unpack_raw(f), params, s);
+    return a->frac == 0;
 }
 
-static FloatParts float16_unpack_canonical(float16 f, float_status *s)
+static bool frac128_eqz(FloatParts128 *a)
 {
-    return float16a_unpack_canonical(f, s, &float16_params);
+    return (a->frac_hi | a->frac_lo) == 0;
 }
 
-static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
+#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
+
+static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
 {
-    return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
+    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
 }
 
-static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
+static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
+{
+    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
+                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
+}
+
+#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
+
+static void frac64_neg(FloatParts64 *a)
+{
+    a->frac = -a->frac;
+}
+
+static void frac128_neg(FloatParts128 *a)
+{
+    bool c = 0;
+    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
+}
+
+static void frac256_neg(FloatParts256 *a)
+{
+    bool c = 0;
+    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
+    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
+    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
+}
+
+#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
+
+static int frac64_normalize(FloatParts64 *a)
+{
+    if (a->frac) {
+        int shift = clz64(a->frac);
+        a->frac <<= shift;
+        return shift;
+    }
+    return 64;
+}
+
+static int frac128_normalize(FloatParts128 *a)
+{
+    if (a->frac_hi) {
+        int shl = clz64(a->frac_hi);
+        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
+        a->frac_lo <<= shl;
+        return shl;
+    } else if (a->frac_lo) {
+        int shl = clz64(a->frac_lo);
+        a->frac_hi = a->frac_lo << shl;
+        a->frac_lo = 0;
+        return shl + 64;
+    }
+    return 128;
+}
+
+static int frac256_normalize(FloatParts256 *a)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+    int ret, shl;
+
+    if (likely(a0)) {
+        shl = clz64(a0);
+        if (shl == 0) {
+            return 0;
+        }
+        ret = shl;
+    } else {
+        if (a1) {
+            ret = 64;
+            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
+        } else if (a2) {
+            ret = 128;
+            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
+        } else if (a3) {
+            ret = 192;
+            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
+        } else {
+            ret = 256;
+            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+            goto done;
+        }
+        shl = clz64(a0);
+        if (shl == 0) {
+            goto done;
+        }
+        ret += shl;
+    }
+
+    a0 = shl_double(a0, a1, shl);
+    a1 = shl_double(a1, a2, shl);
+    a2 = shl_double(a2, a3, shl);
+    a3 <<= shl;
+
+ done:
+    a->frac_hi = a0;
+    a->frac_hm = a1;
+    a->frac_lm = a2;
+    a->frac_lo = a3;
+    return ret;
+}
+
+#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
+
+static void frac64_shl(FloatParts64 *a, int c)
+{
+    a->frac <<= c;
+}
+
+static void frac128_shl(FloatParts128 *a, int c)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+    if (c & 64) {
+        a0 = a1, a1 = 0;
+    }
+
+    c &= 63;
+    if (c) {
+        a0 = shl_double(a0, a1, c);
+        a1 = a1 << c;
+    }
+
+    a->frac_hi = a0;
+    a->frac_lo = a1;
+}
+
+#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
+
+static void frac64_shr(FloatParts64 *a, int c)
+{
+    a->frac >>= c;
+}
+
+static void frac128_shr(FloatParts128 *a, int c)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+    if (c & 64) {
+        a1 = a0, a0 = 0;
+    }
+
+    c &= 63;
+    if (c) {
+        a1 = shr_double(a0, a1, c);
+        a0 = a0 >> c;
+    }
+
+    a->frac_hi = a0;
+    a->frac_lo = a1;
+}
+
+#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
+
+static void frac64_shrjam(FloatParts64 *a, int c)
+{
+    uint64_t a0 = a->frac;
+
+    if (likely(c != 0)) {
+        if (likely(c < 64)) {
+            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
+        } else {
+            a0 = a0 != 0;
+        }
+        a->frac = a0;
+    }
+}
+
+static void frac128_shrjam(FloatParts128 *a, int c)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+    uint64_t sticky = 0;
+
+    if (unlikely(c == 0)) {
+        return;
+    } else if (likely(c < 64)) {
+        /* nothing */
+    } else if (likely(c < 128)) {
+        sticky = a1;
+        a1 = a0;
+        a0 = 0;
+        c &= 63;
+        if (c == 0) {
+            goto done;
+        }
+    } else {
+        sticky = a0 | a1;
+        a0 = a1 = 0;
+        goto done;
+    }
+
+    sticky |= shr_double(a1, 0, c);
+    a1 = shr_double(a0, a1, c);
+    a0 = a0 >> c;
+
+ done:
+    a->frac_lo = a1 | (sticky != 0);
+    a->frac_hi = a0;
+}
+
+static void frac256_shrjam(FloatParts256 *a, int c)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+    uint64_t sticky = 0;
+
+    if (unlikely(c == 0)) {
+        return;
+    } else if (likely(c < 64)) {
+        /* nothing */
+    } else if (likely(c < 256)) {
+        if (unlikely(c & 128)) {
+            sticky |= a2 | a3;
+            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
+        }
+        if (unlikely(c & 64)) {
+            sticky |= a3;
+            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
+        }
+        c &= 63;
+        if (c == 0) {
+            goto done;
+        }
+    } else {
+        sticky = a0 | a1 | a2 | a3;
+        a0 = a1 = a2 = a3 = 0;
+        goto done;
+    }
+
+    sticky |= shr_double(a3, 0, c);
+    a3 = shr_double(a2, a3, c);
+    a2 = shr_double(a1, a2, c);
+    a1 = shr_double(a0, a1, c);
+    a0 = a0 >> c;
+
+ done:
+    a->frac_lo = a3 | (sticky != 0);
+    a->frac_lm = a2;
+    a->frac_hm = a1;
+    a->frac_hi = a0;
+}
+
+#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
+
+static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
+{
+    return usub64_overflow(a->frac, b->frac, &r->frac);
+}
+
+static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
+{
+    bool c = 0;
+    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+    return c;
+}
+
+static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
+{
+    bool c = 0;
+    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
+    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
+    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+    return c;
+}
+
+#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
+
+static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
+{
+    r->frac = a->frac_hi | (a->frac_lo != 0);
+}
+
+static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
+{
+    r->frac_hi = a->frac_hi;
+    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
+}
+
+#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
+
+static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
+{
+    r->frac_hi = a->frac;
+    r->frac_lo = 0;
+}
+
+static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
+{
+    r->frac_hi = a->frac_hi;
+    r->frac_hm = a->frac_lo;
+    r->frac_lm = 0;
+    r->frac_lo = 0;
+}
+
+#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
+
+#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
+#define FloatPartsN    glue(FloatParts,N)
+#define FloatPartsW    glue(FloatParts,W)
+
+#define N 64
+#define W 128
+
+#include "softfloat-parts-addsub.c.inc"
+#include "softfloat-parts.c.inc"
+
+#undef  N
+#undef  W
+#define N 128
+#define W 256
+
+#include "softfloat-parts-addsub.c.inc"
+#include "softfloat-parts.c.inc"
+
+#undef  N
+#undef  W
+#define N            256
+
+#include "softfloat-parts-addsub.c.inc"
+
+#undef  N
+#undef  W
+#undef  partsN
+#undef  FloatPartsN
+#undef  FloatPartsW
+
+/*
+ * Pack/unpack routines with a specific FloatFmt.
+ */
+
+static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
+                                      float_status *s, const FloatFmt *params)
+{
+    float16_unpack_raw(p, f);
+    parts_canonicalize(p, s, params);
+}
+
+static void float16_unpack_canonical(FloatParts64 *p, float16 f,
+                                     float_status *s)
+{
+    float16a_unpack_canonical(p, f, s, &float16_params);
+}
+
+static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
+                                      float_status *s)
+{
+    bfloat16_unpack_raw(p, f);
+    parts_canonicalize(p, s, &bfloat16_params);
+}
+
+static float16 float16a_round_pack_canonical(FloatParts64 *p,
+                                             float_status *s,
                                              const FloatFmt *params)
 {
-    return float16_pack_raw(round_canonical(p, s, params));
+    parts_uncanon(p, s, params);
+    return float16_pack_raw(p);
 }
 
-static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
+static float16 float16_round_pack_canonical(FloatParts64 *p,
+                                            float_status *s)
 {
     return float16a_round_pack_canonical(p, s, &float16_params);
 }
 
-static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
+static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
+                                              float_status *s)
 {
-    return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
+    parts_uncanon(p, s, &bfloat16_params);
+    return bfloat16_pack_raw(p);
 }
 
-static FloatParts float32_unpack_canonical(float32 f, float_status *s)
+static void float32_unpack_canonical(FloatParts64 *p, float32 f,
+                                     float_status *s)
 {
-    return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
+    float32_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float32_params);
 }
 
-static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
+static float32 float32_round_pack_canonical(FloatParts64 *p,
+                                            float_status *s)
 {
-    return float32_pack_raw(round_canonical(p, s, &float32_params));
+    parts_uncanon(p, s, &float32_params);
+    return float32_pack_raw(p);
 }
 
-static FloatParts float64_unpack_canonical(float64 f, float_status *s)
+static void float64_unpack_canonical(FloatParts64 *p, float64 f,
+                                     float_status *s)
 {
-    return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
+    float64_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float64_params);
 }
 
-static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
+static float64 float64_round_pack_canonical(FloatParts64 *p,
+                                            float_status *s)
 {
-    return float64_pack_raw(round_canonical(p, s, &float64_params));
+    parts_uncanon(p, s, &float64_params);
+    return float64_pack_raw(p);
 }
 
-static FloatParts return_nan(FloatParts a, float_status *s)
+static void float128_unpack_canonical(FloatParts128 *p, float128 f,
+                                      float_status *s)
 {
-    switch (a.cls) {
-    case float_class_snan:
-        s->float_exception_flags |= float_flag_invalid;
-        a = parts_silence_nan(a, s);
-        /* fall through */
-    case float_class_qnan:
-        if (s->default_nan_mode) {
-            return parts_default_nan(s);
-        }
-        break;
-
-    default:
-        g_assert_not_reached();
-    }
-    return a;
+    float128_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float128_params);
 }
 
-static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
+static float128 float128_round_pack_canonical(FloatParts128 *p,
+                                              float_status *s)
 {
-    if (is_snan(a.cls) || is_snan(b.cls)) {
-        s->float_exception_flags |= float_flag_invalid;
-    }
-
-    if (s->default_nan_mode) {
-        return parts_default_nan(s);
-    } else {
-        if (pickNaN(a.cls, b.cls,
-                    a.frac > b.frac ||
-                    (a.frac == b.frac && a.sign < b.sign), s)) {
-            a = b;
-        }
-        if (is_snan(a.cls)) {
-            return parts_silence_nan(a, s);
-        }
-    }
-    return a;
-}
-
-static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
-                                  bool inf_zero, float_status *s)
-{
-    int which;
-
-    if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
-        s->float_exception_flags |= float_flag_invalid;
-    }
-
-    which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
-
-    if (s->default_nan_mode) {
-        /* Note that this check is after pickNaNMulAdd so that function
-         * has an opportunity to set the Invalid flag.
-         */
-        which = 3;
-    }
-
-    switch (which) {
-    case 0:
-        break;
-    case 1:
-        a = b;
-        break;
-    case 2:
-        a = c;
-        break;
-    case 3:
-        return parts_default_nan(s);
-    default:
-        g_assert_not_reached();
-    }
-
-    if (is_snan(a.cls)) {
-        return parts_silence_nan(a, s);
-    }
-    return a;
+    parts_uncanon(p, s, &float128_params);
+    return float128_pack_raw(p);
 }
 
 /*
- * Returns the result of adding or subtracting the values of the
- * floating-point values `a' and `b'. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
+ * Addition and subtraction
  */
 
-static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
-                                float_status *s)
+static float16 QEMU_FLATTEN
+float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
 {
-    bool a_sign = a.sign;
-    bool b_sign = b.sign ^ subtract;
+    FloatParts64 pa, pb, *pr;
 
-    if (a_sign != b_sign) {
-        /* Subtraction */
-
-        if (a.cls == float_class_normal && b.cls == float_class_normal) {
-            if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
-                shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
-                a.frac = a.frac - b.frac;
-            } else {
-                shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
-                a.frac = b.frac - a.frac;
-                a.exp = b.exp;
-                a_sign ^= 1;
-            }
-
-            if (a.frac == 0) {
-                a.cls = float_class_zero;
-                a.sign = s->float_rounding_mode == float_round_down;
-            } else {
-                int shift = clz64(a.frac) - 1;
-                a.frac = a.frac << shift;
-                a.exp = a.exp - shift;
-                a.sign = a_sign;
-            }
-            return a;
-        }
-        if (is_nan(a.cls) || is_nan(b.cls)) {
-            return pick_nan(a, b, s);
-        }
-        if (a.cls == float_class_inf) {
-            if (b.cls == float_class_inf) {
-                float_raise(float_flag_invalid, s);
-                return parts_default_nan(s);
-            }
-            return a;
-        }
-        if (a.cls == float_class_zero && b.cls == float_class_zero) {
-            a.sign = s->float_rounding_mode == float_round_down;
-            return a;
-        }
-        if (a.cls == float_class_zero || b.cls == float_class_inf) {
-            b.sign = a_sign ^ 1;
-            return b;
-        }
-        if (b.cls == float_class_zero) {
-            return a;
-        }
-    } else {
-        /* Addition */
-        if (a.cls == float_class_normal && b.cls == float_class_normal) {
-            if (a.exp > b.exp) {
-                shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
-            } else if (a.exp < b.exp) {
-                shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
-                a.exp = b.exp;
-            }
-            a.frac += b.frac;
-            if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
-                shift64RightJamming(a.frac, 1, &a.frac);
-                a.exp += 1;
-            }
-            return a;
-        }
-        if (is_nan(a.cls) || is_nan(b.cls)) {
-            return pick_nan(a, b, s);
-        }
-        if (a.cls == float_class_inf || b.cls == float_class_zero) {
-            return a;
-        }
-        if (b.cls == float_class_inf || a.cls == float_class_zero) {
-            b.sign = b_sign;
-            return b;
-        }
-    }
-    g_assert_not_reached();
-}
-
-/*
- * Returns the result of adding or subtracting the floating-point
- * values `a' and `b'. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
-
-float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
-{
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, false, status);
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
     return float16_round_pack_canonical(pr, status);
 }
 
-float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
+float16 float16_add(float16 a, float16 b, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, true, status);
+    return float16_addsub(a, b, status, false);
+}
 
-    return float16_round_pack_canonical(pr, status);
+float16 float16_sub(float16 a, float16 b, float_status *status)
+{
+    return float16_addsub(a, b, status, true);
 }
 
 static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
+soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, subtract, status);
+    FloatParts64 pa, pb, *pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
     return float32_round_pack_canonical(pr, status);
 }
 
-static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
+static float32 soft_f32_add(float32 a, float32 b, float_status *status)
 {
-    return soft_f32_addsub(a, b, false, status);
+    return soft_f32_addsub(a, b, status, false);
 }
 
-static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
+static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
 {
-    return soft_f32_addsub(a, b, true, status);
+    return soft_f32_addsub(a, b, status, true);
 }
 
 static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
+soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, subtract, status);
+    FloatParts64 pa, pb, *pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
     return float64_round_pack_canonical(pr, status);
 }
 
-static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
+static float64 soft_f64_add(float64 a, float64 b, float_status *status)
 {
-    return soft_f64_addsub(a, b, false, status);
+    return soft_f64_addsub(a, b, status, false);
 }
 
-static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
+static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
 {
-    return soft_f64_addsub(a, b, true, status);
+    return soft_f64_addsub(a, b, status, true);
 }
 
 static float hard_f32_add(float a, float b)
@@ -1182,82 +1591,61 @@
     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
 }
 
-/*
- * Returns the result of adding or subtracting the bfloat16
- * values `a' and `b'.
- */
-bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
+static bfloat16 QEMU_FLATTEN
+bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, false, status);
+    FloatParts64 pa, pb, *pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
     return bfloat16_round_pack_canonical(pr, status);
 }
 
-bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, true, status);
+    return bfloat16_addsub(a, b, status, false);
+}
 
-    return bfloat16_round_pack_canonical(pr, status);
+bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
+{
+    return bfloat16_addsub(a, b, status, true);
+}
+
+static float128 QEMU_FLATTEN
+float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
+{
+    FloatParts128 pa, pb, *pr;
+
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
+
+    return float128_round_pack_canonical(pr, status);
+}
+
+float128 float128_add(float128 a, float128 b, float_status *status)
+{
+    return float128_addsub(a, b, status, false);
+}
+
+float128 float128_sub(float128 a, float128 b, float_status *status)
+{
+    return float128_addsub(a, b, status, true);
 }
 
 /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b'. The operation is performed according to the IEC/IEEE Standard
- * for Binary Floating-Point Arithmetic.
+ * Multiplication
  */
 
-static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
-{
-    bool sign = a.sign ^ b.sign;
-
-    if (a.cls == float_class_normal && b.cls == float_class_normal) {
-        uint64_t hi, lo;
-        int exp = a.exp + b.exp;
-
-        mul64To128(a.frac, b.frac, &hi, &lo);
-        shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
-        if (lo & DECOMPOSED_OVERFLOW_BIT) {
-            shift64RightJamming(lo, 1, &lo);
-            exp += 1;
-        }
-
-        /* Re-use a */
-        a.exp = exp;
-        a.sign = sign;
-        a.frac = lo;
-        return a;
-    }
-    /* handle all the NaN cases */
-    if (is_nan(a.cls) || is_nan(b.cls)) {
-        return pick_nan(a, b, s);
-    }
-    /* Inf * Zero == NaN */
-    if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
-        (a.cls == float_class_zero && b.cls == float_class_inf)) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-    /* Multiply by 0 or Inf */
-    if (a.cls == float_class_inf || a.cls == float_class_zero) {
-        a.sign = sign;
-        return a;
-    }
-    if (b.cls == float_class_inf || b.cls == float_class_zero) {
-        b.sign = sign;
-        return b;
-    }
-    g_assert_not_reached();
-}
-
 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
     return float16_round_pack_canonical(pr, status);
 }
@@ -1265,9 +1653,11 @@
 static float32 QEMU_SOFTFLOAT_ATTR
 soft_f32_mul(float32 a, float32 b, float_status *status)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
     return float32_round_pack_canonical(pr, status);
 }
@@ -1275,9 +1665,11 @@
 static float64 QEMU_SOFTFLOAT_ATTR
 soft_f64_mul(float64 a, float64 b, float_status *status)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
     return float64_round_pack_canonical(pr, status);
 }
@@ -1306,230 +1698,43 @@
                         f64_is_zon2, f64_addsubmul_post);
 }
 
-/*
- * Returns the result of multiplying the bfloat16
- * values `a' and `b'.
- */
-
-bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 QEMU_FLATTEN
+bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
     return bfloat16_round_pack_canonical(pr, status);
 }
 
-/*
- * Returns the result of multiplying the floating-point values `a' and
- * `b' then adding 'c', with no intermediate rounding step after the
- * multiplication. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
- * The flags argument allows the caller to select negation of the
- * addend, the intermediate product, or the final result. (The
- * difference between this and having the caller do a separate
- * negation is that negating externally will flip the sign bit on
- * NaNs.)
- */
-
-static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
-                                int flags, float_status *s)
+float128 QEMU_FLATTEN
+float128_mul(float128 a, float128 b, float_status *status)
 {
-    bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
-                    ((1 << float_class_inf) | (1 << float_class_zero));
-    bool p_sign;
-    bool sign_flip = flags & float_muladd_negate_result;
-    FloatClass p_class;
-    uint64_t hi, lo;
-    int p_exp;
+    FloatParts128 pa, pb, *pr;
 
-    /* It is implementation-defined whether the cases of (0,inf,qnan)
-     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
-     * they return if they do), so we have to hand this information
-     * off to the target-specific pick-a-NaN routine.
-     */
-    if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
-        return pick_nan_muladd(a, b, c, inf_zero, s);
-    }
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
-    if (inf_zero) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-
-    if (flags & float_muladd_negate_c) {
-        c.sign ^= 1;
-    }
-
-    p_sign = a.sign ^ b.sign;
-
-    if (flags & float_muladd_negate_product) {
-        p_sign ^= 1;
-    }
-
-    if (a.cls == float_class_inf || b.cls == float_class_inf) {
-        p_class = float_class_inf;
-    } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
-        p_class = float_class_zero;
-    } else {
-        p_class = float_class_normal;
-    }
-
-    if (c.cls == float_class_inf) {
-        if (p_class == float_class_inf && p_sign != c.sign) {
-            s->float_exception_flags |= float_flag_invalid;
-            return parts_default_nan(s);
-        } else {
-            a.cls = float_class_inf;
-            a.sign = c.sign ^ sign_flip;
-            return a;
-        }
-    }
-
-    if (p_class == float_class_inf) {
-        a.cls = float_class_inf;
-        a.sign = p_sign ^ sign_flip;
-        return a;
-    }
-
-    if (p_class == float_class_zero) {
-        if (c.cls == float_class_zero) {
-            if (p_sign != c.sign) {
-                p_sign = s->float_rounding_mode == float_round_down;
-            }
-            c.sign = p_sign;
-        } else if (flags & float_muladd_halve_result) {
-            c.exp -= 1;
-        }
-        c.sign ^= sign_flip;
-        return c;
-    }
-
-    /* a & b should be normals now... */
-    assert(a.cls == float_class_normal &&
-           b.cls == float_class_normal);
-
-    p_exp = a.exp + b.exp;
-
-    /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
-     * result.
-     */
-    mul64To128(a.frac, b.frac, &hi, &lo);
-    /* binary point now at bit 124 */
-
-    /* check for overflow */
-    if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
-        shift128RightJamming(hi, lo, 1, &hi, &lo);
-        p_exp += 1;
-    }
-
-    /* + add/sub */
-    if (c.cls == float_class_zero) {
-        /* move binary point back to 62 */
-        shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
-    } else {
-        int exp_diff = p_exp - c.exp;
-        if (p_sign == c.sign) {
-            /* Addition */
-            if (exp_diff <= 0) {
-                shift128RightJamming(hi, lo,
-                                     DECOMPOSED_BINARY_POINT - exp_diff,
-                                     &hi, &lo);
-                lo += c.frac;
-                p_exp = c.exp;
-            } else {
-                uint64_t c_hi, c_lo;
-                /* shift c to the same binary point as the product (124) */
-                c_hi = c.frac >> 2;
-                c_lo = 0;
-                shift128RightJamming(c_hi, c_lo,
-                                     exp_diff,
-                                     &c_hi, &c_lo);
-                add128(hi, lo, c_hi, c_lo, &hi, &lo);
-                /* move binary point back to 62 */
-                shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
-            }
-
-            if (lo & DECOMPOSED_OVERFLOW_BIT) {
-                shift64RightJamming(lo, 1, &lo);
-                p_exp += 1;
-            }
-
-        } else {
-            /* Subtraction */
-            uint64_t c_hi, c_lo;
-            /* make C binary point match product at bit 124 */
-            c_hi = c.frac >> 2;
-            c_lo = 0;
-
-            if (exp_diff <= 0) {
-                shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
-                if (exp_diff == 0
-                    &&
-                    (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
-                    sub128(hi, lo, c_hi, c_lo, &hi, &lo);
-                } else {
-                    sub128(c_hi, c_lo, hi, lo, &hi, &lo);
-                    p_sign ^= 1;
-                    p_exp = c.exp;
-                }
-            } else {
-                shift128RightJamming(c_hi, c_lo,
-                                     exp_diff,
-                                     &c_hi, &c_lo);
-                sub128(hi, lo, c_hi, c_lo, &hi, &lo);
-            }
-
-            if (hi == 0 && lo == 0) {
-                a.cls = float_class_zero;
-                a.sign = s->float_rounding_mode == float_round_down;
-                a.sign ^= sign_flip;
-                return a;
-            } else {
-                int shift;
-                if (hi != 0) {
-                    shift = clz64(hi);
-                } else {
-                    shift = clz64(lo) + 64;
-                }
-                /* Normalizing to a binary point of 124 is the
-                   correct adjust for the exponent.  However since we're
-                   shifting, we might as well put the binary point back
-                   at 62 where we really want it.  Therefore shift as
-                   if we're leaving 1 bit at the top of the word, but
-                   adjust the exponent as if we're leaving 3 bits.  */
-                shift -= 1;
-                if (shift >= 64) {
-                    lo = lo << (shift - 64);
-                } else {
-                    hi = (hi << shift) | (lo >> (64 - shift));
-                    lo = hi | ((lo << shift) != 0);
-                }
-                p_exp -= shift - 2;
-            }
-        }
-    }
-
-    if (flags & float_muladd_halve_result) {
-        p_exp -= 1;
-    }
-
-    /* finally prepare our result */
-    a.cls = float_class_normal;
-    a.sign = p_sign ^ sign_flip;
-    a.exp = p_exp;
-    a.frac = lo;
-
-    return a;
+    return float128_round_pack_canonical(pr, status);
 }
 
+/*
+ * Fused multiply-add
+ */
+
 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
-                                                int flags, float_status *status)
+                                    int flags, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pc = float16_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    FloatParts64 pa, pb, pc, *pr;
+
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    float16_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
     return float16_round_pack_canonical(pr, status);
 }
@@ -1538,10 +1743,12 @@
 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
                 float_status *status)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pc = float32_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    FloatParts64 pa, pb, pc, *pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    float32_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
     return float32_round_pack_canonical(pr, status);
 }
@@ -1550,10 +1757,12 @@
 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
                 float_status *status)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pc = float64_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    FloatParts64 pa, pb, pc, *pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    float64_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
     return float64_round_pack_canonical(pr, status);
 }
@@ -1615,7 +1824,7 @@
         ur.h = fmaf(ua.h, ub.h, uc.h);
 
         if (unlikely(f32_is_inf(ur))) {
-            s->float_exception_flags |= float_flag_overflow;
+            float_raise(float_flag_overflow, s);
         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
             ua = ua_orig;
             uc = uc_orig;
@@ -1686,7 +1895,7 @@
         ur.h = fma(ua.h, ub.h, uc.h);
 
         if (unlikely(f64_is_inf(ur))) {
-            s->float_exception_flags |= float_flag_overflow;
+            float_raise(float_flag_overflow, s);
         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
             ua = ua_orig;
             uc = uc_orig;
@@ -1702,107 +1911,43 @@
     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
 }
 
-/*
- * Returns the result of multiplying the bfloat16 values `a'
- * and `b' then adding 'c', with no intermediate rounding step after the
- * multiplication.
- */
-
 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
                                       int flags, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pc = bfloat16_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    FloatParts64 pa, pb, pc, *pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    bfloat16_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
     return bfloat16_round_pack_canonical(pr, status);
 }
 
-/*
- * Returns the result of dividing the floating-point value `a' by the
- * corresponding value `b'. The operation is performed according to
- * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
-
-static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
+float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
+                                      int flags, float_status *status)
 {
-    bool sign = a.sign ^ b.sign;
+    FloatParts128 pa, pb, pc, *pr;
 
-    if (a.cls == float_class_normal && b.cls == float_class_normal) {
-        uint64_t n0, n1, q, r;
-        int exp = a.exp - b.exp;
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    float128_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-        /*
-         * We want a 2*N / N-bit division to produce exactly an N-bit
-         * result, so that we do not lose any precision and so that we
-         * do not have to renormalize afterward.  If A.frac < B.frac,
-         * then division would produce an (N-1)-bit result; shift A left
-         * by one to produce the an N-bit result, and decrement the
-         * exponent to match.
-         *
-         * The udiv_qrnnd algorithm that we're using requires normalization,
-         * i.e. the msb of the denominator must be set.  Since we know that
-         * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
-         * by one (more), and the remainder must be shifted right by one.
-         */
-        if (a.frac < b.frac) {
-            exp -= 1;
-            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
-        } else {
-            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
-        }
-        q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
-
-        /*
-         * Set lsb if there is a remainder, to set inexact.
-         * As mentioned above, to find the actual value of the remainder we
-         * would need to shift right, but (1) we are only concerned about
-         * non-zero-ness, and (2) the remainder will always be even because
-         * both inputs to the division primitive are even.
-         */
-        a.frac = q | (r != 0);
-        a.sign = sign;
-        a.exp = exp;
-        return a;
-    }
-    /* handle all the NaN cases */
-    if (is_nan(a.cls) || is_nan(b.cls)) {
-        return pick_nan(a, b, s);
-    }
-    /* 0/0 or Inf/Inf */
-    if (a.cls == b.cls
-        &&
-        (a.cls == float_class_inf || a.cls == float_class_zero)) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-    /* Inf / x or 0 / x */
-    if (a.cls == float_class_inf || a.cls == float_class_zero) {
-        a.sign = sign;
-        return a;
-    }
-    /* Div 0 => Inf */
-    if (b.cls == float_class_zero) {
-        s->float_exception_flags |= float_flag_divbyzero;
-        a.cls = float_class_inf;
-        a.sign = sign;
-        return a;
-    }
-    /* Div by Inf */
-    if (b.cls == float_class_inf) {
-        a.cls = float_class_zero;
-        a.sign = sign;
-        return a;
-    }
-    g_assert_not_reached();
+    return float128_round_pack_canonical(pr, status);
 }
 
+/*
+ * Division
+ */
+
 float16 float16_div(float16 a, float16 b, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
 
     return float16_round_pack_canonical(pr, status);
 }
@@ -1810,9 +1955,11 @@
 static float32 QEMU_SOFTFLOAT_ATTR
 soft_f32_div(float32 a, float32 b, float_status *status)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
 
     return float32_round_pack_canonical(pr, status);
 }
@@ -1820,9 +1967,11 @@
 static float64 QEMU_SOFTFLOAT_ATTR
 soft_f64_div(float64 a, float64 b, float_status *status)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
 
     return float64_round_pack_canonical(pr, status);
 }
@@ -1885,20 +2034,30 @@
                         f64_div_pre, f64_div_post);
 }
 
-/*
- * Returns the result of dividing the bfloat16
- * value `a' by the corresponding value `b'.
- */
-
-bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 QEMU_FLATTEN
+bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
 
     return bfloat16_round_pack_canonical(pr, status);
 }
 
+float128 QEMU_FLATTEN
+float128_div(float128 a, float128 b, float_status *status)
+{
+    FloatParts128 pa, pb, *pr;
+
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
+
+    return float128_round_pack_canonical(pr, status);
+}
+
 /*
  * Float to Float conversions
  *
@@ -1906,81 +2065,134 @@
  * conversion is performed according to the IEC/IEEE Standard for
  * Binary Floating-Point Arithmetic.
  *
- * The float_to_float helper only needs to take care of raising
- * invalid exceptions and handling the conversion on NaNs.
+ * Usually this only needs to take care of raising invalid exceptions
+ * and handling the conversion on NaNs.
  */
 
-static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
-                                 float_status *s)
+static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
 {
-    if (dstf->arm_althp) {
-        switch (a.cls) {
-        case float_class_qnan:
-        case float_class_snan:
-            /* There is no NaN in the destination format.  Raise Invalid
-             * and return a zero with the sign of the input NaN.
-             */
-            s->float_exception_flags |= float_flag_invalid;
-            a.cls = float_class_zero;
-            a.frac = 0;
-            a.exp = 0;
-            break;
+    switch (a->cls) {
+    case float_class_qnan:
+    case float_class_snan:
+        /*
+         * There is no NaN in the destination format.  Raise Invalid
+         * and return a zero with the sign of the input NaN.
+         */
+        float_raise(float_flag_invalid, s);
+        a->cls = float_class_zero;
+        break;
 
-        case float_class_inf:
-            /* There is no Inf in the destination format.  Raise Invalid
-             * and return the maximum normal with the correct sign.
-             */
-            s->float_exception_flags |= float_flag_invalid;
-            a.cls = float_class_normal;
-            a.exp = dstf->exp_max;
-            a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
-            break;
+    case float_class_inf:
+        /*
+         * There is no Inf in the destination format.  Raise Invalid
+         * and return the maximum normal with the correct sign.
+         */
+        float_raise(float_flag_invalid, s);
+        a->cls = float_class_normal;
+        a->exp = float16_params_ahp.exp_max;
+        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
+                                  float16_params_ahp.frac_size + 1);
+        break;
 
-        default:
-            break;
-        }
-    } else if (is_nan(a.cls)) {
-        if (is_snan(a.cls)) {
-            s->float_exception_flags |= float_flag_invalid;
-            a = parts_silence_nan(a, s);
-        }
-        if (s->default_nan_mode) {
-            return parts_default_nan(s);
-        }
+    case float_class_normal:
+    case float_class_zero:
+        break;
+
+    default:
+        g_assert_not_reached();
     }
-    return a;
+}
+
+static void parts64_float_to_float(FloatParts64 *a, float_status *s)
+{
+    if (is_nan(a->cls)) {
+        parts_return_nan(a, s);
+    }
+}
+
+static void parts128_float_to_float(FloatParts128 *a, float_status *s)
+{
+    if (is_nan(a->cls)) {
+        parts_return_nan(a, s);
+    }
+}
+
+#define parts_float_to_float(P, S) \
+    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
+
+static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
+                                        float_status *s)
+{
+    a->cls = b->cls;
+    a->sign = b->sign;
+    a->exp = b->exp;
+
+    if (a->cls == float_class_normal) {
+        frac_truncjam(a, b);
+    } else if (is_nan(a->cls)) {
+        /* Discard the low bits of the NaN. */
+        a->frac = b->frac_hi;
+        parts_return_nan(a, s);
+    }
+}
+
+static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
+                                       float_status *s)
+{
+    a->cls = b->cls;
+    a->sign = b->sign;
+    a->exp = b->exp;
+    frac_widen(a, b);
+
+    if (is_nan(a->cls)) {
+        parts_return_nan(a, s);
+    }
 }
 
 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
 {
     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
-    FloatParts pr = float_to_float(p, &float32_params, s);
-    return float32_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float16a_unpack_canonical(&p, a, s, fmt16);
+    parts_float_to_float(&p, s);
+    return float32_round_pack_canonical(&p, s);
 }
 
 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
 {
     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
-    FloatParts pr = float_to_float(p, &float64_params, s);
-    return float64_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float16a_unpack_canonical(&p, a, s, fmt16);
+    parts_float_to_float(&p, s);
+    return float64_round_pack_canonical(&p, s);
 }
 
 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
 {
-    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, fmt16, s);
-    return float16a_round_pack_canonical(pr, s, fmt16);
+    FloatParts64 p;
+    const FloatFmt *fmt;
+
+    float32_unpack_canonical(&p, a, s);
+    if (ieee) {
+        parts_float_to_float(&p, s);
+        fmt = &float16_params;
+    } else {
+        parts_float_to_ahp(&p, s);
+        fmt = &float16_params_ahp;
+    }
+    return float16a_round_pack_canonical(&p, s, fmt);
 }
 
 static float64 QEMU_SOFTFLOAT_ATTR
 soft_float32_to_float64(float32 a, float_status *s)
 {
-    FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float64_params, s);
-    return float64_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float64_round_pack_canonical(&p, s);
 }
 
 float64 float32_to_float64(float32 a, float_status *s)
@@ -2001,313 +2213,291 @@
 
 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
 {
-    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, fmt16, s);
-    return float16a_round_pack_canonical(pr, s, fmt16);
+    FloatParts64 p;
+    const FloatFmt *fmt;
+
+    float64_unpack_canonical(&p, a, s);
+    if (ieee) {
+        parts_float_to_float(&p, s);
+        fmt = &float16_params;
+    } else {
+        parts_float_to_ahp(&p, s);
+        fmt = &float16_params_ahp;
+    }
+    return float16a_round_pack_canonical(&p, s, fmt);
 }
 
 float32 float64_to_float32(float64 a, float_status *s)
 {
-    FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float32_params, s);
-    return float32_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float32_round_pack_canonical(&p, s);
 }
 
 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
 {
-    FloatParts p = bfloat16_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float32_params, s);
-    return float32_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float32_round_pack_canonical(&p, s);
 }
 
 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
 {
-    FloatParts p = bfloat16_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float64_params, s);
-    return float64_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float64_round_pack_canonical(&p, s);
 }
 
 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
 {
-    FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &bfloat16_params, s);
-    return bfloat16_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return bfloat16_round_pack_canonical(&p, s);
 }
 
 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
 {
-    FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &bfloat16_params, s);
-    return bfloat16_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return bfloat16_round_pack_canonical(&p, s);
+}
+
+float32 float128_to_float32(float128 a, float_status *s)
+{
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float128_unpack_canonical(&p128, a, s);
+    parts_float_to_float_narrow(&p64, &p128, s);
+    return float32_round_pack_canonical(&p64, s);
+}
+
+float64 float128_to_float64(float128 a, float_status *s)
+{
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float128_unpack_canonical(&p128, a, s);
+    parts_float_to_float_narrow(&p64, &p128, s);
+    return float64_round_pack_canonical(&p64, s);
+}
+
+float128 float32_to_float128(float32 a, float_status *s)
+{
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float32_unpack_canonical(&p64, a, s);
+    parts_float_to_float_widen(&p128, &p64, s);
+    return float128_round_pack_canonical(&p128, s);
+}
+
+float128 float64_to_float128(float64 a, float_status *s)
+{
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float64_unpack_canonical(&p64, a, s);
+    parts_float_to_float_widen(&p128, &p64, s);
+    return float128_round_pack_canonical(&p128, s);
 }
 
 /*
- * Rounds the floating-point value `a' to an integer, and returns the
- * result as a floating-point value. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
+ * Round to integral value
  */
 
-static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
-                               int scale, float_status *s)
-{
-    switch (a.cls) {
-    case float_class_qnan:
-    case float_class_snan:
-        return return_nan(a, s);
-
-    case float_class_zero:
-    case float_class_inf:
-        /* already "integral" */
-        break;
-
-    case float_class_normal:
-        scale = MIN(MAX(scale, -0x10000), 0x10000);
-        a.exp += scale;
-
-        if (a.exp >= DECOMPOSED_BINARY_POINT) {
-            /* already integral */
-            break;
-        }
-        if (a.exp < 0) {
-            bool one;
-            /* all fractional */
-            s->float_exception_flags |= float_flag_inexact;
-            switch (rmode) {
-            case float_round_nearest_even:
-                one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
-                break;
-            case float_round_ties_away:
-                one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
-                break;
-            case float_round_to_zero:
-                one = false;
-                break;
-            case float_round_up:
-                one = !a.sign;
-                break;
-            case float_round_down:
-                one = a.sign;
-                break;
-            case float_round_to_odd:
-                one = true;
-                break;
-            default:
-                g_assert_not_reached();
-            }
-
-            if (one) {
-                a.frac = DECOMPOSED_IMPLICIT_BIT;
-                a.exp = 0;
-            } else {
-                a.cls = float_class_zero;
-            }
-        } else {
-            uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
-            uint64_t frac_lsbm1 = frac_lsb >> 1;
-            uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
-            uint64_t rnd_mask = rnd_even_mask >> 1;
-            uint64_t inc;
-
-            switch (rmode) {
-            case float_round_nearest_even:
-                inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
-                break;
-            case float_round_ties_away:
-                inc = frac_lsbm1;
-                break;
-            case float_round_to_zero:
-                inc = 0;
-                break;
-            case float_round_up:
-                inc = a.sign ? 0 : rnd_mask;
-                break;
-            case float_round_down:
-                inc = a.sign ? rnd_mask : 0;
-                break;
-            case float_round_to_odd:
-                inc = a.frac & frac_lsb ? 0 : rnd_mask;
-                break;
-            default:
-                g_assert_not_reached();
-            }
-
-            if (a.frac & rnd_mask) {
-                s->float_exception_flags |= float_flag_inexact;
-                a.frac += inc;
-                a.frac &= ~rnd_mask;
-                if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
-                    a.frac >>= 1;
-                    a.exp++;
-                }
-            }
-        }
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return a;
-}
-
 float16 float16_round_to_int(float16 a, float_status *s)
 {
-    FloatParts pa = float16_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return float16_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
+    return float16_round_pack_canonical(&p, s);
 }
 
 float32 float32_round_to_int(float32 a, float_status *s)
 {
-    FloatParts pa = float32_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return float32_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
+    return float32_round_pack_canonical(&p, s);
 }
 
 float64 float64_round_to_int(float64 a, float_status *s)
 {
-    FloatParts pa = float64_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return float64_round_pack_canonical(pr, s);
-}
+    FloatParts64 p;
 
-/*
- * Rounds the bfloat16 value `a' to an integer, and returns the
- * result as a bfloat16 value.
- */
+    float64_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
+    return float64_round_pack_canonical(&p, s);
+}
 
 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return bfloat16_round_pack_canonical(pr, s);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
+    return bfloat16_round_pack_canonical(&p, s);
+}
+
+float128 float128_round_to_int(float128 a, float_status *s)
+{
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
+    return float128_round_pack_canonical(&p, s);
 }
 
 /*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format. The conversion is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest positive integer is returned. Otherwise, if the
- * conversion overflows, the largest integer with the same sign as `a'
- * is returned.
-*/
-
-static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
-                                     int scale, int64_t min, int64_t max,
-                                     float_status *s)
-{
-    uint64_t r;
-    int orig_flags = get_float_exception_flags(s);
-    FloatParts p = round_to_int(in, rmode, scale, s);
-
-    switch (p.cls) {
-    case float_class_snan:
-    case float_class_qnan:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return max;
-    case float_class_inf:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return p.sign ? min : max;
-    case float_class_zero:
-        return 0;
-    case float_class_normal:
-        if (p.exp < DECOMPOSED_BINARY_POINT) {
-            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
-        } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
-            r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
-        } else {
-            r = UINT64_MAX;
-        }
-        if (p.sign) {
-            if (r <= -(uint64_t) min) {
-                return -r;
-            } else {
-                s->float_exception_flags = orig_flags | float_flag_invalid;
-                return min;
-            }
-        } else {
-            if (r <= max) {
-                return r;
-            } else {
-                s->float_exception_flags = orig_flags | float_flag_invalid;
-                return max;
-            }
-        }
-    default:
-        g_assert_not_reached();
-    }
-}
+ * Floating-point to signed integer conversions
+ */
 
 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
                               float_status *s)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT8_MIN, INT8_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
 }
 
 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float32_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float32_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float32_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float64_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float64_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_int_and_pack(float64_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                                 float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
+}
+
+int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                                 float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                                 float_status *s)
+{
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
+                                        int scale, float_status *s)
+{
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
+                                        int scale, float_status *s)
+{
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
 int8_t float16_to_int8(float16 a, float_status *s)
@@ -2360,6 +2550,16 @@
     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
+int32_t float128_to_int32(float128 a, float_status *s)
+{
+    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int64_t float128_to_int64(float128 a, float_status *s)
+{
+    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
 {
     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@@ -2405,30 +2605,14 @@
     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format.
- */
-
-int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
-                                 float_status *s)
+int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
 {
-    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
+    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
-                                 float_status *s)
+int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
 {
-    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
-}
-
-int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
-                                 float_status *s)
-{
-    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
@@ -2474,121 +2658,149 @@
  *  flag.
  */
 
-static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
+static uint64_t round_to_uint_and_pack(FloatParts64 p, FloatRoundMode rmode,
                                        int scale, uint64_t max,
                                        float_status *s)
 {
-    int orig_flags = get_float_exception_flags(s);
-    FloatParts p = round_to_int(in, rmode, scale, s);
+    int flags = 0;
     uint64_t r;
 
     switch (p.cls) {
     case float_class_snan:
     case float_class_qnan:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return max;
+        flags = float_flag_invalid;
+        r = max;
+        break;
+
     case float_class_inf:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return p.sign ? 0 : max;
+        flags = float_flag_invalid;
+        r = p.sign ? 0 : max;
+        break;
+
     case float_class_zero:
         return 0;
+
     case float_class_normal:
+        /* TODO: 62 = N - 2, frac_size for rounding */
+        if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
+            flags = float_flag_inexact;
+            if (p.cls == float_class_zero) {
+                r = 0;
+                break;
+            }
+        }
+
         if (p.sign) {
-            s->float_exception_flags = orig_flags | float_flag_invalid;
-            return 0;
-        }
-
-        if (p.exp < DECOMPOSED_BINARY_POINT) {
-            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
-        } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
-            r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
+            flags = float_flag_invalid;
+            r = 0;
+        } else if (p.exp > DECOMPOSED_BINARY_POINT) {
+            flags = float_flag_invalid;
+            r = max;
         } else {
-            s->float_exception_flags = orig_flags | float_flag_invalid;
-            return max;
+            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
+            if (r > max) {
+                flags = float_flag_invalid;
+                r = max;
+            }
         }
+        break;
 
-        /* For uint64 this will never trip, but if p.exp is too large
-         * to shift a decomposed fraction we shall have exited via the
-         * 3rd leg above.
-         */
-        if (r > max) {
-            s->float_exception_flags = orig_flags | float_flag_invalid;
-            return max;
-        }
-        return r;
     default:
         g_assert_not_reached();
     }
+
+    float_raise(flags, s);
+    return r;
 }
 
 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                 float_status *s)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT8_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
 }
 
 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
 }
 
 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
 }
 
 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
 }
 
 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
 }
 
 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
 }
 
 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
 }
 
 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
 }
 
 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
 }
 
 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
                                   float_status *s)
 {
-    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
 }
 
 uint8_t float16_to_uint8(float16 a, float_status *s)
@@ -2694,22 +2906,28 @@
 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
                                    int scale, float_status *s)
 {
-    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
 }
 
 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
                                    int scale, float_status *s)
 {
-    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
 }
 
 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
                                    int scale, float_status *s)
 {
-    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
 }
 
 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
@@ -2750,9 +2968,9 @@
  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  */
 
-static FloatParts int_to_float(int64_t a, int scale, float_status *status)
+static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
 {
-    FloatParts r = { .sign = false };
+    FloatParts64 r = { .sign = false };
 
     if (a == 0) {
         r.cls = float_class_zero;
@@ -2765,11 +2983,11 @@
             f = -f;
             r.sign = true;
         }
-        shift = clz64(f) - 1;
+        shift = clz64(f);
         scale = MIN(MAX(scale, -0x10000), 0x10000);
 
         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
-        r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
+        r.frac = f << shift;
     }
 
     return r;
@@ -2777,8 +2995,8 @@
 
 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return float16_round_pack_canonical(pa, status);
+    FloatParts64 pa = int_to_float(a, scale, status);
+    return float16_round_pack_canonical(&pa, status);
 }
 
 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
@@ -2813,8 +3031,8 @@
 
 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return float32_round_pack_canonical(pa, status);
+    FloatParts64 pa = int_to_float(a, scale, status);
+    return float32_round_pack_canonical(&pa, status);
 }
 
 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
@@ -2844,8 +3062,8 @@
 
 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return float64_round_pack_canonical(pa, status);
+    FloatParts64 pa = int_to_float(a, scale, status);
+    return float64_round_pack_canonical(&pa, status);
 }
 
 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
@@ -2880,8 +3098,8 @@
 
 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return bfloat16_round_pack_canonical(pa, status);
+    FloatParts64 pa = int_to_float(a, scale, status);
+    return bfloat16_round_pack_canonical(&pa, status);
 }
 
 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
@@ -2917,24 +3135,19 @@
  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  */
 
-static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
+static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
 {
-    FloatParts r = { .sign = false };
+    FloatParts64 r = { .sign = false };
+    int shift;
 
     if (a == 0) {
         r.cls = float_class_zero;
     } else {
         scale = MIN(MAX(scale, -0x10000), 0x10000);
+        shift = clz64(a);
         r.cls = float_class_normal;
-        if ((int64_t)a < 0) {
-            r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
-            shift64RightJamming(a, 1, &a);
-            r.frac = a;
-        } else {
-            int shift = clz64(a) - 1;
-            r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
-            r.frac = a << shift;
-        }
+        r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
+        r.frac = a << shift;
     }
 
     return r;
@@ -2942,8 +3155,8 @@
 
 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return float16_round_pack_canonical(pa, status);
+    FloatParts64 pa = uint_to_float(a, scale, status);
+    return float16_round_pack_canonical(&pa, status);
 }
 
 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
@@ -2978,8 +3191,8 @@
 
 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return float32_round_pack_canonical(pa, status);
+    FloatParts64 pa = uint_to_float(a, scale, status);
+    return float32_round_pack_canonical(&pa, status);
 }
 
 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
@@ -3009,8 +3222,8 @@
 
 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return float64_round_pack_canonical(pa, status);
+    FloatParts64 pa = uint_to_float(a, scale, status);
+    return float64_round_pack_canonical(&pa, status);
 }
 
 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
@@ -3045,8 +3258,8 @@
 
 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return bfloat16_round_pack_canonical(pa, status);
+    FloatParts64 pa = uint_to_float(a, scale, status);
+    return bfloat16_round_pack_canonical(&pa, status);
 }
 
 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
@@ -3090,7 +3303,7 @@
  * minnummag() and maxnummag() functions correspond to minNumMag()
  * and minNumMag() from the IEEE-754 2008.
  */
-static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
+static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
                                 bool ieee, bool ismag, float_status *s)
 {
     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
@@ -3101,14 +3314,14 @@
              * the invalid exception is raised.
              */
             if (is_snan(a.cls) || is_snan(b.cls)) {
-                return pick_nan(a, b, s);
+                return *parts_pick_nan(&a, &b, s);
             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
                 return b;
             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
                 return a;
             }
         }
-        return pick_nan(a, b, s);
+        return *parts_pick_nan(&a, &b, s);
     } else {
         int a_exp, b_exp;
 
@@ -3165,11 +3378,11 @@
 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
                                      float_status *s)                   \
 {                                                                       \
-    FloatParts pa = float ## sz ## _unpack_canonical(a, s);             \
-    FloatParts pb = float ## sz ## _unpack_canonical(b, s);             \
-    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
-                                                                        \
-    return float ## sz ## _round_pack_canonical(pr, s);                 \
+    FloatParts64 pa, pb, pr;                                            \
+    float ## sz ## _unpack_canonical(&pa, a, s);                        \
+    float ## sz ## _unpack_canonical(&pb, b, s);                        \
+    pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
+    return float ## sz ## _round_pack_canonical(&pr, s);                \
 }
 
 MINMAX(16, min, true, false, false)
@@ -3198,11 +3411,11 @@
 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
 {                                                                       \
-    FloatParts pa = bfloat16_unpack_canonical(a, s);                    \
-    FloatParts pb = bfloat16_unpack_canonical(b, s);                    \
-    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
-                                                                        \
-    return bfloat16_round_pack_canonical(pr, s);                        \
+    FloatParts64 pa, pb, pr;                                            \
+    bfloat16_unpack_canonical(&pa, a, s);                               \
+    bfloat16_unpack_canonical(&pb, b, s);                               \
+    pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
+    return bfloat16_round_pack_canonical(&pr, s);                       \
 }
 
 BF16_MINMAX(min, true, false, false)
@@ -3215,14 +3428,14 @@
 #undef BF16_MINMAX
 
 /* Floating point compare */
-static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
+static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
                                     float_status *s)
 {
     if (is_nan(a.cls) || is_nan(b.cls)) {
         if (!is_quiet ||
             a.cls == float_class_snan ||
             b.cls == float_class_snan) {
-            s->float_exception_flags |= float_flag_invalid;
+            float_raise(float_flag_invalid, s);
         }
         return float_relation_unordered;
     }
@@ -3276,8 +3489,9 @@
 static int attr                                                         \
 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
 {                                                                       \
-    FloatParts pa = float ## sz ## _unpack_canonical(a, s);             \
-    FloatParts pb = float ## sz ## _unpack_canonical(b, s);             \
+    FloatParts64 pa, pb;                                                \
+    float ## sz ## _unpack_canonical(&pa, a, s);                        \
+    float ## sz ## _unpack_canonical(&pb, b, s);                        \
     return compare_floats(pa, pb, is_quiet, s);                         \
 }
 
@@ -3378,8 +3592,10 @@
 static FloatRelation QEMU_FLATTEN
 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, s);
-    FloatParts pb = bfloat16_unpack_canonical(b, s);
+    FloatParts64 pa, pb;
+
+    bfloat16_unpack_canonical(&pa, a, s);
+    bfloat16_unpack_canonical(&pb, b, s);
     return compare_floats(pa, pb, is_quiet, s);
 }
 
@@ -3394,16 +3610,16 @@
 }
 
 /* Multiply A by 2 raised to the power N.  */
-static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
+static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
 {
     if (unlikely(is_nan(a.cls))) {
-        return return_nan(a, s);
+        parts_return_nan(&a, s);
     }
     if (a.cls == float_class_normal) {
-        /* The largest float type (even though not supported by FloatParts)
+        /* The largest float type (even though not supported by FloatParts64)
          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
          * still allows rounding to infinity, without allowing overflow
-         * within the int32_t that backs FloatParts.exp.
+         * within the int32_t that backs FloatParts64.exp.
          */
         n = MIN(MAX(n, -0x10000), 0x10000);
         a.exp += n;
@@ -3413,30 +3629,38 @@
 
 float16 float16_scalbn(float16 a, int n, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return float16_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    float16_unpack_canonical(&pa, a, status);
+    pr = scalbn_decomposed(pa, n, status);
+    return float16_round_pack_canonical(&pr, status);
 }
 
 float32 float32_scalbn(float32 a, int n, float_status *status)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return float32_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    pr = scalbn_decomposed(pa, n, status);
+    return float32_round_pack_canonical(&pr, status);
 }
 
 float64 float64_scalbn(float64 a, int n, float_status *status)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return float64_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    pr = scalbn_decomposed(pa, n, status);
+    return float64_round_pack_canonical(&pr, status);
 }
 
 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return bfloat16_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    pr = scalbn_decomposed(pa, n, status);
+    return bfloat16_round_pack_canonical(&pr, status);
 }
 
 /*
@@ -3451,20 +3675,22 @@
  * especially for 64 bit floats.
  */
 
-static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
+static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
 {
     uint64_t a_frac, r_frac, s_frac;
     int bit, last_bit;
 
     if (is_nan(a.cls)) {
-        return return_nan(a, s);
+        parts_return_nan(&a, s);
+        return a;
     }
     if (a.cls == float_class_zero) {
         return a;  /* sqrt(+-0) = +-0 */
     }
     if (a.sign) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
+        float_raise(float_flag_invalid, s);
+        parts_default_nan(&a, s);
+        return a;
     }
     if (a.cls == float_class_inf) {
         return a;  /* sqrt(+inf) = +inf */
@@ -3475,12 +3701,9 @@
     /* We need two overflow bits at the top. Adding room for that is a
      * right shift. If the exponent is odd, we can discard the low bit
      * by multiplying the fraction by 2; that's a left shift. Combine
-     * those and we shift right if the exponent is even.
+     * those and we shift right by 1 if the exponent is odd, otherwise 2.
      */
-    a_frac = a.frac;
-    if (!(a.exp & 1)) {
-        a_frac >>= 1;
-    }
+    a_frac = a.frac >> (2 - (a.exp & 1));
     a.exp >>= 1;
 
     /* Bit-by-bit computation of sqrt.  */
@@ -3488,10 +3711,10 @@
     s_frac = 0;
 
     /* Iterate from implicit bit down to the 3 extra bits to compute a
-     * properly rounded result. Remember we've inserted one more bit
-     * at the top, so these positions are one less.
+     * properly rounded result. Remember we've inserted two more bits
+     * at the top, so these positions are two less.
      */
-    bit = DECOMPOSED_BINARY_POINT - 1;
+    bit = DECOMPOSED_BINARY_POINT - 2;
     last_bit = MAX(p->frac_shift - 4, 0);
     do {
         uint64_t q = 1ULL << bit;
@@ -3507,32 +3730,38 @@
     /* Undo the right shift done above. If there is any remaining
      * fraction, the result is inexact. Set the sticky bit.
      */
-    a.frac = (r_frac << 1) + (a_frac != 0);
+    a.frac = (r_frac << 2) + (a_frac != 0);
 
     return a;
 }
 
 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &float16_params);
-    return float16_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    float16_unpack_canonical(&pa, a, status);
+    pr = sqrt_float(pa, status, &float16_params);
+    return float16_round_pack_canonical(&pr, status);
 }
 
 static float32 QEMU_SOFTFLOAT_ATTR
 soft_f32_sqrt(float32 a, float_status *status)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &float32_params);
-    return float32_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    pr = sqrt_float(pa, status, &float32_params);
+    return float32_round_pack_canonical(&pr, status);
 }
 
 static float64 QEMU_SOFTFLOAT_ATTR
 soft_f64_sqrt(float64 a, float_status *status)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &float64_params);
-    return float64_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    pr = sqrt_float(pa, status, &float64_params);
+    return float64_round_pack_canonical(&pr, status);
 }
 
 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
@@ -3591,9 +3820,11 @@
 
 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
-    return bfloat16_round_pack_canonical(pr, status);
+    FloatParts64 pa, pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    pr = sqrt_float(pa, status, &bfloat16_params);
+    return bfloat16_round_pack_canonical(&pr, status);
 }
 
 /*----------------------------------------------------------------------------
@@ -3602,47 +3833,47 @@
 
 float16 float16_default_nan(float_status *status)
 {
-    FloatParts p = parts_default_nan(status);
+    FloatParts64 p;
+
+    parts_default_nan(&p, status);
     p.frac >>= float16_params.frac_shift;
-    return float16_pack_raw(p);
+    return float16_pack_raw(&p);
 }
 
 float32 float32_default_nan(float_status *status)
 {
-    FloatParts p = parts_default_nan(status);
+    FloatParts64 p;
+
+    parts_default_nan(&p, status);
     p.frac >>= float32_params.frac_shift;
-    return float32_pack_raw(p);
+    return float32_pack_raw(&p);
 }
 
 float64 float64_default_nan(float_status *status)
 {
-    FloatParts p = parts_default_nan(status);
+    FloatParts64 p;
+
+    parts_default_nan(&p, status);
     p.frac >>= float64_params.frac_shift;
-    return float64_pack_raw(p);
+    return float64_pack_raw(&p);
 }
 
 float128 float128_default_nan(float_status *status)
 {
-    FloatParts p = parts_default_nan(status);
-    float128 r;
+    FloatParts128 p;
 
-    /* Extrapolate from the choices made by parts_default_nan to fill
-     * in the quad-floating format.  If the low bit is set, assume we
-     * want to set all non-snan bits.
-     */
-    r.low = -(p.frac & 1);
-    r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
-    r.high |= UINT64_C(0x7FFF000000000000);
-    r.high |= (uint64_t)p.sign << 63;
-
-    return r;
+    parts_default_nan(&p, status);
+    frac_shr(&p, float128_params.frac_shift);
+    return float128_pack_raw(&p);
 }
 
 bfloat16 bfloat16_default_nan(float_status *status)
 {
-    FloatParts p = parts_default_nan(status);
+    FloatParts64 p;
+
+    parts_default_nan(&p, status);
     p.frac >>= bfloat16_params.frac_shift;
-    return bfloat16_pack_raw(p);
+    return bfloat16_pack_raw(&p);
 }
 
 /*----------------------------------------------------------------------------
@@ -3651,38 +3882,57 @@
 
 float16 float16_silence_nan(float16 a, float_status *status)
 {
-    FloatParts p = float16_unpack_raw(a);
+    FloatParts64 p;
+
+    float16_unpack_raw(&p, a);
     p.frac <<= float16_params.frac_shift;
-    p = parts_silence_nan(p, status);
+    parts_silence_nan(&p, status);
     p.frac >>= float16_params.frac_shift;
-    return float16_pack_raw(p);
+    return float16_pack_raw(&p);
 }
 
 float32 float32_silence_nan(float32 a, float_status *status)
 {
-    FloatParts p = float32_unpack_raw(a);
+    FloatParts64 p;
+
+    float32_unpack_raw(&p, a);
     p.frac <<= float32_params.frac_shift;
-    p = parts_silence_nan(p, status);
+    parts_silence_nan(&p, status);
     p.frac >>= float32_params.frac_shift;
-    return float32_pack_raw(p);
+    return float32_pack_raw(&p);
 }
 
 float64 float64_silence_nan(float64 a, float_status *status)
 {
-    FloatParts p = float64_unpack_raw(a);
+    FloatParts64 p;
+
+    float64_unpack_raw(&p, a);
     p.frac <<= float64_params.frac_shift;
-    p = parts_silence_nan(p, status);
+    parts_silence_nan(&p, status);
     p.frac >>= float64_params.frac_shift;
-    return float64_pack_raw(p);
+    return float64_pack_raw(&p);
 }
 
 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
 {
-    FloatParts p = bfloat16_unpack_raw(a);
+    FloatParts64 p;
+
+    bfloat16_unpack_raw(&p, a);
     p.frac <<= bfloat16_params.frac_shift;
-    p = parts_silence_nan(p, status);
+    parts_silence_nan(&p, status);
     p.frac >>= bfloat16_params.frac_shift;
-    return bfloat16_pack_raw(p);
+    return bfloat16_pack_raw(&p);
+}
+
+float128 float128_silence_nan(float128 a, float_status *status)
+{
+    FloatParts128 p;
+
+    float128_unpack_raw(&p, a);
+    frac_shl(&p, float128_params.frac_shift);
+    parts_silence_nan(&p, status);
+    frac_shr(&p, float128_params.frac_shift);
+    return float128_pack_raw(&p);
 }
 
 /*----------------------------------------------------------------------------
@@ -3690,7 +3940,7 @@
 | input-denormal exception and return zero. Otherwise just return the value.
 *----------------------------------------------------------------------------*/
 
-static bool parts_squash_denormal(FloatParts p, float_status *status)
+static bool parts_squash_denormal(FloatParts64 p, float_status *status)
 {
     if (p.exp == 0 && p.frac != 0) {
         float_raise(float_flag_input_denormal, status);
@@ -3703,7 +3953,9 @@
 float16 float16_squash_input_denormal(float16 a, float_status *status)
 {
     if (status->flush_inputs_to_zero) {
-        FloatParts p = float16_unpack_raw(a);
+        FloatParts64 p;
+
+        float16_unpack_raw(&p, a);
         if (parts_squash_denormal(p, status)) {
             return float16_set_sign(float16_zero, p.sign);
         }
@@ -3714,7 +3966,9 @@
 float32 float32_squash_input_denormal(float32 a, float_status *status)
 {
     if (status->flush_inputs_to_zero) {
-        FloatParts p = float32_unpack_raw(a);
+        FloatParts64 p;
+
+        float32_unpack_raw(&p, a);
         if (parts_squash_denormal(p, status)) {
             return float32_set_sign(float32_zero, p.sign);
         }
@@ -3725,7 +3979,9 @@
 float64 float64_squash_input_denormal(float64 a, float_status *status)
 {
     if (status->flush_inputs_to_zero) {
-        FloatParts p = float64_unpack_raw(a);
+        FloatParts64 p;
+
+        float64_unpack_raw(&p, a);
         if (parts_squash_denormal(p, status)) {
             return float64_set_sign(float64_zero, p.sign);
         }
@@ -3736,7 +3992,9 @@
 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
 {
     if (status->flush_inputs_to_zero) {
-        FloatParts p = bfloat16_unpack_raw(a);
+        FloatParts64 p;
+
+        bfloat16_unpack_raw(&p, a);
         if (parts_squash_denormal(p, status)) {
             return bfloat16_set_sign(bfloat16_zero, p.sign);
         }
@@ -3797,7 +4055,7 @@
         return zSign ? INT32_MIN : INT32_MAX;
     }
     if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     return z;
 
@@ -3859,7 +4117,7 @@
         return zSign ? INT64_MIN : INT64_MAX;
     }
     if (absZ1) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     return z;
 
@@ -3920,7 +4178,7 @@
     }
 
     if (absZ1) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     return absZ0;
 }
@@ -4031,7 +4289,7 @@
         }
     }
     if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     zSig = ( zSig + roundIncrement )>>7;
     if (!(roundBits ^ 0x40) && roundNearestEven) {
@@ -4187,7 +4445,7 @@
         }
     }
     if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     zSig = ( zSig + roundIncrement )>>10;
     if (!(roundBits ^ 0x200) && roundNearestEven) {
@@ -4321,7 +4579,7 @@
                 float_raise(float_flag_underflow, status);
             }
             if (roundBits) {
-                status->float_exception_flags |= float_flag_inexact;
+                float_raise(float_flag_inexact, status);
             }
             zSig0 += roundIncrement;
             if ( (int64_t) zSig0 < 0 ) zExp = 1;
@@ -4334,7 +4592,7 @@
         }
     }
     if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     zSig0 += roundIncrement;
     if ( zSig0 < roundIncrement ) {
@@ -4397,7 +4655,7 @@
                 float_raise(float_flag_underflow, status);
             }
             if (zSig1) {
-                status->float_exception_flags |= float_flag_inexact;
+                float_raise(float_flag_inexact, status);
             }
             switch (roundingMode) {
             case float_round_nearest_even:
@@ -4427,7 +4685,7 @@
         }
     }
     if (zSig1) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     if ( increment ) {
         ++zSig0;
@@ -4704,7 +4962,7 @@
         }
     }
     if (zSig2) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     if ( increment ) {
         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
@@ -4906,38 +5164,6 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point value
-| `a' to the double-precision floating-point format.  The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float32_to_float128(float32 a, float_status *status)
-{
-    bool aSign;
-    int aExp;
-    uint32_t aSig;
-
-    a = float32_squash_input_denormal(a, status);
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
-    if ( aExp == 0xFF ) {
-        if (aSig) {
-            return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
-        }
-        return packFloat128( aSign, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
-        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
-        --aExp;
-    }
-    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
-
-}
-
-/*----------------------------------------------------------------------------
 | Returns the remainder of the single-precision floating-point value `a'
 | with respect to the corresponding value `b'.  The operation is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
@@ -5211,40 +5437,6 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point value
-| `a' to the quadruple-precision floating-point format.  The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float64_to_float128(float64 a, float_status *status)
-{
-    bool aSign;
-    int aExp;
-    uint64_t aSig, zSig0, zSig1;
-
-    a = float64_squash_input_denormal(a, status);
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
-    if ( aExp == 0x7FF ) {
-        if (aSig) {
-            return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
-        }
-        return packFloat128( aSign, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
-        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
-        --aExp;
-    }
-    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
-    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
-
-}
-
-
-/*----------------------------------------------------------------------------
 | Returns the remainder of the double-precision floating-point value `a'
 | with respect to the corresponding value `b'.  The operation is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
@@ -5442,7 +5634,7 @@
     }
     else if ( aExp < 0x3FFF ) {
         if (aExp || aSig) {
-            status->float_exception_flags |= float_flag_inexact;
+            float_raise(float_flag_inexact, status);
         }
         return 0;
     }
@@ -5457,7 +5649,7 @@
         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     return z;
 
@@ -5541,13 +5733,13 @@
     }
     else if ( aExp < 0x3FFF ) {
         if (aExp | aSig) {
-            status->float_exception_flags |= float_flag_inexact;
+            float_raise(float_flag_inexact, status);
         }
         return 0;
     }
     z = aSig>>( - shiftCount );
     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     if ( aSign ) z = - z;
     return z;
@@ -5698,7 +5890,7 @@
              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
             return a;
         }
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
         aSign = extractFloatx80Sign( a );
         switch (status->float_rounding_mode) {
          case float_round_nearest_even:
@@ -5765,7 +5957,7 @@
         z.low = UINT64_C(0x8000000000000000);
     }
     if (z.low != a.low) {
-        status->float_exception_flags |= float_flag_inexact;
+        float_raise(float_flag_inexact, status);
     }
     return z;
 
@@ -6345,191 +6537,6 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode.  If `a' is a NaN, the largest
-| positive integer is returned.  Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int32_t float128_to_int32(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
-    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
-    aSig0 |= ( aSig1 != 0 );
-    shiftCount = 0x4028 - aExp;
-    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
-    return roundAndPackInt32(aSign, aSig0, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero.  If
-| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
-| conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
-
-int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1, savedASig;
-    int32_t z;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    aSig0 |= ( aSig1 != 0 );
-    if ( 0x401E < aExp ) {
-        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
-        goto invalid;
-    }
-    else if ( aExp < 0x3FFF ) {
-        if (aExp || aSig0) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-        return 0;
-    }
-    aSig0 |= UINT64_C(0x0001000000000000);
-    shiftCount = 0x402F - aExp;
-    savedASig = aSig0;
-    aSig0 >>= shiftCount;
-    z = aSig0;
-    if ( aSign ) z = - z;
-    if ( ( z < 0 ) ^ aSign ) {
- invalid:
-        float_raise(float_flag_invalid, status);
-        return aSign ? INT32_MIN : INT32_MAX;
-    }
-    if ( ( aSig0<<shiftCount ) != savedASig ) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode.  If `a' is a NaN, the largest
-| positive integer is returned.  Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int64_t float128_to_int64(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
-    shiftCount = 0x402F - aExp;
-    if ( shiftCount <= 0 ) {
-        if ( 0x403E < aExp ) {
-            float_raise(float_flag_invalid, status);
-            if (    ! aSign
-                 || (    ( aExp == 0x7FFF )
-                      && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
-                    )
-               ) {
-                return INT64_MAX;
-            }
-            return INT64_MIN;
-        }
-        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
-    }
-    else {
-        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
-    }
-    return roundAndPackInt64(aSign, aSig0, aSig1, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero.
-| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
-| the conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
-
-int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1;
-    int64_t z;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
-    shiftCount = aExp - 0x402F;
-    if ( 0 < shiftCount ) {
-        if ( 0x403E <= aExp ) {
-            aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
-            if (    ( a.high == UINT64_C(0xC03E000000000000) )
-                 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
-                if (aSig1) {
-                    status->float_exception_flags |= float_flag_inexact;
-                }
-            }
-            else {
-                float_raise(float_flag_invalid, status);
-                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
-                    return INT64_MAX;
-                }
-            }
-            return INT64_MIN;
-        }
-        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
-        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-    }
-    else {
-        if ( aExp < 0x3FFF ) {
-            if ( aExp | aSig0 | aSig1 ) {
-                status->float_exception_flags |= float_flag_inexact;
-            }
-            return 0;
-        }
-        z = aSig0>>( - shiftCount );
-        if (    aSig1
-             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-    }
-    if ( aSign ) z = - z;
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point value
 | `a' to the 64-bit unsigned integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
@@ -6647,74 +6654,6 @@
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point
-| value `a' to the single-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float128_to_float32(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig0, aSig1;
-    uint32_t zSig;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
-        }
-        return packFloat32( aSign, 0xFF, 0 );
-    }
-    aSig0 |= ( aSig1 != 0 );
-    shift64RightJamming( aSig0, 18, &aSig0 );
-    zSig = aSig0;
-    if ( aExp || zSig ) {
-        zSig |= 0x40000000;
-        aExp -= 0x3F81;
-    }
-    return roundAndPackFloat32(aSign, aExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the double-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float128_to_float64(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
-        }
-        return packFloat64( aSign, 0x7FF, 0 );
-    }
-    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
-    aSig0 |= ( aSig1 != 0 );
-    if ( aExp || aSig0 ) {
-        aSig0 |= UINT64_C(0x4000000000000000);
-        aExp -= 0x3C01;
-    }
-    return roundAndPackFloat64(aSign, aExp, aSig0, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
 | value `a' to the extended double-precision floating-point format.  The
 | conversion is performed according to the IEC/IEEE Standard for Binary
 | Floating-Point Arithmetic.
@@ -6752,536 +6691,6 @@
 }
 
 /*----------------------------------------------------------------------------
-| Rounds the quadruple-precision floating-point value `a' to an integer, and
-| returns the result as a quadruple-precision floating-point value.  The
-| operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_round_to_int(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp;
-    uint64_t lastBitMask, roundBitsMask;
-    float128 z;
-
-    aExp = extractFloat128Exp( a );
-    if ( 0x402F <= aExp ) {
-        if ( 0x406F <= aExp ) {
-            if (    ( aExp == 0x7FFF )
-                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
-               ) {
-                return propagateFloat128NaN(a, a, status);
-            }
-            return a;
-        }
-        lastBitMask = 1;
-        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
-        roundBitsMask = lastBitMask - 1;
-        z = a;
-        switch (status->float_rounding_mode) {
-        case float_round_nearest_even:
-            if ( lastBitMask ) {
-                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
-                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
-            }
-            else {
-                if ( (int64_t) z.low < 0 ) {
-                    ++z.high;
-                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
-                }
-            }
-            break;
-        case float_round_ties_away:
-            if (lastBitMask) {
-                add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
-            } else {
-                if ((int64_t) z.low < 0) {
-                    ++z.high;
-                }
-            }
-            break;
-        case float_round_to_zero:
-            break;
-        case float_round_up:
-            if (!extractFloat128Sign(z)) {
-                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
-            }
-            break;
-        case float_round_down:
-            if (extractFloat128Sign(z)) {
-                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
-            }
-            break;
-        case float_round_to_odd:
-            /*
-             * Note that if lastBitMask == 0, the last bit is the lsb
-             * of high, and roundBitsMask == -1.
-             */
-            if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
-                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
-            }
-            break;
-        default:
-            abort();
-        }
-        z.low &= ~ roundBitsMask;
-    }
-    else {
-        if ( aExp < 0x3FFF ) {
-            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
-            status->float_exception_flags |= float_flag_inexact;
-            aSign = extractFloat128Sign( a );
-            switch (status->float_rounding_mode) {
-            case float_round_nearest_even:
-                if (    ( aExp == 0x3FFE )
-                     && (   extractFloat128Frac0( a )
-                          | extractFloat128Frac1( a ) )
-                   ) {
-                    return packFloat128( aSign, 0x3FFF, 0, 0 );
-                }
-                break;
-            case float_round_ties_away:
-                if (aExp == 0x3FFE) {
-                    return packFloat128(aSign, 0x3FFF, 0, 0);
-                }
-                break;
-            case float_round_down:
-                return
-                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
-                    : packFloat128( 0, 0, 0, 0 );
-            case float_round_up:
-                return
-                      aSign ? packFloat128( 1, 0, 0, 0 )
-                    : packFloat128( 0, 0x3FFF, 0, 0 );
-
-            case float_round_to_odd:
-                return packFloat128(aSign, 0x3FFF, 0, 0);
-
-            case float_round_to_zero:
-                break;
-            }
-            return packFloat128( aSign, 0, 0, 0 );
-        }
-        lastBitMask = 1;
-        lastBitMask <<= 0x402F - aExp;
-        roundBitsMask = lastBitMask - 1;
-        z.low = 0;
-        z.high = a.high;
-        switch (status->float_rounding_mode) {
-        case float_round_nearest_even:
-            z.high += lastBitMask>>1;
-            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
-                z.high &= ~ lastBitMask;
-            }
-            break;
-        case float_round_ties_away:
-            z.high += lastBitMask>>1;
-            break;
-        case float_round_to_zero:
-            break;
-        case float_round_up:
-            if (!extractFloat128Sign(z)) {
-                z.high |= ( a.low != 0 );
-                z.high += roundBitsMask;
-            }
-            break;
-        case float_round_down:
-            if (extractFloat128Sign(z)) {
-                z.high |= (a.low != 0);
-                z.high += roundBitsMask;
-            }
-            break;
-        case float_round_to_odd:
-            if ((z.high & lastBitMask) == 0) {
-                z.high |= (a.low != 0);
-                z.high += roundBitsMask;
-            }
-            break;
-        default:
-            abort();
-        }
-        z.high &= ~ roundBitsMask;
-    }
-    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the quadruple-precision
-| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
-| before being returned.  `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    int32_t expDiff;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    expDiff = aExp - bExp;
-    if ( 0 < expDiff ) {
-        if ( aExp == 0x7FFF ) {
-            if (aSig0 | aSig1) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( bExp == 0 ) {
-            --expDiff;
-        }
-        else {
-            bSig0 |= UINT64_C(0x0001000000000000);
-        }
-        shift128ExtraRightJamming(
-            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
-        zExp = aExp;
-    }
-    else if ( expDiff < 0 ) {
-        if ( bExp == 0x7FFF ) {
-            if (bSig0 | bSig1) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            return packFloat128( zSign, 0x7FFF, 0, 0 );
-        }
-        if ( aExp == 0 ) {
-            ++expDiff;
-        }
-        else {
-            aSig0 |= UINT64_C(0x0001000000000000);
-        }
-        shift128ExtraRightJamming(
-            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
-        zExp = bExp;
-    }
-    else {
-        if ( aExp == 0x7FFF ) {
-            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            return a;
-        }
-        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-        if ( aExp == 0 ) {
-            if (status->flush_to_zero) {
-                if (zSig0 | zSig1) {
-                    float_raise(float_flag_output_denormal, status);
-                }
-                return packFloat128(zSign, 0, 0, 0);
-            }
-            return packFloat128( zSign, 0, zSig0, zSig1 );
-        }
-        zSig2 = 0;
-        zSig0 |= UINT64_C(0x0002000000000000);
-        zExp = aExp;
-        goto shiftRight1;
-    }
-    aSig0 |= UINT64_C(0x0001000000000000);
-    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-    --zExp;
-    if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
-    ++zExp;
- shiftRight1:
-    shift128ExtraRightJamming(
-        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- roundAndPack:
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the quadruple-
-| precision floating-point values `a' and `b'.  If `zSign' is 1, the
-| difference is negated before being returned.  `zSign' is ignored if the
-| result is a NaN.  The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
-    int32_t expDiff;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    expDiff = aExp - bExp;
-    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
-    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
-    if ( 0 < expDiff ) goto aExpBigger;
-    if ( expDiff < 0 ) goto bExpBigger;
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return float128_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        aExp = 1;
-        bExp = 1;
-    }
-    if ( bSig0 < aSig0 ) goto aBigger;
-    if ( aSig0 < bSig0 ) goto bBigger;
-    if ( bSig1 < aSig1 ) goto aBigger;
-    if ( aSig1 < bSig1 ) goto bBigger;
-    return packFloat128(status->float_rounding_mode == float_round_down,
-                        0, 0, 0);
- bExpBigger:
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        ++expDiff;
-    }
-    else {
-        aSig0 |= UINT64_C(0x4000000000000000);
-    }
-    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
-    bSig0 |= UINT64_C(0x4000000000000000);
- bBigger:
-    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
-    zExp = bExp;
-    zSign ^= 1;
-    goto normalizeRoundAndPack;
- aExpBigger:
-    if ( aExp == 0x7FFF ) {
-        if (aSig0 | aSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        --expDiff;
-    }
-    else {
-        bSig0 |= UINT64_C(0x4000000000000000);
-    }
-    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
-    aSig0 |= UINT64_C(0x4000000000000000);
- aBigger:
-    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-    zExp = aExp;
- normalizeRoundAndPack:
-    --zExp;
-    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
-                                         status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the quadruple-precision floating-point values
-| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_add(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign;
-
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign == bSign ) {
-        return addFloat128Sigs(a, b, aSign, status);
-    }
-    else {
-        return subFloat128Sigs(a, b, aSign, status);
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the quadruple-precision floating-point
-| values `a' and `b'.  The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_sub(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign;
-
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign == bSign ) {
-        return subFloat128Sigs(a, b, aSign, status);
-    }
-    else {
-        return addFloat128Sigs(a, b, aSign, status);
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of multiplying the quadruple-precision floating-point
-| values `a' and `b'.  The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_mul(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign, zSign;
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    bSign = extractFloat128Sign( b );
-    zSign = aSign ^ bSign;
-    if ( aExp == 0x7FFF ) {
-        if (    ( aSig0 | aSig1 )
-             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
-        return packFloat128( zSign, 0x7FFF, 0, 0 );
-    }
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
-            float_raise(float_flag_invalid, status);
-            return float128_default_nan(status);
-        }
-        return packFloat128( zSign, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    if ( bExp == 0 ) {
-        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
-        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
-    }
-    zExp = aExp + bExp - 0x4000;
-    aSig0 |= UINT64_C(0x0001000000000000);
-    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
-    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
-    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
-    zSig2 |= ( zSig3 != 0 );
-    if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
-        shift128ExtraRightJamming(
-            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
-        ++zExp;
-    }
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of dividing the quadruple-precision floating-point value
-| `a' by the corresponding value `b'.  The operation is performed according to
-| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_div(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign, zSign;
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    bSign = extractFloat128Sign( b );
-    zSign = aSign ^ bSign;
-    if ( aExp == 0x7FFF ) {
-        if (aSig0 | aSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        if ( bExp == 0x7FFF ) {
-            if (bSig0 | bSig1) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            goto invalid;
-        }
-        return packFloat128( zSign, 0x7FFF, 0, 0 );
-    }
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return packFloat128( zSign, 0, 0, 0 );
-    }
-    if ( bExp == 0 ) {
-        if ( ( bSig0 | bSig1 ) == 0 ) {
-            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
-                float_raise(float_flag_invalid, status);
-                return float128_default_nan(status);
-            }
-            float_raise(float_flag_divbyzero, status);
-            return packFloat128( zSign, 0x7FFF, 0, 0 );
-        }
-        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    zExp = aExp - bExp + 0x3FFD;
-    shortShift128Left(
-        aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
-    shortShift128Left(
-        bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
-    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
-        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
-        ++zExp;
-    }
-    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
-    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
-    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
-    while ( (int64_t) rem0 < 0 ) {
-        --zSig0;
-        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
-    }
-    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
-    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
-        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
-        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (int64_t) rem1 < 0 ) {
-            --zSig1;
-            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
-        }
-        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
-    }
-    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-/*----------------------------------------------------------------------------
 | Returns the remainder of the quadruple-precision floating-point value `a'
 | with respect to the corresponding value `b'.  The operation is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
diff --git a/hw/Kconfig b/hw/Kconfig
index aa10357..805860f 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -21,6 +21,7 @@
 source misc/Kconfig
 source net/Kconfig
 source nubus/Kconfig
+source nvme/Kconfig
 source nvram/Kconfig
 source pci-bridge/Kconfig
 source pci-host/Kconfig
diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index 4fcd152..295441e 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -25,11 +25,6 @@
 config TC58128
     bool
 
-config NVME_PCI
-    bool
-    default y if PCI_DEVICES
-    depends on PCI
-
 config VIRTIO_BLK
     bool
     default y
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 5b4a769..8b0de54 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,6 @@
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c', 'nvme-dif.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h
deleted file mode 100644
index 524faff..0000000
--- a/hw/block/nvme-dif.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * QEMU NVM Express End-to-End Data Protection support
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *   Klaus Jensen           <k.jensen@samsung.com>
- *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
- */
-
-#ifndef HW_NVME_DIF_H
-#define HW_NVME_DIF_H
-
-/* from Linux kernel (crypto/crct10dif_common.c) */
-static const uint16_t t10_dif_crc_table[256] = {
-    0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-    0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-    0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-    0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-    0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-    0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-    0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-    0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-    0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-    0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-    0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-    0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-    0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-    0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-    0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-    0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-    0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-    0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-    0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-    0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-    0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-    0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-    0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-    0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-    0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-    0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-    0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-    0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-    0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-    0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-    0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-    0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
-                           uint32_t reftag);
-uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
-                               uint64_t slba);
-void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
-                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
-                                 uint32_t reftag);
-uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
-                        uint8_t *mbuf, size_t mlen, uint16_t ctrl,
-                        uint64_t slba, uint16_t apptag,
-                        uint16_t appmask, uint32_t reftag);
-uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
-
-#endif /* HW_NVME_DIF_H */
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
deleted file mode 100644
index fb0a41f..0000000
--- a/hw/block/nvme-ns.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * QEMU NVM Express Virtual Namespace
- *
- * Copyright (c) 2019 CNEX Labs
- * Copyright (c) 2020 Samsung Electronics
- *
- * Authors:
- *  Klaus Jensen      <k.jensen@samsung.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See the
- * COPYING file in the top-level directory.
- *
- */
-
-#ifndef NVME_NS_H
-#define NVME_NS_H
-
-#include "qemu/uuid.h"
-
-#define TYPE_NVME_NS "nvme-ns"
-#define NVME_NS(obj) \
-    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
-
-typedef struct NvmeZone {
-    NvmeZoneDescr   d;
-    uint64_t        w_ptr;
-    QTAILQ_ENTRY(NvmeZone) entry;
-} NvmeZone;
-
-typedef struct NvmeNamespaceParams {
-    bool     detached;
-    bool     shared;
-    uint32_t nsid;
-    QemuUUID uuid;
-
-    uint16_t ms;
-    uint8_t  mset;
-    uint8_t  pi;
-    uint8_t  pil;
-
-    uint16_t mssrl;
-    uint32_t mcl;
-    uint8_t  msrc;
-
-    bool     zoned;
-    bool     cross_zone_read;
-    uint64_t zone_size_bs;
-    uint64_t zone_cap_bs;
-    uint32_t max_active_zones;
-    uint32_t max_open_zones;
-    uint32_t zd_extension_size;
-} NvmeNamespaceParams;
-
-typedef struct NvmeNamespace {
-    DeviceState  parent_obj;
-    BlockConf    blkconf;
-    int32_t      bootindex;
-    int64_t      size;
-    int64_t      mdata_offset;
-    NvmeIdNs     id_ns;
-    const uint32_t *iocs;
-    uint8_t      csi;
-    uint16_t     status;
-    int          attached;
-
-    QTAILQ_ENTRY(NvmeNamespace) entry;
-
-    NvmeIdNsZoned   *id_ns_zoned;
-    NvmeZone        *zone_array;
-    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
-    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
-    QTAILQ_HEAD(, NvmeZone) closed_zones;
-    QTAILQ_HEAD(, NvmeZone) full_zones;
-    uint32_t        num_zones;
-    uint64_t        zone_size;
-    uint64_t        zone_capacity;
-    uint32_t        zone_size_log2;
-    uint8_t         *zd_extensions;
-    int32_t         nr_open_zones;
-    int32_t         nr_active_zones;
-
-    NvmeNamespaceParams params;
-
-    struct {
-        uint32_t err_rec;
-    } features;
-} NvmeNamespace;
-
-static inline uint16_t nvme_ns_status(NvmeNamespace *ns)
-{
-    return ns->status;
-}
-
-static inline uint32_t nvme_nsid(NvmeNamespace *ns)
-{
-    if (ns) {
-        return ns->params.nsid;
-    }
-
-    return 0;
-}
-
-static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
-{
-    NvmeIdNs *id_ns = &ns->id_ns;
-    return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
-}
-
-static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
-{
-    return nvme_ns_lbaf(ns)->ds;
-}
-
-/* convert an LBA to the equivalent in bytes */
-static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
-{
-    return lba << nvme_ns_lbads(ns);
-}
-
-static inline size_t nvme_lsize(NvmeNamespace *ns)
-{
-    return 1 << nvme_ns_lbads(ns);
-}
-
-static inline uint16_t nvme_msize(NvmeNamespace *ns)
-{
-    return nvme_ns_lbaf(ns)->ms;
-}
-
-static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
-{
-    return nvme_msize(ns) * lba;
-}
-
-static inline bool nvme_ns_ext(NvmeNamespace *ns)
-{
-    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
-}
-
-/* calculate the number of LBAs that the namespace can accomodate */
-static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns)
-{
-    if (nvme_msize(ns)) {
-        return ns->size / (nvme_lsize(ns) + nvme_msize(ns));
-    }
-    return ns->size >> nvme_ns_lbads(ns);
-}
-
-typedef struct NvmeCtrl NvmeCtrl;
-
-static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
-{
-    return zone->d.zs >> 4;
-}
-
-static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
-{
-    zone->d.zs = state << 4;
-}
-
-static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
-{
-    return zone->d.zslba + ns->zone_size;
-}
-
-static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
-{
-    return zone->d.zslba + zone->d.zcap;
-}
-
-static inline bool nvme_wp_is_valid(NvmeZone *zone)
-{
-    uint8_t st = nvme_get_zone_state(zone);
-
-    return st != NVME_ZONE_STATE_FULL &&
-           st != NVME_ZONE_STATE_READ_ONLY &&
-           st != NVME_ZONE_STATE_OFFLINE;
-}
-
-static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
-                                             uint32_t zone_idx)
-{
-    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
-}
-
-static inline void nvme_aor_inc_open(NvmeNamespace *ns)
-{
-    assert(ns->nr_open_zones >= 0);
-    if (ns->params.max_open_zones) {
-        ns->nr_open_zones++;
-        assert(ns->nr_open_zones <= ns->params.max_open_zones);
-    }
-}
-
-static inline void nvme_aor_dec_open(NvmeNamespace *ns)
-{
-    if (ns->params.max_open_zones) {
-        assert(ns->nr_open_zones > 0);
-        ns->nr_open_zones--;
-    }
-    assert(ns->nr_open_zones >= 0);
-}
-
-static inline void nvme_aor_inc_active(NvmeNamespace *ns)
-{
-    assert(ns->nr_active_zones >= 0);
-    if (ns->params.max_active_zones) {
-        ns->nr_active_zones++;
-        assert(ns->nr_active_zones <= ns->params.max_active_zones);
-    }
-}
-
-static inline void nvme_aor_dec_active(NvmeNamespace *ns)
-{
-    if (ns->params.max_active_zones) {
-        assert(ns->nr_active_zones > 0);
-        ns->nr_active_zones--;
-        assert(ns->nr_active_zones >= ns->nr_open_zones);
-    }
-    assert(ns->nr_active_zones >= 0);
-}
-
-void nvme_ns_init_format(NvmeNamespace *ns);
-int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
-void nvme_ns_drain(NvmeNamespace *ns);
-void nvme_ns_shutdown(NvmeNamespace *ns);
-void nvme_ns_cleanup(NvmeNamespace *ns);
-
-#endif /* NVME_NS_H */
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
deleted file mode 100644
index 7d7ef5f..0000000
--- a/hw/block/nvme-subsys.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * QEMU NVM Express Subsystem: nvme-subsys
- *
- * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
- *
- * This code is licensed under the GNU GPL v2.  Refer COPYING.
- */
-
-#ifndef NVME_SUBSYS_H
-#define NVME_SUBSYS_H
-
-#define TYPE_NVME_SUBSYS "nvme-subsys"
-#define NVME_SUBSYS(obj) \
-    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
-
-#define NVME_SUBSYS_MAX_CTRLS   32
-#define NVME_MAX_NAMESPACES     256
-
-typedef struct NvmeCtrl NvmeCtrl;
-typedef struct NvmeNamespace NvmeNamespace;
-typedef struct NvmeSubsystem {
-    DeviceState parent_obj;
-    uint8_t     subnqn[256];
-
-    NvmeCtrl    *ctrls[NVME_SUBSYS_MAX_CTRLS];
-    /* Allocated namespaces for this subsystem */
-    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
-
-    struct {
-        char *nqn;
-    } params;
-} NvmeSubsystem;
-
-int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
-
-static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
-        uint32_t cntlid)
-{
-    if (!subsys || cntlid >= NVME_SUBSYS_MAX_CTRLS) {
-        return NULL;
-    }
-
-    return subsys->ctrls[cntlid];
-}
-
-/*
- * Return allocated namespace of the specified nsid in the subsystem.
- */
-static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
-        uint32_t nsid)
-{
-    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
-        return NULL;
-    }
-
-    return subsys->namespaces[nsid];
-}
-
-#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
deleted file mode 100644
index 5d05ec3..0000000
--- a/hw/block/nvme.h
+++ /dev/null
@@ -1,266 +0,0 @@
-#ifndef HW_NVME_H
-#define HW_NVME_H
-
-#include "block/nvme.h"
-#include "hw/pci/pci.h"
-#include "nvme-subsys.h"
-#include "nvme-ns.h"
-
-#define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
-#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
-
-typedef struct NvmeParams {
-    char     *serial;
-    uint32_t num_queues; /* deprecated since 5.1 */
-    uint32_t max_ioqpairs;
-    uint16_t msix_qsize;
-    uint32_t cmb_size_mb;
-    uint8_t  aerl;
-    uint32_t aer_max_queued;
-    uint8_t  mdts;
-    uint8_t  vsl;
-    bool     use_intel_id;
-    uint8_t  zasl;
-    bool     legacy_cmb;
-} NvmeParams;
-
-typedef struct NvmeAsyncEvent {
-    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
-    NvmeAerResult result;
-} NvmeAsyncEvent;
-
-enum {
-    NVME_SG_ALLOC = 1 << 0,
-    NVME_SG_DMA   = 1 << 1,
-};
-
-typedef struct NvmeSg {
-    int flags;
-
-    union {
-        QEMUSGList   qsg;
-        QEMUIOVector iov;
-    };
-} NvmeSg;
-
-typedef struct NvmeRequest {
-    struct NvmeSQueue       *sq;
-    struct NvmeNamespace    *ns;
-    BlockAIOCB              *aiocb;
-    uint16_t                status;
-    void                    *opaque;
-    NvmeCqe                 cqe;
-    NvmeCmd                 cmd;
-    BlockAcctCookie         acct;
-    NvmeSg                  sg;
-    QTAILQ_ENTRY(NvmeRequest)entry;
-} NvmeRequest;
-
-typedef struct NvmeBounceContext {
-    NvmeRequest *req;
-
-    struct {
-        QEMUIOVector iov;
-        uint8_t *bounce;
-    } data, mdata;
-} NvmeBounceContext;
-
-static inline const char *nvme_adm_opc_str(uint8_t opc)
-{
-    switch (opc) {
-    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
-    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
-    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
-    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
-    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
-    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
-    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
-    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
-    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
-    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
-    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
-    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
-    default:                            return "NVME_ADM_CMD_UNKNOWN";
-    }
-}
-
-static inline const char *nvme_io_opc_str(uint8_t opc)
-{
-    switch (opc) {
-    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
-    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
-    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
-    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
-    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
-    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
-    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
-    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
-    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
-    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
-    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
-    default:                        return "NVME_NVM_CMD_UNKNOWN";
-    }
-}
-
-typedef struct NvmeSQueue {
-    struct NvmeCtrl *ctrl;
-    uint16_t    sqid;
-    uint16_t    cqid;
-    uint32_t    head;
-    uint32_t    tail;
-    uint32_t    size;
-    uint64_t    dma_addr;
-    QEMUTimer   *timer;
-    NvmeRequest *io_req;
-    QTAILQ_HEAD(, NvmeRequest) req_list;
-    QTAILQ_HEAD(, NvmeRequest) out_req_list;
-    QTAILQ_ENTRY(NvmeSQueue) entry;
-} NvmeSQueue;
-
-typedef struct NvmeCQueue {
-    struct NvmeCtrl *ctrl;
-    uint8_t     phase;
-    uint16_t    cqid;
-    uint16_t    irq_enabled;
-    uint32_t    head;
-    uint32_t    tail;
-    uint32_t    vector;
-    uint32_t    size;
-    uint64_t    dma_addr;
-    QEMUTimer   *timer;
-    QTAILQ_HEAD(, NvmeSQueue) sq_list;
-    QTAILQ_HEAD(, NvmeRequest) req_list;
-} NvmeCQueue;
-
-#define TYPE_NVME_BUS "nvme-bus"
-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
-
-typedef struct NvmeBus {
-    BusState parent_bus;
-} NvmeBus;
-
-#define TYPE_NVME "nvme"
-#define NVME(obj) \
-        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
-
-typedef struct NvmeFeatureVal {
-    struct {
-        uint16_t temp_thresh_hi;
-        uint16_t temp_thresh_low;
-    };
-    uint32_t    async_config;
-} NvmeFeatureVal;
-
-typedef struct NvmeCtrl {
-    PCIDevice    parent_obj;
-    MemoryRegion bar0;
-    MemoryRegion iomem;
-    NvmeBar      bar;
-    NvmeParams   params;
-    NvmeBus      bus;
-
-    uint16_t    cntlid;
-    bool        qs_created;
-    uint32_t    page_size;
-    uint16_t    page_bits;
-    uint16_t    max_prp_ents;
-    uint16_t    cqe_size;
-    uint16_t    sqe_size;
-    uint32_t    reg_size;
-    uint32_t    num_namespaces;
-    uint32_t    max_q_ents;
-    uint8_t     outstanding_aers;
-    uint32_t    irq_status;
-    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
-    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
-    uint64_t    starttime_ms;
-    uint16_t    temperature;
-    uint8_t     smart_critical_warning;
-
-    struct {
-        MemoryRegion mem;
-        uint8_t      *buf;
-        bool         cmse;
-        hwaddr       cba;
-    } cmb;
-
-    struct {
-        HostMemoryBackend *dev;
-        bool              cmse;
-        hwaddr            cba;
-    } pmr;
-
-    uint8_t     aer_mask;
-    NvmeRequest **aer_reqs;
-    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
-    int         aer_queued;
-
-    uint32_t    dmrsl;
-
-    /* Namespace ID is started with 1 so bitmap should be 1-based */
-#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
-    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
-
-    NvmeSubsystem   *subsys;
-
-    NvmeNamespace   namespace;
-    /*
-     * Attached namespaces to this controller.  If subsys is not given, all
-     * namespaces in this list will always be attached.
-     */
-    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
-    NvmeSQueue      **sq;
-    NvmeCQueue      **cq;
-    NvmeSQueue      admin_sq;
-    NvmeCQueue      admin_cq;
-    NvmeIdCtrl      id_ctrl;
-    NvmeFeatureVal  features;
-} NvmeCtrl;
-
-static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
-{
-    if (!nsid || nsid > n->num_namespaces) {
-        return NULL;
-    }
-
-    return n->namespaces[nsid - 1];
-}
-
-static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
-{
-    NvmeSQueue *sq = req->sq;
-    NvmeCtrl *n = sq->ctrl;
-
-    return n->cq[sq->cqid];
-}
-
-static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
-{
-    NvmeSQueue *sq = req->sq;
-    return sq->ctrl;
-}
-
-static inline uint16_t nvme_cid(NvmeRequest *req)
-{
-    if (!req) {
-        return 0xffff;
-    }
-
-    return le16_to_cpu(req->cqe.cid);
-}
-
-typedef enum NvmeTxDirection {
-    NVME_TX_DIRECTION_TO_DEVICE   = 0,
-    NVME_TX_DIRECTION_FROM_DEVICE = 1,
-} NvmeTxDirection;
-
-void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
-uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                          NvmeTxDirection dir, NvmeRequest *req);
-uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                           NvmeTxDirection dir, NvmeRequest *req);
-void nvme_rw_complete_cb(void *opaque, int ret);
-uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
-                       NvmeCmd *cmd);
-
-#endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index fa12e3a..646917d 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -49,212 +49,6 @@
 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
 hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
 
-# nvme.c
-# nvme traces for successful events
-pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
-pci_nvme_irq_pin(void) "pulsing IRQ pin"
-pci_nvme_irq_masked(void) "IRQ is masked"
-pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
-pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
-pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
-pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
-pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
-pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
-pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
-pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
-pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
-pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
-pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
-pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
-pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
-pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8""
-pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16""
-pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
-pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16""
-pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
-pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
-pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16""
-pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
-pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
-pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
-pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32""
-pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
-pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
-pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
-pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
-pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
-pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8""
-pci_nvme_identify_ctrl(void) "identify controller"
-pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
-pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16""
-pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
-pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
-pci_nvme_identify_cmd_set(void) "identify i/o command set"
-pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
-pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
-pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32""
-pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
-pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
-pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
-pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
-pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
-pci_nvme_process_aers(int queued) "queued %d"
-pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
-pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
-pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
-pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8""
-pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
-pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
-pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
-pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
-pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
-pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
-pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
-pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
-pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
-pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16""
-pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
-pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
-pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
-pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
-pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
-pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
-pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
-pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
-pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
-pci_nvme_mmio_stopped(void) "cleared controller enable bit"
-pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
-pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
-pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
-pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
-pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state"
-pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
-
-# nvme traces for error conditions
-pci_nvme_err_mdts(size_t len) "len %zu"
-pci_nvme_err_zasl(size_t len) "len %zu"
-pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8""
-pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
-pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
-pci_nvme_err_cfs(void) "controller fatal status"
-pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
-pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8""
-pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
-pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
-pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32""
-pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
-pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64""
-pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
-pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
-pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
-pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
-pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
-pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
-pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
-pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64""
-pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
-pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
-pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
-pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
-pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
-pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
-pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
-pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
-pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
-pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
-pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded"
-pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded"
-pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32""
-pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32""
-pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
-pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
-pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
-pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
-pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
-pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
-pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
-pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
-pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
-pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
-pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
-pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
-pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
-pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
-pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
-pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
-pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
-pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
-pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
-pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
-pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
-pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
-pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
-pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
-pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
-pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
-pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
-pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
-pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
-pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u"
-pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
-pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
-pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32""
-pci_nvme_err_startfail(void) "setting controller enable bit failed"
-pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8""
-
-# Traces for undefined behavior
-pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
-pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
-pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
-pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
-pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
-pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
-pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
-pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
-pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
-pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
-pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
-pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
-pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
-pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
-pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
-pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
-pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
-pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
-pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
-pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
-pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
-pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field"
-
 # xen-block.c
 xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u"
 xen_block_connect(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u"
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index f5e9682..c6210fa 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -47,9 +47,13 @@
     VIRTIO_RING_F_INDIRECT_DESC,
     VIRTIO_RING_F_EVENT_IDX,
     VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_F_RING_PACKED,
+    VIRTIO_F_IOMMU_PLATFORM,
     VHOST_INVALID_FEATURE_BIT
 };
 
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
+
 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -309,7 +313,7 @@
     vhost_dev_free_inflight(s->inflight);
 }
 
-static int vhost_user_blk_connect(DeviceState *dev)
+static int vhost_user_blk_connect(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -320,6 +324,7 @@
     }
     s->connected = true;
 
+    s->dev.num_queues = s->num_queues;
     s->dev.nvqs = s->num_queues;
     s->dev.vqs = s->vhost_vqs;
     s->dev.vq_index = 0;
@@ -329,8 +334,7 @@
 
     ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
     if (ret < 0) {
-        error_report("vhost-user-blk: vhost initialization failed: %s",
-                     strerror(-ret));
+        error_setg_errno(errp, -ret, "vhost initialization failed");
         return ret;
     }
 
@@ -338,8 +342,7 @@
     if (virtio_device_started(vdev, vdev->status)) {
         ret = vhost_user_blk_start(vdev);
         if (ret < 0) {
-            error_report("vhost-user-blk: vhost start failed: %s",
-                         strerror(-ret));
+            error_setg_errno(errp, -ret, "vhost start failed");
             return ret;
         }
     }
@@ -362,19 +365,6 @@
     vhost_dev_cleanup(&s->dev);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized);
-
-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, false);
-}
-
-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, true);
-}
-
 static void vhost_user_blk_chr_closed_bh(void *opaque)
 {
     DeviceState *dev = opaque;
@@ -382,36 +372,27 @@
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
     vhost_user_blk_disconnect(dev);
-    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
-            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
+    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+                             NULL, opaque, NULL, true);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized)
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
 {
     DeviceState *dev = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
+    Error *local_err = NULL;
 
     switch (event) {
     case CHR_EVENT_OPENED:
-        if (vhost_user_blk_connect(dev) < 0) {
+        if (vhost_user_blk_connect(dev, &local_err) < 0) {
+            error_report_err(local_err);
             qemu_chr_fe_disconnect(&s->chardev);
             return;
         }
         break;
     case CHR_EVENT_CLOSED:
-        /*
-         * Closing the connection should happen differently on device
-         * initialization and operation stages.
-         * On initalization, we want to re-start vhost_dev initialization
-         * from the very beginning right away when the connection is closed,
-         * so we clean up vhost_dev on each connection closing.
-         * On operation, we want to postpone vhost_dev cleanup to let the
-         * other code perform its own cleanup sequence using vhost_dev data
-         * (e.g. vhost_dev_set_log).
-         */
-        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
+        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
             /*
              * A close event may happen during a read/write, but vhost
              * code assumes the vhost_dev remains setup, so delay the
@@ -431,8 +412,6 @@
              * knowing its type (in this case vhost-user).
              */
             s->dev.started = false;
-        } else {
-            vhost_user_blk_disconnect(dev);
         }
         break;
     case CHR_EVENT_BREAK:
@@ -447,11 +426,10 @@
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
-    Error *err = NULL;
     int i, ret;
 
     if (!s->chardev.chr) {
-        error_setg(errp, "vhost-user-blk: chardev is mandatory");
+        error_setg(errp, "chardev is mandatory");
         return;
     }
 
@@ -459,16 +437,16 @@
         s->num_queues = 1;
     }
     if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) {
-        error_setg(errp, "vhost-user-blk: invalid number of IO queues");
+        error_setg(errp, "invalid number of IO queues");
         return;
     }
 
     if (!s->queue_size) {
-        error_setg(errp, "vhost-user-blk: queue size must be non-zero");
+        error_setg(errp, "queue size must be non-zero");
         return;
     }
     if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
-        error_setg(errp, "vhost-user-blk: queue size must not exceed %d",
+        error_setg(errp, "queue size must not exceed %d",
                    VIRTQUEUE_MAX_SIZE);
         return;
     }
@@ -490,34 +468,31 @@
     s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
     s->connected = false;
 
-    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_realize, NULL, (void *)dev,
-                             NULL, true);
-
-reconnect:
-    if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
-        error_report_err(err);
+    if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) {
         goto virtio_err;
     }
 
-    /* check whether vhost_user_blk_connect() failed or not */
-    if (!s->connected) {
-        goto reconnect;
+    if (vhost_user_blk_connect(dev, errp) < 0) {
+        qemu_chr_fe_disconnect(&s->chardev);
+        goto virtio_err;
     }
+    assert(s->connected);
 
     ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
                                sizeof(struct virtio_blk_config));
     if (ret < 0) {
-        error_report("vhost-user-blk: get block config failed");
-        goto reconnect;
+        error_setg(errp, "vhost-user-blk: get block config failed");
+        goto vhost_err;
     }
 
-    /* we're fully initialized, now we can operate, so change the handler */
+    /* we're fully initialized, now we can operate, so add the handler */
     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_oper, NULL, (void *)dev,
+                             vhost_user_blk_event, NULL, (void *)dev,
                              NULL, true);
     return;
 
+vhost_err:
+    vhost_dev_cleanup(&s->dev);
 virtio_err:
     g_free(s->vhost_vqs);
     s->vhost_vqs = NULL;
diff --git a/hw/meson.build b/hw/meson.build
index 6bdbae0..ba0601e 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -21,6 +21,7 @@
 subdir('misc')
 subdir('net')
 subdir('nubus')
+subdir('nvme')
 subdir('nvram')
 subdir('pci')
 subdir('pci-bridge')
diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
new file mode 100644
index 0000000..8ac9094
--- /dev/null
+++ b/hw/nvme/Kconfig
@@ -0,0 +1,4 @@
+config NVME_PCI
+    bool
+    default y if PCI_DEVICES
+    depends on PCI
diff --git a/hw/block/nvme.c b/hw/nvme/ctrl.c
similarity index 96%
rename from hw/block/nvme.c
rename to hw/nvme/ctrl.c
index 5fe082e..0bcaf71 100644
--- a/hw/block/nvme.c
+++ b/hw/nvme/ctrl.c
@@ -12,10 +12,19 @@
  * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e
  *
  *  https://nvmexpress.org/developers/nvme-specification/
- */
-
-/**
- * Usage: add options:
+ *
+ *
+ * Notes on coding style
+ * ---------------------
+ * While QEMU coding style prefers lowercase hexadecimals in constants, the
+ * NVMe subsystem use thes format from the NVMe specifications in the comments
+ * (i.e. 'h' suffix instead of '0x' prefix).
+ *
+ * Usage
+ * -----
+ * See docs/system/nvme.rst for extensive documentation.
+ *
+ * Add options:
  *      -drive file=<file>,if=none,id=<drive_id>
  *      -device nvme-subsys,id=<subsys_id>,nqn=<nqn_id>
  *      -device nvme,serial=<serial>,id=<bus_name>, \
@@ -135,26 +144,20 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/units.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
-#include "hw/block/block.h"
-#include "hw/pci/msix.h"
-#include "hw/pci/pci.h"
-#include "hw/qdev-properties.h"
-#include "migration/vmstate.h"
-#include "sysemu/sysemu.h"
+#include "qemu/log.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
-#include "sysemu/hostmem.h"
+#include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
-#include "exec/memory.h"
-#include "qemu/log.h"
-#include "qemu/module.h"
-#include "qemu/cutils.h"
-#include "trace.h"
+#include "sysemu/hostmem.h"
+#include "hw/pci/msix.h"
+#include "migration/vmstate.h"
+
 #include "nvme.h"
-#include "nvme-ns.h"
-#include "nvme-dif.h"
+#include "trace.h"
 
 #define NVME_MAX_IOQPAIRS 0xffff
 #define NVME_DB_SIZE  4
@@ -165,6 +168,7 @@
 #define NVME_TEMPERATURE_WARNING 0x157
 #define NVME_TEMPERATURE_CRITICAL 0x175
 #define NVME_NUM_FW_SLOTS 1
+#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
 
 #define NVME_GUEST_ERR(trace, fmt, ...) \
     do { \
@@ -185,6 +189,7 @@
     [NVME_WRITE_ATOMICITY]          = true,
     [NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
     [NVME_TIMESTAMP]                = true,
+    [NVME_COMMAND_SET_PROFILE]      = true,
 };
 
 static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
@@ -194,6 +199,7 @@
     [NVME_NUMBER_OF_QUEUES]         = NVME_FEAT_CAP_CHANGE,
     [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
     [NVME_TIMESTAMP]                = NVME_FEAT_CAP_CHANGE,
+    [NVME_COMMAND_SET_PROFILE]      = NVME_FEAT_CAP_CHANGE,
 };
 
 static const uint32_t nvme_cse_acs[256] = {
@@ -387,7 +393,8 @@
 
 static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
 {
-    return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces);
+    return nsid &&
+        (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES);
 }
 
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
@@ -511,9 +518,7 @@
                           NvmeSg *mdata)
 {
     NvmeSg *dst = data;
-    size_t size = nvme_lsize(ns);
-    size_t msize = nvme_msize(ns);
-    uint32_t trans_len, count = size;
+    uint32_t trans_len, count = ns->lbasz;
     uint64_t offset = 0;
     bool dma = sg->flags & NVME_SG_DMA;
     size_t sge_len;
@@ -545,7 +550,7 @@
 
         if (count == 0) {
             dst = (dst == data) ? mdata : data;
-            count = (dst == data) ? size : msize;
+            count = (dst == data) ? ns->lbasz : ns->lbaf.ms;
         }
 
         if (sge_len == offset) {
@@ -574,7 +579,7 @@
 }
 
 static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
-    size_t len)
+                                  size_t len)
 {
     if (!len) {
         return NVME_SUCCESS;
@@ -1004,7 +1009,7 @@
     uint16_t status;
 
     if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
-        (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
+        (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) {
         goto out;
     }
 
@@ -1187,12 +1192,9 @@
     uint16_t ctrl = le16_to_cpu(rw->control);
 
     if (nvme_ns_ext(ns) &&
-        !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
-        size_t lsize = nvme_lsize(ns);
-        size_t msize = nvme_msize(ns);
-
-        return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0,
-                                   dir);
+        !(ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) {
+        return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
+                                   ns->lbaf.ms, 0, dir);
     }
 
     return nvme_tx(n, &req->sg, ptr, len, dir);
@@ -1205,11 +1207,8 @@
     uint16_t status;
 
     if (nvme_ns_ext(ns)) {
-        size_t lsize = nvme_lsize(ns);
-        size_t msize = nvme_msize(ns);
-
-        return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize,
-                                   dir);
+        return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms,
+                                   ns->lbasz, ns->lbasz, dir);
     }
 
     nvme_sg_unmap(&req->sg);
@@ -1426,6 +1425,7 @@
     uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
 
     if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
+        trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze);
         return NVME_LBA_RANGE | NVME_DNR;
     }
 
@@ -1682,8 +1682,12 @@
     }
 }
 
-static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone,
-                                bool implicit)
+enum {
+    NVME_ZRM_AUTO = 1 << 0,
+};
+
+static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone,
+                                    int flags)
 {
     int act = 0;
     uint16_t status;
@@ -1707,7 +1711,7 @@
 
         nvme_aor_inc_open(ns);
 
-        if (implicit) {
+        if (flags & NVME_ZRM_AUTO) {
             nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
             return NVME_SUCCESS;
         }
@@ -1715,7 +1719,7 @@
         /* fallthrough */
 
     case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-        if (implicit) {
+        if (flags & NVME_ZRM_AUTO) {
             return NVME_SUCCESS;
         }
 
@@ -1733,16 +1737,16 @@
 
 static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone)
 {
-    return __nvme_zrm_open(ns, zone, true);
+    return nvme_zrm_open_flags(ns, zone, NVME_ZRM_AUTO);
 }
 
 static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone)
 {
-    return __nvme_zrm_open(ns, zone, false);
+    return nvme_zrm_open_flags(ns, zone, 0);
 }
 
-static void __nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
-                                   uint32_t nlb)
+static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
+                                 uint32_t nlb)
 {
     zone->d.wp += nlb;
 
@@ -1762,7 +1766,7 @@
     nlb = le16_to_cpu(rw->nlb) + 1;
     zone = nvme_get_zone_by_slba(ns, slba);
 
-    __nvme_advance_zone_wp(ns, zone, nlb);
+    nvme_advance_zone_wp(ns, zone, nlb);
 }
 
 static inline bool nvme_is_write(NvmeRequest *req)
@@ -1832,11 +1836,11 @@
         goto out;
     }
 
-    if (nvme_msize(ns)) {
+    if (ns->lbaf.ms) {
         NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
         uint64_t slba = le64_to_cpu(rw->slba);
         uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
-        uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+        uint64_t offset = nvme_moff(ns, slba);
 
         if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
             size_t mlen = nvme_m2b(ns, nlb);
@@ -2002,7 +2006,7 @@
     uint64_t slba = le64_to_cpu(rw->slba);
     uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
     size_t mlen = nvme_m2b(ns, nlb);
-    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+    uint64_t offset = nvme_moff(ns, slba);
     BlockBackend *blk = ns->blkconf.blk;
 
     trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
@@ -2104,8 +2108,8 @@
         goto out;
     }
 
-    if (nvme_msize(ns)) {
-        int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba);
+    if (ns->lbaf.ms) {
+        int64_t offset = nvme_moff(ns, zone->d.zslba);
 
         blk_aio_pwrite_zeroes(ns->blkconf.blk, offset,
                               nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
@@ -2151,7 +2155,7 @@
         uint64_t sdlba = le64_to_cpu(copy->sdlba);
         NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
 
-        __nvme_advance_zone_wp(ns, zone, ctx->nlb);
+        nvme_advance_zone_wp(ns, zone, ctx->nlb);
     }
 
     g_free(ctx->bounce);
@@ -2173,10 +2177,10 @@
         goto out;
     }
 
-    if (nvme_msize(ns)) {
+    if (ns->lbaf.ms) {
         NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
         uint64_t sdlba = le64_to_cpu(copy->sdlba);
-        int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba);
+        int64_t offset = nvme_moff(ns, sdlba);
 
         qemu_iovec_reset(&req->sg.iov);
         qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb));
@@ -2268,7 +2272,6 @@
 
     status = nvme_check_bounds(ns, sdlba, ctx->nlb);
     if (status) {
-        trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze);
         goto invalid;
     }
 
@@ -2369,10 +2372,19 @@
     uint32_t reftag = le32_to_cpu(rw->reftag);
     struct nvme_compare_ctx *ctx = req->opaque;
     g_autofree uint8_t *buf = NULL;
+    BlockBackend *blk = ns->blkconf.blk;
+    BlockAcctCookie *acct = &req->acct;
+    BlockAcctStats *stats = blk_get_stats(blk);
     uint16_t status = NVME_SUCCESS;
 
     trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
 
+    if (ret) {
+        block_acct_failed(stats, acct);
+        nvme_aio_err(req, ret);
+        goto out;
+    }
+
     buf = g_malloc(ctx->mdata.iov.size);
 
     status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
@@ -2387,7 +2399,6 @@
         uint8_t *bufp;
         uint8_t *mbufp = ctx->mdata.bounce;
         uint8_t *end = mbufp + ctx->mdata.iov.size;
-        size_t msize = nvme_msize(ns);
         int16_t pil = 0;
 
         status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
@@ -2403,11 +2414,11 @@
          * tuple.
          */
         if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-            pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+            pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
         }
 
-        for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) {
-            if (memcmp(bufp + pil, mbufp + pil, msize - pil)) {
+        for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) {
+            if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) {
                 req->status = NVME_CMP_FAILURE;
                 goto out;
             }
@@ -2421,6 +2432,8 @@
         goto out;
     }
 
+    block_acct_done(stats, acct);
+
 out:
     qemu_iovec_destroy(&ctx->data.iov);
     g_free(ctx->data.bounce);
@@ -2468,12 +2481,12 @@
         goto out;
     }
 
-    if (nvme_msize(ns)) {
+    if (ns->lbaf.ms) {
         NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
         uint64_t slba = le64_to_cpu(rw->slba);
         uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
         size_t mlen = nvme_m2b(ns, nlb);
-        uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+        uint64_t offset = nvme_moff(ns, slba);
 
         ctx->mdata.bounce = g_malloc(mlen);
 
@@ -2530,8 +2543,6 @@
             uint32_t nlb = le32_to_cpu(range[i].nlb);
 
             if (nvme_check_bounds(ns, slba, nlb)) {
-                trace_pci_nvme_err_invalid_lba_range(slba, nlb,
-                                                     ns->id_ns.nsze);
                 continue;
             }
 
@@ -2604,7 +2615,6 @@
 
     status = nvme_check_bounds(ns, slba, nlb);
     if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
         return status;
     }
 
@@ -2689,7 +2699,6 @@
 
         status = nvme_check_bounds(ns, slba, _nlb);
         if (status) {
-            trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze);
             goto out;
         }
 
@@ -2716,7 +2725,7 @@
     }
 
     bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
-    if (nvme_msize(ns)) {
+    if (ns->lbaf.ms) {
         mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb));
     }
 
@@ -2752,9 +2761,9 @@
 
         bouncep += len;
 
-        if (nvme_msize(ns)) {
+        if (ns->lbaf.ms) {
             len = nvme_m2b(ns, nlb);
-            offset = ns->mdata_offset + nvme_m2b(ns, slba);
+            offset = nvme_moff(ns, slba);
 
             in_ctx = g_new(struct nvme_copy_in_ctx, 1);
             in_ctx->req = req;
@@ -2818,7 +2827,6 @@
 
     status = nvme_check_bounds(ns, slba, nlb);
     if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
         return status;
     }
 
@@ -2875,7 +2883,7 @@
     /* 1-initialize; see comment in nvme_dsm */
     *num_flushes = 1;
 
-    for (int i = 1; i <= n->num_namespaces; i++) {
+    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             continue;
@@ -2923,7 +2931,7 @@
         if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
             bool pract = ctrl & NVME_RW_PRINFO_PRACT;
 
-            if (pract && nvme_msize(ns) == 8) {
+            if (pract && ns->lbaf.ms == 8) {
                 mapped_size = data_size;
             }
         }
@@ -2938,7 +2946,6 @@
 
     status = nvme_check_bounds(ns, slba, nlb);
     if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
         goto invalid;
     }
 
@@ -3000,7 +3007,7 @@
         if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
             bool pract = ctrl & NVME_RW_PRINFO_PRACT;
 
-            if (pract && nvme_msize(ns) == 8) {
+            if (pract && ns->lbaf.ms == 8) {
                 mapped_size -= nvme_m2b(ns, nlb);
             }
         }
@@ -3018,7 +3025,6 @@
 
     status = nvme_check_bounds(ns, slba, nlb);
     if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
         goto invalid;
     }
 
@@ -3595,8 +3601,8 @@
 
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeNamespace *ns;
     uint32_t nsid = le32_to_cpu(req->cmd.nsid);
-    uint16_t status;
 
     trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
                           req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
@@ -3607,18 +3613,18 @@
 
     /*
      * In the base NVM command set, Flush may apply to all namespaces
-     * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used
+     * (indicated by NSID being set to FFFFFFFFh). But if that feature is used
      * along with TP 4056 (Namespace Types), it may be pretty screwed up.
      *
-     * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the
+     * If NSID is indeed set to FFFFFFFFh, we simply cannot associate the
      * opcode with a specific command since we cannot determine a unique I/O
-     * command set. Opcode 0x0 could have any other meaning than something
+     * command set. Opcode 0h could have any other meaning than something
      * equivalent to flushing and say it DOES have completely different
-     * semantics in some other command set - does an NSID of 0xFFFFFFFF then
+     * semantics in some other command set - does an NSID of FFFFFFFFh then
      * mean "for all namespaces, apply whatever command set specific command
-     * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply
-     * whatever command that uses the 0x0 opcode if, and only if, it allows
-     * NSID to be 0xFFFFFFFF"?
+     * that uses the 0h opcode?" Or does it mean "for all namespaces, apply
+     * whatever command that uses the 0h opcode if, and only if, it allows NSID
+     * to be FFFFFFFFh"?
      *
      * Anyway (and luckily), for now, we do not care about this since the
      * device only supports namespace types that includes the NVM Flush command
@@ -3628,21 +3634,22 @@
         return nvme_flush(n, req);
     }
 
-    req->ns = nvme_ns(n, nsid);
-    if (unlikely(!req->ns)) {
+    ns = nvme_ns(n, nsid);
+    if (unlikely(!ns)) {
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
-    if (!(req->ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+    if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
         trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
         return NVME_INVALID_OPCODE | NVME_DNR;
     }
 
-    status = nvme_ns_status(req->ns);
-    if (unlikely(status)) {
-        return status;
+    if (ns->status) {
+        return ns->status;
     }
 
+    req->ns = ns;
+
     switch (req->cmd.opcode) {
     case NVME_CMD_WRITE_ZEROES:
         return nvme_write_zeroes(n, req);
@@ -3844,7 +3851,7 @@
     } else {
         int i;
 
-        for (i = 1; i <= n->num_namespaces; i++) {
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
             ns = nvme_ns(n, i);
             if (!ns) {
                 continue;
@@ -3934,7 +3941,7 @@
             NVME_CHANGED_NSID_SIZE) {
         /*
          * If more than 1024 namespaces, the first entry in the log page should
-         * be set to 0xffffffff and the others to 0 as spec.
+         * be set to FFFFFFFFh and the others to 0 as spec.
          */
         if (i == ARRAY_SIZE(nslist)) {
             memset(nslist, 0x0, sizeof(nslist));
@@ -4332,7 +4339,7 @@
     trace_pci_nvme_identify_nslist(min_nsid);
 
     /*
-     * Both 0xffffffff (NVME_NSID_BROADCAST) and 0xfffffffe are invalid values
+     * Both FFFFFFFFh (NVME_NSID_BROADCAST) and FFFFFFFFEh are invalid values
      * since the Active Namespace ID List should return namespaces with ids
      * *higher* than the NSID specified in the command. This is also specified
      * in the spec (NVM Express v1.3d, Section 5.15.4).
@@ -4341,7 +4348,7 @@
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
-    for (i = 1; i <= n->num_namespaces; i++) {
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             if (!active) {
@@ -4379,7 +4386,7 @@
     trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
 
     /*
-     * Same as in nvme_identify_nslist(), 0xffffffff/0xfffffffe are invalid.
+     * Same as in nvme_identify_nslist(), FFFFFFFFh/FFFFFFFFEh are invalid.
      */
     if (min_nsid >= NVME_NSID_BROADCAST - 1) {
         return NVME_INVALID_NSID | NVME_DNR;
@@ -4389,7 +4396,7 @@
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
-    for (i = 1; i <= n->num_namespaces; i++) {
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             if (!active) {
@@ -4446,7 +4453,7 @@
 
     /*
      * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data
-     * structure, a Namespace UUID (nidt = 0x3) must be reported in the
+     * structure, a Namespace UUID (nidt = 3h) must be reported in the
      * Namespace Identification Descriptor. Add the namespace UUID here.
      */
     ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
@@ -4595,7 +4602,7 @@
             /*
              * The Reservation Notification Mask and Reservation Persistence
              * features require a status code of Invalid Field in Command when
-             * NSID is 0xFFFFFFFF. Since the device does not support those
+             * NSID is FFFFFFFFh. Since the device does not support those
              * features we can always return Invalid Namespace or Format as we
              * should do for all other features.
              */
@@ -4655,7 +4662,7 @@
         goto out;
     case NVME_VOLATILE_WRITE_CACHE:
         result = 0;
-        for (i = 1; i <= n->num_namespaces; i++) {
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
             ns = nvme_ns(n, i);
             if (!ns) {
                 continue;
@@ -4707,9 +4714,6 @@
             result |= NVME_INTVC_NOCOALESCING;
         }
         break;
-    case NVME_COMMAND_SET_PROFILE:
-        result = 0;
-        break;
     default:
         result = nvme_feature_default[fid];
         break;
@@ -4805,7 +4809,7 @@
         break;
     case NVME_ERROR_RECOVERY:
         if (nsid == NVME_NSID_BROADCAST) {
-            for (i = 1; i <= n->num_namespaces; i++) {
+            for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
                 ns = nvme_ns(n, i);
 
                 if (!ns) {
@@ -4826,7 +4830,7 @@
         }
         break;
     case NVME_VOLATILE_WRITE_CACHE:
-        for (i = 1; i <= n->num_namespaces; i++) {
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
             ns = nvme_ns(n, i);
             if (!ns) {
                 continue;
@@ -4847,15 +4851,15 @@
         }
 
         /*
-         * NVMe v1.3, Section 5.21.1.7: 0xffff is not an allowed value for NCQR
+         * NVMe v1.3, Section 5.21.1.7: FFFFh is not an allowed value for NCQR
          * and NSQR.
          */
         if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
             return NVME_INVALID_FIELD | NVME_DNR;
         }
 
-        trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
-                                    ((dw11 >> 16) & 0xFFFF) + 1,
+        trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1,
+                                    ((dw11 >> 16) & 0xffff) + 1,
                                     n->params.max_ioqpairs,
                                     n->params.max_ioqpairs);
         req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
@@ -4912,7 +4916,25 @@
     }
 }
 
-static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns);
+static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    ns->iocs = nvme_cse_iocs_none;
+    switch (ns->csi) {
+    case NVME_CSI_NVM:
+        if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
+            ns->iocs = nvme_cse_iocs_nvm;
+        }
+        break;
+    case NVME_CSI_ZONED:
+        if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
+            ns->iocs = nvme_cse_iocs_zoned;
+        } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
+            ns->iocs = nvme_cse_iocs_nvm;
+        }
+        break;
+    }
+}
+
 static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
 {
     NvmeNamespace *ns;
@@ -4963,13 +4985,13 @@
             }
 
             nvme_attach_ns(ctrl, ns);
-            __nvme_select_ns_iocs(ctrl, ns);
+            nvme_select_iocs_ns(ctrl, ns);
         } else {
             if (!nvme_ns(ctrl, nsid)) {
                 return NVME_NS_NOT_ATTACHED | NVME_DNR;
             }
 
-            ctrl->namespaces[nsid - 1] = NULL;
+            ctrl->namespaces[nsid] = NULL;
             ns->attached--;
 
             nvme_update_dmrsl(ctrl);
@@ -5101,7 +5123,7 @@
             req->status = status;
         }
     } else {
-        for (i = 1; i <= n->num_namespaces; i++) {
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
             ns = nvme_ns(n, i);
             if (!ns) {
                 continue;
@@ -5212,7 +5234,7 @@
     NvmeNamespace *ns;
     int i;
 
-    for (i = 1; i <= n->num_namespaces; i++) {
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             continue;
@@ -5254,7 +5276,7 @@
         memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
     }
 
-    for (i = 1; i <= n->num_namespaces; i++) {
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             continue;
@@ -5264,37 +5286,18 @@
     }
 }
 
-static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns)
-{
-    ns->iocs = nvme_cse_iocs_none;
-    switch (ns->csi) {
-    case NVME_CSI_NVM:
-        if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
-            ns->iocs = nvme_cse_iocs_nvm;
-        }
-        break;
-    case NVME_CSI_ZONED:
-        if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
-            ns->iocs = nvme_cse_iocs_zoned;
-        } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
-            ns->iocs = nvme_cse_iocs_nvm;
-        }
-        break;
-    }
-}
-
-static void nvme_select_ns_iocs(NvmeCtrl *n)
+static void nvme_select_iocs(NvmeCtrl *n)
 {
     NvmeNamespace *ns;
     int i;
 
-    for (i = 1; i <= n->num_namespaces; i++) {
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             continue;
         }
 
-        __nvme_select_ns_iocs(n, ns);
+        nvme_select_iocs_ns(n, ns);
     }
 }
 
@@ -5396,7 +5399,7 @@
 
     QTAILQ_INIT(&n->aer_queue);
 
-    nvme_select_ns_iocs(n);
+    nvme_select_iocs(n);
 
     return 0;
 }
@@ -5493,7 +5496,7 @@
             n->bar.cc = data;
         }
         break;
-    case 0x1C:  /* CSTS */
+    case 0x1c:  /* CSTS */
         if (data & (1 << 4)) {
             NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
                            "attempted to W1C CSTS.NSSRO"
@@ -5505,7 +5508,7 @@
         }
         break;
     case 0x20:  /* NSSR */
-        if (data == 0x4E564D65) {
+        if (data == 0x4e564d65) {
             trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
         } else {
             /* The spec says that writes of other values have no effect */
@@ -5575,11 +5578,11 @@
         n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32);
         return;
 
-    case 0xE00: /* PMRCAP */
+    case 0xe00: /* PMRCAP */
         NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
                        "invalid write to PMRCAP register, ignored");
         return;
-    case 0xE04: /* PMRCTL */
+    case 0xe04: /* PMRCTL */
         n->bar.pmrctl = data;
         if (NVME_PMRCTL_EN(data)) {
             memory_region_set_enabled(&n->pmr.dev->mr, true);
@@ -5590,19 +5593,19 @@
             n->pmr.cmse = false;
         }
         return;
-    case 0xE08: /* PMRSTS */
+    case 0xe08: /* PMRSTS */
         NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
                        "invalid write to PMRSTS register, ignored");
         return;
-    case 0xE0C: /* PMREBS */
+    case 0xe0C: /* PMREBS */
         NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly,
                        "invalid write to PMREBS register, ignored");
         return;
-    case 0xE10: /* PMRSWTP */
+    case 0xe10: /* PMRSWTP */
         NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,
                        "invalid write to PMRSWTP register, ignored");
         return;
-    case 0xE14: /* PMRMSCL */
+    case 0xe14: /* PMRMSCL */
         if (!NVME_CAP_PMRS(n->bar.cap)) {
             return;
         }
@@ -5622,7 +5625,7 @@
         }
 
         return;
-    case 0xE18: /* PMRMSCU */
+    case 0xe18: /* PMRMSCU */
         if (!NVME_CAP_PMRS(n->bar.cap)) {
             return;
         }
@@ -5664,7 +5667,7 @@
          * from PMRSTS should ensure prior writes
          * made it to persistent media
          */
-        if (addr == 0xE08 &&
+        if (addr == 0xe08 &&
             (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
             memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
         }
@@ -5915,7 +5918,6 @@
 
 static void nvme_init_state(NvmeCtrl *n)
 {
-    n->num_namespaces = NVME_MAX_NAMESPACES;
     /* add one to max_ioqpairs to account for the admin queue pair */
     n->reg_size = pow2ceil(sizeof(NvmeBar) +
                            2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE);
@@ -6096,7 +6098,7 @@
 
     id->sqes = (0x6 << 4) | 0x6;
     id->cqes = (0x4 << 4) | 0x4;
-    id->nn = cpu_to_le32(n->num_namespaces);
+    id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
     id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
                            NVME_ONCS_FEATURES | NVME_ONCS_DSM |
                            NVME_ONCS_COMPARE | NVME_ONCS_COPY);
@@ -6161,7 +6163,7 @@
     uint32_t nsid = ns->params.nsid;
     assert(nsid && nsid <= NVME_MAX_NAMESPACES);
 
-    n->namespaces[nsid - 1] = ns;
+    n->namespaces[nsid] = ns;
     ns->attached++;
 
     n->dmrsl = MIN_NON_ZERO(n->dmrsl,
@@ -6215,7 +6217,7 @@
 
     nvme_ctrl_reset(n);
 
-    for (i = 1; i <= n->num_namespaces; i++) {
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
         ns = nvme_ns(n, i);
         if (!ns) {
             continue;
diff --git a/hw/block/nvme-dif.c b/hw/nvme/dif.c
similarity index 89%
rename from hw/block/nvme-dif.c
rename to hw/nvme/dif.c
index 81b0a4c..88efcbe 100644
--- a/hw/block/nvme-dif.c
+++ b/hw/nvme/dif.c
@@ -9,13 +9,11 @@
  */
 
 #include "qemu/osdep.h"
-#include "hw/block/block.h"
-#include "sysemu/dma.h"
-#include "sysemu/block-backend.h"
 #include "qapi/error.h"
-#include "trace.h"
+#include "sysemu/block-backend.h"
+
 #include "nvme.h"
-#include "nvme-dif.h"
+#include "trace.h"
 
 uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
                            uint32_t reftag)
@@ -46,20 +44,18 @@
                                  uint32_t reftag)
 {
     uint8_t *end = buf + len;
-    size_t lsize = nvme_lsize(ns);
-    size_t msize = nvme_msize(ns);
     int16_t pil = 0;
 
     if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
     }
 
-    trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag,
-                                          reftag);
+    trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
+                                          apptag, reftag);
 
-    for (; buf < end; buf += lsize, mbuf += msize) {
+    for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
         NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
-        uint16_t crc = crc_t10dif(0x0, buf, lsize);
+        uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
 
         if (pil) {
             crc = crc_t10dif(crc, mbuf, pil);
@@ -100,7 +96,7 @@
     }
 
     if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
-        uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns));
+        uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
 
         if (pil) {
             crc = crc_t10dif(crc, mbuf, pil);
@@ -139,8 +135,6 @@
                         uint16_t appmask, uint32_t reftag)
 {
     uint8_t *end = buf + len;
-    size_t lsize = nvme_lsize(ns);
-    size_t msize = nvme_msize(ns);
     int16_t pil = 0;
     uint16_t status;
 
@@ -150,12 +144,12 @@
     }
 
     if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
     }
 
-    trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil);
+    trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil);
 
-    for (; buf < end; buf += lsize, mbuf += msize) {
+    for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
         NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
 
         status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag,
@@ -178,20 +172,18 @@
     BlockBackend *blk = ns->blkconf.blk;
     BlockDriverState *bs = blk_bs(blk);
 
-    size_t msize = nvme_msize(ns);
-    size_t lsize = nvme_lsize(ns);
     int64_t moffset = 0, offset = nvme_l2b(ns, slba);
     uint8_t *mbufp, *end;
     bool zeroed;
     int16_t pil = 0;
-    int64_t bytes = (mlen / msize) * lsize;
+    int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
     int64_t pnum = 0;
 
     Error *err = NULL;
 
 
     if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
+        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
     }
 
     do {
@@ -213,15 +205,15 @@
 
         if (zeroed) {
             mbufp = mbuf + moffset;
-            mlen = (pnum / lsize) * msize;
+            mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
             end = mbufp + mlen;
 
-            for (; mbufp < end; mbufp += msize) {
+            for (; mbufp < end; mbufp += ns->lbaf.ms) {
                 memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
             }
         }
 
-        moffset += (pnum / lsize) * msize;
+        moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
         offset += pnum;
     } while (pnum != bytes);
 
@@ -291,7 +283,7 @@
         goto out;
     }
 
-    if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) {
+    if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) {
         goto out;
     }
 
@@ -314,7 +306,7 @@
     uint64_t slba = le64_to_cpu(rw->slba);
     uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
     size_t mlen = nvme_m2b(ns, nlb);
-    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+    uint64_t offset = nvme_moff(ns, slba);
     BlockBackend *blk = ns->blkconf.blk;
 
     trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
@@ -343,7 +335,7 @@
     NvmeNamespace *ns = req->ns;
     NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
     uint64_t slba = le64_to_cpu(rw->slba);
-    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
+    uint64_t offset = nvme_moff(ns, slba);
     BlockBackend *blk = ns->blkconf.blk;
 
     trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
@@ -395,8 +387,7 @@
 
         if (pract) {
             uint8_t *mbuf, *end;
-            size_t msize = nvme_msize(ns);
-            int16_t pil = msize - sizeof(NvmeDifTuple);
+            int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
 
             status = nvme_check_prinfo(ns, ctrl, slba, reftag);
             if (status) {
@@ -417,7 +408,7 @@
                 pil = 0;
             }
 
-            for (; mbuf < end; mbuf += msize) {
+            for (; mbuf < end; mbuf += ns->lbaf.ms) {
                 NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
 
                 dif->apptag = cpu_to_be16(apptag);
@@ -436,7 +427,7 @@
         return NVME_NO_COMPLETE;
     }
 
-    if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) {
+    if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
         mapped_len += mlen;
     }
 
@@ -470,7 +461,7 @@
     qemu_iovec_init(&ctx->mdata.iov, 1);
     qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
 
-    if (!(pract && nvme_msize(ns) == 8)) {
+    if (!(pract && ns->lbaf.ms == 8)) {
         status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
                                    NVME_TX_DIRECTION_TO_DEVICE, req);
         if (status) {
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
new file mode 100644
index 0000000..3cf4004
--- /dev/null
+++ b/hw/nvme/meson.build
@@ -0,0 +1 @@
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c'))
diff --git a/hw/block/nvme-ns.c b/hw/nvme/ns.c
similarity index 87%
rename from hw/block/nvme-ns.c
rename to hw/nvme/ns.c
index 7bb618f..992e5a1 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/nvme/ns.c
@@ -14,23 +14,16 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
-#include "qemu/cutils.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
-#include "hw/block/block.h"
-#include "hw/pci/pci.h"
+#include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
-#include "qapi/error.h"
 
-#include "hw/qdev-properties.h"
-#include "hw/qdev-core.h"
-
-#include "trace.h"
 #include "nvme.h"
-#include "nvme-ns.h"
+#include "trace.h"
 
 #define MIN_DISCARD_GRANULARITY (4 * KiB)
+#define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
 
 void nvme_ns_init_format(NvmeNamespace *ns)
 {
@@ -38,7 +31,10 @@
     BlockDriverInfo bdi;
     int npdg, nlbas, ret;
 
-    nlbas = nvme_ns_nlbas(ns);
+    ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
+    ns->lbasz = 1 << ns->lbaf.ds;
+
+    nlbas = ns->size / (ns->lbasz + ns->lbaf.ms);
 
     id_ns->nsze = cpu_to_le64(nlbas);
 
@@ -46,13 +42,13 @@
     id_ns->ncap = id_ns->nsze;
     id_ns->nuse = id_ns->ncap;
 
-    ns->mdata_offset = nvme_l2b(ns, nlbas);
+    ns->moff = (int64_t)nlbas << ns->lbaf.ds;
 
-    npdg = ns->blkconf.discard_granularity / nvme_lsize(ns);
+    npdg = ns->blkconf.discard_granularity / ns->lbasz;
 
     ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
     if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) {
-        npdg = bdi.cluster_size / nvme_lsize(ns);
+        npdg = bdi.cluster_size / ns->lbasz;
     }
 
     id_ns->npda = id_ns->npdg = npdg - 1;
@@ -170,7 +166,6 @@
 static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
 {
     uint64_t zone_size, zone_cap;
-    uint32_t lbasz = nvme_lsize(ns);
 
     /* Make sure that the values of ZNS properties are sane */
     if (ns->params.zone_size_bs) {
@@ -188,14 +183,14 @@
                    "zone size %"PRIu64"B", zone_cap, zone_size);
         return -1;
     }
-    if (zone_size < lbasz) {
+    if (zone_size < ns->lbasz) {
         error_setg(errp, "zone size %"PRIu64"B too small, "
-                   "must be at least %"PRIu32"B", zone_size, lbasz);
+                   "must be at least %zuB", zone_size, ns->lbasz);
         return -1;
     }
-    if (zone_cap < lbasz) {
+    if (zone_cap < ns->lbasz) {
         error_setg(errp, "zone capacity %"PRIu64"B too small, "
-                   "must be at least %"PRIu32"B", zone_cap, lbasz);
+                   "must be at least %zuB", zone_cap, ns->lbasz);
         return -1;
     }
 
@@ -203,9 +198,9 @@
      * Save the main zone geometry values to avoid
      * calculating them later again.
      */
-    ns->zone_size = zone_size / lbasz;
-    ns->zone_capacity = zone_cap / lbasz;
-    ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size;
+    ns->zone_size = zone_size / ns->lbasz;
+    ns->zone_capacity = zone_cap / ns->lbasz;
+    ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size;
 
     /* Do a few more sanity checks of ZNS properties */
     if (!ns->num_zones) {
@@ -215,43 +210,6 @@
         return -1;
     }
 
-    if (ns->params.max_open_zones > ns->num_zones) {
-        error_setg(errp,
-                   "max_open_zones value %u exceeds the number of zones %u",
-                   ns->params.max_open_zones, ns->num_zones);
-        return -1;
-    }
-    if (ns->params.max_active_zones > ns->num_zones) {
-        error_setg(errp,
-                   "max_active_zones value %u exceeds the number of zones %u",
-                   ns->params.max_active_zones, ns->num_zones);
-        return -1;
-    }
-
-    if (ns->params.max_active_zones) {
-        if (ns->params.max_open_zones > ns->params.max_active_zones) {
-            error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)",
-                       ns->params.max_open_zones, ns->params.max_active_zones);
-            return -1;
-        }
-
-        if (!ns->params.max_open_zones) {
-            ns->params.max_open_zones = ns->params.max_active_zones;
-        }
-    }
-
-    if (ns->params.zd_extension_size) {
-        if (ns->params.zd_extension_size & 0x3f) {
-            error_setg(errp,
-                "zone descriptor extension size must be a multiple of 64B");
-            return -1;
-        }
-        if ((ns->params.zd_extension_size >> 6) > 0xff) {
-            error_setg(errp, "zone descriptor extension size is too large");
-            return -1;
-        }
-    }
-
     return 0;
 }
 
@@ -303,7 +261,7 @@
 
     id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
 
-    /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
+    /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */
     id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
     id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
     id_ns_z->zoc = 0;
@@ -421,6 +379,34 @@
         }
     }
 
+    if (ns->params.zoned) {
+        if (ns->params.max_active_zones) {
+            if (ns->params.max_open_zones > ns->params.max_active_zones) {
+                error_setg(errp, "max_open_zones (%u) exceeds "
+                           "max_active_zones (%u)", ns->params.max_open_zones,
+                           ns->params.max_active_zones);
+                return -1;
+            }
+
+            if (!ns->params.max_open_zones) {
+                ns->params.max_open_zones = ns->params.max_active_zones;
+            }
+        }
+
+        if (ns->params.zd_extension_size) {
+            if (ns->params.zd_extension_size & 0x3f) {
+                error_setg(errp, "zone descriptor extension size must be a "
+                           "multiple of 64B");
+                return -1;
+            }
+            if ((ns->params.zd_extension_size >> 6) > 0xff) {
+                error_setg(errp,
+                           "zone descriptor extension size is too large");
+                return -1;
+            }
+        }
+    }
+
     return 0;
 }
 
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
new file mode 100644
index 0000000..81a35cd
--- /dev/null
+++ b/hw/nvme/nvme.h
@@ -0,0 +1,547 @@
+/*
+ * QEMU NVM Express
+ *
+ * Copyright (c) 2012 Intel Corporation
+ * Copyright (c) 2021 Minwoo Im
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *   Keith Busch            <kbusch@kernel.org>
+ *   Klaus Jensen           <k.jensen@samsung.com>
+ *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
+ *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
+ *   Minwoo Im              <minwoo.im.dev@gmail.com>
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ */
+
+#ifndef HW_NVME_INTERNAL_H
+#define HW_NVME_INTERNAL_H
+
+#include "qemu/uuid.h"
+#include "hw/pci/pci.h"
+#include "hw/block/block.h"
+
+#include "block/nvme.h"
+
+#define NVME_MAX_CONTROLLERS 32
+#define NVME_MAX_NAMESPACES  256
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+typedef struct NvmeSubsystem {
+    DeviceState parent_obj;
+    uint8_t     subnqn[256];
+
+    NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
+    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+
+    struct {
+        char *nqn;
+    } params;
+} NvmeSubsystem;
+
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+
+static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
+                                         uint32_t cntlid)
+{
+    if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
+        return NULL;
+    }
+
+    return subsys->ctrls[cntlid];
+}
+
+static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
+                                            uint32_t nsid)
+{
+    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
+        return NULL;
+    }
+
+    return subsys->namespaces[nsid];
+}
+
+#define TYPE_NVME_NS "nvme-ns"
+#define NVME_NS(obj) \
+    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
+
+typedef struct NvmeZone {
+    NvmeZoneDescr   d;
+    uint64_t        w_ptr;
+    QTAILQ_ENTRY(NvmeZone) entry;
+} NvmeZone;
+
+typedef struct NvmeNamespaceParams {
+    bool     detached;
+    bool     shared;
+    uint32_t nsid;
+    QemuUUID uuid;
+
+    uint16_t ms;
+    uint8_t  mset;
+    uint8_t  pi;
+    uint8_t  pil;
+
+    uint16_t mssrl;
+    uint32_t mcl;
+    uint8_t  msrc;
+
+    bool     zoned;
+    bool     cross_zone_read;
+    uint64_t zone_size_bs;
+    uint64_t zone_cap_bs;
+    uint32_t max_active_zones;
+    uint32_t max_open_zones;
+    uint32_t zd_extension_size;
+} NvmeNamespaceParams;
+
+typedef struct NvmeNamespace {
+    DeviceState  parent_obj;
+    BlockConf    blkconf;
+    int32_t      bootindex;
+    int64_t      size;
+    int64_t      moff;
+    NvmeIdNs     id_ns;
+    NvmeLBAF     lbaf;
+    size_t       lbasz;
+    const uint32_t *iocs;
+    uint8_t      csi;
+    uint16_t     status;
+    int          attached;
+
+    QTAILQ_ENTRY(NvmeNamespace) entry;
+
+    NvmeIdNsZoned   *id_ns_zoned;
+    NvmeZone        *zone_array;
+    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
+    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
+    QTAILQ_HEAD(, NvmeZone) closed_zones;
+    QTAILQ_HEAD(, NvmeZone) full_zones;
+    uint32_t        num_zones;
+    uint64_t        zone_size;
+    uint64_t        zone_capacity;
+    uint32_t        zone_size_log2;
+    uint8_t         *zd_extensions;
+    int32_t         nr_open_zones;
+    int32_t         nr_active_zones;
+
+    NvmeNamespaceParams params;
+
+    struct {
+        uint32_t err_rec;
+    } features;
+} NvmeNamespace;
+
+static inline uint32_t nvme_nsid(NvmeNamespace *ns)
+{
+    if (ns) {
+        return ns->params.nsid;
+    }
+
+    return 0;
+}
+
+static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
+{
+    return lba << ns->lbaf.ds;
+}
+
+static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
+{
+    return ns->lbaf.ms * lba;
+}
+
+static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
+{
+    return ns->moff + nvme_m2b(ns, lba);
+}
+
+static inline bool nvme_ns_ext(NvmeNamespace *ns)
+{
+    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
+}
+
+static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
+{
+    return zone->d.zs >> 4;
+}
+
+static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
+{
+    zone->d.zs = state << 4;
+}
+
+static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
+{
+    return zone->d.zslba + ns->zone_size;
+}
+
+static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
+{
+    return zone->d.zslba + zone->d.zcap;
+}
+
+static inline bool nvme_wp_is_valid(NvmeZone *zone)
+{
+    uint8_t st = nvme_get_zone_state(zone);
+
+    return st != NVME_ZONE_STATE_FULL &&
+           st != NVME_ZONE_STATE_READ_ONLY &&
+           st != NVME_ZONE_STATE_OFFLINE;
+}
+
+static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
+                                             uint32_t zone_idx)
+{
+    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
+}
+
+static inline void nvme_aor_inc_open(NvmeNamespace *ns)
+{
+    assert(ns->nr_open_zones >= 0);
+    if (ns->params.max_open_zones) {
+        ns->nr_open_zones++;
+        assert(ns->nr_open_zones <= ns->params.max_open_zones);
+    }
+}
+
+static inline void nvme_aor_dec_open(NvmeNamespace *ns)
+{
+    if (ns->params.max_open_zones) {
+        assert(ns->nr_open_zones > 0);
+        ns->nr_open_zones--;
+    }
+    assert(ns->nr_open_zones >= 0);
+}
+
+static inline void nvme_aor_inc_active(NvmeNamespace *ns)
+{
+    assert(ns->nr_active_zones >= 0);
+    if (ns->params.max_active_zones) {
+        ns->nr_active_zones++;
+        assert(ns->nr_active_zones <= ns->params.max_active_zones);
+    }
+}
+
+static inline void nvme_aor_dec_active(NvmeNamespace *ns)
+{
+    if (ns->params.max_active_zones) {
+        assert(ns->nr_active_zones > 0);
+        ns->nr_active_zones--;
+        assert(ns->nr_active_zones >= ns->nr_open_zones);
+    }
+    assert(ns->nr_active_zones >= 0);
+}
+
+void nvme_ns_init_format(NvmeNamespace *ns);
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
+void nvme_ns_drain(NvmeNamespace *ns);
+void nvme_ns_shutdown(NvmeNamespace *ns);
+void nvme_ns_cleanup(NvmeNamespace *ns);
+
+typedef struct NvmeAsyncEvent {
+    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
+    NvmeAerResult result;
+} NvmeAsyncEvent;
+
+enum {
+    NVME_SG_ALLOC = 1 << 0,
+    NVME_SG_DMA   = 1 << 1,
+};
+
+typedef struct NvmeSg {
+    int flags;
+
+    union {
+        QEMUSGList   qsg;
+        QEMUIOVector iov;
+    };
+} NvmeSg;
+
+typedef enum NvmeTxDirection {
+    NVME_TX_DIRECTION_TO_DEVICE   = 0,
+    NVME_TX_DIRECTION_FROM_DEVICE = 1,
+} NvmeTxDirection;
+
+typedef struct NvmeRequest {
+    struct NvmeSQueue       *sq;
+    struct NvmeNamespace    *ns;
+    BlockAIOCB              *aiocb;
+    uint16_t                status;
+    void                    *opaque;
+    NvmeCqe                 cqe;
+    NvmeCmd                 cmd;
+    BlockAcctCookie         acct;
+    NvmeSg                  sg;
+    QTAILQ_ENTRY(NvmeRequest)entry;
+} NvmeRequest;
+
+typedef struct NvmeBounceContext {
+    NvmeRequest *req;
+
+    struct {
+        QEMUIOVector iov;
+        uint8_t *bounce;
+    } data, mdata;
+} NvmeBounceContext;
+
+static inline const char *nvme_adm_opc_str(uint8_t opc)
+{
+    switch (opc) {
+    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
+    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
+    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
+    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
+    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
+    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
+    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
+    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
+    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
+    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
+    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
+    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
+    default:                            return "NVME_ADM_CMD_UNKNOWN";
+    }
+}
+
+static inline const char *nvme_io_opc_str(uint8_t opc)
+{
+    switch (opc) {
+    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
+    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
+    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
+    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
+    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
+    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
+    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
+    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
+    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
+    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
+    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
+    default:                        return "NVME_NVM_CMD_UNKNOWN";
+    }
+}
+
+typedef struct NvmeSQueue {
+    struct NvmeCtrl *ctrl;
+    uint16_t    sqid;
+    uint16_t    cqid;
+    uint32_t    head;
+    uint32_t    tail;
+    uint32_t    size;
+    uint64_t    dma_addr;
+    QEMUTimer   *timer;
+    NvmeRequest *io_req;
+    QTAILQ_HEAD(, NvmeRequest) req_list;
+    QTAILQ_HEAD(, NvmeRequest) out_req_list;
+    QTAILQ_ENTRY(NvmeSQueue) entry;
+} NvmeSQueue;
+
+typedef struct NvmeCQueue {
+    struct NvmeCtrl *ctrl;
+    uint8_t     phase;
+    uint16_t    cqid;
+    uint16_t    irq_enabled;
+    uint32_t    head;
+    uint32_t    tail;
+    uint32_t    vector;
+    uint32_t    size;
+    uint64_t    dma_addr;
+    QEMUTimer   *timer;
+    QTAILQ_HEAD(, NvmeSQueue) sq_list;
+    QTAILQ_HEAD(, NvmeRequest) req_list;
+} NvmeCQueue;
+
+#define TYPE_NVME_BUS "nvme-bus"
+#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
+
+typedef struct NvmeBus {
+    BusState parent_bus;
+} NvmeBus;
+
+#define TYPE_NVME "nvme"
+#define NVME(obj) \
+        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
+
+typedef struct NvmeParams {
+    char     *serial;
+    uint32_t num_queues; /* deprecated since 5.1 */
+    uint32_t max_ioqpairs;
+    uint16_t msix_qsize;
+    uint32_t cmb_size_mb;
+    uint8_t  aerl;
+    uint32_t aer_max_queued;
+    uint8_t  mdts;
+    uint8_t  vsl;
+    bool     use_intel_id;
+    uint8_t  zasl;
+    bool     legacy_cmb;
+} NvmeParams;
+
+typedef struct NvmeCtrl {
+    PCIDevice    parent_obj;
+    MemoryRegion bar0;
+    MemoryRegion iomem;
+    NvmeBar      bar;
+    NvmeParams   params;
+    NvmeBus      bus;
+
+    uint16_t    cntlid;
+    bool        qs_created;
+    uint32_t    page_size;
+    uint16_t    page_bits;
+    uint16_t    max_prp_ents;
+    uint16_t    cqe_size;
+    uint16_t    sqe_size;
+    uint32_t    reg_size;
+    uint32_t    max_q_ents;
+    uint8_t     outstanding_aers;
+    uint32_t    irq_status;
+    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
+    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
+    uint64_t    starttime_ms;
+    uint16_t    temperature;
+    uint8_t     smart_critical_warning;
+
+    struct {
+        MemoryRegion mem;
+        uint8_t      *buf;
+        bool         cmse;
+        hwaddr       cba;
+    } cmb;
+
+    struct {
+        HostMemoryBackend *dev;
+        bool              cmse;
+        hwaddr            cba;
+    } pmr;
+
+    uint8_t     aer_mask;
+    NvmeRequest **aer_reqs;
+    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
+    int         aer_queued;
+
+    uint32_t    dmrsl;
+
+    /* Namespace ID is started with 1 so bitmap should be 1-based */
+#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
+    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
+
+    NvmeSubsystem   *subsys;
+
+    NvmeNamespace   namespace;
+    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
+    NvmeSQueue      **sq;
+    NvmeCQueue      **cq;
+    NvmeSQueue      admin_sq;
+    NvmeCQueue      admin_cq;
+    NvmeIdCtrl      id_ctrl;
+
+    struct {
+        struct {
+            uint16_t temp_thresh_hi;
+            uint16_t temp_thresh_low;
+        };
+        uint32_t    async_config;
+    } features;
+} NvmeCtrl;
+
+static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
+{
+    if (!nsid || nsid > NVME_MAX_NAMESPACES) {
+        return NULL;
+    }
+
+    return n->namespaces[nsid];
+}
+
+static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
+{
+    NvmeSQueue *sq = req->sq;
+    NvmeCtrl *n = sq->ctrl;
+
+    return n->cq[sq->cqid];
+}
+
+static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
+{
+    NvmeSQueue *sq = req->sq;
+    return sq->ctrl;
+}
+
+static inline uint16_t nvme_cid(NvmeRequest *req)
+{
+    if (!req) {
+        return 0xffff;
+    }
+
+    return le16_to_cpu(req->cqe.cid);
+}
+
+void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
+uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                          NvmeTxDirection dir, NvmeRequest *req);
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                           NvmeTxDirection dir, NvmeRequest *req);
+void nvme_rw_complete_cb(void *opaque, int ret);
+uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+                       NvmeCmd *cmd);
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static const uint16_t t10_dif_crc_table[256] = {
+    0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+    0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+    0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+    0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+    0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+    0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+    0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+    0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+    0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+    0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+    0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+    0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+    0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+    0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+    0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+    0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+    0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+    0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+    0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+    0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+    0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+    0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+    0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+    0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+    0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+    0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+    0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+    0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+    0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+    0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+    0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+    0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
+                           uint32_t reftag);
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+                               uint64_t slba);
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
+                                 uint32_t reftag);
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+                        uint8_t *mbuf, size_t mlen, uint16_t ctrl,
+                        uint64_t slba, uint16_t apptag,
+                        uint16_t appmask, uint32_t reftag);
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
+
+
+#endif /* HW_NVME_INTERNAL_H */
diff --git a/hw/block/nvme-subsys.c b/hw/nvme/subsys.c
similarity index 86%
rename from hw/block/nvme-subsys.c
rename to hw/nvme/subsys.c
index 9604c19..192223d 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/nvme/subsys.c
@@ -6,20 +6,10 @@
  * This code is licensed under the GNU GPL v2.  Refer COPYING.
  */
 
-#include "qemu/units.h"
 #include "qemu/osdep.h"
-#include "qemu/uuid.h"
-#include "qemu/iov.h"
-#include "qemu/cutils.h"
 #include "qapi/error.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-core.h"
-#include "hw/block/block.h"
-#include "block/aio.h"
-#include "block/accounting.h"
-#include "hw/pci/pci.h"
+
 #include "nvme.h"
-#include "nvme-subsys.h"
 
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 {
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
new file mode 100644
index 0000000..ea33d0c
--- /dev/null
+++ b/hw/nvme/trace-events
@@ -0,0 +1,204 @@
+# successful events
+pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
+pci_nvme_irq_pin(void) "pulsing IRQ pin"
+pci_nvme_irq_masked(void) "IRQ is masked"
+pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
+pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
+pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
+pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
+pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
+pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
+pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
+pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
+pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
+pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
+pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8""
+pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16""
+pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
+pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16""
+pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
+pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
+pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16""
+pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
+pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
+pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32""
+pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
+pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
+pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
+pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
+pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
+pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8""
+pci_nvme_identify_ctrl(void) "identify controller"
+pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
+pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16""
+pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
+pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
+pci_nvme_identify_cmd_set(void) "identify i/o command set"
+pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
+pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
+pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32""
+pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
+pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
+pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
+pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
+pci_nvme_process_aers(int queued) "queued %d"
+pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
+pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
+pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8""
+pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
+pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
+pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
+pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
+pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
+pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
+pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
+pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
+pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16""
+pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
+pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
+pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
+pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
+pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
+pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
+pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
+pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
+pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
+pci_nvme_mmio_stopped(void) "cleared controller enable bit"
+pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
+pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
+pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
+pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
+pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state"
+pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
+
+# error conditions
+pci_nvme_err_mdts(size_t len) "len %zu"
+pci_nvme_err_zasl(size_t len) "len %zu"
+pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8""
+pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
+pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
+pci_nvme_err_cfs(void) "controller fatal status"
+pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
+pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8""
+pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
+pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
+pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32""
+pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
+pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64""
+pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
+pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
+pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
+pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
+pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
+pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
+pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
+pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64""
+pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
+pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
+pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
+pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
+pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
+pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
+pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
+pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded"
+pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded"
+pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32""
+pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32""
+pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
+pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
+pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
+pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
+pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
+pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
+pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
+pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
+pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
+pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
+pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
+pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
+pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
+pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
+pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
+pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
+pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
+pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
+pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
+pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
+pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
+pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
+pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
+pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
+pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
+pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
+pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
+pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
+pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
+pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u"
+pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
+pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
+pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32""
+pci_nvme_err_startfail(void) "setting controller enable bit failed"
+pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8""
+
+# undefined behavior
+pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
+pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
+pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
+pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
+pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
+pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
+pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
+pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
+pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
+pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
+pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
+pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
+pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
+pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
+pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
+pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
+pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
+pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
+pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
+pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
+pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
+pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field"
diff --git a/hw/nvme/trace.h b/hw/nvme/trace.h
new file mode 100644
index 0000000..b398ea1
--- /dev/null
+++ b/hw/nvme/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_nvme.h"
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 86d6f37..597d974 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -29,6 +29,9 @@
   'spapr_numa.c',
   'pef.c',
 ))
+ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files(
+  'spapr_softmmu.c',
+))
 ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
 ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
   'spapr_pci_vfio.c',
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index ffe0197..d16dd2d 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -196,7 +196,7 @@
     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
 
-    if (env->spr_cb[SPR_PURR].oea_read) {
+    if (ppc_has_spr(cpu, SPR_PURR)) {
         _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
     }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 8f40319..c23bcc4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -703,10 +703,10 @@
     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
 
-    if (env->spr_cb[SPR_PURR].oea_read) {
+    if (ppc_has_spr(cpu, SPR_PURR)) {
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
     }
-    if (env->spr_cb[SPR_SPURR].oea_read) {
+    if (ppc_has_spr(cpu, SPR_PURR)) {
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
     }
 
@@ -979,6 +979,7 @@
          */
         val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */
         val[3] = 0x00; /* Hash */
+        spapr_check_mmu_mode(false);
     } else if (kvm_enabled()) {
         if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
             val[3] = 0x80; /* OV5_MMU_BOTH */
@@ -1556,6 +1557,22 @@
     }
 }
 
+void spapr_check_mmu_mode(bool guest_radix)
+{
+    if (guest_radix) {
+        if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
+            error_report("Guest requested unavailable MMU mode (radix).");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
+            && !kvmppc_has_cap_mmu_hash_v3()) {
+            error_report("Guest requested unavailable MMU mode (hash).");
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
 static void spapr_machine_reset(MachineState *machine)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(machine);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 9ea7ddd..d0c419b 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -371,6 +371,65 @@
     return true;
 }
 
+static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
+                                 bool (*cb)(void *, uint32_t, uint32_t),
+                                 void *opaque)
+{
+    PPCHash64Options *opts = cpu->hash64_opts;
+    int i;
+    int n = 0;
+    bool ci_largepage = false;
+
+    assert(opts);
+
+    n = 0;
+    for (i = 0; i < ARRAY_SIZE(opts->sps); i++) {
+        PPCHash64SegmentPageSizes *sps = &opts->sps[i];
+        int j;
+        int m = 0;
+
+        assert(n <= i);
+
+        if (!sps->page_shift) {
+            break;
+        }
+
+        for (j = 0; j < ARRAY_SIZE(sps->enc); j++) {
+            PPCHash64PageSize *ps = &sps->enc[j];
+
+            assert(m <= j);
+            if (!ps->page_shift) {
+                break;
+            }
+
+            if (cb(opaque, sps->page_shift, ps->page_shift)) {
+                if (ps->page_shift >= 16) {
+                    ci_largepage = true;
+                }
+                sps->enc[m++] = *ps;
+            }
+        }
+
+        /* Clear rest of the row */
+        for (j = m; j < ARRAY_SIZE(sps->enc); j++) {
+            memset(&sps->enc[j], 0, sizeof(sps->enc[j]));
+        }
+
+        if (m) {
+            n++;
+        }
+    }
+
+    /* Clear the rest of the table */
+    for (i = n; i < ARRAY_SIZE(opts->sps); i++) {
+        memset(&opts->sps[i], 0, sizeof(opts->sps[i]));
+    }
+
+    if (!ci_largepage) {
+        opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
+    }
+}
+
 static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr,
                                           PowerPCCPU *cpu,
                                           uint8_t val, Error **errp)
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 16c719c..f25014a 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -20,24 +20,7 @@
 #include "mmu-book3s-v3.h"
 #include "hw/mem/memory-device.h"
 
-static bool has_spr(PowerPCCPU *cpu, int spr)
-{
-    /* We can test whether the SPR is defined by checking for a valid name */
-    return cpu->env.spr_cb[spr].name != NULL;
-}
-
-static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
-{
-    /*
-     * hash value/pteg group index is normalized by HPT mask
-     */
-    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
-        return false;
-    }
-    return true;
-}
-
-static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
 {
     MachineState *machine = MACHINE(spapr);
     DeviceMemoryState *dms = machine->device_memory;
@@ -53,355 +36,6 @@
     return false;
 }
 
-static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                            target_ulong opcode, target_ulong *args)
-{
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    target_ulong pteh = args[2];
-    target_ulong ptel = args[3];
-    unsigned apshift;
-    target_ulong raddr;
-    target_ulong slot;
-    const ppc_hash_pte64_t *hptes;
-
-    apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
-    if (!apshift) {
-        /* Bad page size encoding */
-        return H_PARAMETER;
-    }
-
-    raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
-
-    if (is_ram_address(spapr, raddr)) {
-        /* Regular RAM - should have WIMG=0010 */
-        if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
-            return H_PARAMETER;
-        }
-    } else {
-        target_ulong wimg_flags;
-        /* Looks like an IO address */
-        /* FIXME: What WIMG combinations could be sensible for IO?
-         * For now we allow WIMG=010x, but are there others? */
-        /* FIXME: Should we check against registered IO addresses? */
-        wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
-
-        if (wimg_flags != HPTE64_R_I &&
-            wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
-            return H_PARAMETER;
-        }
-    }
-
-    pteh &= ~0x60ULL;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return H_PARAMETER;
-    }
-
-    slot = ptex & 7ULL;
-    ptex = ptex & ~7ULL;
-
-    if (likely((flags & H_EXACT) == 0)) {
-        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
-        for (slot = 0; slot < 8; slot++) {
-            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
-                break;
-            }
-        }
-        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
-        if (slot == 8) {
-            return H_PTEG_FULL;
-        }
-    } else {
-        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
-        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
-            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
-            return H_PTEG_FULL;
-        }
-        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
-    }
-
-    spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
-
-    args[0] = ptex + slot;
-    return H_SUCCESS;
-}
-
-typedef enum {
-    REMOVE_SUCCESS = 0,
-    REMOVE_NOT_FOUND = 1,
-    REMOVE_PARM = 2,
-    REMOVE_HW = 3,
-} RemoveResult;
-
-static RemoveResult remove_hpte(PowerPCCPU *cpu
-                                , target_ulong ptex,
-                                target_ulong avpn,
-                                target_ulong flags,
-                                target_ulong *vp, target_ulong *rp)
-{
-    const ppc_hash_pte64_t *hptes;
-    target_ulong v, r;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return REMOVE_PARM;
-    }
-
-    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
-    v = ppc_hash64_hpte0(cpu, hptes, 0);
-    r = ppc_hash64_hpte1(cpu, hptes, 0);
-    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
-
-    if ((v & HPTE64_V_VALID) == 0 ||
-        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
-        ((flags & H_ANDCOND) && (v & avpn) != 0)) {
-        return REMOVE_NOT_FOUND;
-    }
-    *vp = v;
-    *rp = r;
-    spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
-    return REMOVE_SUCCESS;
-}
-
-static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                             target_ulong opcode, target_ulong *args)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    target_ulong avpn = args[2];
-    RemoveResult ret;
-
-    ret = remove_hpte(cpu, ptex, avpn, flags,
-                      &args[0], &args[1]);
-
-    switch (ret) {
-    case REMOVE_SUCCESS:
-        check_tlb_flush(env, true);
-        return H_SUCCESS;
-
-    case REMOVE_NOT_FOUND:
-        return H_NOT_FOUND;
-
-    case REMOVE_PARM:
-        return H_PARAMETER;
-
-    case REMOVE_HW:
-        return H_HARDWARE;
-    }
-
-    g_assert_not_reached();
-}
-
-#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
-#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
-#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
-#define   H_BULK_REMOVE_END            0xc000000000000000ULL
-#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
-#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
-#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
-#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
-#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
-#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
-#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
-#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
-#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
-#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
-#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
-
-#define H_BULK_REMOVE_MAX_BATCH        4
-
-static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                                  target_ulong opcode, target_ulong *args)
-{
-    CPUPPCState *env = &cpu->env;
-    int i;
-    target_ulong rc = H_SUCCESS;
-
-    for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
-        target_ulong *tsh = &args[i*2];
-        target_ulong tsl = args[i*2 + 1];
-        target_ulong v, r, ret;
-
-        if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
-            break;
-        } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
-            return H_PARAMETER;
-        }
-
-        *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
-        *tsh |= H_BULK_REMOVE_RESPONSE;
-
-        if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
-            *tsh |= H_BULK_REMOVE_PARM;
-            return H_PARAMETER;
-        }
-
-        ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
-                          (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
-                          &v, &r);
-
-        *tsh |= ret << 60;
-
-        switch (ret) {
-        case REMOVE_SUCCESS:
-            *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
-            break;
-
-        case REMOVE_PARM:
-            rc = H_PARAMETER;
-            goto exit;
-
-        case REMOVE_HW:
-            rc = H_HARDWARE;
-            goto exit;
-        }
-    }
- exit:
-    check_tlb_flush(env, true);
-
-    return rc;
-}
-
-static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                              target_ulong opcode, target_ulong *args)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    target_ulong avpn = args[2];
-    const ppc_hash_pte64_t *hptes;
-    target_ulong v, r;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return H_PARAMETER;
-    }
-
-    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
-    v = ppc_hash64_hpte0(cpu, hptes, 0);
-    r = ppc_hash64_hpte1(cpu, hptes, 0);
-    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
-
-    if ((v & HPTE64_V_VALID) == 0 ||
-        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
-        return H_NOT_FOUND;
-    }
-
-    r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
-           HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
-    r |= (flags << 55) & HPTE64_R_PP0;
-    r |= (flags << 48) & HPTE64_R_KEY_HI;
-    r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
-    spapr_store_hpte(cpu, ptex,
-                     (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
-    /* Flush the tlb */
-    check_tlb_flush(env, true);
-    /* Don't need a memory barrier, due to qemu's global lock */
-    spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
-    return H_SUCCESS;
-}
-
-static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                           target_ulong opcode, target_ulong *args)
-{
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    int i, ridx, n_entries = 1;
-    const ppc_hash_pte64_t *hptes;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return H_PARAMETER;
-    }
-
-    if (flags & H_READ_4) {
-        /* Clear the two low order bits */
-        ptex &= ~(3ULL);
-        n_entries = 4;
-    }
-
-    hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
-    for (i = 0, ridx = 0; i < n_entries; i++) {
-        args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
-        args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
-    }
-    ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
-
-    return H_SUCCESS;
-}
-
-struct SpaprPendingHpt {
-    /* These fields are read-only after initialization */
-    int shift;
-    QemuThread thread;
-
-    /* These fields are protected by the BQL */
-    bool complete;
-
-    /* These fields are private to the preparation thread if
-     * !complete, otherwise protected by the BQL */
-    int ret;
-    void *hpt;
-};
-
-static void free_pending_hpt(SpaprPendingHpt *pending)
-{
-    if (pending->hpt) {
-        qemu_vfree(pending->hpt);
-    }
-
-    g_free(pending);
-}
-
-static void *hpt_prepare_thread(void *opaque)
-{
-    SpaprPendingHpt *pending = opaque;
-    size_t size = 1ULL << pending->shift;
-
-    pending->hpt = qemu_try_memalign(size, size);
-    if (pending->hpt) {
-        memset(pending->hpt, 0, size);
-        pending->ret = H_SUCCESS;
-    } else {
-        pending->ret = H_NO_MEM;
-    }
-
-    qemu_mutex_lock_iothread();
-
-    if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
-        /* Ready to go */
-        pending->complete = true;
-    } else {
-        /* We've been cancelled, clean ourselves up */
-        free_pending_hpt(pending);
-    }
-
-    qemu_mutex_unlock_iothread();
-    return NULL;
-}
-
-/* Must be called with BQL held */
-static void cancel_hpt_prepare(SpaprMachineState *spapr)
-{
-    SpaprPendingHpt *pending = spapr->pending_hpt;
-
-    /* Let the thread know it's cancelled */
-    spapr->pending_hpt = NULL;
-
-    if (!pending) {
-        /* Nothing to do */
-        return;
-    }
-
-    if (!pending->complete) {
-        /* thread will clean itself up */
-        return;
-    }
-
-    free_pending_hpt(pending);
-}
-
 /* Convert a return code from the KVM ioctl()s implementing resize HPT
  * into a PAPR hypercall return code */
 static target_ulong resize_hpt_convert_rc(int ret)
@@ -447,7 +81,6 @@
 {
     target_ulong flags = args[0];
     int shift = args[1];
-    SpaprPendingHpt *pending = spapr->pending_hpt;
     uint64_t current_ram_size;
     int rc;
 
@@ -484,182 +117,11 @@
         return resize_hpt_convert_rc(rc);
     }
 
-    if (pending) {
-        /* something already in progress */
-        if (pending->shift == shift) {
-            /* and it's suitable */
-            if (pending->complete) {
-                return pending->ret;
-            } else {
-                return H_LONG_BUSY_ORDER_100_MSEC;
-            }
-        }
-
-        /* not suitable, cancel and replace */
-        cancel_hpt_prepare(spapr);
-    }
-
-    if (!shift) {
-        /* nothing to do */
-        return H_SUCCESS;
-    }
-
-    /* start new prepare */
-
-    pending = g_new0(SpaprPendingHpt, 1);
-    pending->shift = shift;
-    pending->ret = H_HARDWARE;
-
-    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
-                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
-
-    spapr->pending_hpt = pending;
-
-    /* In theory we could estimate the time more accurately based on
-     * the new size, but there's not much point */
-    return H_LONG_BUSY_ORDER_100_MSEC;
-}
-
-static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
-{
-    uint8_t *addr = htab;
-
-    addr += pteg * HASH_PTEG_SIZE_64;
-    addr += slot * HASH_PTE_SIZE_64;
-    return  ldq_p(addr);
-}
-
-static void new_hpte_store(void *htab, uint64_t pteg, int slot,
-                           uint64_t pte0, uint64_t pte1)
-{
-    uint8_t *addr = htab;
-
-    addr += pteg * HASH_PTEG_SIZE_64;
-    addr += slot * HASH_PTE_SIZE_64;
-
-    stq_p(addr, pte0);
-    stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
-}
-
-static int rehash_hpte(PowerPCCPU *cpu,
-                       const ppc_hash_pte64_t *hptes,
-                       void *old_hpt, uint64_t oldsize,
-                       void *new_hpt, uint64_t newsize,
-                       uint64_t pteg, int slot)
-{
-    uint64_t old_hash_mask = (oldsize >> 7) - 1;
-    uint64_t new_hash_mask = (newsize >> 7) - 1;
-    target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
-    target_ulong pte1;
-    uint64_t avpn;
-    unsigned base_pg_shift;
-    uint64_t hash, new_pteg, replace_pte0;
-
-    if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
-        return H_SUCCESS;
-    }
-
-    pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
-
-    base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
-    assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
-    avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
-
-    if (pte0 & HPTE64_V_SECONDARY) {
-        pteg = ~pteg;
-    }
-
-    if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
-        uint64_t offset, vsid;
-
-        /* We only have 28 - 23 bits of offset in avpn */
-        offset = (avpn & 0x1f) << 23;
-        vsid = avpn >> 5;
-        /* We can find more bits from the pteg value */
-        if (base_pg_shift < 23) {
-            offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
-        }
-
-        hash = vsid ^ (offset >> base_pg_shift);
-    } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
-        uint64_t offset, vsid;
-
-        /* We only have 40 - 23 bits of seg_off in avpn */
-        offset = (avpn & 0x1ffff) << 23;
-        vsid = avpn >> 17;
-        if (base_pg_shift < 23) {
-            offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
-                << base_pg_shift;
-        }
-
-        hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
-    } else {
-        error_report("rehash_pte: Bad segment size in HPTE");
+    if (kvm_enabled()) {
         return H_HARDWARE;
     }
 
-    new_pteg = hash & new_hash_mask;
-    if (pte0 & HPTE64_V_SECONDARY) {
-        assert(~pteg == (hash & old_hash_mask));
-        new_pteg = ~new_pteg;
-    } else {
-        assert(pteg == (hash & old_hash_mask));
-    }
-    assert((oldsize != newsize) || (pteg == new_pteg));
-    replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
-    /*
-     * Strictly speaking, we don't need all these tests, since we only
-     * ever rehash bolted HPTEs.  We might in future handle non-bolted
-     * HPTEs, though so make the logic correct for those cases as
-     * well.
-     */
-    if (replace_pte0 & HPTE64_V_VALID) {
-        assert(newsize < oldsize);
-        if (replace_pte0 & HPTE64_V_BOLTED) {
-            if (pte0 & HPTE64_V_BOLTED) {
-                /* Bolted collision, nothing we can do */
-                return H_PTEG_FULL;
-            } else {
-                /* Discard this hpte */
-                return H_SUCCESS;
-            }
-        }
-    }
-
-    new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
-    return H_SUCCESS;
-}
-
-static int rehash_hpt(PowerPCCPU *cpu,
-                      void *old_hpt, uint64_t oldsize,
-                      void *new_hpt, uint64_t newsize)
-{
-    uint64_t n_ptegs = oldsize >> 7;
-    uint64_t pteg;
-    int slot;
-    int rc;
-
-    for (pteg = 0; pteg < n_ptegs; pteg++) {
-        hwaddr ptex = pteg * HPTES_PER_GROUP;
-        const ppc_hash_pte64_t *hptes
-            = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
-
-        if (!hptes) {
-            return H_HARDWARE;
-        }
-
-        for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
-            rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
-                             pteg, slot);
-            if (rc != H_SUCCESS) {
-                ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
-                return rc;
-            }
-        }
-        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
-    }
-
-    return H_SUCCESS;
+    return softmmu_resize_hpt_prepare(cpu, spapr, shift);
 }
 
 static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data)
@@ -675,7 +137,7 @@
     }
 }
 
-static void push_sregs_to_kvm_pr(SpaprMachineState *spapr)
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr)
 {
     CPUState *cs;
 
@@ -700,9 +162,7 @@
 {
     target_ulong flags = args[0];
     target_ulong shift = args[1];
-    SpaprPendingHpt *pending = spapr->pending_hpt;
     int rc;
-    size_t newsize;
 
     if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
         return H_AUTHORITY;
@@ -725,43 +185,15 @@
         return rc;
     }
 
-    if (flags != 0) {
-        return H_PARAMETER;
+    if (kvm_enabled()) {
+        return H_HARDWARE;
     }
 
-    if (!pending || (pending->shift != shift)) {
-        /* no matching prepare */
-        return H_CLOSED;
-    }
-
-    if (!pending->complete) {
-        /* prepare has not completed */
-        return H_BUSY;
-    }
-
-    /* Shouldn't have got past PREPARE without an HPT */
-    g_assert(spapr->htab_shift);
-
-    newsize = 1ULL << pending->shift;
-    rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
-                    pending->hpt, newsize);
-    if (rc == H_SUCCESS) {
-        qemu_vfree(spapr->htab);
-        spapr->htab = pending->hpt;
-        spapr->htab_shift = pending->shift;
-
-        push_sregs_to_kvm_pr(spapr);
-
-        pending->hpt = NULL; /* so it's not free()d */
-    }
-
-    /* Clean up */
-    spapr->pending_hpt = NULL;
-    free_pending_hpt(pending);
-
-    return rc;
+    return softmmu_resize_hpt_commit(cpu, spapr, flags, shift);
 }
 
+
+
 static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                 target_ulong opcode, target_ulong *args)
 {
@@ -774,12 +206,12 @@
 static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                target_ulong opcode, target_ulong *args)
 {
-    if (!has_spr(cpu, SPR_DABR)) {
+    if (!ppc_has_spr(cpu, SPR_DABR)) {
         return H_HARDWARE;              /* DABR register not available */
     }
     cpu_synchronize_state(CPU(cpu));
 
-    if (has_spr(cpu, SPR_DABRX)) {
+    if (ppc_has_spr(cpu, SPR_DABRX)) {
         cpu->env.spr[SPR_DABRX] = 0x3;  /* Use Problem and Privileged state */
     } else if (!(args[0] & 0x4)) {      /* Breakpoint Translation set? */
         return H_RESERVED_DABR;
@@ -794,7 +226,7 @@
 {
     target_ulong dabrx = args[1];
 
-    if (!has_spr(cpu, SPR_DABR) || !has_spr(cpu, SPR_DABRX)) {
+    if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) {
         return H_HARDWARE;
     }
 
@@ -1760,18 +1192,8 @@
     spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
     spapr_ovec_cleanup(ov5_guest);
 
-    if (guest_radix) {
-        if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
-            error_report("Guest requested unavailable MMU mode (radix).");
-            exit(EXIT_FAILURE);
-        }
-    } else {
-        if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
-            && !kvmppc_has_cap_mmu_hash_v3()) {
-            error_report("Guest requested unavailable MMU mode (hash).");
-            exit(EXIT_FAILURE);
-        }
-    }
+    spapr_check_mmu_mode(guest_radix);
+
     spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
     spapr_ovec_cleanup(ov1_guest);
 
@@ -2023,16 +1445,34 @@
     return H_FUNCTION;
 }
 
-static void hypercall_register_types(void)
+#ifndef CONFIG_TCG
+static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    g_assert_not_reached();
+}
+
+static void hypercall_register_softmmu(void)
 {
     /* hcall-pft */
-    spapr_register_hypercall(H_ENTER, h_enter);
-    spapr_register_hypercall(H_REMOVE, h_remove);
-    spapr_register_hypercall(H_PROTECT, h_protect);
-    spapr_register_hypercall(H_READ, h_read);
+    spapr_register_hypercall(H_ENTER, h_softmmu);
+    spapr_register_hypercall(H_REMOVE, h_softmmu);
+    spapr_register_hypercall(H_PROTECT, h_softmmu);
+    spapr_register_hypercall(H_READ, h_softmmu);
 
     /* hcall-bulk */
-    spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+    spapr_register_hypercall(H_BULK_REMOVE, h_softmmu);
+}
+#else
+static void hypercall_register_softmmu(void)
+{
+    /* DO NOTHING */
+}
+#endif
+
+static void hypercall_register_types(void)
+{
+    hypercall_register_softmmu();
 
     /* hcall-hpt-resize */
     spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c
new file mode 100644
index 0000000..6c6b86d
--- /dev/null
+++ b/hw/ppc/spapr_softmmu.c
@@ -0,0 +1,627 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/runstate.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "helper_regs.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "mmu-hash64.h"
+#include "cpu-models.h"
+#include "trace.h"
+#include "kvm_ppc.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "mmu-book3s-v3.h"
+#include "hw/mem/memory-device.h"
+
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
+{
+    /*
+     * hash value/pteg group index is normalized by HPT mask
+     */
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
+        return false;
+    }
+    return true;
+}
+
+static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong pteh = args[2];
+    target_ulong ptel = args[3];
+    unsigned apshift;
+    target_ulong raddr;
+    target_ulong slot;
+    const ppc_hash_pte64_t *hptes;
+
+    apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
+    if (!apshift) {
+        /* Bad page size encoding */
+        return H_PARAMETER;
+    }
+
+    raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
+
+    if (is_ram_address(spapr, raddr)) {
+        /* Regular RAM - should have WIMG=0010 */
+        if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
+            return H_PARAMETER;
+        }
+    } else {
+        target_ulong wimg_flags;
+        /* Looks like an IO address */
+        /* FIXME: What WIMG combinations could be sensible for IO?
+         * For now we allow WIMG=010x, but are there others? */
+        /* FIXME: Should we check against registered IO addresses? */
+        wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
+
+        if (wimg_flags != HPTE64_R_I &&
+            wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
+            return H_PARAMETER;
+        }
+    }
+
+    pteh &= ~0x60ULL;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    slot = ptex & 7ULL;
+    ptex = ptex & ~7ULL;
+
+    if (likely((flags & H_EXACT) == 0)) {
+        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+        for (slot = 0; slot < 8; slot++) {
+            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
+                break;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+        if (slot == 8) {
+            return H_PTEG_FULL;
+        }
+    } else {
+        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
+            return H_PTEG_FULL;
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+    }
+
+    spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
+
+    args[0] = ptex + slot;
+    return H_SUCCESS;
+}
+
+typedef enum {
+    REMOVE_SUCCESS = 0,
+    REMOVE_NOT_FOUND = 1,
+    REMOVE_PARM = 2,
+    REMOVE_HW = 3,
+} RemoveResult;
+
+static RemoveResult remove_hpte(PowerPCCPU *cpu
+                                , target_ulong ptex,
+                                target_ulong avpn,
+                                target_ulong flags,
+                                target_ulong *vp, target_ulong *rp)
+{
+    const ppc_hash_pte64_t *hptes;
+    target_ulong v, r;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return REMOVE_PARM;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+    if ((v & HPTE64_V_VALID) == 0 ||
+        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
+        ((flags & H_ANDCOND) && (v & avpn) != 0)) {
+        return REMOVE_NOT_FOUND;
+    }
+    *vp = v;
+    *rp = r;
+    spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+    return REMOVE_SUCCESS;
+}
+
+static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                             target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong avpn = args[2];
+    RemoveResult ret;
+
+    ret = remove_hpte(cpu, ptex, avpn, flags,
+                      &args[0], &args[1]);
+
+    switch (ret) {
+    case REMOVE_SUCCESS:
+        check_tlb_flush(env, true);
+        return H_SUCCESS;
+
+    case REMOVE_NOT_FOUND:
+        return H_NOT_FOUND;
+
+    case REMOVE_PARM:
+        return H_PARAMETER;
+
+    case REMOVE_HW:
+        return H_HARDWARE;
+    }
+
+    g_assert_not_reached();
+}
+
+#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
+#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
+#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
+#define   H_BULK_REMOVE_END            0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
+#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
+#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
+#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
+#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
+#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
+#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
+#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
+#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
+
+#define H_BULK_REMOVE_MAX_BATCH        4
+
+static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                  target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    int i;
+    target_ulong rc = H_SUCCESS;
+
+    for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+        target_ulong *tsh = &args[i*2];
+        target_ulong tsl = args[i*2 + 1];
+        target_ulong v, r, ret;
+
+        if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+            break;
+        } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
+            return H_PARAMETER;
+        }
+
+        *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+        *tsh |= H_BULK_REMOVE_RESPONSE;
+
+        if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
+            *tsh |= H_BULK_REMOVE_PARM;
+            return H_PARAMETER;
+        }
+
+        ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
+                          (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
+                          &v, &r);
+
+        *tsh |= ret << 60;
+
+        switch (ret) {
+        case REMOVE_SUCCESS:
+            *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
+            break;
+
+        case REMOVE_PARM:
+            rc = H_PARAMETER;
+            goto exit;
+
+        case REMOVE_HW:
+            rc = H_HARDWARE;
+            goto exit;
+        }
+    }
+ exit:
+    check_tlb_flush(env, true);
+
+    return rc;
+}
+
+static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong avpn = args[2];
+    const ppc_hash_pte64_t *hptes;
+    target_ulong v, r;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+    if ((v & HPTE64_V_VALID) == 0 ||
+        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
+        return H_NOT_FOUND;
+    }
+
+    r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
+           HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
+    r |= (flags << 55) & HPTE64_R_PP0;
+    r |= (flags << 48) & HPTE64_R_KEY_HI;
+    r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
+    spapr_store_hpte(cpu, ptex,
+                     (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+    /* Flush the tlb */
+    check_tlb_flush(env, true);
+    /* Don't need a memory barrier, due to qemu's global lock */
+    spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
+    return H_SUCCESS;
+}
+
+static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    int i, ridx, n_entries = 1;
+    const ppc_hash_pte64_t *hptes;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    if (flags & H_READ_4) {
+        /* Clear the two low order bits */
+        ptex &= ~(3ULL);
+        n_entries = 4;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
+    for (i = 0, ridx = 0; i < n_entries; i++) {
+        args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
+        args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
+    }
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
+
+    return H_SUCCESS;
+}
+
+struct SpaprPendingHpt {
+    /* These fields are read-only after initialization */
+    int shift;
+    QemuThread thread;
+
+    /* These fields are protected by the BQL */
+    bool complete;
+
+    /* These fields are private to the preparation thread if
+     * !complete, otherwise protected by the BQL */
+    int ret;
+    void *hpt;
+};
+
+static void free_pending_hpt(SpaprPendingHpt *pending)
+{
+    if (pending->hpt) {
+        qemu_vfree(pending->hpt);
+    }
+
+    g_free(pending);
+}
+
+static void *hpt_prepare_thread(void *opaque)
+{
+    SpaprPendingHpt *pending = opaque;
+    size_t size = 1ULL << pending->shift;
+
+    pending->hpt = qemu_try_memalign(size, size);
+    if (pending->hpt) {
+        memset(pending->hpt, 0, size);
+        pending->ret = H_SUCCESS;
+    } else {
+        pending->ret = H_NO_MEM;
+    }
+
+    qemu_mutex_lock_iothread();
+
+    if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
+        /* Ready to go */
+        pending->complete = true;
+    } else {
+        /* We've been cancelled, clean ourselves up */
+        free_pending_hpt(pending);
+    }
+
+    qemu_mutex_unlock_iothread();
+    return NULL;
+}
+
+/* Must be called with BQL held */
+static void cancel_hpt_prepare(SpaprMachineState *spapr)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+
+    /* Let the thread know it's cancelled */
+    spapr->pending_hpt = NULL;
+
+    if (!pending) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!pending->complete) {
+        /* thread will clean itself up */
+        return;
+    }
+
+    free_pending_hpt(pending);
+}
+
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         target_ulong shift)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+
+    if (pending) {
+        /* something already in progress */
+        if (pending->shift == shift) {
+            /* and it's suitable */
+            if (pending->complete) {
+                return pending->ret;
+            } else {
+                return H_LONG_BUSY_ORDER_100_MSEC;
+            }
+        }
+
+        /* not suitable, cancel and replace */
+        cancel_hpt_prepare(spapr);
+    }
+
+    if (!shift) {
+        /* nothing to do */
+        return H_SUCCESS;
+    }
+
+    /* start new prepare */
+
+    pending = g_new0(SpaprPendingHpt, 1);
+    pending->shift = shift;
+    pending->ret = H_HARDWARE;
+
+    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+
+    spapr->pending_hpt = pending;
+
+    /* In theory we could estimate the time more accurately based on
+     * the new size, but there's not much point */
+    return H_LONG_BUSY_ORDER_100_MSEC;
+}
+
+static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+    return  ldq_p(addr);
+}
+
+static void new_hpte_store(void *htab, uint64_t pteg, int slot,
+                           uint64_t pte0, uint64_t pte1)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+
+    stq_p(addr, pte0);
+    stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
+}
+
+static int rehash_hpte(PowerPCCPU *cpu,
+                       const ppc_hash_pte64_t *hptes,
+                       void *old_hpt, uint64_t oldsize,
+                       void *new_hpt, uint64_t newsize,
+                       uint64_t pteg, int slot)
+{
+    uint64_t old_hash_mask = (oldsize >> 7) - 1;
+    uint64_t new_hash_mask = (newsize >> 7) - 1;
+    target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
+    target_ulong pte1;
+    uint64_t avpn;
+    unsigned base_pg_shift;
+    uint64_t hash, new_pteg, replace_pte0;
+
+    if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
+        return H_SUCCESS;
+    }
+
+    pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
+
+    base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
+    assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
+    avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
+
+    if (pte0 & HPTE64_V_SECONDARY) {
+        pteg = ~pteg;
+    }
+
+    if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
+        uint64_t offset, vsid;
+
+        /* We only have 28 - 23 bits of offset in avpn */
+        offset = (avpn & 0x1f) << 23;
+        vsid = avpn >> 5;
+        /* We can find more bits from the pteg value */
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
+        }
+
+        hash = vsid ^ (offset >> base_pg_shift);
+    } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
+        uint64_t offset, vsid;
+
+        /* We only have 40 - 23 bits of seg_off in avpn */
+        offset = (avpn & 0x1ffff) << 23;
+        vsid = avpn >> 17;
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
+                << base_pg_shift;
+        }
+
+        hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
+    } else {
+        error_report("rehash_pte: Bad segment size in HPTE");
+        return H_HARDWARE;
+    }
+
+    new_pteg = hash & new_hash_mask;
+    if (pte0 & HPTE64_V_SECONDARY) {
+        assert(~pteg == (hash & old_hash_mask));
+        new_pteg = ~new_pteg;
+    } else {
+        assert(pteg == (hash & old_hash_mask));
+    }
+    assert((oldsize != newsize) || (pteg == new_pteg));
+    replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
+    /*
+     * Strictly speaking, we don't need all these tests, since we only
+     * ever rehash bolted HPTEs.  We might in future handle non-bolted
+     * HPTEs, though so make the logic correct for those cases as
+     * well.
+     */
+    if (replace_pte0 & HPTE64_V_VALID) {
+        assert(newsize < oldsize);
+        if (replace_pte0 & HPTE64_V_BOLTED) {
+            if (pte0 & HPTE64_V_BOLTED) {
+                /* Bolted collision, nothing we can do */
+                return H_PTEG_FULL;
+            } else {
+                /* Discard this hpte */
+                return H_SUCCESS;
+            }
+        }
+    }
+
+    new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
+    return H_SUCCESS;
+}
+
+static int rehash_hpt(PowerPCCPU *cpu,
+                      void *old_hpt, uint64_t oldsize,
+                      void *new_hpt, uint64_t newsize)
+{
+    uint64_t n_ptegs = oldsize >> 7;
+    uint64_t pteg;
+    int slot;
+    int rc;
+
+    for (pteg = 0; pteg < n_ptegs; pteg++) {
+        hwaddr ptex = pteg * HPTES_PER_GROUP;
+        const ppc_hash_pte64_t *hptes
+            = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+
+        if (!hptes) {
+            return H_HARDWARE;
+        }
+
+        for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
+            rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
+                             pteg, slot);
+            if (rc != H_SUCCESS) {
+                ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+                return rc;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+    }
+
+    return H_SUCCESS;
+}
+
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu,
+                                        SpaprMachineState *spapr,
+                                        target_ulong flags,
+                                        target_ulong shift)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+    int rc;
+    size_t newsize;
+
+    if (flags != 0) {
+        return H_PARAMETER;
+    }
+
+    if (!pending || (pending->shift != shift)) {
+        /* no matching prepare */
+        return H_CLOSED;
+    }
+
+    if (!pending->complete) {
+        /* prepare has not completed */
+        return H_BUSY;
+    }
+
+    /* Shouldn't have got past PREPARE without an HPT */
+    g_assert(spapr->htab_shift);
+
+    newsize = 1ULL << pending->shift;
+    rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
+                    pending->hpt, newsize);
+    if (rc == H_SUCCESS) {
+        qemu_vfree(spapr->htab);
+        spapr->htab = pending->hpt;
+        spapr->htab_shift = pending->shift;
+
+        push_sregs_to_kvm_pr(spapr);
+
+        pending->hpt = NULL; /* so it's not free()d */
+    }
+
+    /* Clean up */
+    spapr->pending_hpt = NULL;
+    free_pending_hpt(pending);
+
+    return rc;
+}
+
+static void hypercall_register_types(void)
+{
+    /* hcall-pft */
+    spapr_register_hypercall(H_ENTER, h_enter);
+    spapr_register_hypercall(H_REMOVE, h_remove);
+    spapr_register_hypercall(H_PROTECT, h_protect);
+    spapr_register_hypercall(H_READ, h_read);
+
+    /* hcall-bulk */
+    spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+
+}
+
+type_init(hypercall_register_types)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ded0c10..ee57abe 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1909,6 +1909,11 @@
                 return err;
             }
         }
+        if (dev->num_queues && dev->max_queues < dev->num_queues) {
+            error_report("The maximum number of queues supported by the "
+                         "backend is %" PRIu64, dev->max_queues);
+            return -EINVAL;
+        }
 
         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
                 !(virtio_has_feature(dev->protocol_features,
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index d6332d4..859978d 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -69,6 +69,11 @@
         return;
     }
 
+    if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+        error_setg(errp, "iommu_platform=true is not supported by the device");
+        return;
+    }
+
     if (klass->device_plugged != NULL) {
         klass->device_plugged(qbus->parent, &local_err);
     }
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 4ac926f..0ff9ce1 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -7,7 +7,7 @@
     uint32_t    intms;
     uint32_t    intmc;
     uint32_t    cc;
-    uint32_t    rsvd1;
+    uint8_t     rsvd24[4];
     uint32_t    csts;
     uint32_t    nssrc;
     uint32_t    aqa;
@@ -848,8 +848,8 @@
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
     NVME_NS_ALREADY_ATTACHED    = 0x0118,
     NVME_NS_PRIVATE             = 0x0119,
-    NVME_NS_NOT_ATTACHED        = 0x011A,
-    NVME_NS_CTRL_LIST_INVALID   = 0x011C,
+    NVME_NS_NOT_ATTACHED        = 0x011a,
+    NVME_NS_CTRL_LIST_INVALID   = 0x011c,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,
     NVME_WRITE_TO_RO            = 0x0182,
@@ -1409,9 +1409,9 @@
     NVME_ZONE_STATE_IMPLICITLY_OPEN  = 0x02,
     NVME_ZONE_STATE_EXPLICITLY_OPEN  = 0x03,
     NVME_ZONE_STATE_CLOSED           = 0x04,
-    NVME_ZONE_STATE_READ_ONLY        = 0x0D,
-    NVME_ZONE_STATE_FULL             = 0x0E,
-    NVME_ZONE_STATE_OFFLINE          = 0x0F,
+    NVME_ZONE_STATE_READ_ONLY        = 0x0d,
+    NVME_ZONE_STATE_FULL             = 0x0e,
+    NVME_ZONE_STATE_OFFLINE          = 0x0f,
 } NvmeZoneState;
 
 static inline void _nvme_check_size(void)
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 298e01e..467529d 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -1,6 +1,7 @@
 #ifndef GEN_ICOUNT_H
 #define GEN_ICOUNT_H
 
+#include "exec/exec-all.h"
 #include "qemu/timer.h"
 
 /* Helpers for instruction counting code generation.  */
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index a35ec28..ec4e27a 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -83,6 +83,43 @@
 #define FPU_SOFTFLOAT_MACROS_H
 
 #include "fpu/softfloat-types.h"
+#include "qemu/host-utils.h"
+
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+    return l;
+#else
+    return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+    return r;
+#else
+    return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
 
 /*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
@@ -403,16 +440,12 @@
 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- add128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-    uint64_t z1;
-
-    z1 = a1 + b1;
-    *z1Ptr = z1;
-    *z0Ptr = a0 + b0 + ( z1 < a1 );
-
+    bool c = 0;
+    *z1Ptr = uadd64_carry(a1, b1, &c);
+    *z0Ptr = uadd64_carry(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -423,34 +456,14 @@
 | `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- add192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
+                          uint64_t b0, uint64_t b1, uint64_t b2,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2;
-    int8_t carry0, carry1;
-
-    z2 = a2 + b2;
-    carry1 = ( z2 < a2 );
-    z1 = a1 + b1;
-    carry0 = ( z1 < a1 );
-    z0 = a0 + b0;
-    z1 += carry1;
-    z0 += ( z1 < carry1 );
-    z0 += carry0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    bool c = 0;
+    *z2Ptr = uadd64_carry(a2, b2, &c);
+    *z1Ptr = uadd64_carry(a1, b1, &c);
+    *z0Ptr = uadd64_carry(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -461,14 +474,12 @@
 | `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- sub128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-
-    *z1Ptr = a1 - b1;
-    *z0Ptr = a0 - b0 - ( a1 < b1 );
-
+    bool c = 0;
+    *z1Ptr = usub64_borrow(a1, b1, &c);
+    *z0Ptr = usub64_borrow(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -479,34 +490,14 @@
 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- sub192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
+                          uint64_t b0, uint64_t b1, uint64_t b2,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2;
-    int8_t borrow0, borrow1;
-
-    z2 = a2 - b2;
-    borrow1 = ( a2 < b2 );
-    z1 = a1 - b1;
-    borrow0 = ( a1 < b1 );
-    z0 = a0 - b0;
-    z0 -= ( z1 < borrow1 );
-    z1 -= borrow1;
-    z0 -= borrow0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    bool c = 0;
+    *z2Ptr = usub64_borrow(a2, b2, &c);
+    *z1Ptr = usub64_borrow(a1, b1, &c);
+    *z0Ptr = usub64_borrow(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -515,27 +506,10 @@
 | `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void
+mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-    uint32_t aHigh, aLow, bHigh, bLow;
-    uint64_t z0, zMiddleA, zMiddleB, z1;
-
-    aLow = a;
-    aHigh = a>>32;
-    bLow = b;
-    bHigh = b>>32;
-    z1 = ( (uint64_t) aLow ) * bLow;
-    zMiddleA = ( (uint64_t) aLow ) * bHigh;
-    zMiddleB = ( (uint64_t) aHigh ) * bLow;
-    z0 = ( (uint64_t) aHigh ) * bHigh;
-    zMiddleA += zMiddleB;
-    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
-    zMiddleA <<= 32;
-    z1 += zMiddleA;
-    z0 += ( z1 < zMiddleA );
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    mulu64(z1Ptr, z0Ptr, a, b);
 }
 
 /*----------------------------------------------------------------------------
@@ -546,24 +520,14 @@
 *----------------------------------------------------------------------------*/
 
 static inline void
- mul128By64To192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
+                uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2, more1;
+    uint64_t z0, z1, m1;
 
-    mul64To128( a1, b, &z1, &z2 );
-    mul64To128( a0, b, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    mul64To128(a1, b, &m1, z2Ptr);
+    mul64To128(a0, b, &z0, &z1);
+    add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
 }
 
 /*----------------------------------------------------------------------------
@@ -573,34 +537,21 @@
 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- mul128To256(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr,
-     uint64_t *z3Ptr
- )
+static inline void mul128To256(uint64_t a0, uint64_t a1,
+                               uint64_t b0, uint64_t b1,
+                               uint64_t *z0Ptr, uint64_t *z1Ptr,
+                               uint64_t *z2Ptr, uint64_t *z3Ptr)
 {
-    uint64_t z0, z1, z2, z3;
-    uint64_t more1, more2;
+    uint64_t z0, z1, z2;
+    uint64_t m0, m1, m2, n1, n2;
 
-    mul64To128( a1, b1, &z2, &z3 );
-    mul64To128( a1, b0, &z1, &more2 );
-    add128( z1, more2, 0, z2, &z1, &z2 );
-    mul64To128( a0, b0, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    mul64To128( a0, b1, &more1, &more2 );
-    add128( more1, more2, 0, z2, &more1, &z2 );
-    add128( z0, z1, 0, more1, &z0, &z1 );
-    *z3Ptr = z3;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
+    mul64To128(a1, b0, &m1, &m2);
+    mul64To128(a0, b1, &n1, &n2);
+    mul64To128(a1, b1, &z2, z3Ptr);
+    mul64To128(a0, b0, &z0, &z1);
 
+    add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);
+    add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
 }
 
 /*----------------------------------------------------------------------------
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 78ad5ca..53f2c2e 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -100,7 +100,10 @@
 | Routine to raise any or all of the software IEC/IEEE floating-point
 | exception flags.
 *----------------------------------------------------------------------------*/
-void float_raise(uint8_t flags, float_status *status);
+static inline void float_raise(uint8_t flags, float_status *status)
+{
+    status->float_exception_flags |= flags;
+}
 
 /*----------------------------------------------------------------------------
 | If `a' is denormal and we are in flush-to-zero mode then set the
@@ -1194,6 +1197,8 @@
 float128 float128_add(float128, float128, float_status *status);
 float128 float128_sub(float128, float128, float_status *status);
 float128 float128_mul(float128, float128, float_status *status);
+float128 float128_muladd(float128, float128, float128, int,
+                         float_status *status);
 float128 float128_div(float128, float128, float_status *status);
 float128 float128_rem(float128, float128, float_status *status);
 float128 float128_sqrt(float128, float_status *status);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7f40a15..bbf817a 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -582,6 +582,12 @@
 void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
 target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
                              target_ulong *args);
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                         target_ulong shift);
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                        target_ulong flags, target_ulong shift);
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr);
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
 
 /* Virtual Processor Area structure constants */
 #define VPA_MIN_SIZE           640
@@ -821,6 +827,7 @@
 void close_htab_fd(SpaprMachineState *spapr);
 void spapr_setup_hpt(SpaprMachineState *spapr);
 void spapr_free_hpt(SpaprMachineState *spapr);
+void spapr_check_mmu_mode(bool guest_radix);
 SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
 void spapr_tce_table_enable(SpaprTceTable *tcet,
                             uint32_t page_shift, uint64_t bus_offset,
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 4a8bc75..21a9a52 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -74,6 +74,8 @@
     int nvqs;
     /* the first virtqueue which would be used by this vhost dev */
     int vq_index;
+    /* if non-zero, minimum required value for max_queues */
+    int num_queues;
     uint64_t features;
     uint64_t acked_features;
     uint64_t backend_features;
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index cdca299..711b221 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@
 #ifndef HOST_UTILS_H
 #define HOST_UTILS_H
 
+#include "qemu/compiler.h"
 #include "qemu/bswap.h"
 
 #ifdef CONFIG_INT128
@@ -272,6 +273,9 @@
  */
 static inline uint8_t revbit8(uint8_t x)
 {
+#if __has_builtin(__builtin_bitreverse8)
+    return __builtin_bitreverse8(x);
+#else
     /* Assign the correct nibble position.  */
     x = ((x & 0xf0) >> 4)
       | ((x & 0x0f) << 4);
@@ -281,6 +285,7 @@
       | ((x & 0x22) << 1)
       | ((x & 0x11) << 3);
     return x;
+#endif
 }
 
 /**
@@ -289,6 +294,9 @@
  */
 static inline uint16_t revbit16(uint16_t x)
 {
+#if __has_builtin(__builtin_bitreverse16)
+    return __builtin_bitreverse16(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap16(x);
     /* Assign the correct nibble position.  */
@@ -300,6 +308,7 @@
       | ((x & 0x2222) << 1)
       | ((x & 0x1111) << 3);
     return x;
+#endif
 }
 
 /**
@@ -308,6 +317,9 @@
  */
 static inline uint32_t revbit32(uint32_t x)
 {
+#if __has_builtin(__builtin_bitreverse32)
+    return __builtin_bitreverse32(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap32(x);
     /* Assign the correct nibble position.  */
@@ -319,6 +331,7 @@
       | ((x & 0x22222222u) << 1)
       | ((x & 0x11111111u) << 3);
     return x;
+#endif
 }
 
 /**
@@ -327,6 +340,9 @@
  */
 static inline uint64_t revbit64(uint64_t x)
 {
+#if __has_builtin(__builtin_bitreverse64)
+    return __builtin_bitreverse64(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap64(x);
     /* Assign the correct nibble position.  */
@@ -338,6 +354,281 @@
       | ((x & 0x2222222222222222ull) << 1)
       | ((x & 0x1111111111111111ull) << 3);
     return x;
+#endif
+}
+
+/**
+ * sadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * sadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * uadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return *ret < x;
+#endif
+}
+
+/**
+ * uadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return *ret < x;
+#endif
+}
+
+/**
+ * ssub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for difference
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * ssub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * usub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return x < y;
+#endif
+}
+
+/**
+ * usub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return x < y;
+#endif
+}
+
+/**
+ * smul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    int64_t z = (int64_t)x * y;
+    *ret = z;
+    return *ret != z;
+#endif
+}
+
+/**
+ * smul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    uint64_t hi, lo;
+    muls64(&lo, &hi, x, y);
+    *ret = lo;
+    return hi != ((int64_t)lo >> 63);
+#endif
+}
+
+/**
+ * umul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    uint64_t z = (uint64_t)x * y;
+    *ret = z;
+    return z > UINT32_MAX;
+#endif
+}
+
+/**
+ * umul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    uint64_t hi;
+    mulu64(ret, &hi, x, y);
+    return hi != 0;
+#endif
+}
+
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+    unsigned long long c = *pcarry;
+    x = __builtin_addcll(x, y, c, &c);
+    *pcarry = c & 1;
+    return x;
+#else
+    bool c = *pcarry;
+    /* This is clang's internal expansion of __builtin_addc. */
+    c = uadd64_overflow(x, c, &x);
+    c |= uadd64_overflow(x, y, &x);
+    *pcarry = c;
+    return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll)
+    unsigned long long b = *pborrow;
+    x = __builtin_subcll(x, y, b, &b);
+    *pborrow = b & 1;
+    return x;
+#else
+    bool b = *pborrow;
+    b = usub64_overflow(x, b, &x);
+    b |= usub64_overflow(x, y, &x);
+    *pborrow = b;
+    return x;
+#endif
 }
 
 /* Host type specific sizes of these routines.  */
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index 2cd1faf..ef8a008 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -1096,6 +1096,7 @@
 #define tcg_gen_sextract_tl tcg_gen_sextract_i64
 #define tcg_gen_extract2_tl tcg_gen_extract2_i64
 #define tcg_const_tl tcg_const_i64
+#define tcg_constant_tl tcg_constant_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #define tcg_gen_movcond_tl tcg_gen_movcond_i64
 #define tcg_gen_add2_tl tcg_gen_add2_i64
@@ -1209,6 +1210,7 @@
 #define tcg_gen_sextract_tl tcg_gen_sextract_i32
 #define tcg_gen_extract2_tl tcg_gen_extract2_i32
 #define tcg_const_tl tcg_const_i32
+#define tcg_constant_tl tcg_constant_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #define tcg_gen_movcond_tl tcg_gen_movcond_i32
 #define tcg_gen_add2_tl tcg_gen_add2_i32
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index b591790..662bcd1 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -561,11 +561,7 @@
         goto badframe;
     }
 
-    if (do_sigaltstack(frame_addr +
-            offsetof(struct target_rt_sigframe, uc.tuc_stack),
-            0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
index c5c27ce..1129ffe 100644
--- a/linux-user/alpha/signal.c
+++ b/linux-user/alpha/signal.c
@@ -138,8 +138,8 @@
 
     setup_sigcontext(&frame->sc, env, frame_addr, set);
 
-    if (ka->sa_restorer) {
-        r26 = ka->sa_restorer;
+    if (ka->ka_restorer) {
+        r26 = ka->ka_restorer;
     } else {
         __put_user(INSN_MOV_R30_R16, &frame->retcode[0]);
         __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn,
@@ -192,15 +192,15 @@
         __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
     }
 
-    if (ka->sa_restorer) {
-        r26 = ka->sa_restorer;
+    if (ka->ka_restorer) {
+        r26 = ka->ka_restorer;
     } else {
         __put_user(INSN_MOV_R30_R16, &frame->retcode[0]);
         __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn,
                    &frame->retcode[1]);
         __put_user(INSN_CALLSYS, &frame->retcode[2]);
         /* imb(); */
-        r26 = frame_addr + offsetof(struct target_sigframe, retcode);
+        r26 = frame_addr + offsetof(struct target_rt_sigframe, retcode);
     }
 
     if (err) {
@@ -257,11 +257,7 @@
     set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-                                             uc.tuc_stack),
-                       0, env->ir[IR_SP]) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h
index 0b90d3a..2506429 100644
--- a/linux-user/alpha/target_signal.h
+++ b/linux-user/alpha/target_signal.h
@@ -92,6 +92,7 @@
 #define TARGET_GEN_SUBRNG7     -25
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_KA_RESTORER
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
index 989d03c..69632d1 100644
--- a/linux-user/arm/cpu_loop.c
+++ b/linux-user/arm/cpu_loop.c
@@ -224,6 +224,64 @@
     }
 }
 
+static bool emulate_arm_fpa11(CPUARMState *env, uint32_t opcode)
+{
+    TaskState *ts = env_cpu(env)->opaque;
+    int rc = EmulateAll(opcode, &ts->fpa, env);
+    int raise, enabled;
+
+    if (rc == 0) {
+        /* Illegal instruction */
+        return false;
+    }
+    if (rc > 0) {
+        /* Everything ok. */
+        env->regs[15] += 4;
+        return true;
+    }
+
+    /* FP exception */
+    rc = -rc;
+    raise = 0;
+
+    /* Translate softfloat flags to FPSR flags */
+    if (rc & float_flag_invalid) {
+        raise |= BIT_IOC;
+    }
+    if (rc & float_flag_divbyzero) {
+        raise |= BIT_DZC;
+    }
+    if (rc & float_flag_overflow) {
+        raise |= BIT_OFC;
+    }
+    if (rc & float_flag_underflow) {
+        raise |= BIT_UFC;
+    }
+    if (rc & float_flag_inexact) {
+        raise |= BIT_IXC;
+    }
+
+    /* Accumulate unenabled exceptions */
+    enabled = ts->fpa.fpsr >> 16;
+    ts->fpa.fpsr |= raise & ~enabled;
+
+    if (raise & enabled) {
+        target_siginfo_t info = { };
+
+        /*
+         * The kernel's nwfpe emulator does not pass a real si_code.
+         * It merely uses send_sig(SIGFPE, current, 1).
+         */
+        info.si_signo = TARGET_SIGFPE;
+        info.si_code = TARGET_SI_KERNEL;
+
+        queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+    } else {
+        env->regs[15] += 4;
+    }
+    return true;
+}
+
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = env_cpu(env);
@@ -244,9 +302,7 @@
         case EXCP_NOCP:
         case EXCP_INVSTATE:
             {
-                TaskState *ts = cs->opaque;
                 uint32_t opcode;
-                int rc;
 
                 /* we handle the FPU emulation here, as Linux */
                 /* we get the opcode */
@@ -263,64 +319,15 @@
                     goto excp_debug;
                 }
 
-                rc = EmulateAll(opcode, &ts->fpa, env);
-                if (rc == 0) { /* illegal instruction */
-                    info.si_signo = TARGET_SIGILL;
-                    info.si_errno = 0;
-                    info.si_code = TARGET_ILL_ILLOPN;
-                    info._sifields._sigfault._addr = env->regs[15];
-                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                } else if (rc < 0) { /* FP exception */
-                    int arm_fpe=0;
-
-                    /* translate softfloat flags to FPSR flags */
-                    if (-rc & float_flag_invalid)
-                      arm_fpe |= BIT_IOC;
-                    if (-rc & float_flag_divbyzero)
-                      arm_fpe |= BIT_DZC;
-                    if (-rc & float_flag_overflow)
-                      arm_fpe |= BIT_OFC;
-                    if (-rc & float_flag_underflow)
-                      arm_fpe |= BIT_UFC;
-                    if (-rc & float_flag_inexact)
-                      arm_fpe |= BIT_IXC;
-
-                    FPSR fpsr = ts->fpa.fpsr;
-                    //printf("fpsr 0x%x, arm_fpe 0x%x\n",fpsr,arm_fpe);
-
-                    if (fpsr & (arm_fpe << 16)) { /* exception enabled? */
-                      info.si_signo = TARGET_SIGFPE;
-                      info.si_errno = 0;
-
-                      /* ordered by priority, least first */
-                      if (arm_fpe & BIT_IXC) info.si_code = TARGET_FPE_FLTRES;
-                      if (arm_fpe & BIT_UFC) info.si_code = TARGET_FPE_FLTUND;
-                      if (arm_fpe & BIT_OFC) info.si_code = TARGET_FPE_FLTOVF;
-                      if (arm_fpe & BIT_DZC) info.si_code = TARGET_FPE_FLTDIV;
-                      if (arm_fpe & BIT_IOC) info.si_code = TARGET_FPE_FLTINV;
-
-                      info._sifields._sigfault._addr = env->regs[15];
-                      queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                    } else {
-                      env->regs[15] += 4;
-                    }
-
-                    /* accumulate unenabled exceptions */
-                    if ((!(fpsr & BIT_IXE)) && (arm_fpe & BIT_IXC))
-                      fpsr |= BIT_IXC;
-                    if ((!(fpsr & BIT_UFE)) && (arm_fpe & BIT_UFC))
-                      fpsr |= BIT_UFC;
-                    if ((!(fpsr & BIT_OFE)) && (arm_fpe & BIT_OFC))
-                      fpsr |= BIT_OFC;
-                    if ((!(fpsr & BIT_DZE)) && (arm_fpe & BIT_DZC))
-                      fpsr |= BIT_DZC;
-                    if ((!(fpsr & BIT_IOE)) && (arm_fpe & BIT_IOC))
-                      fpsr |= BIT_IOC;
-                    ts->fpa.fpsr=fpsr;
-                } else { /* everything OK */
-                    /* increment PC */
-                    env->regs[15] += 4;
+                if (!env->thumb && emulate_arm_fpa11(env, opcode)) {
+                    break;
                 }
+
+                info.si_signo = TARGET_SIGILL;
+                info.si_errno = 0;
+                info.si_code = TARGET_ILL_ILLOPN;
+                info._sifields._sigfault._addr = env->regs[15];
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_SWI:
diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index f21d153..32b68ee 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -685,11 +685,7 @@
         }
     }
 
-    if (do_sigaltstack(context_addr
-                       + offsetof(struct target_ucontext_v2, tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT) {
-        return 1;
-    }
+    target_restore_altstack(&uc->tuc_stack, env);
 
 #if 0
     /* Send SIGTRAP if we're single-stepping */
@@ -773,8 +769,7 @@
         goto badframe;
     }
 
-    if (do_sigaltstack(frame_addr + offsetof(struct rt_sigframe_v1, uc.tuc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT)
-        goto badframe;
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
 #if 0
     /* Send SIGTRAP if we're single-stepping */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index fc9c4f1..0e832b2 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -676,48 +676,25 @@
 
 #define ELF_CLASS   ELFCLASS64
 #define ELF_ARCH    EM_SPARCV9
-
-#define STACK_BIAS              2047
-
-static inline void init_thread(struct target_pt_regs *regs,
-                               struct image_info *infop)
-{
-#ifndef TARGET_ABI32
-    regs->tstate = 0;
-#endif
-    regs->pc = infop->entry;
-    regs->npc = regs->pc + 4;
-    regs->y = 0;
-#ifdef TARGET_ABI32
-    regs->u_regs[14] = infop->start_stack - 16 * 4;
-#else
-    if (personality(infop->personality) == PER_LINUX32)
-        regs->u_regs[14] = infop->start_stack - 16 * 4;
-    else
-        regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS;
-#endif
-}
-
 #else
 #define ELF_START_MMAP 0x80000000
 #define ELF_HWCAP  (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \
                     | HWCAP_SPARC_MULDIV)
-
 #define ELF_CLASS   ELFCLASS32
 #define ELF_ARCH    EM_SPARC
+#endif /* TARGET_SPARC64 */
 
 static inline void init_thread(struct target_pt_regs *regs,
                                struct image_info *infop)
 {
-    regs->psr = 0;
+    /* Note that target_cpu_copy_regs does not read psr/tstate. */
     regs->pc = infop->entry;
     regs->npc = regs->pc + 4;
     regs->y = 0;
-    regs->u_regs[14] = infop->start_stack - 16 * 4;
+    regs->u_regs[14] = (infop->start_stack - 16 * sizeof(abi_ulong)
+                        - TARGET_STACK_BIAS);
 }
-
-#endif
-#endif
+#endif /* TARGET_SPARC */
 
 #ifdef TARGET_PPC
 
@@ -1398,6 +1375,39 @@
     regs->gprs[15] = infop->start_stack;
 }
 
+/* See linux kernel: arch/s390/include/uapi/asm/ptrace.h (s390_regs).  */
+#define ELF_NREG 27
+typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
+
+enum {
+    TARGET_REG_PSWM = 0,
+    TARGET_REG_PSWA = 1,
+    TARGET_REG_GPRS = 2,
+    TARGET_REG_ARS = 18,
+    TARGET_REG_ORIG_R2 = 26,
+};
+
+static void elf_core_copy_regs(target_elf_gregset_t *regs,
+                               const CPUS390XState *env)
+{
+    int i;
+    uint32_t *aregs;
+
+    (*regs)[TARGET_REG_PSWM] = tswapreg(env->psw.mask);
+    (*regs)[TARGET_REG_PSWA] = tswapreg(env->psw.addr);
+    for (i = 0; i < 16; i++) {
+        (*regs)[TARGET_REG_GPRS + i] = tswapreg(env->regs[i]);
+    }
+    aregs = (uint32_t *)&((*regs)[TARGET_REG_ARS]);
+    for (i = 0; i < 16; i++) {
+        aregs[i] = tswap32(env->aregs[i]);
+    }
+    (*regs)[TARGET_REG_ORIG_R2] = 0;
+}
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE 4096
+
 #endif /* TARGET_S390X */
 
 #ifdef TARGET_RISCV
@@ -3399,7 +3409,6 @@
 static void free_note_info(struct elf_note_info *);
 static int fill_note_info(struct elf_note_info *, long, const CPUArchState *);
 static void fill_thread_info(struct elf_note_info *, const CPUArchState *);
-static int core_dump_filename(const TaskState *, char *, size_t);
 
 static int dump_write(int, const void *, size_t);
 static int write_note(struct memelfnote *, int);
@@ -3642,11 +3651,12 @@
 
     (void) memset(psinfo, 0, sizeof (*psinfo));
 
-    len = ts->info->arg_end - ts->info->arg_start;
+    len = ts->info->env_strings - ts->info->arg_strings;
     if (len >= ELF_PRARGSZ)
         len = ELF_PRARGSZ - 1;
-    if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_start, len))
+    if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_strings, len)) {
         return -EFAULT;
+    }
     for (i = 0; i < len; i++)
         if (psinfo->pr_psargs[i] == 0)
             psinfo->pr_psargs[i] = ' ';
@@ -3698,32 +3708,16 @@
  * for the name:
  *     qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
  *
- * Returns 0 in case of success, -1 otherwise (errno is set).
+ * Returns the filename
  */
-static int core_dump_filename(const TaskState *ts, char *buf,
-                              size_t bufsize)
+static char *core_dump_filename(const TaskState *ts)
 {
-    char timestamp[64];
-    char *base_filename = NULL;
-    struct timeval tv;
-    struct tm tm;
+    g_autoptr(GDateTime) now = g_date_time_new_now_local();
+    g_autofree char *nowstr = g_date_time_format(now, "%Y%m%d-%H%M%S");
+    g_autofree char *base_filename = g_path_get_basename(ts->bprm->filename);
 
-    assert(bufsize >= PATH_MAX);
-
-    if (gettimeofday(&tv, NULL) < 0) {
-        (void) fprintf(stderr, "unable to get current timestamp: %s",
-                       strerror(errno));
-        return (-1);
-    }
-
-    base_filename = g_path_get_basename(ts->bprm->filename);
-    (void) strftime(timestamp, sizeof (timestamp), "%Y%m%d-%H%M%S",
-                    localtime_r(&tv.tv_sec, &tm));
-    (void) snprintf(buf, bufsize, "qemu_%s_%s_%d.core",
-                    base_filename, timestamp, (int)getpid());
-    g_free(base_filename);
-
-    return (0);
+    return g_strdup_printf("qemu_%s_%s_%d.core",
+                           base_filename, nowstr, (int)getpid());
 }
 
 static int dump_write(int fd, const void *ptr, size_t size)
@@ -3951,7 +3945,7 @@
     const CPUState *cpu = env_cpu((CPUArchState *)env);
     const TaskState *ts = (const TaskState *)cpu->opaque;
     struct vm_area_struct *vma = NULL;
-    char corefile[PATH_MAX];
+    g_autofree char *corefile = NULL;
     struct elf_note_info info;
     struct elfhdr elf;
     struct elf_phdr phdr;
@@ -3968,8 +3962,7 @@
     if (dumpsize.rlim_cur == 0)
         return 0;
 
-    if (core_dump_filename(ts, corefile, sizeof (corefile)) < 0)
-        return (-errno);
+    corefile = core_dump_filename(ts);
 
     if ((fd = open(corefile, O_WRONLY | O_CREAT,
                    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
diff --git a/linux-user/hexagon/signal.c b/linux-user/hexagon/signal.c
index fde8dc9..85eab5e 100644
--- a/linux-user/hexagon/signal.c
+++ b/linux-user/hexagon/signal.c
@@ -260,11 +260,7 @@
     }
 
     restore_ucontext(env, &frame->uc);
-
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-            uc.uc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.uc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index d1a58fe..0e266f4 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -187,13 +187,7 @@
     set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
-    unlock_user_struct(frame, frame_addr, 0);
-
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-                                             uc.tuc_stack),
-                       0, env->gr[30]) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 9320e1d..8701774 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -581,10 +581,7 @@
         goto badframe;
     }
 
-    if (do_sigaltstack(frame_addr + offsetof(struct rt_sigframe, uc.tuc_stack), 0,
-                       get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/m68k/signal.c b/linux-user/m68k/signal.c
index 49ff87c..d062306 100644
--- a/linux-user/m68k/signal.c
+++ b/linux-user/m68k/signal.c
@@ -400,10 +400,7 @@
     if (target_rt_restore_ucontext(env, &frame->uc))
         goto badframe;
 
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT)
-        goto badframe;
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/main.c b/linux-user/main.c
index 7995b6e..4dfc47a 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -205,7 +205,6 @@
     CPUState *new_cpu = cpu_create(cpu_type);
     CPUArchState *new_env = new_cpu->env_ptr;
     CPUBreakpoint *bp;
-    CPUWatchpoint *wp;
 
     /* Reset non arch specific state */
     cpu_reset(new_cpu);
@@ -217,13 +216,9 @@
        Note: Once we support ptrace with hw-debug register access, make sure
        BP_CPU break/watchpoints are handled correctly on clone. */
     QTAILQ_INIT(&new_cpu->breakpoints);
-    QTAILQ_INIT(&new_cpu->watchpoints);
     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
         cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
     }
-    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
-        cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
-    }
 
     return new_env;
 }
diff --git a/linux-user/meson.build b/linux-user/meson.build
index 7fe28d6..9549f81 100644
--- a/linux-user/meson.build
+++ b/linux-user/meson.build
@@ -32,7 +32,6 @@
 subdir('ppc')
 subdir('s390x')
 subdir('sh4')
-subdir('sparc64')
 subdir('sparc')
 subdir('x86_64')
 subdir('xtensa')
diff --git a/linux-user/microblaze/signal.c b/linux-user/microblaze/signal.c
index cf0707b..4c483bd 100644
--- a/linux-user/microblaze/signal.c
+++ b/linux-user/microblaze/signal.c
@@ -209,11 +209,7 @@
 
     restore_sigcontext(&frame->uc.tuc_mcontext, env);
 
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/mips/signal.c b/linux-user/mips/signal.c
index 455a8a2..e6be807 100644
--- a/linux-user/mips/signal.c
+++ b/linux-user/mips/signal.c
@@ -368,11 +368,7 @@
     set_sigmask(&blocked);
 
     restore_sigcontext(env, &frame->rs_uc.tuc_mcontext);
-
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, rs_uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT)
-        goto badframe;
+    target_restore_altstack(&frame->rs_uc.tuc_stack, env);
 
     env->active_tc.PC = env->CP0_EPC;
     mips_set_hflags_isa_mode_from_pc(env);
diff --git a/linux-user/nios2/signal.c b/linux-user/nios2/signal.c
index 7d53506..cc3872f 100644
--- a/linux-user/nios2/signal.c
+++ b/linux-user/nios2/signal.c
@@ -82,9 +82,7 @@
                                int *pr2)
 {
     int temp;
-    abi_ulong off, frame_addr = env->regs[R_SP];
     unsigned long *gregs = uc->tuc_mcontext.gregs;
-    int err;
 
     /* Always make any pending restarted system calls return -EINTR */
     /* current->restart_block.fn = do_no_restart_syscall; */
@@ -130,11 +128,7 @@
     __get_user(env->regs[R_RA], &gregs[23]);
     __get_user(env->regs[R_SP], &gregs[28]);
 
-    off = offsetof(struct target_rt_sigframe, uc.tuc_stack);
-    err = do_sigaltstack(frame_addr + off, 0, get_sp_from_cpustate(env));
-    if (err == -EFAULT) {
-        return 1;
-    }
+    target_restore_altstack(&uc->tuc_stack, env);
 
     *pr2 = env->regs[2];
     return 0;
diff --git a/linux-user/openrisc/signal.c b/linux-user/openrisc/signal.c
index 232ad82..5c5640a 100644
--- a/linux-user/openrisc/signal.c
+++ b/linux-user/openrisc/signal.c
@@ -158,10 +158,7 @@
     set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
-    if (do_sigaltstack(frame_addr + offsetof(target_rt_sigframe, uc.tuc_stack),
-                       0, frame_addr) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return cpu_get_gpr(env, 11);
diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c
index 4a0f6c8..fa91ea0 100644
--- a/linux-user/ppc/cpu_loop.c
+++ b/linux-user/ppc/cpu_loop.c
@@ -423,12 +423,6 @@
             cpu_abort(cs, "Maintenance exception while in user mode. "
                       "Aborting\n");
             break;
-        case POWERPC_EXCP_STOP:     /* stop translation                      */
-            /* We did invalidate the instruction cache. Go on */
-            break;
-        case POWERPC_EXCP_BRANCH:   /* branch instruction:                   */
-            /* We just stopped because of a branch. Go on */
-            break;
         case POWERPC_EXCP_SYSCALL_USER:
             /* system call in user-mode emulation */
             /* WARNING:
diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index bad38f8..edfad28 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -655,9 +655,7 @@
     if (do_setcontext(&rt_sf->uc, env, 1))
         goto sigsegv;
 
-    do_sigaltstack(rt_sf_addr
-                   + offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                   0, env->gpr[1]);
+    target_restore_altstack(&rt_sf->uc.tuc_stack, env);
 
     unlock_user_struct(rt_sf, rt_sf_addr, 1);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 74e06e7..3b0b6b7 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -432,7 +432,8 @@
 int host_to_target_signal(int sig);
 long do_sigreturn(CPUArchState *env);
 long do_rt_sigreturn(CPUArchState *env);
-abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
+abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr,
+                        CPUArchState *env);
 int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
 abi_long do_swapcontext(CPUArchState *env, abi_ulong uold_ctx,
                         abi_ulong unew_ctx, abi_long ctx_size);
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index 67a95db..9405c7f 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -192,11 +192,7 @@
     }
 
     restore_ucontext(env, &frame->uc);
-
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-            uc.uc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.uc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index 7107c5f..ef136da 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -25,25 +25,24 @@
 #define __NUM_FPRS 16
 #define __NUM_ACRS 16
 
-#define S390_SYSCALL_SIZE   2
 #define __SIGNAL_FRAMESIZE      160 /* FIXME: 31-bit mode -> 96 */
 
 #define _SIGCONTEXT_NSIG        64
 #define _SIGCONTEXT_NSIG_BPW    64 /* FIXME: 31-bit mode -> 32 */
 #define _SIGCONTEXT_NSIG_WORDS  (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
 #define _SIGMASK_COPY_SIZE    (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
-#define PSW_ADDR_AMODE            0x0000000000000000UL /* 0x80000000UL for 31-bit */
 #define S390_SYSCALL_OPCODE ((uint16_t)0x0a00)
 
 typedef struct {
     target_psw_t psw;
-    target_ulong gprs[__NUM_GPRS];
-    unsigned int acrs[__NUM_ACRS];
+    abi_ulong gprs[__NUM_GPRS];
+    abi_uint acrs[__NUM_ACRS];
 } target_s390_regs_common;
 
 typedef struct {
-    unsigned int fpc;
-    double   fprs[__NUM_FPRS];
+    uint32_t fpc;
+    uint32_t pad;
+    uint64_t fprs[__NUM_FPRS];
 } target_s390_fp_regs;
 
 typedef struct {
@@ -51,30 +50,41 @@
     target_s390_fp_regs     fpregs;
 } target_sigregs;
 
-struct target_sigcontext {
-    target_ulong   oldmask[_SIGCONTEXT_NSIG_WORDS];
-    target_sigregs *sregs;
-};
+typedef struct {
+    uint64_t vxrs_low[16];
+    uint64_t vxrs_high[16][2];
+    uint8_t reserved[128];
+} target_sigregs_ext;
+
+typedef struct {
+    abi_ulong oldmask[_SIGCONTEXT_NSIG_WORDS];
+    abi_ulong sregs;
+} target_sigcontext;
 
 typedef struct {
     uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
-    struct target_sigcontext sc;
+    target_sigcontext sc;
     target_sigregs sregs;
     int signo;
-    uint8_t retcode[S390_SYSCALL_SIZE];
+    target_sigregs_ext sregs_ext;
+    uint16_t retcode;
 } sigframe;
 
+#define TARGET_UC_VXRS 2
+
 struct target_ucontext {
-    target_ulong tuc_flags;
-    struct target_ucontext *tuc_link;
+    abi_ulong tuc_flags;
+    abi_ulong tuc_link;
     target_stack_t tuc_stack;
     target_sigregs tuc_mcontext;
-    target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+    target_sigset_t tuc_sigmask;
+    uint8_t reserved[128 - sizeof(target_sigset_t)];
+    target_sigregs_ext tuc_mcontext_ext;
 };
 
 typedef struct {
     uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
-    uint8_t retcode[S390_SYSCALL_SIZE];
+    uint16_t retcode;
     struct target_siginfo info;
     struct target_ucontext uc;
 } rt_sigframe;
@@ -105,151 +115,191 @@
 static void save_sigregs(CPUS390XState *env, target_sigregs *sregs)
 {
     int i;
-    //save_access_regs(current->thread.acrs); FIXME
 
-    /* Copy a 'clean' PSW mask to the user to avoid leaking
-       information about whether PER is currently on.  */
+    /*
+     * Copy a 'clean' PSW mask to the user to avoid leaking
+     * information about whether PER is currently on.
+     */
     __put_user(env->psw.mask, &sregs->regs.psw.mask);
     __put_user(env->psw.addr, &sregs->regs.psw.addr);
+
     for (i = 0; i < 16; i++) {
         __put_user(env->regs[i], &sregs->regs.gprs[i]);
     }
     for (i = 0; i < 16; i++) {
         __put_user(env->aregs[i], &sregs->regs.acrs[i]);
     }
+
     /*
      * We have to store the fp registers to current->thread.fp_regs
      * to merge them with the emulated registers.
      */
-    //save_fp_regs(&current->thread.fp_regs); FIXME
     for (i = 0; i < 16; i++) {
         __put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]);
     }
 }
 
+static void save_sigregs_ext(CPUS390XState *env, target_sigregs_ext *ext)
+{
+    int i;
+
+    /*
+     * if (MACHINE_HAS_VX) ...
+     * That said, we always allocate the stack storage and the
+     * space is always available in env.
+     */
+    for (i = 0; i < 16; ++i) {
+        __put_user(env->vregs[i][1], &ext->vxrs_low[i]);
+    }
+    for (i = 0; i < 16; ++i) {
+        __put_user(env->vregs[i + 16][0], &ext->vxrs_high[i][0]);
+        __put_user(env->vregs[i + 16][1], &ext->vxrs_high[i][1]);
+    }
+}
+
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUS390XState *env)
 {
     sigframe *frame;
     abi_ulong frame_addr;
+    abi_ulong restorer;
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     trace_user_setup_frame(env, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        goto give_sigsegv;
-    }
-
-    __put_user(set->sig[0], &frame->sc.oldmask[0]);
-
-    save_sigregs(env, &frame->sregs);
-
-    __put_user((abi_ulong)(unsigned long)&frame->sregs,
-               (abi_ulong *)&frame->sc.sregs);
-
-    /* Set up to return from userspace.  If provided, use a stub
-       already in userspace.  */
-    if (ka->sa_flags & TARGET_SA_RESTORER) {
-        env->regs[14] = (unsigned long)
-                ka->sa_restorer | PSW_ADDR_AMODE;
-    } else {
-        env->regs[14] = (frame_addr + offsetof(sigframe, retcode))
-                        | PSW_ADDR_AMODE;
-        __put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn,
-                   (uint16_t *)(frame->retcode));
+        force_sigsegv(sig);
+        return;
     }
 
     /* Set up backchain. */
     __put_user(env->regs[15], (abi_ulong *) frame);
 
+    /* Create struct sigcontext on the signal stack. */
+    /* Make sure that we're initializing all of oldmask. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(frame->sc.oldmask) != 1);
+    __put_user(set->sig[0], &frame->sc.oldmask[0]);
+    __put_user(frame_addr + offsetof(sigframe, sregs), &frame->sc.sregs);
+
+    /* Create _sigregs on the signal stack */
+    save_sigregs(env, &frame->sregs);
+
+    /*
+     * ??? The kernel uses regs->gprs[2] here, which is not yet the signo.
+     * Moreover the comment talks about allowing backtrace, which is really
+     * done by the r15 copy above.
+     */
+    __put_user(sig, &frame->signo);
+
+    /* Create sigregs_ext on the signal stack. */
+    save_sigregs_ext(env, &frame->sregs_ext);
+
+    /*
+     * Set up to return from userspace.
+     * If provided, use a stub already in userspace.
+     */
+    if (ka->sa_flags & TARGET_SA_RESTORER) {
+        restorer = ka->sa_restorer;
+    } else {
+        restorer = frame_addr + offsetof(sigframe, retcode);
+        __put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn,
+                   &frame->retcode);
+    }
+
     /* Set up registers for signal handler */
+    env->regs[14] = restorer;
     env->regs[15] = frame_addr;
-    env->psw.addr = (target_ulong) ka->_sa_handler | PSW_ADDR_AMODE;
+    /* Force default amode and default user address space control. */
+    env->psw.mask = PSW_MASK_64 | PSW_MASK_32 | PSW_ASC_PRIMARY
+                  | (env->psw.mask & ~PSW_MASK_ASC);
+    env->psw.addr = ka->_sa_handler;
 
-    env->regs[2] = sig; //map_signal(sig);
-    env->regs[3] = frame_addr += offsetof(typeof(*frame), sc);
+    env->regs[2] = sig;
+    env->regs[3] = frame_addr + offsetof(typeof(*frame), sc);
 
-    /* We forgot to include these in the sigcontext.
-       To avoid breaking binary compatibility, they are passed as args. */
-    env->regs[4] = 0; // FIXME: no clue... current->thread.trap_no;
-    env->regs[5] = 0; // FIXME: no clue... current->thread.prot_addr;
+    /*
+     * We forgot to include these in the sigcontext.
+     * To avoid breaking binary compatibility, they are passed as args.
+     */
+    env->regs[4] = 0; /* FIXME: regs->int_code & 127 */
+    env->regs[5] = 0; /* FIXME: regs->int_parm_long */
+    env->regs[6] = 0; /* FIXME: current->thread.last_break */
 
-    /* Place signal number on stack to allow backtrace from handler.  */
-    __put_user(env->regs[2], &frame->signo);
     unlock_user_struct(frame, frame_addr, 1);
-    return;
-
-give_sigsegv:
-    force_sigsegv(sig);
 }
 
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUS390XState *env)
 {
-    int i;
     rt_sigframe *frame;
     abi_ulong frame_addr;
+    abi_ulong restorer;
+    abi_ulong uc_flags;
 
     frame_addr = get_sigframe(ka, env, sizeof *frame);
     trace_user_setup_rt_frame(env, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        goto give_sigsegv;
-    }
-
-    tswap_siginfo(&frame->info, info);
-
-    /* Create the ucontext.  */
-    __put_user(0, &frame->uc.tuc_flags);
-    __put_user((abi_ulong)0, (abi_ulong *)&frame->uc.tuc_link);
-    target_save_altstack(&frame->uc.tuc_stack, env);
-    save_sigregs(env, &frame->uc.tuc_mcontext);
-    for (i = 0; i < TARGET_NSIG_WORDS; i++) {
-        __put_user((abi_ulong)set->sig[i],
-                   (abi_ulong *)&frame->uc.tuc_sigmask.sig[i]);
-    }
-
-    /* Set up to return from userspace.  If provided, use a stub
-       already in userspace.  */
-    if (ka->sa_flags & TARGET_SA_RESTORER) {
-        env->regs[14] = ka->sa_restorer | PSW_ADDR_AMODE;
-    } else {
-        env->regs[14] = (frame_addr + offsetof(typeof(*frame), retcode))
-                        | PSW_ADDR_AMODE;
-        __put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn,
-                   (uint16_t *)(frame->retcode));
+        force_sigsegv(sig);
+        return;
     }
 
     /* Set up backchain. */
     __put_user(env->regs[15], (abi_ulong *) frame);
 
-    /* Set up registers for signal handler */
-    env->regs[15] = frame_addr;
-    env->psw.addr = (target_ulong) ka->_sa_handler | PSW_ADDR_AMODE;
+    /*
+     * Set up to return from userspace.
+     * If provided, use a stub already in userspace.
+     */
+    if (ka->sa_flags & TARGET_SA_RESTORER) {
+        restorer = ka->sa_restorer;
+    } else {
+        restorer = frame_addr + offsetof(typeof(*frame), retcode);
+        __put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn,
+                   &frame->retcode);
+    }
 
-    env->regs[2] = sig; //map_signal(sig);
+    /* Create siginfo on the signal stack. */
+    tswap_siginfo(&frame->info, info);
+
+    /* Create ucontext on the signal stack. */
+    uc_flags = 0;
+    if (s390_has_feat(S390_FEAT_VECTOR)) {
+        uc_flags |= TARGET_UC_VXRS;
+    }
+    __put_user(uc_flags, &frame->uc.tuc_flags);
+    __put_user(0, &frame->uc.tuc_link);
+    target_save_altstack(&frame->uc.tuc_stack, env);
+    save_sigregs(env, &frame->uc.tuc_mcontext);
+    save_sigregs_ext(env, &frame->uc.tuc_mcontext_ext);
+    tswap_sigset(&frame->uc.tuc_sigmask, set);
+
+    /* Set up registers for signal handler */
+    env->regs[14] = restorer;
+    env->regs[15] = frame_addr;
+    /* Force default amode and default user address space control. */
+    env->psw.mask = PSW_MASK_64 | PSW_MASK_32 | PSW_ASC_PRIMARY
+                  | (env->psw.mask & ~PSW_MASK_ASC);
+    env->psw.addr = ka->_sa_handler;
+
+    env->regs[2] = sig;
     env->regs[3] = frame_addr + offsetof(typeof(*frame), info);
     env->regs[4] = frame_addr + offsetof(typeof(*frame), uc);
-    return;
-
-give_sigsegv:
-    force_sigsegv(sig);
+    env->regs[5] = 0; /* FIXME: current->thread.last_break */
 }
 
-static int
-restore_sigregs(CPUS390XState *env, target_sigregs *sc)
+static void restore_sigregs(CPUS390XState *env, target_sigregs *sc)
 {
-    int err = 0;
+    target_ulong prev_addr;
     int i;
 
     for (i = 0; i < 16; i++) {
         __get_user(env->regs[i], &sc->regs.gprs[i]);
     }
 
+    prev_addr = env->psw.addr;
     __get_user(env->psw.mask, &sc->regs.psw.mask);
-    trace_user_s390x_restore_sigregs(env, (unsigned long long)sc->regs.psw.addr,
-                                     (unsigned long long)env->psw.addr);
     __get_user(env->psw.addr, &sc->regs.psw.addr);
-    /* FIXME: 31-bit -> | PSW_ADDR_AMODE */
+    trace_user_s390x_restore_sigregs(env, env->psw.addr, prev_addr);
 
     for (i = 0; i < 16; i++) {
         __get_user(env->aregs[i], &sc->regs.acrs[i]);
@@ -257,8 +307,24 @@
     for (i = 0; i < 16; i++) {
         __get_user(*get_freg(env, i), &sc->fpregs.fprs[i]);
     }
+}
 
-    return err;
+static void restore_sigregs_ext(CPUS390XState *env, target_sigregs_ext *ext)
+{
+    int i;
+
+    /*
+     * if (MACHINE_HAS_VX) ...
+     * That said, we always allocate the stack storage and the
+     * space is always available in env.
+     */
+    for (i = 0; i < 16; ++i) {
+        __get_user(env->vregs[i][1], &ext->vxrs_low[i]);
+    }
+    for (i = 0; i < 16; ++i) {
+        __get_user(env->vregs[i + 16][0], &ext->vxrs_high[i][0]);
+        __get_user(env->vregs[i + 16][1], &ext->vxrs_high[i][1]);
+    }
 }
 
 long do_sigreturn(CPUS390XState *env)
@@ -270,23 +336,22 @@
 
     trace_user_do_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
-        goto badframe;
+        force_sig(TARGET_SIGSEGV);
+        return -TARGET_QEMU_ESIGRETURN;
     }
+
+    /* Make sure that we're initializing all of target_set. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(target_set.sig) != 1);
     __get_user(target_set.sig[0], &frame->sc.oldmask[0]);
 
     target_to_host_sigset_internal(&set, &target_set);
     set_sigmask(&set); /* ~_BLOCKABLE? */
 
-    if (restore_sigregs(env, &frame->sregs)) {
-        goto badframe;
-    }
+    restore_sigregs(env, &frame->sregs);
+    restore_sigregs_ext(env, &frame->sregs_ext);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
-
-badframe:
-    force_sig(TARGET_SIGSEGV);
-    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUS390XState *env)
@@ -297,25 +362,18 @@
 
     trace_user_do_rt_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
-        goto badframe;
+        force_sig(TARGET_SIGSEGV);
+        return -TARGET_QEMU_ESIGRETURN;
     }
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
 
     set_sigmask(&set); /* ~_BLOCKABLE? */
 
-    if (restore_sigregs(env, &frame->uc.tuc_mcontext)) {
-        goto badframe;
-    }
+    restore_sigregs(env, &frame->uc.tuc_mcontext);
+    restore_sigregs_ext(env, &frame->uc.tuc_mcontext_ext);
 
-    if (do_sigaltstack(frame_addr + offsetof(rt_sigframe, uc.tuc_stack), 0,
-                       get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
-    unlock_user_struct(frame, frame_addr, 0);
-    return -TARGET_QEMU_ESIGRETURN;
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
-badframe:
     unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index 29c1ee3..0451e65 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -323,12 +323,7 @@
     set_sigmask(&blocked);
 
     restore_sigcontext(regs, &frame->uc.tuc_mcontext);
-
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(regs)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, regs);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h
index 1df1068..ea86328 100644
--- a/linux-user/signal-common.h
+++ b/linux-user/signal-common.h
@@ -24,6 +24,7 @@
 int sas_ss_flags(unsigned long sp);
 abi_ulong target_sigsp(abi_ulong sp, struct target_sigaction *ka);
 void target_save_altstack(target_stack_t *uss, CPUArchState *env);
+abi_long target_restore_altstack(target_stack_t *uss, CPUArchState *env);
 
 static inline void target_sigemptyset(target_sigset_t *set)
 {
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7eecec4..9016896 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -297,6 +297,50 @@
     __put_user(ts->sigaltstack_used.ss_size, &uss->ss_size);
 }
 
+abi_long target_restore_altstack(target_stack_t *uss, CPUArchState *env)
+{
+    TaskState *ts = (TaskState *)thread_cpu->opaque;
+    size_t minstacksize = TARGET_MINSIGSTKSZ;
+    target_stack_t ss;
+
+#if defined(TARGET_PPC64)
+    /* ELF V2 for PPC64 has a 4K minimum stack size for signal handlers */
+    struct image_info *image = ts->info;
+    if (get_ppc64_abi(image) > 1) {
+        minstacksize = 4096;
+    }
+#endif
+
+    __get_user(ss.ss_sp, &uss->ss_sp);
+    __get_user(ss.ss_size, &uss->ss_size);
+    __get_user(ss.ss_flags, &uss->ss_flags);
+
+    if (on_sig_stack(get_sp_from_cpustate(env))) {
+        return -TARGET_EPERM;
+    }
+
+    switch (ss.ss_flags) {
+    default:
+        return -TARGET_EINVAL;
+
+    case TARGET_SS_DISABLE:
+        ss.ss_size = 0;
+        ss.ss_sp = 0;
+        break;
+
+    case TARGET_SS_ONSTACK:
+    case 0:
+        if (ss.ss_size < minstacksize) {
+            return -TARGET_ENOMEM;
+        }
+        break;
+    }
+
+    ts->sigaltstack_used.ss_sp = ss.ss_sp;
+    ts->sigaltstack_used.ss_size = ss.ss_size;
+    return 0;
+}
+
 /* siginfo conversion */
 
 static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
@@ -756,81 +800,49 @@
 
 /* do_sigaltstack() returns target values and errnos. */
 /* compare linux/kernel/signal.c:do_sigaltstack() */
-abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp)
+abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr,
+                        CPUArchState *env)
 {
-    int ret;
-    struct target_sigaltstack oss;
-    TaskState *ts = (TaskState *)thread_cpu->opaque;
+    target_stack_t oss, *uoss = NULL;
+    abi_long ret = -TARGET_EFAULT;
 
-    /* XXX: test errors */
-    if(uoss_addr)
-    {
-        __put_user(ts->sigaltstack_used.ss_sp, &oss.ss_sp);
-        __put_user(ts->sigaltstack_used.ss_size, &oss.ss_size);
-        __put_user(sas_ss_flags(sp), &oss.ss_flags);
+    if (uoss_addr) {
+        /* Verify writability now, but do not alter user memory yet. */
+        if (!lock_user_struct(VERIFY_WRITE, uoss, uoss_addr, 0)) {
+            goto out;
+        }
+        target_save_altstack(&oss, env);
     }
 
-    if(uss_addr)
-    {
-        struct target_sigaltstack *uss;
-        struct target_sigaltstack ss;
-        size_t minstacksize = TARGET_MINSIGSTKSZ;
+    if (uss_addr) {
+        target_stack_t *uss;
 
-#if defined(TARGET_PPC64)
-        /* ELF V2 for PPC64 has a 4K minimum stack size for signal handlers */
-        struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
-        if (get_ppc64_abi(image) > 1) {
-            minstacksize = 4096;
-        }
-#endif
-
-        ret = -TARGET_EFAULT;
         if (!lock_user_struct(VERIFY_READ, uss, uss_addr, 1)) {
             goto out;
         }
-        __get_user(ss.ss_sp, &uss->ss_sp);
-        __get_user(ss.ss_size, &uss->ss_size);
-        __get_user(ss.ss_flags, &uss->ss_flags);
-        unlock_user_struct(uss, uss_addr, 0);
-
-        ret = -TARGET_EPERM;
-        if (on_sig_stack(sp))
+        ret = target_restore_altstack(uss, env);
+        if (ret) {
             goto out;
-
-        ret = -TARGET_EINVAL;
-        if (ss.ss_flags != TARGET_SS_DISABLE
-            && ss.ss_flags != TARGET_SS_ONSTACK
-            && ss.ss_flags != 0)
-            goto out;
-
-        if (ss.ss_flags == TARGET_SS_DISABLE) {
-            ss.ss_size = 0;
-            ss.ss_sp = 0;
-        } else {
-            ret = -TARGET_ENOMEM;
-            if (ss.ss_size < minstacksize) {
-                goto out;
-            }
         }
-
-        ts->sigaltstack_used.ss_sp = ss.ss_sp;
-        ts->sigaltstack_used.ss_size = ss.ss_size;
     }
 
     if (uoss_addr) {
-        ret = -TARGET_EFAULT;
-        if (copy_to_user(uoss_addr, &oss, sizeof(oss)))
-            goto out;
+        memcpy(uoss, &oss, sizeof(oss));
+        unlock_user_struct(uoss, uoss_addr, 1);
+        uoss = NULL;
     }
-
     ret = 0;
-out:
+
+ out:
+    if (uoss) {
+        unlock_user_struct(uoss, uoss_addr, 0);
+    }
     return ret;
 }
 
 /* do_sigaction() return target values and host errnos */
 int do_sigaction(int sig, const struct target_sigaction *act,
-                 struct target_sigaction *oact)
+                 struct target_sigaction *oact, abi_ulong ka_restorer)
 {
     struct target_sigaction *k;
     struct sigaction act1;
@@ -864,6 +876,9 @@
 #ifdef TARGET_ARCH_HAS_SA_RESTORER
         __get_user(k->sa_restorer, &act->sa_restorer);
 #endif
+#ifdef TARGET_ARCH_HAS_KA_RESTORER
+        k->ka_restorer = ka_restorer;
+#endif
         /* To be swapped in target_to_host_sigset.  */
         k->sa_mask = act->sa_mask;
 
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index d27b7a3..0cc3db5 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -21,107 +21,96 @@
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
-#define __SUNOS_MAXWIN   31
-
-/* This is what SunOS does, so shall I. */
-struct target_sigcontext {
-    abi_ulong sigc_onstack;      /* state to restore */
-
-    abi_ulong sigc_mask;         /* sigmask to restore */
-    abi_ulong sigc_sp;           /* stack pointer */
-    abi_ulong sigc_pc;           /* program counter */
-    abi_ulong sigc_npc;          /* next program counter */
-    abi_ulong sigc_psr;          /* for condition codes etc */
-    abi_ulong sigc_g1;           /* User uses these two registers */
-    abi_ulong sigc_o0;           /* within the trampoline code. */
-
-    /* Now comes information regarding the users window set
-         * at the time of the signal.
-         */
-    abi_ulong sigc_oswins;       /* outstanding windows */
-
-    /* stack ptrs for each regwin buf */
-    char *sigc_spbuf[__SUNOS_MAXWIN];
-
-    /* Windows to restore after signal */
-    struct {
-        abi_ulong locals[8];
-        abi_ulong ins[8];
-    } sigc_wbuf[__SUNOS_MAXWIN];
-};
-/* A Sparc stack frame */
-struct sparc_stackf {
+/* A Sparc register window */
+struct target_reg_window {
     abi_ulong locals[8];
     abi_ulong ins[8];
-    /* It's simpler to treat fp and callers_pc as elements of ins[]
-         * since we never need to access them ourselves.
-         */
-    char *structptr;
-    abi_ulong xargs[6];
-    abi_ulong xxargs[1];
 };
 
-typedef struct {
-    struct {
-        abi_ulong psr;
-        abi_ulong pc;
-        abi_ulong npc;
-        abi_ulong y;
-        abi_ulong u_regs[16]; /* globals and ins */
-    }               si_regs;
-    int             si_mask;
-} __siginfo_t;
+/* A Sparc stack frame. */
+struct target_stackf {
+    /*
+     * Since qemu does not reference fp or callers_pc directly,
+     * it's simpler to treat fp and callers_pc as elements of ins[],
+     * and then bundle locals[] and ins[] into reg_window.
+     */
+    struct target_reg_window win;
+    /*
+     * Similarly, bundle structptr and xxargs into xargs[].
+     * This portion of the struct is part of the function call abi,
+     * and belongs to the callee for spilling argument registers.
+     */
+    abi_ulong xargs[8];
+};
 
-typedef struct {
-    abi_ulong  si_float_regs[32];
-    unsigned   long si_fsr;
-    unsigned   long si_fpqdepth;
+struct target_siginfo_fpu {
+#ifdef TARGET_SPARC64
+    uint64_t si_double_regs[32];
+    uint64_t si_fsr;
+    uint64_t si_gsr;
+    uint64_t si_fprs;
+#else
+    /* It is more convenient for qemu to move doubles, not singles. */
+    uint64_t si_double_regs[16];
+    uint32_t si_fsr;
+    uint32_t si_fpqdepth;
     struct {
-        unsigned long *insn_addr;
-        unsigned long insn;
+        uint32_t insn_addr;
+        uint32_t insn;
     } si_fpqueue [16];
-} qemu_siginfo_fpu_t;
+#endif
+};
 
-
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
 struct target_signal_frame {
-    struct sparc_stackf ss;
-    __siginfo_t         info;
-    abi_ulong           fpu_save;
-    uint32_t            insns[2] QEMU_ALIGNED(8);
-    abi_ulong           extramask[TARGET_NSIG_WORDS - 1];
-    abi_ulong           extra_size; /* Should be 0 */
-    qemu_siginfo_fpu_t fpu_state;
+    struct target_stackf ss;
+    struct target_pt_regs regs;
+    uint32_t si_mask;
+    abi_ulong fpu_save;
+    uint32_t insns[2] QEMU_ALIGNED(8);
+    abi_ulong extramask[TARGET_NSIG_WORDS - 1];
+    abi_ulong extra_size; /* Should be 0 */
+    abi_ulong rwin_save;
 };
+#endif
+
 struct target_rt_signal_frame {
-    struct sparc_stackf ss;
-    siginfo_t           info;
-    abi_ulong           regs[20];
-    sigset_t            mask;
-    abi_ulong           fpu_save;
-    uint32_t            insns[2];
-    stack_t             stack;
-    unsigned int        extra_size; /* Should be 0 */
-    qemu_siginfo_fpu_t  fpu_state;
+    struct target_stackf ss;
+    target_siginfo_t info;
+    struct target_pt_regs regs;
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+    abi_ulong fpu_save;
+    target_stack_t stack;
+    target_sigset_t mask;
+#else
+    target_sigset_t mask;
+    abi_ulong fpu_save;
+    uint32_t insns[2];
+    target_stack_t stack;
+    abi_ulong extra_size; /* Should be 0 */
+#endif
+    abi_ulong rwin_save;
 };
 
-static inline abi_ulong get_sigframe(struct target_sigaction *sa, 
-                                     CPUSPARCState *env,
-                                     unsigned long framesize)
+static abi_ulong get_sigframe(struct target_sigaction *sa,
+                              CPUSPARCState *env,
+                              size_t framesize)
 {
     abi_ulong sp = get_sp_from_cpustate(env);
 
     /*
      * If we are on the alternate signal stack and would overflow it, don't.
      * Return an always-bogus address instead so we will die with SIGSEGV.
-         */
+     */
     if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) {
-            return -1;
+        return -1;
     }
 
     /* This is the X/Open sanctioned signal stack switching.  */
     sp = target_sigsp(sp, sa) - framesize;
 
-    /* Always align the stack frame.  This handles two cases.  First,
+    /*
+     * Always align the stack frame.  This handles two cases.  First,
      * sigaltstack need not be mindful of platform specific stack
      * alignment.  Second, if we took this signal because the stack
      * is not aligned properly, we'd like to take the signal cleanly
@@ -132,175 +121,310 @@
     return sp;
 }
 
-static int
-setup___siginfo(__siginfo_t *si, CPUSPARCState *env, abi_ulong mask)
+static void save_pt_regs(struct target_pt_regs *regs, CPUSPARCState *env)
 {
-    int err = 0, i;
+    int i;
 
-    __put_user(env->psr, &si->si_regs.psr);
-    __put_user(env->pc, &si->si_regs.pc);
-    __put_user(env->npc, &si->si_regs.npc);
-    __put_user(env->y, &si->si_regs.y);
-    for (i=0; i < 8; i++) {
-        __put_user(env->gregs[i], &si->si_regs.u_regs[i]);
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+    __put_user(sparc64_tstate(env), &regs->tstate);
+    /* TODO: magic should contain PT_REG_MAGIC + %tt. */
+    __put_user(0, &regs->magic);
+#else
+    __put_user(cpu_get_psr(env), &regs->psr);
+#endif
+
+    __put_user(env->pc, &regs->pc);
+    __put_user(env->npc, &regs->npc);
+    __put_user(env->y, &regs->y);
+
+    for (i = 0; i < 8; i++) {
+        __put_user(env->gregs[i], &regs->u_regs[i]);
     }
-    for (i=0; i < 8; i++) {
-        __put_user(env->regwptr[WREG_O0 + i], &si->si_regs.u_regs[i + 8]);
+    for (i = 0; i < 8; i++) {
+        __put_user(env->regwptr[WREG_O0 + i], &regs->u_regs[i + 8]);
     }
-    __put_user(mask, &si->si_mask);
-    return err;
 }
 
-#define NF_ALIGNEDSZ  (((sizeof(struct target_signal_frame) + 7) & (~7)))
+static void restore_pt_regs(struct target_pt_regs *regs, CPUSPARCState *env)
+{
+    int i;
 
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+    /* User can only change condition codes and %asi in %tstate. */
+    uint64_t tstate;
+    __get_user(tstate, &regs->tstate);
+    cpu_put_ccr(env, tstate >> 32);
+    env->asi = extract64(tstate, 24, 8);
+#else
+    /*
+     * User can only change condition codes and FPU enabling in %psr.
+     * But don't bother with FPU enabling, since a real kernel would
+     * just re-enable the FPU upon the next fpu trap.
+     */
+    uint32_t psr;
+    __get_user(psr, &regs->psr);
+    env->psr = (psr & PSR_ICC) | (env->psr & ~PSR_ICC);
+#endif
+
+    /* Note that pc and npc are handled in the caller. */
+
+    __get_user(env->y, &regs->y);
+
+    for (i = 0; i < 8; i++) {
+        __get_user(env->gregs[i], &regs->u_regs[i]);
+    }
+    for (i = 0; i < 8; i++) {
+        __get_user(env->regwptr[WREG_O0 + i], &regs->u_regs[i + 8]);
+    }
+}
+
+static void save_reg_win(struct target_reg_window *win, CPUSPARCState *env)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        __put_user(env->regwptr[i + WREG_L0], &win->locals[i]);
+    }
+    for (i = 0; i < 8; i++) {
+        __put_user(env->regwptr[i + WREG_I0], &win->ins[i]);
+    }
+}
+
+static void save_fpu(struct target_siginfo_fpu *fpu, CPUSPARCState *env)
+{
+    int i;
+
+#ifdef TARGET_SPARC64
+    for (i = 0; i < 32; ++i) {
+        __put_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+    }
+    __put_user(env->fsr, &fpu->si_fsr);
+    __put_user(env->gsr, &fpu->si_gsr);
+    __put_user(env->fprs, &fpu->si_fprs);
+#else
+    for (i = 0; i < 16; ++i) {
+        __put_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+    }
+    __put_user(env->fsr, &fpu->si_fsr);
+    __put_user(0, &fpu->si_fpqdepth);
+#endif
+}
+
+static void restore_fpu(struct target_siginfo_fpu *fpu, CPUSPARCState *env)
+{
+    int i;
+
+#ifdef TARGET_SPARC64
+    uint64_t fprs;
+    __get_user(fprs, &fpu->si_fprs);
+
+    /* In case the user mucks about with FPRS, restore as directed. */
+    if (fprs & FPRS_DL) {
+        for (i = 0; i < 16; ++i) {
+            __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+        }
+    }
+    if (fprs & FPRS_DU) {
+        for (i = 16; i < 32; ++i) {
+            __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+        }
+    }
+    __get_user(env->fsr, &fpu->si_fsr);
+    __get_user(env->gsr, &fpu->si_gsr);
+    env->fprs |= fprs;
+#else
+    for (i = 0; i < 16; ++i) {
+        __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+    }
+    __get_user(env->fsr, &fpu->si_fsr);
+#endif
+}
+
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUSPARCState *env)
 {
     abi_ulong sf_addr;
     struct target_signal_frame *sf;
-    int sigframe_size, err, i;
+    size_t sf_size = sizeof(*sf) + sizeof(struct target_siginfo_fpu);
+    int i;
 
-    /* 1. Make sure everything is clean */
-    //synchronize_user_stack();
-
-    sigframe_size = NF_ALIGNEDSZ;
-    sf_addr = get_sigframe(ka, env, sigframe_size);
+    sf_addr = get_sigframe(ka, env, sf_size);
     trace_user_setup_frame(env, sf_addr);
 
-    sf = lock_user(VERIFY_WRITE, sf_addr,
-                   sizeof(struct target_signal_frame), 0);
+    sf = lock_user(VERIFY_WRITE, sf_addr, sf_size, 0);
     if (!sf) {
-        goto sigsegv;
+        force_sigsegv(sig);
+        return;
     }
-#if 0
-    if (invalid_frame_pointer(sf, sigframe_size))
-        goto sigill_and_return;
-#endif
+
     /* 2. Save the current process state */
-    err = setup___siginfo(&sf->info, env, set->sig[0]);
+    save_pt_regs(&sf->regs, env);
     __put_user(0, &sf->extra_size);
 
-    //save_fpu_state(regs, &sf->fpu_state);
-    //__put_user(&sf->fpu_state, &sf->fpu_save);
+    save_fpu((struct target_siginfo_fpu *)(sf + 1), env);
+    __put_user(sf_addr + sizeof(*sf), &sf->fpu_save);
 
-    __put_user(set->sig[0], &sf->info.si_mask);
+    __put_user(0, &sf->rwin_save);  /* TODO: save_rwin_state */
+
+    __put_user(set->sig[0], &sf->si_mask);
     for (i = 0; i < TARGET_NSIG_WORDS - 1; i++) {
         __put_user(set->sig[i + 1], &sf->extramask[i]);
     }
 
-    for (i = 0; i < 8; i++) {
-        __put_user(env->regwptr[i + WREG_L0], &sf->ss.locals[i]);
-    }
-    for (i = 0; i < 8; i++) {
-        __put_user(env->regwptr[i + WREG_I0], &sf->ss.ins[i]);
-    }
-    if (err)
-        goto sigsegv;
+    save_reg_win(&sf->ss.win, env);
 
     /* 3. signal handler back-trampoline and parameters */
     env->regwptr[WREG_SP] = sf_addr;
     env->regwptr[WREG_O0] = sig;
     env->regwptr[WREG_O1] = sf_addr +
-            offsetof(struct target_signal_frame, info);
+            offsetof(struct target_signal_frame, regs);
     env->regwptr[WREG_O2] = sf_addr +
-            offsetof(struct target_signal_frame, info);
+            offsetof(struct target_signal_frame, regs);
 
     /* 4. signal handler */
     env->pc = ka->_sa_handler;
-    env->npc = (env->pc + 4);
+    env->npc = env->pc + 4;
+
     /* 5. return to kernel instructions */
     if (ka->ka_restorer) {
         env->regwptr[WREG_O7] = ka->ka_restorer;
     } else {
-        uint32_t val32;
-
         env->regwptr[WREG_O7] = sf_addr +
                 offsetof(struct target_signal_frame, insns) - 2 * 4;
 
         /* mov __NR_sigreturn, %g1 */
-        val32 = 0x821020d8;
-        __put_user(val32, &sf->insns[0]);
-
+        __put_user(0x821020d8u, &sf->insns[0]);
         /* t 0x10 */
-        val32 = 0x91d02010;
-        __put_user(val32, &sf->insns[1]);
+        __put_user(0x91d02010u, &sf->insns[1]);
     }
-    unlock_user(sf, sf_addr, sizeof(struct target_signal_frame));
-    return;
-#if 0
-sigill_and_return:
-    force_sig(TARGET_SIGILL);
-#endif
-sigsegv:
-    unlock_user(sf, sf_addr, sizeof(struct target_signal_frame));
-    force_sigsegv(sig);
+    unlock_user(sf, sf_addr, sf_size);
 }
+#endif /* TARGET_ARCH_HAS_SETUP_FRAME */
 
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUSPARCState *env)
 {
-    qemu_log_mask(LOG_UNIMP, "setup_rt_frame: not implemented\n");
+    abi_ulong sf_addr;
+    struct target_rt_signal_frame *sf;
+    size_t sf_size = sizeof(*sf) + sizeof(struct target_siginfo_fpu);
+
+    sf_addr = get_sigframe(ka, env, sf_size);
+    trace_user_setup_rt_frame(env, sf_addr);
+
+    sf = lock_user(VERIFY_WRITE, sf_addr, sf_size, 0);
+    if (!sf) {
+        force_sigsegv(sig);
+        return;
+    }
+
+    /* 2. Save the current process state */
+    save_reg_win(&sf->ss.win, env);
+    save_pt_regs(&sf->regs, env);
+
+    save_fpu((struct target_siginfo_fpu *)(sf + 1), env);
+    __put_user(sf_addr + sizeof(*sf), &sf->fpu_save);
+
+    __put_user(0, &sf->rwin_save);  /* TODO: save_rwin_state */
+
+    tswap_siginfo(&sf->info, info);
+    tswap_sigset(&sf->mask, set);
+    target_save_altstack(&sf->stack, env);
+
+#ifdef TARGET_ABI32
+    __put_user(0, &sf->extra_size);
+#endif
+
+    /* 3. signal handler back-trampoline and parameters */
+    env->regwptr[WREG_SP] = sf_addr - TARGET_STACK_BIAS;
+    env->regwptr[WREG_O0] = sig;
+    env->regwptr[WREG_O1] =
+        sf_addr + offsetof(struct target_rt_signal_frame, info);
+#ifdef TARGET_ABI32
+    env->regwptr[WREG_O2] =
+        sf_addr + offsetof(struct target_rt_signal_frame, regs);
+#else
+    env->regwptr[WREG_O2] = env->regwptr[WREG_O1];
+#endif
+
+    /* 4. signal handler */
+    env->pc = ka->_sa_handler;
+    env->npc = env->pc + 4;
+
+    /* 5. return to kernel instructions */
+#ifdef TARGET_ABI32
+    if (ka->ka_restorer) {
+        env->regwptr[WREG_O7] = ka->ka_restorer;
+    } else {
+        env->regwptr[WREG_O7] =
+            sf_addr + offsetof(struct target_rt_signal_frame, insns) - 2 * 4;
+
+        /* mov __NR_rt_sigreturn, %g1 */
+        __put_user(0x82102065u, &sf->insns[0]);
+        /* t 0x10 */
+        __put_user(0x91d02010u, &sf->insns[1]);
+    }
+#else
+    env->regwptr[WREG_O7] = ka->ka_restorer;
+#endif
+
+    unlock_user(sf, sf_addr, sf_size);
 }
 
 long do_sigreturn(CPUSPARCState *env)
 {
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
     abi_ulong sf_addr;
-    struct target_signal_frame *sf;
-    abi_ulong up_psr, pc, npc;
+    struct target_signal_frame *sf = NULL;
+    abi_ulong pc, npc, ptr;
     target_sigset_t set;
     sigset_t host_set;
     int i;
 
     sf_addr = env->regwptr[WREG_SP];
     trace_user_do_sigreturn(env, sf_addr);
-    if (!lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) {
+
+    /* 1. Make sure we are not getting garbage from the user */
+    if ((sf_addr & 15) || !lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) {
         goto segv_and_exit;
     }
 
-    /* 1. Make sure we are not getting garbage from the user */
-
-    if (sf_addr & 3)
+    /* Make sure stack pointer is aligned.  */
+    __get_user(ptr, &sf->regs.u_regs[14]);
+    if (ptr & 7) {
         goto segv_and_exit;
+    }
 
-    __get_user(pc,  &sf->info.si_regs.pc);
-    __get_user(npc, &sf->info.si_regs.npc);
-
+    /* Make sure instruction pointers are aligned.  */
+    __get_user(pc, &sf->regs.pc);
+    __get_user(npc, &sf->regs.npc);
     if ((pc | npc) & 3) {
         goto segv_and_exit;
     }
 
     /* 2. Restore the state */
-    __get_user(up_psr, &sf->info.si_regs.psr);
-
-    /* User can only change condition codes and FPU enabling in %psr. */
-    env->psr = (up_psr & (PSR_ICC /* | PSR_EF */))
-            | (env->psr & ~(PSR_ICC /* | PSR_EF */));
-
+    restore_pt_regs(&sf->regs, env);
     env->pc = pc;
     env->npc = npc;
-    __get_user(env->y, &sf->info.si_regs.y);
-    for (i=0; i < 8; i++) {
-        __get_user(env->gregs[i], &sf->info.si_regs.u_regs[i]);
-    }
-    for (i=0; i < 8; i++) {
-        __get_user(env->regwptr[i + WREG_O0], &sf->info.si_regs.u_regs[i + 8]);
+
+    __get_user(ptr, &sf->fpu_save);
+    if (ptr) {
+        struct target_siginfo_fpu *fpu;
+        if ((ptr & 3) || !lock_user_struct(VERIFY_READ, fpu, ptr, 1)) {
+            goto segv_and_exit;
+        }
+        restore_fpu(fpu, env);
+        unlock_user_struct(fpu, ptr, 0);
     }
 
-    /* FIXME: implement FPU save/restore:
-     * __get_user(fpu_save, &sf->fpu_save);
-     * if (fpu_save) {
-     *     if (restore_fpu_state(env, fpu_save)) {
-     *         goto segv_and_exit;
-     *     }
-     * }
-     */
+    __get_user(ptr, &sf->rwin_save);
+    if (ptr) {
+        goto segv_and_exit;  /* TODO: restore_rwin */
+    }
 
-    /* This is pretty much atomic, no amount locking would prevent
-         * the races which exist anyways.
-         */
-    __get_user(set.sig[0], &sf->info.si_mask);
-    for(i = 1; i < TARGET_NSIG_WORDS; i++) {
+    __get_user(set.sig[0], &sf->si_mask);
+    for (i = 1; i < TARGET_NSIG_WORDS; i++) {
         __get_user(set.sig[i], &sf->extramask[i - 1]);
     }
 
@@ -310,17 +434,74 @@
     unlock_user_struct(sf, sf_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
 
-segv_and_exit:
+ segv_and_exit:
     unlock_user_struct(sf, sf_addr, 0);
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
+#else
+    return -TARGET_ENOSYS;
+#endif
 }
 
 long do_rt_sigreturn(CPUSPARCState *env)
 {
-    trace_user_do_rt_sigreturn(env, 0);
-    qemu_log_mask(LOG_UNIMP, "do_rt_sigreturn: not implemented\n");
-    return -TARGET_ENOSYS;
+    abi_ulong sf_addr, tpc, tnpc, ptr;
+    struct target_rt_signal_frame *sf = NULL;
+    sigset_t set;
+
+    sf_addr = get_sp_from_cpustate(env);
+    trace_user_do_rt_sigreturn(env, sf_addr);
+
+    /* 1. Make sure we are not getting garbage from the user */
+    if ((sf_addr & 15) || !lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) {
+        goto segv_and_exit;
+    }
+
+    /* Validate SP alignment.  */
+    __get_user(ptr, &sf->regs.u_regs[8 + WREG_SP]);
+    if ((ptr + TARGET_STACK_BIAS) & 7) {
+        goto segv_and_exit;
+    }
+
+    /* Validate PC and NPC alignment.  */
+    __get_user(tpc, &sf->regs.pc);
+    __get_user(tnpc, &sf->regs.npc);
+    if ((tpc | tnpc) & 3) {
+        goto segv_and_exit;
+    }
+
+    /* 2. Restore the state */
+    restore_pt_regs(&sf->regs, env);
+
+    __get_user(ptr, &sf->fpu_save);
+    if (ptr) {
+        struct target_siginfo_fpu *fpu;
+        if ((ptr & 7) || !lock_user_struct(VERIFY_READ, fpu, ptr, 1)) {
+            goto segv_and_exit;
+        }
+        restore_fpu(fpu, env);
+        unlock_user_struct(fpu, ptr, 0);
+    }
+
+    __get_user(ptr, &sf->rwin_save);
+    if (ptr) {
+        goto segv_and_exit;  /* TODO: restore_rwin_state */
+    }
+
+    target_restore_altstack(&sf->stack, env);
+    target_to_host_sigset(&set, &sf->mask);
+    set_sigmask(&set);
+
+    env->pc = tpc;
+    env->npc = tnpc;
+
+    unlock_user_struct(sf, sf_addr, 0);
+    return -TARGET_QEMU_ESIGRETURN;
+
+ segv_and_exit:
+    unlock_user_struct(sf, sf_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
@@ -388,14 +569,6 @@
     target_mcontext_t tuc_mcontext;
 };
 
-/* A V9 register window */
-struct target_reg_window {
-    abi_ulong locals[8];
-    abi_ulong ins[8];
-};
-
-#define TARGET_STACK_BIAS 2047
-
 /* {set, get}context() needed for 64-bit SparcLinux userland. */
 void sparc64_set_context(CPUSPARCState *env)
 {
diff --git a/linux-user/sparc/target_cpu.h b/linux-user/sparc/target_cpu.h
index 1fa1011..1f4bed5 100644
--- a/linux-user/sparc/target_cpu.h
+++ b/linux-user/sparc/target_cpu.h
@@ -20,6 +20,12 @@
 #ifndef SPARC_TARGET_CPU_H
 #define SPARC_TARGET_CPU_H
 
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+# define TARGET_STACK_BIAS 2047
+#else
+# define TARGET_STACK_BIAS 0
+#endif
+
 static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp,
                                         unsigned flags)
 {
@@ -40,6 +46,7 @@
 #endif
         /* ??? The kernel appears to copy one stack frame to the new stack. */
         /* ??? The kernel force aligns the new stack. */
+        /* Userspace provides a biased stack pointer value. */
         env->regwptr[WREG_SP] = newsp;
     }
 
@@ -77,7 +84,7 @@
 
 static inline abi_ulong get_sp_from_cpustate(CPUSPARCState *state)
 {
-    return state->regwptr[WREG_SP];
+    return state->regwptr[WREG_SP] + TARGET_STACK_BIAS;
 }
 
 #endif
diff --git a/linux-user/sparc/target_signal.h b/linux-user/sparc/target_signal.h
index 911a3f5..34f9a12 100644
--- a/linux-user/sparc/target_signal.h
+++ b/linux-user/sparc/target_signal.h
@@ -67,7 +67,9 @@
 #define TARGET_MINSIGSTKSZ	4096
 #define TARGET_SIGSTKSZ		16384
 
+#ifdef TARGET_ABI32
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#endif
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/sparc/target_structs.h b/linux-user/sparc/target_structs.h
index 9953540..beeace8 100644
--- a/linux-user/sparc/target_structs.h
+++ b/linux-user/sparc/target_structs.h
@@ -26,13 +26,10 @@
     abi_uint cuid;                      /* Creator's user ID.  */
     abi_uint cgid;                      /* Creator's group ID.  */
 #if TARGET_ABI_BITS == 32
-    abi_ushort __pad1;
-    abi_ushort mode;                    /* Read/write permission.  */
-    abi_ushort __pad2;
-#else
-    abi_ushort mode;
-    abi_ushort __pad1;
+    abi_ushort __pad0;
 #endif
+    abi_ushort mode;                    /* Read/write permission.  */
+    abi_ushort __pad1;
     abi_ushort __seq;                   /* Sequence number.  */
     uint64_t __unused1;
     uint64_t __unused2;
@@ -40,22 +37,17 @@
 
 struct target_shmid_ds {
     struct target_ipc_perm shm_perm;    /* operation permission struct */
-#if TARGET_ABI_BITS == 32
-    abi_uint __pad1;
-#endif
-    abi_ulong shm_atime;                /* time of last shmat() */
-#if TARGET_ABI_BITS == 32
-    abi_uint __pad2;
-#endif
-    abi_ulong shm_dtime;                /* time of last shmdt() */
-#if TARGET_ABI_BITS == 32
-    abi_uint __pad3;
-#endif
-    abi_ulong shm_ctime;                /* time of last change by shmctl() */
-    abi_long shm_segsz;                 /* size of segment in bytes */
-    abi_ulong shm_cpid;                 /* pid of creator */
-    abi_ulong shm_lpid;                 /* pid of last shmop */
-    abi_long shm_nattch;                /* number of current attaches */
+    /*
+     * Note that sparc32 splits these into hi/lo parts.
+     * For simplicity in qemu, always use a 64-bit type.
+     */
+    int64_t  shm_atime;                 /* last attach time */
+    int64_t  shm_dtime;                 /* last detach time */
+    int64_t  shm_ctime;                 /* last change time */
+    abi_ulong shm_segsz;                /* size of segment in bytes */
+    abi_int shm_cpid;                   /* pid of creator */
+    abi_int shm_lpid;                   /* pid of last shmop */
+    abi_ulong shm_nattch;               /* number of current attaches */
     abi_ulong __unused1;
     abi_ulong __unused2;
 };
diff --git a/linux-user/sparc/target_syscall.h b/linux-user/sparc/target_syscall.h
index d8ea04e..15d531f 100644
--- a/linux-user/sparc/target_syscall.h
+++ b/linux-user/sparc/target_syscall.h
@@ -3,18 +3,34 @@
 
 #include "target_errno.h"
 
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
 struct target_pt_regs {
-	abi_ulong psr;
-	abi_ulong pc;
-	abi_ulong npc;
-	abi_ulong y;
-	abi_ulong u_regs[16];
+    abi_ulong u_regs[16];
+    abi_ulong tstate;
+    abi_ulong pc;
+    abi_ulong npc;
+    uint32_t y;
+    uint32_t magic;
 };
+#else
+struct target_pt_regs {
+    abi_ulong psr;
+    abi_ulong pc;
+    abi_ulong npc;
+    abi_ulong y;
+    abi_ulong u_regs[16];
+};
+#endif
 
-#define UNAME_MACHINE "sparc"
+#ifdef TARGET_SPARC64
+# define UNAME_MACHINE "sparc64"
+#else
+# define UNAME_MACHINE "sparc"
+#endif
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
-/* SPARC kernels don't define this in their Kconfig, but they have the
+/*
+ * SPARC kernels don't define this in their Kconfig, but they have the
  * same ABI as if they did, implemented by sparc-specific code which fishes
  * directly in the u_regs() struct for half the parameters in sparc_do_fork()
  * and copy_thread().
@@ -25,20 +41,24 @@
 #define TARGET_MCL_FUTURE  0x4000
 #define TARGET_MCL_ONFAULT 0x8000
 
-/* For SPARC SHMLBA is determined at runtime in the kernel, and
- * libc has to runtime-detect it using the hwcaps (see glibc
- * sysdeps/unix/sysv/linux/sparc/getshmlba; we follow the same
- * logic here, though we know we're not the sparc v9 64-bit case).
+/*
+ * For SPARC SHMLBA is determined at runtime in the kernel, and
+ * libc has to runtime-detect it using the hwcaps.
+ * See glibc sysdeps/unix/sysv/linux/sparc/getshmlba.
  */
 #define TARGET_FORCE_SHMLBA
 
 static inline abi_ulong target_shmlba(CPUSPARCState *env)
 {
+#ifdef TARGET_SPARC64
+    return MAX(TARGET_PAGE_SIZE, 16 * 1024);
+#else
     if (!(env->def.features & CPU_FEATURE_FLUSH)) {
         return 64 * 1024;
     } else {
         return 256 * 1024;
     }
+#endif
 }
 
 #endif /* SPARC_TARGET_SYSCALL_H */
diff --git a/linux-user/sparc64/cpu_loop.c b/linux-user/sparc64/cpu_loop.c
deleted file mode 100644
index 4fd44e1..0000000
--- a/linux-user/sparc64/cpu_loop.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  qemu user cpu loop
- *
- *  Copyright (c) 2003-2008 Fabrice Bellard
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "../sparc/cpu_loop.c"
diff --git a/linux-user/sparc64/meson.build b/linux-user/sparc64/meson.build
deleted file mode 100644
index 9527a40..0000000
--- a/linux-user/sparc64/meson.build
+++ /dev/null
@@ -1,5 +0,0 @@
-syscall_nr_generators += {
-  'sparc64': generator(sh,
-                       arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ],
-                       output: '@BASENAME@_nr.h')
-}
diff --git a/linux-user/sparc64/signal.c b/linux-user/sparc64/signal.c
deleted file mode 100644
index 170ebac..0000000
--- a/linux-user/sparc64/signal.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  Emulation of Linux signals
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "../sparc/signal.c"
diff --git a/linux-user/sparc64/sockbits.h b/linux-user/sparc64/sockbits.h
deleted file mode 100644
index 658899e..0000000
--- a/linux-user/sparc64/sockbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/sockbits.h"
diff --git a/linux-user/sparc64/syscall.tbl b/linux-user/sparc64/syscall.tbl
deleted file mode 100644
index 4af114e..0000000
--- a/linux-user/sparc64/syscall.tbl
+++ /dev/null
@@ -1,487 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-#
-# system call numbers and entry vectors for sparc
-#
-# The format is:
-# <number> <abi> <name> <entry point> <compat entry point>
-#
-# The <abi> can be common, 64, or 32 for this file.
-#
-0	common	restart_syscall		sys_restart_syscall
-1	32	exit			sys_exit			sparc_exit
-1	64	exit			sparc_exit
-2	common	fork			sys_fork
-3	common	read			sys_read
-4	common	write			sys_write
-5	common	open			sys_open			compat_sys_open
-6	common	close			sys_close
-7	common	wait4			sys_wait4			compat_sys_wait4
-8	common	creat			sys_creat
-9	common	link			sys_link
-10	common	unlink			sys_unlink
-11	32	execv			sunos_execv
-11	64	execv			sys_nis_syscall
-12	common	chdir			sys_chdir
-13	32    	chown			sys_chown16
-13	64	chown			sys_chown
-14	common	mknod			sys_mknod
-15	common	chmod			sys_chmod
-16	32	lchown			sys_lchown16
-16	64	lchown			sys_lchown
-17	common	brk			sys_brk
-18	common	perfctr			sys_nis_syscall
-19	common	lseek			sys_lseek			compat_sys_lseek
-20	common	getpid			sys_getpid
-21	common	capget			sys_capget
-22	common	capset			sys_capset
-23	32    	setuid			sys_setuid16
-23	64    	setuid			sys_setuid
-24	32	getuid			sys_getuid16
-24	64   	getuid			sys_getuid
-25	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
-26	common	ptrace			sys_ptrace			compat_sys_ptrace
-27	common	alarm			sys_alarm
-28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
-29	32    	pause			sys_pause
-29	64    	pause			sys_nis_syscall
-30	32	utime			sys_utime32
-30	64	utime			sys_utime
-31	32    	lchown32		sys_lchown
-32	32    	fchown32		sys_fchown
-33	common	access			sys_access
-34	common	nice			sys_nice
-35	32    	chown32			sys_chown
-36	common	sync			sys_sync
-37	common	kill			sys_kill
-38	common	stat			sys_newstat			compat_sys_newstat
-39	32	sendfile		sys_sendfile			compat_sys_sendfile
-39	64	sendfile		sys_sendfile64
-40	common	lstat			sys_newlstat			compat_sys_newlstat
-41	common	dup			sys_dup
-42	common	pipe			sys_sparc_pipe
-43	common	times			sys_times			compat_sys_times
-44	32    	getuid32		sys_getuid
-45	common	umount2			sys_umount
-46	32	setgid			sys_setgid16
-46	64	setgid			sys_setgid
-47	32	getgid			sys_getgid16
-47	64	getgid			sys_getgid
-48	common	signal			sys_signal
-49	32	geteuid			sys_geteuid16
-49	64	geteuid			sys_geteuid
-50	32	getegid			sys_getegid16
-50	64	getegid			sys_getegid
-51	common	acct			sys_acct
-52	64	memory_ordering		sys_memory_ordering
-53	32	getgid32		sys_getgid
-54	common	ioctl			sys_ioctl			compat_sys_ioctl
-55	common	reboot			sys_reboot
-56	32    	mmap2			sys_mmap2			sys32_mmap2
-57	common	symlink			sys_symlink
-58	common	readlink		sys_readlink
-59	32	execve			sys_execve			sys32_execve
-59	64	execve			sys64_execve
-60	common	umask			sys_umask
-61	common	chroot			sys_chroot
-62	common	fstat			sys_newfstat			compat_sys_newfstat
-63	common	fstat64			sys_fstat64			compat_sys_fstat64
-64	common	getpagesize		sys_getpagesize
-65	common	msync			sys_msync
-66	common	vfork			sys_vfork
-67	common	pread64			sys_pread64			compat_sys_pread64
-68	common	pwrite64		sys_pwrite64			compat_sys_pwrite64
-69	32    	geteuid32		sys_geteuid
-70	32	getegid32		sys_getegid
-71	common	mmap			sys_mmap
-72	32	setreuid32		sys_setreuid
-73	32	munmap			sys_munmap
-73	64	munmap			sys_64_munmap
-74	common	mprotect		sys_mprotect
-75	common	madvise			sys_madvise
-76	common	vhangup			sys_vhangup
-77	32	truncate64		sys_truncate64			compat_sys_truncate64
-78	common	mincore			sys_mincore
-79	32	getgroups		sys_getgroups16
-79	64	getgroups		sys_getgroups
-80	32	setgroups		sys_setgroups16
-80	64	setgroups		sys_setgroups
-81	common	getpgrp			sys_getpgrp
-82	32	setgroups32		sys_setgroups
-83	common	setitimer		sys_setitimer			compat_sys_setitimer
-84	32	ftruncate64		sys_ftruncate64			compat_sys_ftruncate64
-85	common	swapon			sys_swapon
-86	common	getitimer		sys_getitimer			compat_sys_getitimer
-87	32	setuid32		sys_setuid
-88	common	sethostname		sys_sethostname
-89	32	setgid32		sys_setgid
-90	common	dup2			sys_dup2
-91	32	setfsuid32		sys_setfsuid
-92	common	fcntl			sys_fcntl			compat_sys_fcntl
-93	common	select			sys_select
-94	32	setfsgid32		sys_setfsgid
-95	common	fsync			sys_fsync
-96	common	setpriority		sys_setpriority
-97	common	socket			sys_socket
-98	common	connect			sys_connect
-99	common	accept			sys_accept
-100	common	getpriority		sys_getpriority
-101	common	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
-102	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
-103	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
-104	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-105	32	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
-105	64	rt_sigtimedwait		sys_rt_sigtimedwait
-106	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
-107	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
-108	32	setresuid32		sys_setresuid
-108	64	setresuid		sys_setresuid
-109	32	getresuid32		sys_getresuid
-109	64	getresuid		sys_getresuid
-110	32	setresgid32		sys_setresgid
-110	64	setresgid		sys_setresgid
-111	32	getresgid32		sys_getresgid
-111	64	getresgid		sys_getresgid
-112	32	setregid32		sys_setregid
-113	common	recvmsg			sys_recvmsg			compat_sys_recvmsg
-114	common	sendmsg			sys_sendmsg			compat_sys_sendmsg
-115	32	getgroups32		sys_getgroups
-116	common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
-117	common	getrusage		sys_getrusage			compat_sys_getrusage
-118	common	getsockopt		sys_getsockopt			sys_getsockopt
-119	common	getcwd			sys_getcwd
-120	common	readv			sys_readv			compat_sys_readv
-121	common	writev			sys_writev			compat_sys_writev
-122	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
-123	32	fchown			sys_fchown16
-123	64	fchown			sys_fchown
-124	common	fchmod			sys_fchmod
-125	common	recvfrom		sys_recvfrom
-126	32	setreuid		sys_setreuid16
-126	64	setreuid		sys_setreuid
-127	32	setregid		sys_setregid16
-127	64	setregid		sys_setregid
-128	common	rename			sys_rename
-129	common	truncate		sys_truncate			compat_sys_truncate
-130	common	ftruncate		sys_ftruncate			compat_sys_ftruncate
-131	common	flock			sys_flock
-132	common	lstat64			sys_lstat64			compat_sys_lstat64
-133	common	sendto			sys_sendto
-134	common	shutdown		sys_shutdown
-135	common	socketpair		sys_socketpair
-136	common	mkdir			sys_mkdir
-137	common	rmdir			sys_rmdir
-138	32	utimes			sys_utimes_time32
-138	64	utimes			sys_utimes
-139	common	stat64			sys_stat64			compat_sys_stat64
-140	common	sendfile64		sys_sendfile64
-141	common	getpeername		sys_getpeername
-142	32	futex			sys_futex_time32
-142	64	futex			sys_futex
-143	common	gettid			sys_gettid
-144	common	getrlimit		sys_getrlimit			compat_sys_getrlimit
-145	common	setrlimit		sys_setrlimit			compat_sys_setrlimit
-146	common	pivot_root		sys_pivot_root
-147	common	prctl			sys_prctl
-148	common	pciconfig_read		sys_pciconfig_read
-149	common	pciconfig_write		sys_pciconfig_write
-150	common	getsockname		sys_getsockname
-151	common	inotify_init		sys_inotify_init
-152	common	inotify_add_watch	sys_inotify_add_watch
-153	common	poll			sys_poll
-154	common	getdents64		sys_getdents64
-155	32	fcntl64			sys_fcntl64			compat_sys_fcntl64
-156	common	inotify_rm_watch	sys_inotify_rm_watch
-157	common	statfs			sys_statfs			compat_sys_statfs
-158	common	fstatfs			sys_fstatfs			compat_sys_fstatfs
-159	common	umount			sys_oldumount
-160	common	sched_set_affinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-161	common	sched_get_affinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
-162	common	getdomainname		sys_getdomainname
-163	common	setdomainname		sys_setdomainname
-164	64	utrap_install		sys_utrap_install
-165	common	quotactl		sys_quotactl
-166	common	set_tid_address		sys_set_tid_address
-167	common	mount			sys_mount			compat_sys_mount
-168	common	ustat			sys_ustat			compat_sys_ustat
-169	common	setxattr		sys_setxattr
-170	common	lsetxattr		sys_lsetxattr
-171	common	fsetxattr		sys_fsetxattr
-172	common	getxattr		sys_getxattr
-173	common	lgetxattr		sys_lgetxattr
-174	common	getdents		sys_getdents			compat_sys_getdents
-175	common	setsid			sys_setsid
-176	common	fchdir			sys_fchdir
-177	common	fgetxattr		sys_fgetxattr
-178	common	listxattr		sys_listxattr
-179	common	llistxattr		sys_llistxattr
-180	common	flistxattr		sys_flistxattr
-181	common	removexattr		sys_removexattr
-182	common	lremovexattr		sys_lremovexattr
-183	32	sigpending		sys_sigpending			compat_sys_sigpending
-183	64	sigpending		sys_nis_syscall
-184	common	query_module		sys_ni_syscall
-185	common	setpgid			sys_setpgid
-186	common	fremovexattr		sys_fremovexattr
-187	common	tkill			sys_tkill
-188	32	exit_group		sys_exit_group			sparc_exit_group
-188	64	exit_group		sparc_exit_group
-189	common	uname			sys_newuname
-190	common	init_module		sys_init_module
-191	32	personality		sys_personality			sys_sparc64_personality
-191	64	personality		sys_sparc64_personality
-192	32	remap_file_pages	sys_sparc_remap_file_pages	sys_remap_file_pages
-192	64	remap_file_pages	sys_remap_file_pages
-193	common	epoll_create		sys_epoll_create
-194	common	epoll_ctl		sys_epoll_ctl
-195	common	epoll_wait		sys_epoll_wait
-196	common	ioprio_set		sys_ioprio_set
-197	common	getppid			sys_getppid
-198	32	sigaction		sys_sparc_sigaction		compat_sys_sparc_sigaction
-198	64	sigaction		sys_nis_syscall
-199	common	sgetmask		sys_sgetmask
-200	common	ssetmask		sys_ssetmask
-201	32	sigsuspend		sys_sigsuspend
-201	64	sigsuspend		sys_nis_syscall
-202	common	oldlstat		sys_newlstat			compat_sys_newlstat
-203	common	uselib			sys_uselib
-204	32	readdir			sys_old_readdir			compat_sys_old_readdir
-204	64	readdir			sys_nis_syscall
-205	common	readahead		sys_readahead			compat_sys_readahead
-206	common	socketcall		sys_socketcall			sys32_socketcall
-207	common	syslog			sys_syslog
-208	common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
-209	common	fadvise64		sys_fadvise64			compat_sys_fadvise64
-210	common	fadvise64_64		sys_fadvise64_64		compat_sys_fadvise64_64
-211	common	tgkill			sys_tgkill
-212	common	waitpid			sys_waitpid
-213	common	swapoff			sys_swapoff
-214	common	sysinfo			sys_sysinfo			compat_sys_sysinfo
-215	32	ipc			sys_ipc				compat_sys_ipc
-215	64	ipc			sys_sparc_ipc
-216	32	sigreturn		sys_sigreturn			sys32_sigreturn
-216	64	sigreturn		sys_nis_syscall
-217	common	clone			sys_clone
-218	common	ioprio_get		sys_ioprio_get
-219	32	adjtimex		sys_adjtimex_time32
-219	64	adjtimex		sys_sparc_adjtimex
-220	32	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
-220	64	sigprocmask		sys_nis_syscall
-221	common	create_module		sys_ni_syscall
-222	common	delete_module		sys_delete_module
-223	common	get_kernel_syms		sys_ni_syscall
-224	common	getpgid			sys_getpgid
-225	common	bdflush			sys_bdflush
-226	common	sysfs			sys_sysfs
-227	common	afs_syscall		sys_nis_syscall
-228	common	setfsuid		sys_setfsuid16
-229	common	setfsgid		sys_setfsgid16
-230	common	_newselect		sys_select			compat_sys_select
-231	32	time			sys_time32
-232	common	splice			sys_splice
-233	32	stime			sys_stime32
-233	64	stime			sys_stime
-234	common	statfs64		sys_statfs64			compat_sys_statfs64
-235	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
-236	common	_llseek			sys_llseek
-237	common	mlock			sys_mlock
-238	common	munlock			sys_munlock
-239	common	mlockall		sys_mlockall
-240	common	munlockall		sys_munlockall
-241	common	sched_setparam		sys_sched_setparam
-242	common	sched_getparam		sys_sched_getparam
-243	common	sched_setscheduler	sys_sched_setscheduler
-244	common	sched_getscheduler	sys_sched_getscheduler
-245	common	sched_yield		sys_sched_yield
-246	common	sched_get_priority_max	sys_sched_get_priority_max
-247	common	sched_get_priority_min	sys_sched_get_priority_min
-248	32	sched_rr_get_interval	sys_sched_rr_get_interval_time32
-248	64	sched_rr_get_interval	sys_sched_rr_get_interval
-249	32	nanosleep		sys_nanosleep_time32
-249	64	nanosleep		sys_nanosleep
-250	32	mremap			sys_mremap
-250	64	mremap			sys_64_mremap
-251	common	_sysctl			sys_ni_syscall
-252	common	getsid			sys_getsid
-253	common	fdatasync		sys_fdatasync
-254	32	nfsservctl		sys_ni_syscall			sys_nis_syscall
-254	64	nfsservctl		sys_nis_syscall
-255	common	sync_file_range		sys_sync_file_range		compat_sys_sync_file_range
-256	32	clock_settime		sys_clock_settime32
-256	64	clock_settime		sys_clock_settime
-257	32	clock_gettime		sys_clock_gettime32
-257	64	clock_gettime		sys_clock_gettime
-258	32	clock_getres		sys_clock_getres_time32
-258	64	clock_getres		sys_clock_getres
-259	32	clock_nanosleep		sys_clock_nanosleep_time32
-259	64	clock_nanosleep		sys_clock_nanosleep
-260	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
-261	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-262	32	timer_settime		sys_timer_settime32
-262	64	timer_settime		sys_timer_settime
-263	32	timer_gettime		sys_timer_gettime32
-263	64	timer_gettime		sys_timer_gettime
-264	common	timer_getoverrun	sys_timer_getoverrun
-265	common	timer_delete		sys_timer_delete
-266	common	timer_create		sys_timer_create		compat_sys_timer_create
-# 267 was vserver
-267	common	vserver			sys_nis_syscall
-268	common	io_setup		sys_io_setup			compat_sys_io_setup
-269	common	io_destroy		sys_io_destroy
-270	common	io_submit		sys_io_submit			compat_sys_io_submit
-271	common	io_cancel		sys_io_cancel
-272	32	io_getevents		sys_io_getevents_time32
-272	64	io_getevents		sys_io_getevents
-273	common	mq_open			sys_mq_open			compat_sys_mq_open
-274	common	mq_unlink		sys_mq_unlink
-275	32	mq_timedsend		sys_mq_timedsend_time32
-275	64	mq_timedsend		sys_mq_timedsend
-276	32	mq_timedreceive		sys_mq_timedreceive_time32
-276	64	mq_timedreceive		sys_mq_timedreceive
-277	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
-278	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
-279	common	waitid			sys_waitid			compat_sys_waitid
-280	common	tee			sys_tee
-281	common	add_key			sys_add_key
-282	common	request_key		sys_request_key
-283	common	keyctl			sys_keyctl			compat_sys_keyctl
-284	common	openat			sys_openat			compat_sys_openat
-285	common	mkdirat			sys_mkdirat
-286	common	mknodat			sys_mknodat
-287	common	fchownat		sys_fchownat
-288	32	futimesat		sys_futimesat_time32
-288	64	futimesat		sys_futimesat
-289	common	fstatat64		sys_fstatat64			compat_sys_fstatat64
-290	common	unlinkat		sys_unlinkat
-291	common	renameat		sys_renameat
-292	common	linkat			sys_linkat
-293	common	symlinkat		sys_symlinkat
-294	common	readlinkat		sys_readlinkat
-295	common	fchmodat		sys_fchmodat
-296	common	faccessat		sys_faccessat
-297	32	pselect6		sys_pselect6_time32		compat_sys_pselect6_time32
-297	64	pselect6		sys_pselect6
-298	32	ppoll			sys_ppoll_time32		compat_sys_ppoll_time32
-298	64	ppoll			sys_ppoll
-299	common	unshare			sys_unshare
-300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
-301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
-302	common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
-303	common	mbind			sys_mbind			compat_sys_mbind
-304	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-305	common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
-306	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-307	common	move_pages		sys_move_pages			compat_sys_move_pages
-308	common	getcpu			sys_getcpu
-309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
-310	32	utimensat		sys_utimensat_time32
-310	64	utimensat		sys_utimensat
-311	common	signalfd		sys_signalfd			compat_sys_signalfd
-312	common	timerfd_create		sys_timerfd_create
-313	common	eventfd			sys_eventfd
-314	common	fallocate		sys_fallocate			compat_sys_fallocate
-315	32	timerfd_settime		sys_timerfd_settime32
-315	64	timerfd_settime		sys_timerfd_settime
-316	32	timerfd_gettime		sys_timerfd_gettime32
-316	64	timerfd_gettime		sys_timerfd_gettime
-317	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
-318	common	eventfd2		sys_eventfd2
-319	common	epoll_create1		sys_epoll_create1
-320	common	dup3			sys_dup3
-321	common	pipe2			sys_pipe2
-322	common	inotify_init1		sys_inotify_init1
-323	common	accept4			sys_accept4
-324	common	preadv			sys_preadv			compat_sys_preadv
-325	common	pwritev			sys_pwritev			compat_sys_pwritev
-326	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
-327	common	perf_event_open		sys_perf_event_open
-328	32	recvmmsg		sys_recvmmsg_time32		compat_sys_recvmmsg_time32
-328	64	recvmmsg		sys_recvmmsg
-329	common	fanotify_init		sys_fanotify_init
-330	common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
-331	common	prlimit64		sys_prlimit64
-332	common	name_to_handle_at	sys_name_to_handle_at
-333	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-334	32	clock_adjtime		sys_clock_adjtime32
-334	64	clock_adjtime		sys_sparc_clock_adjtime
-335	common	syncfs			sys_syncfs
-336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
-337	common	setns			sys_setns
-338	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-339	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
-340	32	kern_features		sys_ni_syscall			sys_kern_features
-340	64	kern_features		sys_kern_features
-341	common	kcmp			sys_kcmp
-342	common	finit_module		sys_finit_module
-343	common	sched_setattr		sys_sched_setattr
-344	common	sched_getattr		sys_sched_getattr
-345	common	renameat2		sys_renameat2
-346	common	seccomp			sys_seccomp
-347	common	getrandom		sys_getrandom
-348	common	memfd_create		sys_memfd_create
-349	common	bpf			sys_bpf
-350	32	execveat		sys_execveat			sys32_execveat
-350	64	execveat		sys64_execveat
-351	common	membarrier		sys_membarrier
-352	common	userfaultfd		sys_userfaultfd
-353	common	bind			sys_bind
-354	common	listen			sys_listen
-355	common	setsockopt		sys_setsockopt			sys_setsockopt
-356	common	mlock2			sys_mlock2
-357	common	copy_file_range		sys_copy_file_range
-358	common	preadv2			sys_preadv2			compat_sys_preadv2
-359	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
-360	common	statx			sys_statx
-361	32	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
-361	64	io_pgetevents		sys_io_pgetevents
-362	common	pkey_mprotect		sys_pkey_mprotect
-363	common	pkey_alloc		sys_pkey_alloc
-364	common	pkey_free		sys_pkey_free
-365	common	rseq			sys_rseq
-# room for arch specific syscalls
-392	64	semtimedop			sys_semtimedop
-393	common	semget			sys_semget
-394	common	semctl			sys_semctl			compat_sys_semctl
-395	common	shmget			sys_shmget
-396	common	shmctl			sys_shmctl			compat_sys_shmctl
-397	common	shmat			sys_shmat			compat_sys_shmat
-398	common	shmdt			sys_shmdt
-399	common	msgget			sys_msgget
-400	common	msgsnd			sys_msgsnd			compat_sys_msgsnd
-401	common	msgrcv			sys_msgrcv			compat_sys_msgrcv
-402	common	msgctl			sys_msgctl			compat_sys_msgctl
-403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
-404	32	clock_settime64			sys_clock_settime		sys_clock_settime
-405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
-406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
-407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
-408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
-409	32	timer_settime64			sys_timer_settime		sys_timer_settime
-410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
-411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
-412	32	utimensat_time64		sys_utimensat			sys_utimensat
-413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
-414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
-416	32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
-417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
-418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
-419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
-420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
-421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
-422	32	futex_time64			sys_futex			sys_futex
-423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
-424	common	pidfd_send_signal		sys_pidfd_send_signal
-425	common	io_uring_setup			sys_io_uring_setup
-426	common	io_uring_enter			sys_io_uring_enter
-427	common	io_uring_register		sys_io_uring_register
-428	common	open_tree			sys_open_tree
-429	common	move_mount			sys_move_mount
-430	common	fsopen				sys_fsopen
-431	common	fsconfig			sys_fsconfig
-432	common	fsmount				sys_fsmount
-433	common	fspick				sys_fspick
-434	common	pidfd_open			sys_pidfd_open
-# 435 reserved for clone3
-436	common	close_range			sys_close_range
-437	common	openat2			sys_openat2
-438	common	pidfd_getfd			sys_pidfd_getfd
-439	common	faccessat2			sys_faccessat2
diff --git a/linux-user/sparc64/syscallhdr.sh b/linux-user/sparc64/syscallhdr.sh
deleted file mode 100644
index 08c7e39..0000000
--- a/linux-user/sparc64/syscallhdr.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=LINUX_USER_SPARC64_`basename "$out" | sed \
-    -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-    -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-    printf "#ifndef %s\n" "${fileguard}"
-    printf "#define %s\n" "${fileguard}"
-    printf "\n"
-
-    nxt=0
-    while read nr abi name entry compat ; do
-        if [ -z "$offset" ]; then
-            printf "#define TARGET_NR_%s%s\t%s\n" \
-                "${prefix}" "${name}" "${nr}"
-        else
-            printf "#define TARGET_NR_%s%s\t(%s + %s)\n" \
-                "${prefix}" "${name}" "${offset}" "${nr}"
-        fi
-        nxt=$((nr+1))
-    done
-
-    printf "\n"
-    printf "#endif /* %s */" "${fileguard}"
-) > "$out"
diff --git a/linux-user/sparc64/target_cpu.h b/linux-user/sparc64/target_cpu.h
deleted file mode 100644
index b22263d..0000000
--- a/linux-user/sparc64/target_cpu.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/target_cpu.h"
diff --git a/linux-user/sparc64/target_elf.h b/linux-user/sparc64/target_elf.h
deleted file mode 100644
index d6e388f..0000000
--- a/linux-user/sparc64/target_elf.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-
-#ifndef SPARC64_TARGET_ELF_H
-#define SPARC64_TARGET_ELF_H
-static inline const char *cpu_get_model(uint32_t eflags)
-{
-    return "TI UltraSparc II";
-}
-#endif
diff --git a/linux-user/sparc64/target_fcntl.h b/linux-user/sparc64/target_fcntl.h
deleted file mode 100644
index 053c774..0000000
--- a/linux-user/sparc64/target_fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/target_fcntl.h"
diff --git a/linux-user/sparc64/target_signal.h b/linux-user/sparc64/target_signal.h
deleted file mode 100644
index 6a7d57d..0000000
--- a/linux-user/sparc64/target_signal.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/target_signal.h"
diff --git a/linux-user/sparc64/target_structs.h b/linux-user/sparc64/target_structs.h
deleted file mode 100644
index 4a8ed48..0000000
--- a/linux-user/sparc64/target_structs.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * SPARC64 specific structures for linux-user
- *
- * Copyright (c) 2013 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef SPARC64_TARGET_STRUCTS_H
-#define SPARC64_TARGET_STRUCTS_H
-
-struct target_ipc_perm {
-    abi_int __key;                      /* Key.  */
-    abi_uint uid;                       /* Owner's user ID.  */
-    abi_uint gid;                       /* Owner's group ID.  */
-    abi_uint cuid;                      /* Creator's user ID.  */
-    abi_uint cgid;                      /* Creator's group ID.  */
-    abi_ushort mode;                    /* Read/write permission.  */
-    abi_ushort __pad1;
-    abi_ushort __seq;                   /* Sequence number.  */
-    abi_ushort __pad2;
-    abi_ulong __unused1;
-    abi_ulong __unused2;
-};
-
-struct target_shmid_ds {
-    struct target_ipc_perm shm_perm;    /* operation permission struct */
-    abi_long shm_segsz;                 /* size of segment in bytes */
-    abi_ulong shm_atime;                /* time of last shmat() */
-#if TARGET_ABI_BITS == 32
-    abi_ulong __unused1;
-#endif
-    abi_ulong shm_dtime;                /* time of last shmdt() */
-#if TARGET_ABI_BITS == 32
-    abi_ulong __unused2;
-#endif
-    abi_ulong shm_ctime;                /* time of last change by shmctl() */
-#if TARGET_ABI_BITS == 32
-    abi_ulong __unused3;
-#endif
-    abi_int shm_cpid;                   /* pid of creator */
-    abi_int shm_lpid;                   /* pid of last shmop */
-    abi_ulong shm_nattch;               /* number of current attaches */
-    abi_ulong __unused4;
-    abi_ulong __unused5;
-};
-
-#endif
diff --git a/linux-user/sparc64/target_syscall.h b/linux-user/sparc64/target_syscall.h
deleted file mode 100644
index 696a68b..0000000
--- a/linux-user/sparc64/target_syscall.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef SPARC64_TARGET_SYSCALL_H
-#define SPARC64_TARGET_SYSCALL_H
-
-#include "../sparc/target_errno.h"
-
-struct target_pt_regs {
-	abi_ulong u_regs[16];
-	abi_ulong tstate;
-	abi_ulong pc;
-	abi_ulong npc;
-	abi_ulong y;
-	abi_ulong fprs;
-};
-
-#define UNAME_MACHINE "sparc64"
-#define UNAME_MINIMUM_RELEASE "2.6.32"
-
-/* SPARC kernels don't define this in their Kconfig, but they have the
- * same ABI as if they did, implemented by sparc-specific code which fishes
- * directly in the u_regs() struct for half the parameters in sparc_do_fork()
- * and copy_thread().
- */
-#define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ      4096
-#define TARGET_MCL_CURRENT 0x2000
-#define TARGET_MCL_FUTURE  0x4000
-#define TARGET_MCL_ONFAULT 0x8000
-
-#define TARGET_FORCE_SHMLBA
-
-static inline abi_ulong target_shmlba(CPUSPARCState *env)
-{
-    return MAX(TARGET_PAGE_SIZE, 16 * 1024);
-}
-#endif /* SPARC64_TARGET_SYSCALL_H */
diff --git a/linux-user/sparc64/termbits.h b/linux-user/sparc64/termbits.h
deleted file mode 100644
index 1ab1e80..0000000
--- a/linux-user/sparc64/termbits.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* from asm/termbits.h */
-
-#ifndef LINUX_USER_SPARC64_TERMBITS_H
-#define LINUX_USER_SPARC64_TERMBITS_H
-
-#define TARGET_NCCS 19
-
-typedef unsigned char   target_cc_t;        /* cc_t */
-typedef unsigned int    target_speed_t;     /* speed_t */
-typedef unsigned int    target_tcflag_t;    /* tcflag_t */
-
-struct target_termios {
-    target_tcflag_t c_iflag;               /* input mode flags */
-    target_tcflag_t c_oflag;               /* output mode flags */
-    target_tcflag_t c_cflag;               /* control mode flags */
-    target_tcflag_t c_lflag;               /* local mode flags */
-    target_cc_t c_line;                    /* line discipline */
-    target_cc_t c_cc[TARGET_NCCS];         /* control characters */
-};
-
-
-/* c_cc characters */
-#define TARGET_VINTR    0
-#define TARGET_VQUIT    1
-#define TARGET_VERASE   2
-#define TARGET_VKILL    3
-#define TARGET_VEOF     4
-#define TARGET_VEOL     5
-#define TARGET_VEOL2    6
-#define TARGET_VSWTC    7
-#define TARGET_VSTART   8
-#define TARGET_VSTOP    9
-
-#define TARGET_VSUSP    10
-#define TARGET_VDSUSP   11  /* SunOS POSIX nicety I do believe... */
-#define TARGET_VREPRINT 12
-#define TARGET_VDISCARD 13
-#define TARGET_VWERASE  14
-#define TARGET_VLNEXT   15
-
-/* Kernel keeps vmin/vtime separated, user apps assume vmin/vtime is
- * shared with eof/eol
- */
-#define TARGET_VMIN     TARGET_VEOF
-#define TARGET_VTIME    TARGET_VEOL
-
-/* c_iflag bits */
-#define TARGET_IGNBRK	0x00000001
-#define TARGET_BRKINT	0x00000002
-#define TARGET_IGNPAR	0x00000004
-#define TARGET_PARMRK	0x00000008
-#define TARGET_INPCK	0x00000010
-#define TARGET_ISTRIP	0x00000020
-#define TARGET_INLCR	0x00000040
-#define TARGET_IGNCR	0x00000080
-#define TARGET_ICRNL	0x00000100
-#define TARGET_IUCLC	0x00000200
-#define TARGET_IXON	0x00000400
-#define TARGET_IXANY	0x00000800
-#define TARGET_IXOFF	0x00001000
-#define TARGET_IMAXBEL	0x00002000
-#define TARGET_IUTF8	0x00004000
-
-/* c_oflag bits */
-#define TARGET_OPOST	0x00000001
-#define TARGET_OLCUC	0x00000002
-#define TARGET_ONLCR	0x00000004
-#define TARGET_OCRNL	0x00000008
-#define TARGET_ONOCR	0x00000010
-#define TARGET_ONLRET	0x00000020
-#define TARGET_OFILL	0x00000040
-#define TARGET_OFDEL	0x00000080
-#define TARGET_NLDLY	0x00000100
-#define   TARGET_NL0	0x00000000
-#define   TARGET_NL1	0x00000100
-#define TARGET_CRDLY	0x00000600
-#define   TARGET_CR0	0x00000000
-#define   TARGET_CR1	0x00000200
-#define   TARGET_CR2	0x00000400
-#define   TARGET_CR3	0x00000600
-#define TARGET_TABDLY	0x00001800
-#define   TARGET_TAB0	0x00000000
-#define   TARGET_TAB1	0x00000800
-#define   TARGET_TAB2	0x00001000
-#define   TARGET_TAB3	0x00001800
-#define   TARGET_XTABS	0x00001800
-#define TARGET_BSDLY	0x00002000
-#define   TARGET_BS0	0x00000000
-#define   TARGET_BS1	0x00002000
-#define TARGET_VTDLY	0x00004000
-#define   TARGET_VT0	0x00000000
-#define   TARGET_VT1	0x00004000
-#define TARGET_FFDLY	0x00008000
-#define   TARGET_FF0	0x00000000
-#define   TARGET_FF1	0x00008000
-#define TARGET_PAGEOUT 0x00010000  /* SUNOS specific */
-#define TARGET_WRAP    0x00020000  /* SUNOS specific */
-
-/* c_cflag bit meaning */
-#define TARGET_CBAUD	  0x0000100f
-#define  TARGET_B0	  0x00000000   /* hang up */
-#define  TARGET_B50	  0x00000001
-#define  TARGET_B75	  0x00000002
-#define  TARGET_B110	  0x00000003
-#define  TARGET_B134	  0x00000004
-#define  TARGET_B150	  0x00000005
-#define  TARGET_B200	  0x00000006
-#define  TARGET_B300	  0x00000007
-#define  TARGET_B600	  0x00000008
-#define  TARGET_B1200	  0x00000009
-#define  TARGET_B1800	  0x0000000a
-#define  TARGET_B2400	  0x0000000b
-#define  TARGET_B4800	  0x0000000c
-#define  TARGET_B9600	  0x0000000d
-#define  TARGET_B19200	  0x0000000e
-#define  TARGET_B38400	  0x0000000f
-#define TARGET_EXTA      B19200
-#define TARGET_EXTB      B38400
-#define  TARGET_CSIZE    0x00000030
-#define   TARGET_CS5	  0x00000000
-#define   TARGET_CS6	  0x00000010
-#define   TARGET_CS7	  0x00000020
-#define   TARGET_CS8	  0x00000030
-#define TARGET_CSTOPB	  0x00000040
-#define TARGET_CREAD	  0x00000080
-#define TARGET_PARENB	  0x00000100
-#define TARGET_PARODD	  0x00000200
-#define TARGET_HUPCL	  0x00000400
-#define TARGET_CLOCAL	  0x00000800
-#define TARGET_CBAUDEX   0x00001000
-/* We'll never see these speeds with the Zilogs, but for completeness... */
-#define  TARGET_B57600   0x00001001
-#define  TARGET_B115200  0x00001002
-#define  TARGET_B230400  0x00001003
-#define  TARGET_B460800  0x00001004
-/* This is what we can do with the Zilogs. */
-#define  TARGET_B76800   0x00001005
-/* This is what we can do with the SAB82532. */
-#define  TARGET_B153600  0x00001006
-#define  TARGET_B307200  0x00001007
-#define  TARGET_B614400  0x00001008
-#define  TARGET_B921600  0x00001009
-/* And these are the rest... */
-#define  TARGET_B500000  0x0000100a
-#define  TARGET_B576000  0x0000100b
-#define TARGET_B1000000  0x0000100c
-#define TARGET_B1152000  0x0000100d
-#define TARGET_B1500000  0x0000100e
-#define TARGET_B2000000  0x0000100f
-/* These have totally bogus values and nobody uses them
-   so far. Later on we'd have to use say 0x10000x and
-   adjust CBAUD constant and drivers accordingly.
-#define B2500000  0x00001010
-#define B3000000  0x00001011
-#define B3500000  0x00001012
-#define B4000000  0x00001013  */
-#define TARGET_CIBAUD	  0x100f0000  /* input baud rate (not used) */
-#define TARGET_CMSPAR	  0x40000000  /* mark or space (stick) parity */
-#define TARGET_CRTSCTS	  0x80000000  /* flow control */
-
-/* c_lflag bits */
-#define TARGET_ISIG	0x00000001
-#define TARGET_ICANON	0x00000002
-#define TARGET_XCASE	0x00000004
-#define TARGET_ECHO	0x00000008
-#define TARGET_ECHOE	0x00000010
-#define TARGET_ECHOK	0x00000020
-#define TARGET_ECHONL	0x00000040
-#define TARGET_NOFLSH	0x00000080
-#define TARGET_TOSTOP	0x00000100
-#define TARGET_ECHOCTL	0x00000200
-#define TARGET_ECHOPRT	0x00000400
-#define TARGET_ECHOKE	0x00000800
-#define TARGET_DEFECHO  0x00001000  /* SUNOS thing, what is it? */
-#define TARGET_FLUSHO	0x00002000
-#define TARGET_PENDIN	0x00004000
-#define TARGET_IEXTEN	0x00008000
-#define TARGET_EXTPROC  0x00010000
-
-/* ioctls */
-
-/* Big T */
-#define TARGET_TCGETA		TARGET_IOR('T', 1, struct target_termio)
-#define TARGET_TCSETA		TARGET_IOW('T', 2, struct target_termio)
-#define TARGET_TCSETAW		TARGET_IOW('T', 3, struct target_termio)
-#define TARGET_TCSETAF		TARGET_IOW('T', 4, struct target_termio)
-#define TARGET_TCSBRK		TARGET_IO('T', 5)
-#define TARGET_TCXONC		TARGET_IO('T', 6)
-#define TARGET_TCFLSH		TARGET_IO('T', 7)
-#define TARGET_TCGETS		TARGET_IOR('T', 8, struct target_termios)
-#define TARGET_TCSETS		TARGET_IOW('T', 9, struct target_termios)
-#define TARGET_TCSETSW		TARGET_IOW('T', 10, struct target_termios)
-#define TARGET_TCSETSF		TARGET_IOW('T', 11, struct target_termios)
-
-/* Note that all the ioctls that are not available in Linux have a
- * double underscore on the front to: a) avoid some programs to
- * thing we support some ioctls under Linux (autoconfiguration stuff)
- */
-/* Little t */
-#define TARGET_TIOCGETD	TARGET_IOR('t', 0, int)
-#define TARGET_TIOCSETD	TARGET_IOW('t', 1, int)
-//#define __TIOCHPCL        _IO('t', 2) /* SunOS Specific */
-//#define __TIOCMODG        _IOR('t', 3, int) /* SunOS Specific */
-//#define __TIOCMODS        _IOW('t', 4, int) /* SunOS Specific */
-//#define __TIOCGETP        _IOR('t', 8, struct sgttyb) /* SunOS Specific */
-//#define __TIOCSETP        _IOW('t', 9, struct sgttyb) /* SunOS Specific */
-//#define __TIOCSETN        _IOW('t', 10, struct sgttyb) /* SunOS Specific */
-#define TARGET_TIOCEXCL	TARGET_IO('t', 13)
-#define TARGET_TIOCNXCL	TARGET_IO('t', 14)
-//#define __TIOCFLUSH       _IOW('t', 16, int) /* SunOS Specific */
-//#define __TIOCSETC        _IOW('t', 17, struct tchars) /* SunOS Specific */
-//#define __TIOCGETC        _IOR('t', 18, struct tchars) /* SunOS Specific */
-//#define __TIOCTCNTL       _IOW('t', 32, int) /* SunOS Specific */
-//#define __TIOCSIGNAL      _IOW('t', 33, int) /* SunOS Specific */
-//#define __TIOCSETX        _IOW('t', 34, int) /* SunOS Specific */
-//#define __TIOCGETX        _IOR('t', 35, int) /* SunOS Specific */
-#define TARGET_TIOCCONS	TARGET_IO('t', 36)
-//#define __TIOCSSIZE     _IOW('t', 37, struct sunos_ttysize) /* SunOS Specific */
-//#define __TIOCGSIZE     _IOR('t', 38, struct sunos_ttysize) /* SunOS Specific */
-#define TARGET_TIOCGSOFTCAR	TARGET_IOR('t', 100, int)
-#define TARGET_TIOCSSOFTCAR	TARGET_IOW('t', 101, int)
-//#define __TIOCUCNTL       _IOW('t', 102, int) /* SunOS Specific */
-#define TARGET_TIOCSWINSZ	TARGET_IOW('t', 103, struct winsize)
-#define TARGET_TIOCGWINSZ	TARGET_IOR('t', 104, struct winsize)
-//#define __TIOCREMOTE      _IOW('t', 105, int) /* SunOS Specific */
-#define TARGET_TIOCMGET	TARGET_IOR('t', 106, int)
-#define TARGET_TIOCMBIC	TARGET_IOW('t', 107, int)
-#define TARGET_TIOCMBIS	TARGET_IOW('t', 108, int)
-#define TARGET_TIOCMSET	TARGET_IOW('t', 109, int)
-#define TARGET_TIOCSTART       TARGET_IO('t', 110)
-#define TARGET_TIOCSTOP        TARGET_IO('t', 111)
-#define TARGET_TIOCPKT		TARGET_IOW('t', 112, int)
-#define TARGET_TIOCNOTTY	TARGET_IO('t', 113)
-#define TARGET_TIOCSTI		TARGET_IOW('t', 114, char)
-#define TARGET_TIOCOUTQ	TARGET_IOR('t', 115, int)
-//#define __TIOCGLTC        _IOR('t', 116, struct ltchars) /* SunOS Specific */
-//#define __TIOCSLTC        _IOW('t', 117, struct ltchars) /* SunOS Specific */
-/* 118 is the non-posix setpgrp tty ioctl */
-/* 119 is the non-posix getpgrp tty ioctl */
-//#define __TIOCCDTR        TARGET_IO('t', 120) /* SunOS Specific */
-//#define __TIOCSDTR        TARGET_IO('t', 121) /* SunOS Specific */
-#define TARGET_TIOCCBRK        TARGET_IO('t', 122)
-#define TARGET_TIOCSBRK        TARGET_IO('t', 123)
-//#define __TIOCLGET        TARGET_IOW('t', 124, int) /* SunOS Specific */
-//#define __TIOCLSET        TARGET_IOW('t', 125, int) /* SunOS Specific */
-//#define __TIOCLBIC        TARGET_IOW('t', 126, int) /* SunOS Specific */
-//#define __TIOCLBIS        TARGET_IOW('t', 127, int) /* SunOS Specific */
-//#define __TIOCISPACE      TARGET_IOR('t', 128, int) /* SunOS Specific */
-//#define __TIOCISIZE       TARGET_IOR('t', 129, int) /* SunOS Specific */
-#define TARGET_TIOCSPGRP	TARGET_IOW('t', 130, int)
-#define TARGET_TIOCGPGRP	TARGET_IOR('t', 131, int)
-#define TARGET_TIOCSCTTY	TARGET_IO('t', 132)
-#define TARGET_TIOCGSID	TARGET_IOR('t', 133, int)
-/* Get minor device of a pty master's FD -- Solaris equiv is ISPTM */
-#define TARGET_TIOCGPTN	TARGET_IOR('t', 134, unsigned int) /* Get Pty Number */
-#define TARGET_TIOCSPTLCK	TARGET_IOW('t', 135, int) /* Lock/unlock PTY */
-#define TARGET_TIOCGPTPEER      TARGET_IO('t', 137) /* Safely open the slave */
-
-/* Little f */
-#define TARGET_FIOCLEX		TARGET_IO('f', 1)
-#define TARGET_FIONCLEX	TARGET_IO('f', 2)
-#define TARGET_FIOASYNC	TARGET_IOW('f', 125, int)
-#define TARGET_FIONBIO		TARGET_IOW('f', 126, int)
-#define TARGET_FIONREAD	TARGET_IOR('f', 127, int)
-#define TARGET_TIOCINQ		TARGET_FIONREAD
-
-/* SCARY Rutgers local SunOS kernel hackery, perhaps I will support it
- * someday.  This is completely bogus, I know...
- */
-//#define __TCGETSTAT       TARGET_IO('T', 200) /* Rutgers specific */
-//#define __TCSETSTAT       TARGET_IO('T', 201) /* Rutgers specific */
-
-/* Linux specific, no SunOS equivalent. */
-#define TARGET_TIOCLINUX	0x541C
-#define TARGET_TIOCGSERIAL	0x541E
-#define TARGET_TIOCSSERIAL	0x541F
-#define TARGET_TCSBRKP		0x5425
-#define TARGET_TIOCTTYGSTRUCT	0x5426
-#define TARGET_TIOCSERCONFIG	0x5453
-#define TARGET_TIOCSERGWILD	0x5454
-#define TARGET_TIOCSERSWILD	0x5455
-#define TARGET_TIOCGLCKTRMIOS	0x5456
-#define TARGET_TIOCSLCKTRMIOS	0x5457
-#define TARGET_TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TARGET_TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TARGET_TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TARGET_TIOCSERSETMULTI 0x545B /* Set multiport config */
-#define TARGET_TIOCMIWAIT	0x545C /* Wait input */
-#define TARGET_TIOCGICOUNT	0x545D /* Read serial port inline interrupt counts */
-
-#endif
diff --git a/linux-user/strace.c b/linux-user/strace.c
index e969121..cce0a5d 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -1109,6 +1109,12 @@
 #if defined(CLONE_NEWNET)
     FLAG_GENERIC(CLONE_NEWNET),
 #endif
+#if defined(CLONE_NEWCGROUP)
+    FLAG_GENERIC(CLONE_NEWCGROUP),
+#endif
+#if defined(CLONE_NEWTIME)
+    FLAG_GENERIC(CLONE_NEWTIME),
+#endif
 #if defined(CLONE_IO)
     FLAG_GENERIC(CLONE_IO),
 #endif
@@ -2335,7 +2341,7 @@
 }
 #endif
 
-#ifdef TARGET_NR__llseek
+#if defined(TARGET_NR__llseek) || defined(TARGET_NR_llseek)
 static void
 print__llseek(void *cpu_env, const struct syscallname *name,
               abi_long arg0, abi_long arg1, abi_long arg2,
@@ -2355,6 +2361,7 @@
     qemu_log("%s", whence);
     print_syscall_epilogue(name);
 }
+#define print_llseek print__llseek
 #endif
 
 #ifdef TARGET_NR_lseek
@@ -3467,6 +3474,18 @@
 }
 #endif
 
+#ifdef TARGET_NR_unshare
+static void
+print_unshare(void *cpu_env, const struct syscallname *name,
+              abi_long arg0, abi_long arg1, abi_long arg2,
+              abi_long arg3, abi_long arg4, abi_long arg5)
+{
+    print_syscall_prologue(name);
+    print_flags(clone_flags, arg0, 1);
+    print_syscall_epilogue(name);
+}
+#endif
+
 #ifdef TARGET_NR_utime
 static void
 print_utime(void *cpu_env, const struct syscallname *name,
diff --git a/linux-user/strace.list b/linux-user/strace.list
index 084048a..278596a 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -511,6 +511,9 @@
 #ifdef TARGET_NR__llseek
 { TARGET_NR__llseek, "_llseek" , NULL, print__llseek, NULL },
 #endif
+#ifdef TARGET_NR_llseek
+{ TARGET_NR_llseek, "llseek" , NULL, print_llseek, NULL },
+#endif
 #ifdef TARGET_NR_lock
 { TARGET_NR_lock, "lock" , NULL, NULL, NULL },
 #endif
@@ -1573,7 +1576,7 @@
 { TARGET_NR_unlinkat, "unlinkat" , NULL, print_unlinkat, NULL },
 #endif
 #ifdef TARGET_NR_unshare
-{ TARGET_NR_unshare, "unshare" , NULL, NULL, NULL },
+{ TARGET_NR_unshare, "unshare" , NULL, print_unshare, NULL },
 #endif
 #ifdef TARGET_NR_userfaultfd
 { TARGET_NR_userfaultfd, "userfaultfd" , NULL, NULL, NULL },
@@ -1665,3 +1668,6 @@
 #ifdef TARGET_NR_statx
 { TARGET_NR_statx, "statx", NULL, print_statx, NULL },
 #endif
+#ifdef TARGET_NR_copy_file_range
+{ TARGET_NR_copy_file_range, "copy_file_range", "%s(%d,%p,%d,%p,"TARGET_ABI_FMT_lu",%u)", NULL, NULL },
+#endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 95d79dd..c9f8120 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8980,29 +8980,7 @@
 #ifdef TARGET_NR_sigaction
     case TARGET_NR_sigaction:
         {
-#if defined(TARGET_ALPHA)
-            struct target_sigaction act, oact, *pact = 0;
-            struct target_old_sigaction *old_act;
-            if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, old_act, arg2, 1))
-                    return -TARGET_EFAULT;
-                act._sa_handler = old_act->_sa_handler;
-                target_siginitset(&act.sa_mask, old_act->sa_mask);
-                act.sa_flags = old_act->sa_flags;
-                act.sa_restorer = 0;
-                unlock_user_struct(old_act, arg2, 0);
-                pact = &act;
-            }
-            ret = get_errno(do_sigaction(arg1, pact, &oact));
-            if (!is_error(ret) && arg3) {
-                if (!lock_user_struct(VERIFY_WRITE, old_act, arg3, 0))
-                    return -TARGET_EFAULT;
-                old_act->_sa_handler = oact._sa_handler;
-                old_act->sa_mask = oact.sa_mask.sig[0];
-                old_act->sa_flags = oact.sa_flags;
-                unlock_user_struct(old_act, arg3, 1);
-            }
-#elif defined(TARGET_MIPS)
+#if defined(TARGET_MIPS)
 	    struct target_sigaction act, oact, *pact, *old_act;
 
 	    if (arg2) {
@@ -9017,7 +8995,7 @@
 		pact = NULL;
 	    }
 
-	    ret = get_errno(do_sigaction(arg1, pact, &oact));
+        ret = get_errno(do_sigaction(arg1, pact, &oact, 0));
 
 	    if (!is_error(ret) && arg3) {
                 if (!lock_user_struct(VERIFY_WRITE, old_act, arg3, 0))
@@ -9039,23 +9017,24 @@
                 act._sa_handler = old_act->_sa_handler;
                 target_siginitset(&act.sa_mask, old_act->sa_mask);
                 act.sa_flags = old_act->sa_flags;
+#ifdef TARGET_ARCH_HAS_SA_RESTORER
                 act.sa_restorer = old_act->sa_restorer;
-#ifdef TARGET_ARCH_HAS_KA_RESTORER
-                act.ka_restorer = 0;
 #endif
                 unlock_user_struct(old_act, arg2, 0);
                 pact = &act;
             } else {
                 pact = NULL;
             }
-            ret = get_errno(do_sigaction(arg1, pact, &oact));
+            ret = get_errno(do_sigaction(arg1, pact, &oact, 0));
             if (!is_error(ret) && arg3) {
                 if (!lock_user_struct(VERIFY_WRITE, old_act, arg3, 0))
                     return -TARGET_EFAULT;
                 old_act->_sa_handler = oact._sa_handler;
                 old_act->sa_mask = oact.sa_mask.sig[0];
                 old_act->sa_flags = oact.sa_flags;
+#ifdef TARGET_ARCH_HAS_SA_RESTORER
                 old_act->sa_restorer = oact.sa_restorer;
+#endif
                 unlock_user_struct(old_act, arg3, 1);
             }
 #endif
@@ -9064,77 +9043,43 @@
 #endif
     case TARGET_NR_rt_sigaction:
         {
-#if defined(TARGET_ALPHA)
-            /* For Alpha and SPARC this is a 5 argument syscall, with
+            /*
+             * For Alpha and SPARC this is a 5 argument syscall, with
              * a 'restorer' parameter which must be copied into the
              * sa_restorer field of the sigaction struct.
              * For Alpha that 'restorer' is arg5; for SPARC it is arg4,
              * and arg5 is the sigsetsize.
-             * Alpha also has a separate rt_sigaction struct that it uses
-             * here; SPARC uses the usual sigaction struct.
              */
-            struct target_rt_sigaction *rt_act;
-            struct target_sigaction act, oact, *pact = 0;
-
-            if (arg4 != sizeof(target_sigset_t)) {
-                return -TARGET_EINVAL;
-            }
-            if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, rt_act, arg2, 1))
-                    return -TARGET_EFAULT;
-                act._sa_handler = rt_act->_sa_handler;
-                act.sa_mask = rt_act->sa_mask;
-                act.sa_flags = rt_act->sa_flags;
-                act.sa_restorer = arg5;
-                unlock_user_struct(rt_act, arg2, 0);
-                pact = &act;
-            }
-            ret = get_errno(do_sigaction(arg1, pact, &oact));
-            if (!is_error(ret) && arg3) {
-                if (!lock_user_struct(VERIFY_WRITE, rt_act, arg3, 0))
-                    return -TARGET_EFAULT;
-                rt_act->_sa_handler = oact._sa_handler;
-                rt_act->sa_mask = oact.sa_mask;
-                rt_act->sa_flags = oact.sa_flags;
-                unlock_user_struct(rt_act, arg3, 1);
-            }
-#else
-#ifdef TARGET_SPARC
+#if defined(TARGET_ALPHA)
+            target_ulong sigsetsize = arg4;
+            target_ulong restorer = arg5;
+#elif defined(TARGET_SPARC)
             target_ulong restorer = arg4;
             target_ulong sigsetsize = arg5;
 #else
             target_ulong sigsetsize = arg4;
+            target_ulong restorer = 0;
 #endif
-            struct target_sigaction *act;
-            struct target_sigaction *oact;
+            struct target_sigaction *act = NULL;
+            struct target_sigaction *oact = NULL;
 
             if (sigsetsize != sizeof(target_sigset_t)) {
                 return -TARGET_EINVAL;
             }
-            if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, act, arg2, 1)) {
-                    return -TARGET_EFAULT;
-                }
-#ifdef TARGET_ARCH_HAS_KA_RESTORER
-                act->ka_restorer = restorer;
-#endif
-            } else {
-                act = NULL;
+            if (arg2 && !lock_user_struct(VERIFY_READ, act, arg2, 1)) {
+                return -TARGET_EFAULT;
             }
-            if (arg3) {
-                if (!lock_user_struct(VERIFY_WRITE, oact, arg3, 0)) {
-                    ret = -TARGET_EFAULT;
-                    goto rt_sigaction_fail;
+            if (arg3 && !lock_user_struct(VERIFY_WRITE, oact, arg3, 0)) {
+                ret = -TARGET_EFAULT;
+            } else {
+                ret = get_errno(do_sigaction(arg1, act, oact, restorer));
+                if (oact) {
+                    unlock_user_struct(oact, arg3, 1);
                 }
-            } else
-                oact = NULL;
-            ret = get_errno(do_sigaction(arg1, act, oact));
-	rt_sigaction_fail:
-            if (act)
+            }
+            if (act) {
                 unlock_user_struct(act, arg2, 0);
-            if (oact)
-                unlock_user_struct(oact, arg3, 1);
-#endif
+            }
         }
         return ret;
 #ifdef TARGET_NR_sgetmask /* not on alpha */
@@ -11195,8 +11140,7 @@
         return ret;
     }
     case TARGET_NR_sigaltstack:
-        return do_sigaltstack(arg1, arg2,
-                              get_sp_from_cpustate((CPUArchState *)cpu_env));
+        return do_sigaltstack(arg1, arg2, cpu_env);
 
 #ifdef CONFIG_SENDFILE
 #ifdef TARGET_NR_sendfile
@@ -13245,8 +13189,9 @@
                 }
                 poutoff = &outoff;
             }
+            /* Do not sign-extend the count parameter. */
             ret = get_errno(safe_copy_file_range(arg1, pinoff, arg3, poutoff,
-                                                 arg5, arg6));
+                                                 (abi_ulong)arg5, arg6));
             if (!is_error(ret) && ret > 0) {
                 if (arg2) {
                     if (put_user_u64(inoff, arg2)) {
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 25be414..18b031a 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -492,7 +492,7 @@
                                const abi_ulong *old_sigset);
 struct target_sigaction;
 int do_sigaction(int sig, const struct target_sigaction *act,
-                 struct target_sigaction *oact);
+                 struct target_sigaction *oact, abi_ulong ka_restorer);
 
 #include "target_signal.h"
 
@@ -501,27 +501,12 @@
 #endif
 
 #if defined(TARGET_ALPHA)
-struct target_old_sigaction {
-    abi_ulong _sa_handler;
-    abi_ulong sa_mask;
-    int32_t sa_flags;
-};
+typedef int32_t target_old_sa_flags;
+#else
+typedef abi_ulong target_old_sa_flags;
+#endif
 
-struct target_rt_sigaction {
-    abi_ulong _sa_handler;
-    abi_ulong sa_flags;
-    target_sigset_t sa_mask;
-};
-
-/* This is the struct used inside the kernel.  The ka_restorer
-   field comes from the 5th argument to sys_rt_sigaction.  */
-struct target_sigaction {
-    abi_ulong _sa_handler;
-    abi_ulong sa_flags;
-    target_sigset_t sa_mask;
-    abi_ulong sa_restorer;
-};
-#elif defined(TARGET_MIPS)
+#if defined(TARGET_MIPS)
 struct target_sigaction {
 	uint32_t	sa_flags;
 #if defined(TARGET_ABI_MIPSN32)
@@ -539,7 +524,7 @@
 struct target_old_sigaction {
         abi_ulong _sa_handler;
         abi_ulong sa_mask;
-        abi_ulong sa_flags;
+        target_old_sa_flags sa_flags;
 #ifdef TARGET_ARCH_HAS_SA_RESTORER
         abi_ulong sa_restorer;
 #endif
diff --git a/linux-user/xtensa/signal.c b/linux-user/xtensa/signal.c
index 590f031..72771e1 100644
--- a/linux-user/xtensa/signal.c
+++ b/linux-user/xtensa/signal.c
@@ -253,12 +253,8 @@
     set_sigmask(&set);
 
     restore_sigcontext(env, frame);
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -TARGET_EFAULT) {
-        goto badframe;
-    }
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
 
diff --git a/meson.build b/meson.build
index 8e16e05..1559e8d 100644
--- a/meson.build
+++ b/meson.build
@@ -1822,6 +1822,7 @@
     'hw/misc/macio',
     'hw/net',
     'hw/net/can',
+    'hw/nvme',
     'hw/nvram',
     'hw/pci',
     'hw/pci-host',
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
index 1c2d6d3..ad1116e 100644
--- a/target/mips/fpu_helper.h
+++ b/target/mips/fpu_helper.h
@@ -27,8 +27,14 @@
 
 static inline void restore_snan_bit_mode(CPUMIPSState *env)
 {
-    set_snan_bit_is_one((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) == 0,
-                        &env->active_fpu.fp_status);
+    bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
+
+    /*
+     * With nan2008, SNaNs are silenced in the usual way.
+     * Before that, SNaNs are not silenced; default nans are produced.
+     */
+    set_snan_bit_is_one(!nan2008, &env->active_fpu.fp_status);
+    set_default_nan_mode(!nan2008, &env->active_fpu.fp_status);
 }
 
 static inline void restore_fp_status(CPUMIPSState *env)
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 9ab04b2..9210e61 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -17,7 +17,6 @@
 #include "elf.h"
 #include "sysemu/dump.h"
 #include "sysemu/kvm.h"
-#include "exec/helper-proto.h"
 
 #ifdef TARGET_PPC64
 #define ELFCLASS ELFCLASS64
@@ -176,7 +175,7 @@
             vmxregset->avr[i].u64[1] = avr->u64[1];
         }
     }
-    vmxregset->vscr.u32[3] = cpu_to_dump32(s, helper_mfvscr(&cpu->env));
+    vmxregset->vscr.u32[3] = cpu_to_dump32(s, ppc_get_vscr(&cpu->env));
 }
 
 static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
index e501a7f..d957d1a 100644
--- a/target/ppc/cpu.c
+++ b/target/ppc/cpu.c
@@ -20,6 +20,10 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "cpu-models.h"
+#include "cpu-qom.h"
+#include "exec/log.h"
+#include "fpu/softfloat-helpers.h"
+#include "mmu-hash64.h"
 
 target_ulong cpu_read_xer(CPUPPCState *env)
 {
@@ -45,3 +49,46 @@
                        (1ul << XER_OV) | (1ul << XER_CA) |
                        (1ul << XER_OV32) | (1ul << XER_CA32));
 }
+
+void ppc_store_vscr(CPUPPCState *env, uint32_t vscr)
+{
+    env->vscr = vscr & ~(1u << VSCR_SAT);
+    /* Which bit we set is completely arbitrary, but clear the rest.  */
+    env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
+    env->vscr_sat.u64[1] = 0;
+    set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
+}
+
+uint32_t ppc_get_vscr(CPUPPCState *env)
+{
+    uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
+    return env->vscr | (sat << VSCR_SAT);
+}
+
+#ifdef CONFIG_SOFTMMU
+void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
+    assert(!cpu->vhyp);
+#if defined(TARGET_PPC64)
+    if (mmu_is_64bit(env->mmu_model)) {
+        target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
+        target_ulong htabsize = value & SDR_64_HTABSIZE;
+
+        if (value & ~sdr_mask) {
+            error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
+                         value & ~sdr_mask);
+            value &= sdr_mask;
+        }
+        if (htabsize > 28) {
+            error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+                         htabsize);
+            return;
+        }
+    }
+#endif /* defined(TARGET_PPC64) */
+    /* FIXME: Should check for valid HTABMASK values in 32-bit case */
+    env->spr[SPR_SDR1] = value;
+}
+#endif /* CONFIG_SOFTMMU */
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 733a216..cab33a3 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -131,11 +131,7 @@
     POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception                     */
     /* EOL                                                                   */
     POWERPC_EXCP_NB       = 103,
-    /* QEMU exceptions: used internally during code translation              */
-    POWERPC_EXCP_STOP         = 0x200, /* stop translation                   */
-    POWERPC_EXCP_BRANCH       = 0x201, /* branch instruction                 */
     /* QEMU exceptions: special cases we want to stop translation            */
-    POWERPC_EXCP_SYNC         = 0x202, /* context synchronizing instruction  */
     POWERPC_EXCP_SYSCALL_USER = 0x203, /* System call in user mode only      */
 };
 
@@ -1297,6 +1293,7 @@
 void ppc_store_ptcr(CPUPPCState *env, target_ulong value);
 #endif /* !defined(CONFIG_USER_ONLY) */
 void ppc_store_msr(CPUPPCState *env, target_ulong value);
+void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
 
 void ppc_cpu_list(void);
 
@@ -2641,7 +2638,15 @@
     return (ppc_avr_t *)((uintptr_t)env + avr_full_offset(i));
 }
 
+static inline bool ppc_has_spr(PowerPCCPU *cpu, int spr)
+{
+    /* We can test whether the SPR is defined by checking for a valid name */
+    return cpu->env.spr_cb[spr].name != NULL;
+}
+
 void dump_mmu(CPUPPCState *env);
 
 void ppc_maybe_bswap_register(CPUPPCState *env, uint8_t *mem_buf, int len);
+void ppc_store_vscr(CPUPPCState *env, uint32_t vscr);
+uint32_t ppc_get_vscr(CPUPPCState *env);
 #endif /* PPC_CPU_H */
diff --git a/target/ppc/translate_init.c.inc b/target/ppc/cpu_init.c
similarity index 89%
rename from target/ppc/translate_init.c.inc
rename to target/ppc/cpu_init.c
index 66e6a4a..22ecbcc 100644
--- a/target/ppc/translate_init.c.inc
+++ b/target/ppc/cpu_init.c
@@ -18,6 +18,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "qemu/osdep.h"
 #include "disas/dis-asm.h"
 #include "exec/gdbstub.h"
 #include "kvm_ppc.h"
@@ -42,682 +43,19 @@
 #include "fpu/softfloat.h"
 #include "qapi/qapi-commands-machine-target.h"
 
+#include "exec/helper-proto.h"
+#include "helper_regs.h"
+#include "internal.h"
+#include "spr_tcg.h"
+
 /* #define PPC_DEBUG_SPR */
-/* #define PPC_DUMP_SPR_ACCESSES */
 /* #define USE_APPLE_GDB */
 
-/*
- * Generic callbacks:
- * do nothing but store/retrieve spr value
- */
-static void spr_load_dump_spr(int sprn)
-{
-#ifdef PPC_DUMP_SPR_ACCESSES
-    TCGv_i32 t0 = tcg_const_i32(sprn);
-    gen_helper_load_dump_spr(cpu_env, t0);
-    tcg_temp_free_i32(t0);
-#endif
-}
-
-static void spr_read_generic(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_load_spr(cpu_gpr[gprn], sprn);
-    spr_load_dump_spr(sprn);
-}
-
-static void spr_store_dump_spr(int sprn)
-{
-#ifdef PPC_DUMP_SPR_ACCESSES
-    TCGv_i32 t0 = tcg_const_i32(sprn);
-    gen_helper_store_dump_spr(cpu_env, t0);
-    tcg_temp_free_i32(t0);
-#endif
-}
-
-static void spr_write_generic(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_store_spr(sprn, cpu_gpr[gprn]);
-    spr_store_dump_spr(sprn);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_generic32(DisasContext *ctx, int sprn, int gprn)
-{
-#ifdef TARGET_PPC64
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_ext32u_tl(t0, cpu_gpr[gprn]);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-    spr_store_dump_spr(sprn);
-#else
-    spr_write_generic(ctx, sprn, gprn);
-#endif
-}
-
-static void spr_write_clear(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    gen_load_spr(t0, sprn);
-    tcg_gen_neg_tl(t1, cpu_gpr[gprn]);
-    tcg_gen_and_tl(t0, t0, t1);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
-{
-}
-
-#endif
-
-/* SPR common to all PowerPC */
-/* XER */
-static void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_read_xer(ctx, cpu_gpr[gprn]);
-}
-
-static void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_write_xer(cpu_gpr[gprn]);
-}
-
-/* LR */
-static void spr_read_lr(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_lr);
-}
-
-static void spr_write_lr(DisasContext *ctx, int sprn, int gprn)
-{
-    tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
-}
-
-/* CFAR */
-#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-static void spr_read_cfar(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
-}
-
-static void spr_write_cfar(DisasContext *ctx, int sprn, int gprn)
-{
-    tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
-}
-#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
-
-/* CTR */
-static void spr_read_ctr(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_ctr);
-}
-
-static void spr_write_ctr(DisasContext *ctx, int sprn, int gprn)
-{
-    tcg_gen_mov_tl(cpu_ctr, cpu_gpr[gprn]);
-}
-
-/* User read access to SPR */
-/* USPRx */
-/* UMMCRx */
-/* UPMCx */
-/* USIA */
-/* UDECR */
-static void spr_read_ureg(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_load_spr(cpu_gpr[gprn], sprn + 0x10);
-}
-
-#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-static void spr_write_ureg(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_store_spr(sprn + 0x10, cpu_gpr[gprn]);
-}
-#endif
-
-/* SPR common to all non-embedded PowerPC */
-/* DECR */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_decr(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_decr(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_decr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_decr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-#endif
-
-/* SPR common to all non-embedded PowerPC, except 601 */
-/* Time base */
-static void spr_read_tbl(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_tbl(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_read_tbu(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_tbu(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-ATTRIBUTE_UNUSED
-static void spr_read_atbl(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_atbl(cpu_gpr[gprn], cpu_env);
-}
-
-ATTRIBUTE_UNUSED
-static void spr_read_atbu(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_atbu(cpu_gpr[gprn], cpu_env);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_tbl(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_tbl(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_tbu(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_tbu(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-ATTRIBUTE_UNUSED
-static void spr_write_atbl(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_atbl(cpu_env, cpu_gpr[gprn]);
-}
-
-ATTRIBUTE_UNUSED
-static void spr_write_atbu(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_atbu(cpu_env, cpu_gpr[gprn]);
-}
-
-#if defined(TARGET_PPC64)
-ATTRIBUTE_UNUSED
-static void spr_read_purr(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_purr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_purr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-/* HDECR */
-static void spr_read_hdecr(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_hdecr(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_hdecr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_read_vtb(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_vtb(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_vtb(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_vtb(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_tbu40(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-#endif
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
-/* IBAT0U...IBAT0U */
-/* IBAT0L...IBAT7L */
-static void spr_read_ibat(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
-}
-
-static void spr_read_ibat_h(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           IBAT[sprn & 1][((sprn - SPR_IBAT4U) / 2) + 4]));
-}
-
-static void spr_write_ibatu(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
-    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_ibatu_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4U) / 2) + 4);
-    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_ibatl(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0L) / 2);
-    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_ibatl_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4L) / 2) + 4);
-    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-/* DBAT0U...DBAT7U */
-/* DBAT0L...DBAT7L */
-static void spr_read_dbat(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           DBAT[sprn & 1][(sprn - SPR_DBAT0U) / 2]));
-}
-
-static void spr_read_dbat_h(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           DBAT[sprn & 1][((sprn - SPR_DBAT4U) / 2) + 4]));
-}
-
-static void spr_write_dbatu(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0U) / 2);
-    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_dbatu_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4U) / 2) + 4);
-    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_dbatl(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0L) / 2);
-    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_dbatl_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4L) / 2) + 4);
-    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-/* SDR1 */
-static void spr_write_sdr1(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_sdr1(cpu_env, cpu_gpr[gprn]);
-}
-
-#if defined(TARGET_PPC64)
-/* 64 bits PowerPC specific SPRs */
-/* PIDR */
-static void spr_write_pidr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_pidr(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_lpidr(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_read_hior(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, excp_prefix));
-}
-
-static void spr_write_hior(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0x3FFFFF00000ULL);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
-    tcg_temp_free(t0);
-}
-static void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_ptcr(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
-}
-
-/* DPDES */
-static void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env);
-}
-
-static void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]);
-}
-#endif
-#endif
-
-/* PowerPC 601 specific registers */
-/* RTC */
-static void spr_read_601_rtcl(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_601_rtcl(cpu_gpr[gprn], cpu_env);
-}
-
-static void spr_read_601_rtcu(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_601_rtcu(cpu_gpr[gprn], cpu_env);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_601_rtcu(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_601_rtcu(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_601_rtcl(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_601_rtcl(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_hid0_601(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_hid0_601(cpu_env, cpu_gpr[gprn]);
-    /* Must stop the translation as endianness may have changed */
-    gen_stop_exception(ctx);
-}
-#endif
-
-/* Unified bats */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_601_ubat(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
-}
-
-static void spr_write_601_ubatu(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
-    gen_helper_store_601_batl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_601_ubatl(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
-    gen_helper_store_601_batu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-#endif
-
-/* PowerPC 40x specific registers */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_40x_pit(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_40x_pit(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_40x_pit(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_40x_pit(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_40x_dbcr0(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_store_spr(sprn, cpu_gpr[gprn]);
-    gen_helper_store_40x_dbcr0(cpu_env, cpu_gpr[gprn]);
-    /* We must stop translation as we may have rebooted */
-    gen_stop_exception(ctx);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_40x_sler(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_40x_sler(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_booke_tcr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_booke_tcr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_booke_tsr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_booke_tsr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-#endif
-
-/* PowerPC 403 specific registers */
-/* PBL1 / PBU1 / PBL2 / PBU2 */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_403_pbr(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState, pb[sprn - SPR_403_PBL1]));
-}
-
-static void spr_write_403_pbr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(sprn - SPR_403_PBL1);
-    gen_helper_store_403_pbr(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_pir(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0xF);
-    gen_store_spr(SPR_PIR, t0);
-    tcg_temp_free(t0);
-}
-#endif
-
-/* SPE specific registers */
-static void spr_read_spefscr(DisasContext *ctx, int gprn, int sprn)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    tcg_gen_ld_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
-    tcg_gen_extu_i32_tl(cpu_gpr[gprn], t0);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_spefscr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[gprn]);
-    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
-    tcg_temp_free_i32(t0);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-/* Callback used to write the exception vector base */
-static void spr_write_excp_prefix(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivpr_mask));
-    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_excp_vector(DisasContext *ctx, int sprn, int gprn)
-{
-    int sprn_offs;
-
-    if (sprn >= SPR_BOOKE_IVOR0 && sprn <= SPR_BOOKE_IVOR15) {
-        sprn_offs = sprn - SPR_BOOKE_IVOR0;
-    } else if (sprn >= SPR_BOOKE_IVOR32 && sprn <= SPR_BOOKE_IVOR37) {
-        sprn_offs = sprn - SPR_BOOKE_IVOR32 + 32;
-    } else if (sprn >= SPR_BOOKE_IVOR38 && sprn <= SPR_BOOKE_IVOR42) {
-        sprn_offs = sprn - SPR_BOOKE_IVOR38 + 38;
-    } else {
-        printf("Trying to write an unknown exception vector %d %03x\n",
-               sprn, sprn);
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
-
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivor_mask));
-    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_vectors[sprn_offs]));
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-#endif
-
 static inline void vscr_init(CPUPPCState *env, uint32_t val)
 {
     /* Altivec always uses round-to-nearest */
     set_float_rounding_mode(float_round_nearest_even, &env->vec_status);
-    helper_mtvscr(env, val);
+    ppc_store_vscr(env, val);
 }
 
 /**
@@ -813,7 +151,7 @@
                      oea_read, oea_write, 0, ival)
 
 /* Generic PowerPC SPRs */
-static void gen_spr_generic(CPUPPCState *env)
+static void register_generic_sprs(CPUPPCState *env)
 {
     /* Integer processing */
     spr_register(env, SPR_XER, "XER",
@@ -858,7 +196,7 @@
 }
 
 /* SPR common to all non-embedded PowerPC, including 601 */
-static void gen_spr_ne_601(CPUPPCState *env)
+static void register_ne_601_sprs(CPUPPCState *env)
 {
     /* Exception processing */
     spr_register_kvm(env, SPR_DSISR, "DSISR",
@@ -877,7 +215,7 @@
 }
 
 /* Storage Description Register 1 */
-static void gen_spr_sdr1(CPUPPCState *env)
+static void register_sdr1_sprs(CPUPPCState *env)
 {
 #ifndef CONFIG_USER_ONLY
     if (env->has_hv_mode) {
@@ -900,7 +238,7 @@
 }
 
 /* BATs 0-3 */
-static void gen_low_BATs(CPUPPCState *env)
+static void register_low_BATs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     spr_register(env, SPR_IBAT0U, "IBAT0U",
@@ -972,7 +310,7 @@
 }
 
 /* BATs 4-7 */
-static void gen_high_BATs(CPUPPCState *env)
+static void register_high_BATs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     spr_register(env, SPR_IBAT4U, "IBAT4U",
@@ -1044,7 +382,7 @@
 }
 
 /* Generic PowerPC time base */
-static void gen_tbl(CPUPPCState *env)
+static void register_tbl(CPUPPCState *env)
 {
     spr_register(env, SPR_VTBL,  "TBL",
                  &spr_read_tbl, SPR_NOACCESS,
@@ -1065,7 +403,7 @@
 }
 
 /* Softare table search registers */
-static void gen_6xx_7xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
+static void register_6xx_7xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
 {
 #if !defined(CONFIG_USER_ONLY)
     env->nb_tlb = nb_tlbs;
@@ -1104,7 +442,7 @@
 }
 
 /* SPR common to MPC755 and G2 */
-static void gen_spr_G2_755(CPUPPCState *env)
+static void register_G2_755_sprs(CPUPPCState *env)
 {
     /* SGPRs */
     spr_register(env, SPR_SPRG4, "SPRG4",
@@ -1126,7 +464,7 @@
 }
 
 /* SPR common to all 7xx PowerPC implementations */
-static void gen_spr_7xx(CPUPPCState *env)
+static void register_7xx_sprs(CPUPPCState *env)
 {
     /* Breakpoints */
     /* XXX : not implemented */
@@ -1225,106 +563,7 @@
 }
 
 #ifdef TARGET_PPC64
-#ifndef CONFIG_USER_ONLY
-static void spr_write_amr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    /*
-     * Note, the HV=1 PR=0 case is handled earlier by simply using
-     * spr_write_generic for HV mode in the SPR table
-     */
-
-    /* Build insertion mask into t1 based on context */
-    if (ctx->pr) {
-        gen_load_spr(t1, SPR_UAMOR);
-    } else {
-        gen_load_spr(t1, SPR_AMOR);
-    }
-
-    /* Mask new bits into t2 */
-    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
-
-    /* Load AMR and clear new bits in t0 */
-    gen_load_spr(t0, SPR_AMR);
-    tcg_gen_andc_tl(t0, t0, t1);
-
-    /* Or'in new bits and write it out */
-    tcg_gen_or_tl(t0, t0, t2);
-    gen_store_spr(SPR_AMR, t0);
-    spr_store_dump_spr(SPR_AMR);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void spr_write_uamor(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    /*
-     * Note, the HV=1 case is handled earlier by simply using
-     * spr_write_generic for HV mode in the SPR table
-     */
-
-    /* Build insertion mask into t1 based on context */
-    gen_load_spr(t1, SPR_AMOR);
-
-    /* Mask new bits into t2 */
-    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
-
-    /* Load AMR and clear new bits in t0 */
-    gen_load_spr(t0, SPR_UAMOR);
-    tcg_gen_andc_tl(t0, t0, t1);
-
-    /* Or'in new bits and write it out */
-    tcg_gen_or_tl(t0, t0, t2);
-    gen_store_spr(SPR_UAMOR, t0);
-    spr_store_dump_spr(SPR_UAMOR);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void spr_write_iamr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    /*
-     * Note, the HV=1 case is handled earlier by simply using
-     * spr_write_generic for HV mode in the SPR table
-     */
-
-    /* Build insertion mask into t1 based on context */
-    gen_load_spr(t1, SPR_AMOR);
-
-    /* Mask new bits into t2 */
-    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
-
-    /* Load AMR and clear new bits in t0 */
-    gen_load_spr(t0, SPR_IAMR);
-    tcg_gen_andc_tl(t0, t0, t1);
-
-    /* Or'in new bits and write it out */
-    tcg_gen_or_tl(t0, t0, t2);
-    gen_store_spr(SPR_IAMR, t0);
-    spr_store_dump_spr(SPR_IAMR);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-#endif /* CONFIG_USER_ONLY */
-
-static void gen_spr_amr(CPUPPCState *env)
+static void register_amr_sprs(CPUPPCState *env)
 {
 #ifndef CONFIG_USER_ONLY
     /*
@@ -1356,7 +595,7 @@
 #endif /* !CONFIG_USER_ONLY */
 }
 
-static void gen_spr_iamr(CPUPPCState *env)
+static void register_iamr_sprs(CPUPPCState *env)
 {
 #ifndef CONFIG_USER_ONLY
     spr_register_kvm_hv(env, SPR_IAMR, "IAMR",
@@ -1368,16 +607,7 @@
 }
 #endif /* TARGET_PPC64 */
 
-#ifndef CONFIG_USER_ONLY
-static void spr_read_thrm(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_fixup_thrm(cpu_env);
-    gen_load_spr(cpu_gpr[gprn], sprn);
-    spr_load_dump_spr(sprn);
-}
-#endif /* !CONFIG_USER_ONLY */
-
-static void gen_spr_thrm(CPUPPCState *env)
+static void register_thrm_sprs(CPUPPCState *env)
 {
     /* Thermal management */
     /* XXX : not implemented */
@@ -1398,7 +628,7 @@
 }
 
 /* SPR specific to PowerPC 604 implementation */
-static void gen_spr_604(CPUPPCState *env)
+static void register_604_sprs(CPUPPCState *env)
 {
     /* Processor identification */
     spr_register(env, SPR_PIR, "PIR",
@@ -1451,7 +681,7 @@
 }
 
 /* SPR specific to PowerPC 603 implementation */
-static void gen_spr_603(CPUPPCState *env)
+static void register_603_sprs(CPUPPCState *env)
 {
     /* External access control */
     /* XXX : not implemented */
@@ -1469,7 +699,7 @@
 }
 
 /* SPR specific to PowerPC G2 implementation */
-static void gen_spr_G2(CPUPPCState *env)
+static void register_G2_sprs(CPUPPCState *env)
 {
     /* Memory base address */
     /* MBAR */
@@ -1521,7 +751,7 @@
 }
 
 /* SPR specific to PowerPC 602 implementation */
-static void gen_spr_602(CPUPPCState *env)
+static void register_602_sprs(CPUPPCState *env)
 {
     /* ESA registers */
     /* XXX : not implemented */
@@ -1569,7 +799,7 @@
 }
 
 /* SPR specific to PowerPC 601 implementation */
-static void gen_spr_601(CPUPPCState *env)
+static void register_601_sprs(CPUPPCState *env)
 {
     /* Multiplication/division register */
     /* MQ */
@@ -1645,7 +875,7 @@
 #endif
 }
 
-static void gen_spr_74xx(CPUPPCState *env)
+static void register_74xx_sprs(CPUPPCState *env)
 {
     /* Processor identification */
     spr_register(env, SPR_PIR, "PIR",
@@ -1695,7 +925,7 @@
                  0x00000000);
 }
 
-static void gen_l3_ctrl(CPUPPCState *env)
+static void register_l3_ctrl(CPUPPCState *env)
 {
     /* L3CR */
     /* XXX : not implemented */
@@ -1717,7 +947,7 @@
                  0x00000000);
 }
 
-static void gen_74xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
+static void register_74xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
 {
 #if !defined(CONFIG_USER_ONLY)
     env->nb_tlb = nb_tlbs;
@@ -1742,58 +972,7 @@
 #endif
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_e500_l1csr0(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR0_DCE | L1CSR0_CPE);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_e500_l1csr1(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR1_ICE | L1CSR1_CPE);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_e500_l2csr0(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn],
-                    ~(E500_L2CSR0_L2FI | E500_L2CSR0_L2FL | E500_L2CSR0_L2LFC));
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_booke206_mmucsr0(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_booke206_tlbflush(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_booke_pid(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(sprn);
-    gen_helper_booke_setpid(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-static void spr_write_eplc(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_booke_set_eplc(cpu_env, cpu_gpr[gprn]);
-}
-static void spr_write_epsc(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_booke_set_epsc(cpu_env, cpu_gpr[gprn]);
-}
-
-#endif
-
-static void gen_spr_usprg3(CPUPPCState *env)
+static void register_usprg3_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_USPRG3, "USPRG3",
                  &spr_read_ureg, SPR_NOACCESS,
@@ -1801,7 +980,7 @@
                  0x00000000);
 }
 
-static void gen_spr_usprgh(CPUPPCState *env)
+static void register_usprgh_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_USPRG4, "USPRG4",
                  &spr_read_ureg, SPR_NOACCESS,
@@ -1822,7 +1001,7 @@
 }
 
 /* PowerPC BookE SPR */
-static void gen_spr_BookE(CPUPPCState *env, uint64_t ivor_mask)
+static void register_BookE_sprs(CPUPPCState *env, uint64_t ivor_mask)
 {
     const char *ivor_names[64] = {
         "IVOR0",  "IVOR1",  "IVOR2",  "IVOR3",
@@ -1998,7 +1177,8 @@
                  0x00000000);
 }
 
-static inline uint32_t gen_tlbncfg(uint32_t assoc, uint32_t minsize,
+#if !defined(CONFIG_USER_ONLY)
+static inline uint32_t register_tlbncfg(uint32_t assoc, uint32_t minsize,
                                    uint32_t maxsize, uint32_t flags,
                                    uint32_t nentries)
 {
@@ -2007,9 +1187,10 @@
            (maxsize << TLBnCFG_MAXSIZE_SHIFT) |
            flags | nentries;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 /* BookE 2.06 storage control registers */
-static void gen_spr_BookE206(CPUPPCState *env, uint32_t mas_mask,
+static void register_BookE206_sprs(CPUPPCState *env, uint32_t mas_mask,
                              uint32_t *tlbncfg, uint32_t mmucfg)
 {
 #if !defined(CONFIG_USER_ONLY)
@@ -2097,11 +1278,11 @@
     }
 #endif
 
-    gen_spr_usprgh(env);
+    register_usprgh_sprs(env);
 }
 
 /* SPR specific to PowerPC 440 implementation */
-static void gen_spr_440(CPUPPCState *env)
+static void register_440_sprs(CPUPPCState *env)
 {
     /* Cache control */
     /* XXX : not implemented */
@@ -2242,7 +1423,7 @@
 }
 
 /* SPR shared between PowerPC 40x implementations */
-static void gen_spr_40x(CPUPPCState *env)
+static void register_40x_sprs(CPUPPCState *env)
 {
     /* Cache */
     /* not emulated, as QEMU do not emulate caches */
@@ -2297,7 +1478,7 @@
 }
 
 /* SPR specific to PowerPC 405 implementation */
-static void gen_spr_405(CPUPPCState *env)
+static void register_405_sprs(CPUPPCState *env)
 {
     /* MMU */
     spr_register(env, SPR_40x_PID, "PID",
@@ -2399,11 +1580,11 @@
                  SPR_NOACCESS, SPR_NOACCESS,
                  spr_read_generic, &spr_write_generic,
                  0x00000000);
-    gen_spr_usprgh(env);
+    register_usprgh_sprs(env);
 }
 
 /* SPR shared between PowerPC 401 & 403 implementations */
-static void gen_spr_401_403(CPUPPCState *env)
+static void register_401_403_sprs(CPUPPCState *env)
 {
     /* Time base */
     spr_register(env, SPR_403_VTBL,  "TBL",
@@ -2431,7 +1612,7 @@
 }
 
 /* SPR specific to PowerPC 401 implementation */
-static void gen_spr_401(CPUPPCState *env)
+static void register_401_sprs(CPUPPCState *env)
 {
     /* Debug interface */
     /* XXX : not implemented */
@@ -2473,9 +1654,9 @@
                  0x00000000);
 }
 
-static void gen_spr_401x2(CPUPPCState *env)
+static void register_401x2_sprs(CPUPPCState *env)
 {
-    gen_spr_401(env);
+    register_401_sprs(env);
     spr_register(env, SPR_40x_PID, "PID",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, &spr_write_generic,
@@ -2487,7 +1668,7 @@
 }
 
 /* SPR specific to PowerPC 403 implementation */
-static void gen_spr_403(CPUPPCState *env)
+static void register_403_sprs(CPUPPCState *env)
 {
     /* Debug interface */
     /* XXX : not implemented */
@@ -2523,7 +1704,7 @@
                  0x00000000);
 }
 
-static void gen_spr_403_real(CPUPPCState *env)
+static void register_403_real_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_403_PBL1,  "PBL1",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -2543,7 +1724,7 @@
                  0x00000000);
 }
 
-static void gen_spr_403_mmu(CPUPPCState *env)
+static void register_403_mmu_sprs(CPUPPCState *env)
 {
     /* MMU */
     spr_register(env, SPR_40x_PID, "PID",
@@ -2557,7 +1738,7 @@
 }
 
 /* SPR specific to PowerPC compression coprocessor extension */
-static void gen_spr_compress(CPUPPCState *env)
+static void register_compress_sprs(CPUPPCState *env)
 {
     /* XXX : not implemented */
     spr_register(env, SPR_401_SKR, "SKR",
@@ -2566,7 +1747,7 @@
                  0x00000000);
 }
 
-static void gen_spr_5xx_8xx(CPUPPCState *env)
+static void register_5xx_8xx_sprs(CPUPPCState *env)
 {
     /* Exception processing */
     spr_register_kvm(env, SPR_DSISR, "DSISR",
@@ -2684,7 +1865,7 @@
                  0x00000000);
 }
 
-static void gen_spr_5xx(CPUPPCState *env)
+static void register_5xx_sprs(CPUPPCState *env)
 {
     /* XXX : not implemented */
     spr_register(env, SPR_RCPU_MI_GRA, "MI_GRA",
@@ -2793,7 +1974,7 @@
                  0x00000000);
 }
 
-static void gen_spr_8xx(CPUPPCState *env)
+static void register_8xx_sprs(CPUPPCState *env)
 {
     /* XXX : not implemented */
     spr_register(env, SPR_MPC_IC_CST, "IC_CST",
@@ -3528,9 +2709,9 @@
 
 static void init_proc_401(CPUPPCState *env)
 {
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401(env);
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401_sprs(env);
     init_excp_4xx_real(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -3574,10 +2755,10 @@
 
 static void init_proc_401x2(CPUPPCState *env)
 {
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401x2(env);
-    gen_spr_compress(env);
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401x2_sprs(env);
+    register_compress_sprs(env);
     /* Memory management */
 #if !defined(CONFIG_USER_ONLY)
     env->nb_tlb = 64;
@@ -3632,11 +2813,11 @@
 
 static void init_proc_401x3(CPUPPCState *env)
 {
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401(env);
-    gen_spr_401x2(env);
-    gen_spr_compress(env);
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401_sprs(env);
+    register_401x2_sprs(env);
+    register_compress_sprs(env);
     init_excp_4xx_softmmu(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -3685,10 +2866,10 @@
 
 static void init_proc_IOP480(CPUPPCState *env)
 {
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401x2(env);
-    gen_spr_compress(env);
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401x2_sprs(env);
+    register_compress_sprs(env);
     /* Memory management */
 #if !defined(CONFIG_USER_ONLY)
     env->nb_tlb = 64;
@@ -3743,10 +2924,10 @@
 
 static void init_proc_403(CPUPPCState *env)
 {
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_403(env);
-    gen_spr_403_real(env);
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_403_sprs(env);
+    register_403_real_sprs(env);
     init_excp_4xx_real(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -3790,11 +2971,11 @@
 
 static void init_proc_403GCX(CPUPPCState *env)
 {
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_403(env);
-    gen_spr_403_real(env);
-    gen_spr_403_mmu(env);
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_403_sprs(env);
+    register_403_real_sprs(env);
+    register_403_mmu_sprs(env);
     /* Bus access control */
     /* not emulated, as QEMU never does speculative access */
     spr_register(env, SPR_40x_SGR, "SGR",
@@ -3858,9 +3039,9 @@
 static void init_proc_405(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_40x(env);
-    gen_spr_405(env);
+    register_tbl(env);
+    register_40x_sprs(env);
+    register_405_sprs(env);
     /* Bus access control */
     /* not emulated, as QEMU never does speculative access */
     spr_register(env, SPR_40x_SGR, "SGR",
@@ -3924,10 +3105,10 @@
 static void init_proc_440EP(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
     /* Processor identification */
     spr_register(env, SPR_BOOKE_PIR, "PIR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -4066,10 +3247,10 @@
 static void init_proc_440GP(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
     /* Processor identification */
     spr_register(env, SPR_BOOKE_PIR, "PIR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -4149,10 +3330,10 @@
 static void init_proc_440x4(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
     /* Processor identification */
     spr_register(env, SPR_BOOKE_PIR, "PIR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -4232,10 +3413,10 @@
 static void init_proc_440x5(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
     /* Processor identification */
     spr_register(env, SPR_BOOKE_PIR, "PIR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -4371,9 +3552,9 @@
 static void init_proc_MPC5xx(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_5xx_8xx(env);
-    gen_spr_5xx(env);
+    register_tbl(env);
+    register_5xx_8xx_sprs(env);
+    register_5xx_sprs(env);
     init_excp_MPC5xx(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -4415,9 +3596,9 @@
 static void init_proc_MPC8xx(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_5xx_8xx(env);
-    gen_spr_8xx(env);
+    register_tbl(env);
+    register_5xx_8xx_sprs(env);
+    register_8xx_sprs(env);
     init_excp_MPC8xx(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -4459,12 +3640,12 @@
 
 static void init_proc_G2(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_G2_755(env);
-    gen_spr_G2(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_G2_755_sprs(env);
+    register_G2_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* External access control */
     /* XXX : not implemented */
     spr_register(env, SPR_EAR, "EAR",
@@ -4488,9 +3669,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_G2(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -4538,12 +3719,12 @@
 
 static void init_proc_G2LE(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_G2_755(env);
-    gen_spr_G2(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_G2_755_sprs(env);
+    register_G2_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* External access control */
     /* XXX : not implemented */
     spr_register(env, SPR_EAR, "EAR",
@@ -4568,9 +3749,9 @@
                  0x00000000);
 
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_G2(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -4621,15 +3802,15 @@
 static void init_proc_e200(CPUPPCState *env)
 {
     /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000070000FFFFULL);
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000070000FFFFULL);
     /* XXX : not implemented */
     spr_register(env, SPR_BOOKE_SPEFSCR, "SPEFSCR",
                  &spr_read_spefscr, &spr_write_spefscr,
                  &spr_read_spefscr, &spr_write_spefscr,
                  0x00000000);
     /* Memory management */
-    gen_spr_BookE206(env, 0x0000005D, NULL, 0);
+    register_BookE206_sprs(env, 0x0000005D, NULL, 0);
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -4775,11 +3956,11 @@
 
 static void init_proc_e300(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_603(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_603_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -4823,9 +4004,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_603(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -4873,31 +4054,6 @@
                  POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_mas73(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv val = tcg_temp_new();
-    tcg_gen_ext32u_tl(val, cpu_gpr[gprn]);
-    gen_store_spr(SPR_BOOKE_MAS3, val);
-    tcg_gen_shri_tl(val, cpu_gpr[gprn], 32);
-    gen_store_spr(SPR_BOOKE_MAS7, val);
-    tcg_temp_free(val);
-}
-
-static void spr_read_mas73(DisasContext *ctx, int gprn, int sprn)
-{
-    TCGv mas7 = tcg_temp_new();
-    TCGv mas3 = tcg_temp_new();
-    gen_load_spr(mas7, SPR_BOOKE_MAS7);
-    tcg_gen_shli_tl(mas7, mas7, 32);
-    gen_load_spr(mas3, SPR_BOOKE_MAS3);
-    tcg_gen_or_tl(cpu_gpr[gprn], mas3, mas7);
-    tcg_temp_free(mas3);
-    tcg_temp_free(mas7);
-}
-
-#endif
-
 enum fsl_e500_version {
     fsl_e500v1,
     fsl_e500v2,
@@ -4921,11 +4077,11 @@
 #endif
 
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /*
      * XXX The e500 doesn't implement IVOR7 and IVOR9, but doesn't
      *     complain when accessing them.
-     * gen_spr_BookE(env, 0x0000000F0000FD7FULL);
+     * register_BookE_sprs(env, 0x0000000F0000FD7FULL);
      */
     switch (version) {
     case fsl_e500v1:
@@ -4941,8 +4097,8 @@
         ivor_mask = 0x000003FF0000FFFFULL;
         break;
     }
-    gen_spr_BookE(env, ivor_mask);
-    gen_spr_usprg3(env);
+    register_BookE_sprs(env, ivor_mask);
+    register_usprg3_sprs(env);
     /* Processor identification */
     spr_register(env, SPR_BOOKE_PIR, "PIR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -4960,17 +4116,17 @@
     env->id_tlbs = 0;
     switch (version) {
     case fsl_e500v1:
-        tlbncfg[0] = gen_tlbncfg(2, 1, 1, 0, 256);
-        tlbncfg[1] = gen_tlbncfg(16, 1, 9, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
+        tlbncfg[0] = register_tlbncfg(2, 1, 1, 0, 256);
+        tlbncfg[1] = register_tlbncfg(16, 1, 9, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
         break;
     case fsl_e500v2:
-        tlbncfg[0] = gen_tlbncfg(4, 1, 1, 0, 512);
-        tlbncfg[1] = gen_tlbncfg(16, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
+        tlbncfg[0] = register_tlbncfg(4, 1, 1, 0, 512);
+        tlbncfg[1] = register_tlbncfg(16, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
         break;
     case fsl_e500mc:
     case fsl_e5500:
-        tlbncfg[0] = gen_tlbncfg(4, 1, 1, 0, 512);
-        tlbncfg[1] = gen_tlbncfg(64, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 64);
+        tlbncfg[0] = register_tlbncfg(4, 1, 1, 0, 512);
+        tlbncfg[1] = register_tlbncfg(64, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 64);
         break;
     case fsl_e6500:
         mmucfg = 0x6510B45;
@@ -5007,7 +4163,7 @@
         cpu_abort(env_cpu(env), "Unknown CPU: " TARGET_FMT_lx "\n",
                   env->spr[SPR_PVR]);
     }
-    gen_spr_BookE206(env, 0x000000DF, tlbncfg, mmucfg);
+    register_BookE206_sprs(env, 0x000000DF, tlbncfg, mmucfg);
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -5365,9 +4521,9 @@
 
 static void init_proc_601(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_601(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_601_sprs(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5481,11 +4637,11 @@
 
 static void init_proc_602(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_602(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_602_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5498,8 +4654,8 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_602(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -5551,11 +4707,11 @@
 
 static void init_proc_603(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_603(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_603_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5568,8 +4724,8 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_603(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -5618,11 +4774,11 @@
 
 static void init_proc_603E(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_603(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_603_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5635,8 +4791,8 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_603(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -5685,11 +4841,11 @@
 
 static void init_proc_604(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_604(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_604_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5697,7 +4853,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     init_excp_604(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -5749,9 +4905,9 @@
 
 static void init_proc_604E(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_604(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_604_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_7XX_MMCR1, "MMCR1",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -5768,7 +4924,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5781,7 +4937,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     init_excp_604(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -5833,13 +4989,13 @@
 
 static void init_proc_740(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5852,7 +5008,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     init_excp_7x0(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -5904,18 +5060,18 @@
 
 static void init_proc_750(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_L2CR, "L2CR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, spr_access_nop,
                  0x00000000);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -5928,7 +5084,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     /*
      * XXX: high BATs are also present but are known to be bugged on
      *      die version 1.x
@@ -5984,16 +5140,16 @@
 
 static void init_proc_750cl(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_L2CR, "L2CR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, spr_access_nop,
                  0x00000000);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
     /* Those registers are fake on 750CL */
     spr_register(env, SPR_THRM1, "THRM1",
@@ -6094,9 +5250,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     /* PowerPC 750cl has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
+    register_high_BATs(env);
     init_excp_750cl(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6187,18 +5343,18 @@
 
 static void init_proc_750cx(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_L2CR, "L2CR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, spr_access_nop,
                  0x00000000);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* This register is not implemented but is present for compatibility */
     spr_register(env, SPR_SDA, "SDA",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -6216,9 +5372,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     /* PowerPC 750cx has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
+    register_high_BATs(env);
     init_excp_750cx(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6270,18 +5426,18 @@
 
 static void init_proc_750fx(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_L2CR, "L2CR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, spr_access_nop,
                  0x00000000);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_750_THRM4, "THRM4",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -6304,9 +5460,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     /* PowerPC 750fx & 750gx has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
+    register_high_BATs(env);
     init_excp_7x0(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6358,18 +5514,18 @@
 
 static void init_proc_750gx(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* XXX : not implemented (XXX: different from 750fx) */
     spr_register(env, SPR_L2CR, "L2CR",
                  SPR_NOACCESS, SPR_NOACCESS,
                  &spr_read_generic, spr_access_nop,
                  0x00000000);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* XXX : not implemented */
     spr_register(env, SPR_750_THRM4, "THRM4",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -6392,9 +5548,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     /* PowerPC 750fx & 750gx has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
+    register_high_BATs(env);
     init_excp_7x0(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6446,14 +5602,14 @@
 
 static void init_proc_745(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    gen_spr_G2_755(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    register_G2_755_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -6471,9 +5627,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_7x5(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6522,12 +5678,12 @@
 
 static void init_proc_755(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    gen_spr_G2_755(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    register_G2_755_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* L2 cache control */
     /* XXX : not implemented */
     spr_register(env, SPR_L2CR, "L2CR",
@@ -6540,7 +5696,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* Hardware implementation registers */
     /* XXX : not implemented */
     spr_register(env, SPR_HID0, "HID0",
@@ -6558,9 +5714,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
     init_excp_7x5(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6609,13 +5765,13 @@
 
 static void init_proc_7400(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* XXX : not implemented */
     spr_register(env, SPR_UBAMR, "UBAMR",
@@ -6629,9 +5785,9 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     init_excp_7400(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6689,13 +5845,13 @@
 
 static void init_proc_7410(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* XXX : not implemented */
     spr_register(env, SPR_UBAMR, "UBAMR",
@@ -6703,7 +5859,7 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Thermal management */
-    gen_spr_thrm(env);
+    register_thrm_sprs(env);
     /* L2PMCR */
     /* XXX : not implemented */
     spr_register(env, SPR_L2PMCR, "L2PMCR",
@@ -6717,7 +5873,7 @@
                  &spr_read_generic, &spr_write_generic,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
+    register_low_BATs(env);
     init_excp_7400(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6775,13 +5931,13 @@
 
 static void init_proc_7440(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* XXX : not implemented */
     spr_register(env, SPR_UBAMR, "UBAMR",
@@ -6828,8 +5984,8 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
+    register_low_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
     init_excp_7450(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -6884,16 +6040,16 @@
 
 static void init_proc_7450(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* Level 3 cache control */
-    gen_l3_ctrl(env);
+    register_l3_ctrl(env);
     /* L3ITCR1 */
     /* XXX : not implemented */
     spr_register(env, SPR_L3ITCR1, "L3ITCR1",
@@ -6963,8 +6119,8 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
+    register_low_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
     init_excp_7450(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -7019,13 +6175,13 @@
 
 static void init_proc_7445(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* LDSTCR */
     /* XXX : not implemented */
@@ -7100,9 +6256,9 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
     init_excp_7450(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -7157,16 +6313,16 @@
 
 static void init_proc_7455(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* Level 3 cache control */
-    gen_l3_ctrl(env);
+    register_l3_ctrl(env);
     /* LDSTCR */
     /* XXX : not implemented */
     spr_register(env, SPR_LDSTCR, "LDSTCR",
@@ -7240,9 +6396,9 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
     init_excp_7450(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -7297,16 +6453,16 @@
 
 static void init_proc_7457(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* Level 3 cache control */
-    gen_l3_ctrl(env);
+    register_l3_ctrl(env);
     /* L3ITCR1 */
     /* XXX : not implemented */
     spr_register(env, SPR_L3ITCR1, "L3ITCR1",
@@ -7404,9 +6560,9 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
     init_excp_7450(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -7461,13 +6617,13 @@
 
 static void init_proc_e600(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
     /* Time base */
-    gen_tbl(env);
+    register_tbl(env);
     /* 74xx specific SPR */
-    gen_spr_74xx(env);
+    register_74xx_sprs(env);
     vscr_init(env, 0x00010000);
     /* XXX : not implemented */
     spr_register(env, SPR_UBAMR, "UBAMR",
@@ -7543,9 +6699,9 @@
                  &spr_read_ureg, SPR_NOACCESS,
                  0x00000000);
     /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
     init_excp_7450(env);
     env->dcache_line_size = 32;
     env->icache_line_size = 32;
@@ -7609,58 +6765,6 @@
 #define POWERPC970_HID5_INIT 0x00000000
 #endif
 
-static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
-                                    int bit, int sprn, int cause)
-{
-    TCGv_i32 t1 = tcg_const_i32(bit);
-    TCGv_i32 t2 = tcg_const_i32(sprn);
-    TCGv_i32 t3 = tcg_const_i32(cause);
-
-    gen_helper_fscr_facility_check(cpu_env, t1, t2, t3);
-
-    tcg_temp_free_i32(t3);
-    tcg_temp_free_i32(t2);
-    tcg_temp_free_i32(t1);
-}
-
-static void gen_msr_facility_check(DisasContext *ctx, int facility_sprn,
-                                   int bit, int sprn, int cause)
-{
-    TCGv_i32 t1 = tcg_const_i32(bit);
-    TCGv_i32 t2 = tcg_const_i32(sprn);
-    TCGv_i32 t3 = tcg_const_i32(cause);
-
-    gen_helper_msr_facility_check(cpu_env, t1, t2, t3);
-
-    tcg_temp_free_i32(t3);
-    tcg_temp_free_i32(t2);
-    tcg_temp_free_i32(t1);
-}
-
-static void spr_read_prev_upper32(DisasContext *ctx, int gprn, int sprn)
-{
-    TCGv spr_up = tcg_temp_new();
-    TCGv spr = tcg_temp_new();
-
-    gen_load_spr(spr, sprn - 1);
-    tcg_gen_shri_tl(spr_up, spr, 32);
-    tcg_gen_ext32u_tl(cpu_gpr[gprn], spr_up);
-
-    tcg_temp_free(spr);
-    tcg_temp_free(spr_up);
-}
-
-static void spr_write_prev_upper32(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv spr = tcg_temp_new();
-
-    gen_load_spr(spr, sprn - 1);
-    tcg_gen_deposit_tl(spr, spr, cpu_gpr[gprn], 32, 32);
-    gen_store_spr(sprn - 1, spr);
-
-    tcg_temp_free(spr);
-}
-
 static int check_pow_970(CPUPPCState *env)
 {
     if (env->spr[SPR_HID0] & (HID0_DEEPNAP | HID0_DOZE | HID0_NAP)) {
@@ -7670,7 +6774,7 @@
     return 0;
 }
 
-static void gen_spr_970_hid(CPUPPCState *env)
+static void register_970_hid_sprs(CPUPPCState *env)
 {
     /* Hardware implementation registers */
     /* XXX : not implemented */
@@ -7688,7 +6792,7 @@
                  POWERPC970_HID5_INIT);
 }
 
-static void gen_spr_970_hior(CPUPPCState *env)
+static void register_970_hior_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_HIOR, "SPR_HIOR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -7696,7 +6800,7 @@
                  0x00000000);
 }
 
-static void gen_spr_book3s_ctrl(CPUPPCState *env)
+static void register_book3s_ctrl_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_CTRL, "SPR_CTRL",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -7708,7 +6812,7 @@
                  0x00000000);
 }
 
-static void gen_spr_book3s_altivec(CPUPPCState *env)
+static void register_book3s_altivec_sprs(CPUPPCState *env)
 {
     if (!(env->insns_flags & PPC_ALTIVEC)) {
         return;
@@ -7721,7 +6825,7 @@
 
 }
 
-static void gen_spr_book3s_dbg(CPUPPCState *env)
+static void register_book3s_dbg_sprs(CPUPPCState *env)
 {
     /*
      * TODO: different specs define different scopes for these,
@@ -7740,7 +6844,7 @@
                      KVM_REG_PPC_DABRX, 0x00000000);
 }
 
-static void gen_spr_book3s_207_dbg(CPUPPCState *env)
+static void register_book3s_207_dbg_sprs(CPUPPCState *env)
 {
     spr_register_kvm_hv(env, SPR_DAWR0, "DAWR0",
                         SPR_NOACCESS, SPR_NOACCESS,
@@ -7759,7 +6863,7 @@
                         KVM_REG_PPC_CIABR, 0x00000000);
 }
 
-static void gen_spr_970_dbg(CPUPPCState *env)
+static void register_970_dbg_sprs(CPUPPCState *env)
 {
     /* Breakpoints */
     spr_register(env, SPR_IABR, "IABR",
@@ -7768,7 +6872,7 @@
                  0x00000000);
 }
 
-static void gen_spr_book3s_pmu_sup(CPUPPCState *env)
+static void register_book3s_pmu_sup_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_POWER_MMCR0, "MMCR0",
                      SPR_NOACCESS, SPR_NOACCESS,
@@ -7816,7 +6920,7 @@
                      KVM_REG_PPC_SDAR, 0x00000000);
 }
 
-static void gen_spr_book3s_pmu_user(CPUPPCState *env)
+static void register_book3s_pmu_user_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_POWER_UMMCR0, "UMMCR0",
                  &spr_read_ureg, SPR_NOACCESS,
@@ -7864,7 +6968,7 @@
                  0x00000000);
 }
 
-static void gen_spr_970_pmu_sup(CPUPPCState *env)
+static void register_970_pmu_sup_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_970_PMC7, "PMC7",
                      SPR_NOACCESS, SPR_NOACCESS,
@@ -7876,7 +6980,7 @@
                      KVM_REG_PPC_PMC8, 0x00000000);
 }
 
-static void gen_spr_970_pmu_user(CPUPPCState *env)
+static void register_970_pmu_user_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_970_UPMC7, "UPMC7",
                  &spr_read_ureg, SPR_NOACCESS,
@@ -7888,7 +6992,7 @@
                  0x00000000);
 }
 
-static void gen_spr_power8_pmu_sup(CPUPPCState *env)
+static void register_power8_pmu_sup_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_POWER_MMCR2, "MMCR2",
                      SPR_NOACCESS, SPR_NOACCESS,
@@ -7924,7 +7028,7 @@
                      KVM_REG_PPC_CSIGR, 0x00000000);
 }
 
-static void gen_spr_power8_pmu_user(CPUPPCState *env)
+static void register_power8_pmu_user_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_POWER_UMMCR2, "UMMCR2",
                  &spr_read_ureg, SPR_NOACCESS,
@@ -7936,7 +7040,7 @@
                  0x00000000);
 }
 
-static void gen_spr_power5p_ear(CPUPPCState *env)
+static void register_power5p_ear_sprs(CPUPPCState *env)
 {
     /* External access control */
     spr_register(env, SPR_EAR, "EAR",
@@ -7945,7 +7049,7 @@
                  0x00000000);
 }
 
-static void gen_spr_power5p_tb(CPUPPCState *env)
+static void register_power5p_tb_sprs(CPUPPCState *env)
 {
     /* TBU40 (High 40 bits of the Timebase register */
     spr_register_hv(env, SPR_TBU40, "TBU40",
@@ -7955,25 +7059,7 @@
                     0x00000000);
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv hmer = tcg_temp_new();
-
-    gen_load_spr(hmer, sprn);
-    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
-    gen_store_spr(sprn, hmer);
-    spr_store_dump_spr(sprn);
-    tcg_temp_free(hmer);
-}
-
-static void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_lpcr(cpu_env, cpu_gpr[gprn]);
-}
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-static void gen_spr_970_lpar(CPUPPCState *env)
+static void register_970_lpar_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /*
@@ -7990,7 +7076,7 @@
 #endif
 }
 
-static void gen_spr_power5p_lpar(CPUPPCState *env)
+static void register_power5p_lpar_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* Logical partitionning */
@@ -8006,7 +7092,7 @@
 #endif
 }
 
-static void gen_spr_book3s_ids(CPUPPCState *env)
+static void register_book3s_ids_sprs(CPUPPCState *env)
 {
     /* FIXME: Will need to deal with thread vs core only SPRs */
 
@@ -8098,7 +7184,7 @@
                  0x00000000);
 }
 
-static void gen_spr_rmor(CPUPPCState *env)
+static void register_rmor_sprs(CPUPPCState *env)
 {
     spr_register_hv(env, SPR_RMOR, "RMOR",
                  SPR_NOACCESS, SPR_NOACCESS,
@@ -8107,7 +7193,7 @@
                  0x00000000);
 }
 
-static void gen_spr_power8_ids(CPUPPCState *env)
+static void register_power8_ids_sprs(CPUPPCState *env)
 {
     /* Thread identification */
     spr_register(env, SPR_TIR, "TIR",
@@ -8116,7 +7202,7 @@
                  0x00000000);
 }
 
-static void gen_spr_book3s_purr(CPUPPCState *env)
+static void register_book3s_purr_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* PURR & SPURR: Hack - treat these as aliases for the TB for now */
@@ -8133,7 +7219,7 @@
 #endif
 }
 
-static void gen_spr_power6_dbg(CPUPPCState *env)
+static void register_power6_dbg_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     spr_register(env, SPR_CFAR, "SPR_CFAR",
@@ -8143,7 +7229,7 @@
 #endif
 }
 
-static void gen_spr_power5p_common(CPUPPCState *env)
+static void register_power5p_common_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_PPR, "PPR",
                      &spr_read_generic, &spr_write_generic,
@@ -8151,7 +7237,7 @@
                      KVM_REG_PPC_PPR, 0x00000000);
 }
 
-static void gen_spr_power6_common(CPUPPCState *env)
+static void register_power6_common_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     spr_register_kvm(env, SPR_DSCR, "SPR_DSCR",
@@ -8170,19 +7256,7 @@
                  0x00000000);
 }
 
-static void spr_read_tar(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
-    spr_read_generic(ctx, gprn, sprn);
-}
-
-static void spr_write_tar(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
-    spr_write_generic(ctx, sprn, gprn);
-}
-
-static void gen_spr_power8_tce_address_control(CPUPPCState *env)
+static void register_power8_tce_address_control_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_TAR, "TAR",
                      &spr_read_tar, &spr_write_tar,
@@ -8190,31 +7264,7 @@
                      KVM_REG_PPC_TAR, 0x00000000);
 }
 
-static void spr_read_tm(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_read_generic(ctx, gprn, sprn);
-}
-
-static void spr_write_tm(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_write_generic(ctx, sprn, gprn);
-}
-
-static void spr_read_tm_upper32(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_read_prev_upper32(ctx, gprn, sprn);
-}
-
-static void spr_write_tm_upper32(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_write_prev_upper32(ctx, sprn, gprn);
-}
-
-static void gen_spr_power8_tm(CPUPPCState *env)
+static void register_power8_tm_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_TFHAR, "TFHAR",
                      &spr_read_tm, &spr_write_tm,
@@ -8234,31 +7284,7 @@
                  0x00000000);
 }
 
-static void spr_read_ebb(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_read_generic(ctx, gprn, sprn);
-}
-
-static void spr_write_ebb(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_write_generic(ctx, sprn, gprn);
-}
-
-static void spr_read_ebb_upper32(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_read_prev_upper32(ctx, gprn, sprn);
-}
-
-static void spr_write_ebb_upper32(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_write_prev_upper32(ctx, sprn, gprn);
-}
-
-static void gen_spr_power8_ebb(CPUPPCState *env)
+static void register_power8_ebb_sprs(CPUPPCState *env)
 {
     spr_register(env, SPR_BESCRS, "BESCRS",
                  &spr_read_ebb, &spr_write_ebb,
@@ -8291,7 +7317,7 @@
 }
 
 /* Virtual Time Base */
-static void gen_spr_vtb(CPUPPCState *env)
+static void register_vtb_sprs(CPUPPCState *env)
 {
     spr_register_kvm_hv(env, SPR_VTB, "VTB",
                         SPR_NOACCESS, SPR_NOACCESS,
@@ -8300,7 +7326,7 @@
                         KVM_REG_PPC_VTB, 0x00000000);
 }
 
-static void gen_spr_power8_fscr(CPUPPCState *env)
+static void register_power8_fscr_sprs(CPUPPCState *env)
 {
 #if defined(CONFIG_USER_ONLY)
     target_ulong initval = 1ULL << FSCR_TAR;
@@ -8313,7 +7339,7 @@
                      KVM_REG_PPC_FSCR, initval);
 }
 
-static void gen_spr_power8_pspb(CPUPPCState *env)
+static void register_power8_pspb_sprs(CPUPPCState *env)
 {
     spr_register_kvm(env, SPR_PSPB, "PSPB",
                      SPR_NOACCESS, SPR_NOACCESS,
@@ -8321,7 +7347,7 @@
                      KVM_REG_PPC_PSPB, 0);
 }
 
-static void gen_spr_power8_dpdes(CPUPPCState *env)
+static void register_power8_dpdes_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* Directed Privileged Door-bell Exception State, used for IPI */
@@ -8333,7 +7359,7 @@
 #endif
 }
 
-static void gen_spr_power8_ic(CPUPPCState *env)
+static void register_power8_ic_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     spr_register_hv(env, SPR_IC, "IC",
@@ -8344,7 +7370,7 @@
 #endif
 }
 
-static void gen_spr_power8_book4(CPUPPCState *env)
+static void register_power8_book4_sprs(CPUPPCState *env)
 {
     /* Add a number of P8 book4 registers */
 #if !defined(CONFIG_USER_ONLY)
@@ -8363,7 +7389,7 @@
 #endif
 }
 
-static void gen_spr_power7_book4(CPUPPCState *env)
+static void register_power7_book4_sprs(CPUPPCState *env)
 {
     /* Add a number of P7 book4 registers */
 #if !defined(CONFIG_USER_ONLY)
@@ -8378,7 +7404,7 @@
 #endif
 }
 
-static void gen_spr_power8_rpr(CPUPPCState *env)
+static void register_power8_rpr_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     spr_register_hv(env, SPR_RPR, "RPR",
@@ -8389,7 +7415,7 @@
 #endif
 }
 
-static void gen_spr_power9_mmu(CPUPPCState *env)
+static void register_power9_mmu_sprs(CPUPPCState *env)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* Partition Table Control */
@@ -8409,13 +7435,13 @@
 
 static void init_proc_book3s_common(CPUPPCState *env)
 {
-    gen_spr_ne_601(env);
-    gen_tbl(env);
-    gen_spr_usprg3(env);
-    gen_spr_book3s_altivec(env);
-    gen_spr_book3s_pmu_sup(env);
-    gen_spr_book3s_pmu_user(env);
-    gen_spr_book3s_ctrl(env);
+    register_ne_601_sprs(env);
+    register_tbl(env);
+    register_usprg3_sprs(env);
+    register_book3s_altivec_sprs(env);
+    register_book3s_pmu_sup_sprs(env);
+    register_book3s_pmu_user_sprs(env);
+    register_book3s_ctrl_sprs(env);
     /*
      * Can't find information on what this should be on reset.  This
      * value is the one used by 74xx processors.
@@ -8427,17 +7453,17 @@
 {
     /* Common Registers */
     init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_dbg(env);
+    register_sdr1_sprs(env);
+    register_book3s_dbg_sprs(env);
 
     /* 970 Specific Registers */
-    gen_spr_970_hid(env);
-    gen_spr_970_hior(env);
-    gen_low_BATs(env);
-    gen_spr_970_pmu_sup(env);
-    gen_spr_970_pmu_user(env);
-    gen_spr_970_lpar(env);
-    gen_spr_970_dbg(env);
+    register_970_hid_sprs(env);
+    register_970_hior_sprs(env);
+    register_low_BATs(env);
+    register_970_pmu_sup_sprs(env);
+    register_970_pmu_user_sprs(env);
+    register_970_lpar_sprs(env);
+    register_970_dbg_sprs(env);
 
     /* env variables */
     env->dcache_line_size = 128;
@@ -8500,19 +7526,19 @@
 {
     /* Common Registers */
     init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_dbg(env);
+    register_sdr1_sprs(env);
+    register_book3s_dbg_sprs(env);
 
     /* POWER5+ Specific Registers */
-    gen_spr_970_hid(env);
-    gen_spr_970_hior(env);
-    gen_low_BATs(env);
-    gen_spr_970_pmu_sup(env);
-    gen_spr_970_pmu_user(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
+    register_970_hid_sprs(env);
+    register_970_hior_sprs(env);
+    register_low_BATs(env);
+    register_970_pmu_sup_sprs(env);
+    register_970_pmu_user_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
 
     /* env variables */
     env->dcache_line_size = 128;
@@ -8579,21 +7605,21 @@
 {
     /* Common Registers */
     init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_dbg(env);
+    register_sdr1_sprs(env);
+    register_book3s_dbg_sprs(env);
 
     /* POWER7 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_rmor(env);
-    gen_spr_amr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power7_book4(env);
+    register_book3s_ids_sprs(env);
+    register_rmor_sprs(env);
+    register_amr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power7_book4_sprs(env);
 
     /* env variables */
     env->dcache_line_size = 128;
@@ -8725,34 +7751,34 @@
 {
     /* Common Registers */
     init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_207_dbg(env);
+    register_sdr1_sprs(env);
+    register_book3s_207_dbg_sprs(env);
 
     /* POWER8 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_rmor(env);
-    gen_spr_amr(env);
-    gen_spr_iamr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power8_tce_address_control(env);
-    gen_spr_power8_ids(env);
-    gen_spr_power8_ebb(env);
-    gen_spr_power8_fscr(env);
-    gen_spr_power8_pmu_sup(env);
-    gen_spr_power8_pmu_user(env);
-    gen_spr_power8_tm(env);
-    gen_spr_power8_pspb(env);
-    gen_spr_power8_dpdes(env);
-    gen_spr_vtb(env);
-    gen_spr_power8_ic(env);
-    gen_spr_power8_book4(env);
-    gen_spr_power8_rpr(env);
+    register_book3s_ids_sprs(env);
+    register_rmor_sprs(env);
+    register_amr_sprs(env);
+    register_iamr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power8_tce_address_control_sprs(env);
+    register_power8_ids_sprs(env);
+    register_power8_ebb_sprs(env);
+    register_power8_fscr_sprs(env);
+    register_power8_pmu_sup_sprs(env);
+    register_power8_pmu_user_sprs(env);
+    register_power8_tm_sprs(env);
+    register_power8_pspb_sprs(env);
+    register_power8_dpdes_sprs(env);
+    register_vtb_sprs(env);
+    register_power8_ic_sprs(env);
+    register_power8_book4_sprs(env);
+    register_power8_rpr_sprs(env);
 
     /* env variables */
     env->dcache_line_size = 128;
@@ -8922,33 +7948,33 @@
 {
     /* Common Registers */
     init_proc_book3s_common(env);
-    gen_spr_book3s_207_dbg(env);
+    register_book3s_207_dbg_sprs(env);
 
     /* POWER8 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_amr(env);
-    gen_spr_iamr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power8_tce_address_control(env);
-    gen_spr_power8_ids(env);
-    gen_spr_power8_ebb(env);
-    gen_spr_power8_fscr(env);
-    gen_spr_power8_pmu_sup(env);
-    gen_spr_power8_pmu_user(env);
-    gen_spr_power8_tm(env);
-    gen_spr_power8_pspb(env);
-    gen_spr_power8_dpdes(env);
-    gen_spr_vtb(env);
-    gen_spr_power8_ic(env);
-    gen_spr_power8_book4(env);
-    gen_spr_power8_rpr(env);
-    gen_spr_power9_mmu(env);
+    register_book3s_ids_sprs(env);
+    register_amr_sprs(env);
+    register_iamr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power8_tce_address_control_sprs(env);
+    register_power8_ids_sprs(env);
+    register_power8_ebb_sprs(env);
+    register_power8_fscr_sprs(env);
+    register_power8_pmu_sup_sprs(env);
+    register_power8_pmu_user_sprs(env);
+    register_power8_tm_sprs(env);
+    register_power8_pspb_sprs(env);
+    register_power8_dpdes_sprs(env);
+    register_vtb_sprs(env);
+    register_power8_ic_sprs(env);
+    register_power8_book4_sprs(env);
+    register_power8_rpr_sprs(env);
+    register_power9_mmu_sprs(env);
 
     /* POWER9 Specific registers */
     spr_register_kvm(env, SPR_TIDR, "TIDR", NULL, NULL,
@@ -9140,31 +8166,31 @@
 {
     /* Common Registers */
     init_proc_book3s_common(env);
-    gen_spr_book3s_207_dbg(env);
+    register_book3s_207_dbg_sprs(env);
 
     /* POWER8 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_amr(env);
-    gen_spr_iamr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power8_tce_address_control(env);
-    gen_spr_power8_ids(env);
-    gen_spr_power8_ebb(env);
-    gen_spr_power8_fscr(env);
-    gen_spr_power8_pmu_sup(env);
-    gen_spr_power8_pmu_user(env);
-    gen_spr_power8_tm(env);
-    gen_spr_power8_pspb(env);
-    gen_spr_vtb(env);
-    gen_spr_power8_ic(env);
-    gen_spr_power8_book4(env);
-    gen_spr_power8_rpr(env);
-    gen_spr_power9_mmu(env);
+    register_book3s_ids_sprs(env);
+    register_amr_sprs(env);
+    register_iamr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power8_tce_address_control_sprs(env);
+    register_power8_ids_sprs(env);
+    register_power8_ebb_sprs(env);
+    register_power8_fscr_sprs(env);
+    register_power8_pmu_sup_sprs(env);
+    register_power8_pmu_user_sprs(env);
+    register_power8_tm_sprs(env);
+    register_power8_pspb_sprs(env);
+    register_vtb_sprs(env);
+    register_power8_ic_sprs(env);
+    register_power8_book4_sprs(env);
+    register_power8_rpr_sprs(env);
+    register_power9_mmu_sprs(env);
 
     /* FIXME: Filter fields properly based on privilege level */
     spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
@@ -9369,7 +8395,7 @@
     env->tlb_type = TLB_NONE;
 #endif
     /* Register SPR common to all PowerPC implementations */
-    gen_spr_generic(env);
+    register_generic_sprs(env);
     spr_register(env, SPR_PVR, "PVR",
                  /* Linux permits userspace to read PVR */
 #if defined(CONFIG_LINUX_USER)
@@ -10342,4 +9368,186 @@
 #endif
 }
 
+void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+#define RGPL  4
+#define RFPL  4
+
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    int i;
+
+    qemu_fprintf(f, "NIP " TARGET_FMT_lx "   LR " TARGET_FMT_lx " CTR "
+                 TARGET_FMT_lx " XER " TARGET_FMT_lx " CPU#%d\n",
+                 env->nip, env->lr, env->ctr, cpu_read_xer(env),
+                 cs->cpu_index);
+    qemu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
+                 "%08x iidx %d didx %d\n",
+                 env->msr, env->spr[SPR_HID0], env->hflags,
+                 cpu_mmu_index(env, true), cpu_mmu_index(env, false));
+#if !defined(NO_TIMER_DUMP)
+    qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
+#if !defined(CONFIG_USER_ONLY)
+                 " DECR " TARGET_FMT_lu
+#endif
+                 "\n",
+                 cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
+#if !defined(CONFIG_USER_ONLY)
+                 , cpu_ppc_load_decr(env)
+#endif
+        );
+#endif
+    for (i = 0; i < 32; i++) {
+        if ((i & (RGPL - 1)) == 0) {
+            qemu_fprintf(f, "GPR%02d", i);
+        }
+        qemu_fprintf(f, " %016" PRIx64, ppc_dump_gpr(env, i));
+        if ((i & (RGPL - 1)) == (RGPL - 1)) {
+            qemu_fprintf(f, "\n");
+        }
+    }
+    qemu_fprintf(f, "CR ");
+    for (i = 0; i < 8; i++)
+        qemu_fprintf(f, "%01x", env->crf[i]);
+    qemu_fprintf(f, "  [");
+    for (i = 0; i < 8; i++) {
+        char a = '-';
+        if (env->crf[i] & 0x08) {
+            a = 'L';
+        } else if (env->crf[i] & 0x04) {
+            a = 'G';
+        } else if (env->crf[i] & 0x02) {
+            a = 'E';
+        }
+        qemu_fprintf(f, " %c%c", a, env->crf[i] & 0x01 ? 'O' : ' ');
+    }
+    qemu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
+                 env->reserve_addr);
+
+    if (flags & CPU_DUMP_FPU) {
+        for (i = 0; i < 32; i++) {
+            if ((i & (RFPL - 1)) == 0) {
+                qemu_fprintf(f, "FPR%02d", i);
+            }
+            qemu_fprintf(f, " %016" PRIx64, *cpu_fpr_ptr(env, i));
+            if ((i & (RFPL - 1)) == (RFPL - 1)) {
+                qemu_fprintf(f, "\n");
+            }
+        }
+        qemu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr);
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    qemu_fprintf(f, " SRR0 " TARGET_FMT_lx "  SRR1 " TARGET_FMT_lx
+                 "    PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n",
+                 env->spr[SPR_SRR0], env->spr[SPR_SRR1],
+                 env->spr[SPR_PVR], env->spr[SPR_VRSAVE]);
+
+    qemu_fprintf(f, "SPRG0 " TARGET_FMT_lx " SPRG1 " TARGET_FMT_lx
+                 "  SPRG2 " TARGET_FMT_lx "  SPRG3 " TARGET_FMT_lx "\n",
+                 env->spr[SPR_SPRG0], env->spr[SPR_SPRG1],
+                 env->spr[SPR_SPRG2], env->spr[SPR_SPRG3]);
+
+    qemu_fprintf(f, "SPRG4 " TARGET_FMT_lx " SPRG5 " TARGET_FMT_lx
+                 "  SPRG6 " TARGET_FMT_lx "  SPRG7 " TARGET_FMT_lx "\n",
+                 env->spr[SPR_SPRG4], env->spr[SPR_SPRG5],
+                 env->spr[SPR_SPRG6], env->spr[SPR_SPRG7]);
+
+#if defined(TARGET_PPC64)
+    if (env->excp_model == POWERPC_EXCP_POWER7 ||
+        env->excp_model == POWERPC_EXCP_POWER8 ||
+        env->excp_model == POWERPC_EXCP_POWER9 ||
+        env->excp_model == POWERPC_EXCP_POWER10)  {
+        qemu_fprintf(f, "HSRR0 " TARGET_FMT_lx " HSRR1 " TARGET_FMT_lx "\n",
+                     env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
+    }
+#endif
+    if (env->excp_model == POWERPC_EXCP_BOOKE) {
+        qemu_fprintf(f, "CSRR0 " TARGET_FMT_lx " CSRR1 " TARGET_FMT_lx
+                     " MCSRR0 " TARGET_FMT_lx " MCSRR1 " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_CSRR0], env->spr[SPR_BOOKE_CSRR1],
+                     env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
+
+        qemu_fprintf(f, "  TCR " TARGET_FMT_lx "   TSR " TARGET_FMT_lx
+                     "    ESR " TARGET_FMT_lx "   DEAR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_TCR], env->spr[SPR_BOOKE_TSR],
+                     env->spr[SPR_BOOKE_ESR], env->spr[SPR_BOOKE_DEAR]);
+
+        qemu_fprintf(f, "  PIR " TARGET_FMT_lx " DECAR " TARGET_FMT_lx
+                     "   IVPR " TARGET_FMT_lx "   EPCR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_PIR], env->spr[SPR_BOOKE_DECAR],
+                     env->spr[SPR_BOOKE_IVPR], env->spr[SPR_BOOKE_EPCR]);
+
+        qemu_fprintf(f, " MCSR " TARGET_FMT_lx " SPRG8 " TARGET_FMT_lx
+                     "    EPR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_MCSR], env->spr[SPR_BOOKE_SPRG8],
+                     env->spr[SPR_BOOKE_EPR]);
+
+        /* FSL-specific */
+        qemu_fprintf(f, " MCAR " TARGET_FMT_lx "  PID1 " TARGET_FMT_lx
+                     "   PID2 " TARGET_FMT_lx "    SVR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_Exxx_MCAR], env->spr[SPR_BOOKE_PID1],
+                     env->spr[SPR_BOOKE_PID2], env->spr[SPR_E500_SVR]);
+
+        /*
+         * IVORs are left out as they are large and do not change often --
+         * they can be read with "p $ivor0", "p $ivor1", etc.
+         */
+    }
+
+#if defined(TARGET_PPC64)
+    if (env->flags & POWERPC_FLAG_CFAR) {
+        qemu_fprintf(f, " CFAR " TARGET_FMT_lx"\n", env->cfar);
+    }
+#endif
+
+    if (env->spr_cb[SPR_LPCR].name) {
+        qemu_fprintf(f, " LPCR " TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
+    }
+
+    switch (env->mmu_model) {
+    case POWERPC_MMU_32B:
+    case POWERPC_MMU_601:
+    case POWERPC_MMU_SOFT_6xx:
+    case POWERPC_MMU_SOFT_74xx:
+#if defined(TARGET_PPC64)
+    case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
+    case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_07:
+    case POWERPC_MMU_3_00:
+#endif
+        if (env->spr_cb[SPR_SDR1].name) { /* SDR1 Exists */
+            qemu_fprintf(f, " SDR1 " TARGET_FMT_lx " ", env->spr[SPR_SDR1]);
+        }
+        if (env->spr_cb[SPR_PTCR].name) { /* PTCR Exists */
+            qemu_fprintf(f, " PTCR " TARGET_FMT_lx " ", env->spr[SPR_PTCR]);
+        }
+        qemu_fprintf(f, "  DAR " TARGET_FMT_lx "  DSISR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_DAR], env->spr[SPR_DSISR]);
+        break;
+    case POWERPC_MMU_BOOKE206:
+        qemu_fprintf(f, " MAS0 " TARGET_FMT_lx "  MAS1 " TARGET_FMT_lx
+                     "   MAS2 " TARGET_FMT_lx "   MAS3 " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_MAS0], env->spr[SPR_BOOKE_MAS1],
+                     env->spr[SPR_BOOKE_MAS2], env->spr[SPR_BOOKE_MAS3]);
+
+        qemu_fprintf(f, " MAS4 " TARGET_FMT_lx "  MAS6 " TARGET_FMT_lx
+                     "   MAS7 " TARGET_FMT_lx "    PID " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_MAS4], env->spr[SPR_BOOKE_MAS6],
+                     env->spr[SPR_BOOKE_MAS7], env->spr[SPR_BOOKE_PID]);
+
+        qemu_fprintf(f, "MMUCFG " TARGET_FMT_lx " TLB0CFG " TARGET_FMT_lx
+                     " TLB1CFG " TARGET_FMT_lx "\n",
+                     env->spr[SPR_MMUCFG], env->spr[SPR_BOOKE_TLB0CFG],
+                     env->spr[SPR_BOOKE_TLB1CFG]);
+        break;
+    default:
+        break;
+    }
+#endif
+
+#undef RGPL
+#undef RFPL
+}
 type_init(ppc_cpu_register_types)
diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index 94a7273..9339e7e 100644
--- a/target/ppc/gdbstub.c
+++ b/target/ppc/gdbstub.c
@@ -498,7 +498,7 @@
         return 16;
     }
     if (n == 32) {
-        gdb_get_reg32(buf, helper_mfvscr(env));
+        gdb_get_reg32(buf, ppc_get_vscr(env));
         mem_buf = gdb_get_reg_ptr(buf, 4);
         ppc_maybe_bswap_register(env, mem_buf, 4);
         return 4;
@@ -529,7 +529,7 @@
     }
     if (n == 32) {
         ppc_maybe_bswap_register(env, mem_buf, 4);
-        helper_mtvscr(env, ldl_p(mem_buf));
+        ppc_store_vscr(env, ldl_p(mem_buf));
         return 4;
     }
     if (n == 33) {
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 513066d..ea9f2a2 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -1,5 +1,5 @@
-DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, void, env, i32, i32)
-DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32)
+DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
 DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index a44c2d9..41f8477 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -462,17 +462,12 @@
 
 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
 {
-    env->vscr = vscr & ~(1u << VSCR_SAT);
-    /* Which bit we set is completely arbitrary, but clear the rest.  */
-    env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
-    env->vscr_sat.u64[1] = 0;
-    set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
+    ppc_store_vscr(env, vscr);
 }
 
 uint32_t helper_mfvscr(CPUPPCState *env)
 {
-    uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
-    return env->vscr | (sat << VSCR_SAT);
+    return ppc_get_vscr(env);
 }
 
 static inline void set_vscr_sat(CPUPPCState *env)
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 184ba6d..2b4b06e 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -228,4 +228,23 @@
 void ppc_gdb_init(CPUState *cs, PowerPCCPUClass *ppc);
 gchar *ppc_gdb_arch_name(CPUState *cs);
 
+/**
+ * prot_for_access_type:
+ * @access_type: Access type
+ *
+ * Return the protection bit required for the given access type.
+ */
+static inline int prot_for_access_type(MMUAccessType access_type)
+{
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        return PAGE_EXEC;
+    case MMU_DATA_LOAD:
+        return PAGE_READ;
+    case MMU_DATA_STORE:
+        return PAGE_WRITE;
+    }
+    g_assert_not_reached();
+}
+
 #endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index e5bffbe..93972df 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -8,7 +8,6 @@
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
 #include "kvm_ppc.h"
-#include "exec/helper-proto.h"
 
 static void post_load_update_msr(CPUPPCState *env)
 {
@@ -107,7 +106,7 @@
         ppc_store_sdr1(env, sdr1);
     }
     qemu_get_be32s(f, &vscr);
-    helper_mtvscr(env, vscr);
+    ppc_store_vscr(env, vscr);
     qemu_get_be64s(f, &env->spe_acc);
     qemu_get_be32s(f, &env->spe_fscr);
     qemu_get_betls(f, &env->msr_mask);
@@ -456,7 +455,7 @@
                     const VMStateField *field)
 {
     PowerPCCPU *cpu = opaque;
-    helper_mtvscr(&cpu->env, qemu_get_be32(f));
+    ppc_store_vscr(&cpu->env, qemu_get_be32(f));
     return 0;
 }
 
@@ -464,7 +463,7 @@
                     const VMStateField *field, JSONWriter *vmdesc)
 {
     PowerPCCPU *cpu = opaque;
-    qemu_put_be32(f, helper_mfvscr(&cpu->env));
+    qemu_put_be32(f, ppc_get_vscr(&cpu->env));
     return 0;
 }
 
diff --git a/target/ppc/meson.build b/target/ppc/meson.build
index 4079d01..d1aa7d5 100644
--- a/target/ppc/meson.build
+++ b/target/ppc/meson.build
@@ -2,6 +2,7 @@
 ppc_ss.add(files(
   'cpu-models.c',
   'cpu.c',
+  'cpu_init.c',
   'dfp_helper.c',
   'excp_helper.c',
   'fpu_helper.c',
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 002958b..08a31da 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -261,6 +261,16 @@
     hreg_store_msr(env, value, 0);
 }
 
+void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    env->spr[SPR_LPCR] = val & pcc->lpcr_mask;
+    /* The gtse bit affects hflags */
+    hreg_compute_hflags(env);
+}
+
 /*
  * This code is lifted from MacOnLinux. It is called whenever THRM1,2
  * or 3 is read an fixes up the values in such a way that will make
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 178cf09..744a763 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -24,6 +24,7 @@
 #include "exec/helper-proto.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "internal.h"
 #include "mmu-hash32.h"
 #include "exec/log.h"
 
@@ -152,16 +153,17 @@
     return ppc_hash32_pp_prot(key, pp, 0);
 }
 
-static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, int rwx,
-                                    int *prot)
+static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
+                                    MMUAccessType access_type, int *prot)
 {
     CPUPPCState *env = &cpu->env;
     target_ulong *BATlt, *BATut;
+    bool ifetch = access_type == MMU_INST_FETCH;
     int i;
 
     LOG_BATS("%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
-             rwx == 2 ? 'I' : 'D', ea);
-    if (rwx == 2) {
+             ifetch ? 'I' : 'D', ea);
+    if (ifetch) {
         BATlt = env->IBAT[1];
         BATut = env->IBAT[0];
     } else {
@@ -180,7 +182,7 @@
         }
         LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
                  " BATl " TARGET_FMT_lx "\n", __func__,
-                 type == ACCESS_CODE ? 'I' : 'D', i, ea, batu, batl);
+                 ifetch ? 'I' : 'D', i, ea, batu, batl);
 
         if (mask && ((ea & mask) == (batu & BATU32_BEPI))) {
             hwaddr raddr = (batl & mask) | (ea & ~mask);
@@ -208,7 +210,7 @@
             LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
                      " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
                      TARGET_FMT_lx " " TARGET_FMT_lx "\n",
-                     __func__, type == ACCESS_CODE ? 'I' : 'D', i, ea,
+                     __func__, ifetch ? 'I' : 'D', i, ea,
                      *BATu, *BATl, BEPIu, BEPIl, bl);
         }
     }
@@ -218,7 +220,8 @@
 }
 
 static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
-                                   target_ulong eaddr, int rwx,
+                                   target_ulong eaddr,
+                                   MMUAccessType access_type,
                                    hwaddr *raddr, int *prot)
 {
     CPUState *cs = CPU(cpu);
@@ -239,7 +242,7 @@
         return 0;
     }
 
-    if (rwx == 2) {
+    if (access_type == MMU_INST_FETCH) {
         /* No code fetch is allowed in direct-store areas */
         cs->exception_index = POWERPC_EXCP_ISI;
         env->error_code = 0x10000000;
@@ -260,7 +263,7 @@
         /* lwarx, ldarx or srwcx. */
         env->error_code = 0;
         env->spr[SPR_DAR] = eaddr;
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             env->spr[SPR_DSISR] = 0x06000000;
         } else {
             env->spr[SPR_DSISR] = 0x04000000;
@@ -280,7 +283,7 @@
         cs->exception_index = POWERPC_EXCP_DSI;
         env->error_code = 0;
         env->spr[SPR_DAR] = eaddr;
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             env->spr[SPR_DSISR] = 0x06100000;
         } else {
             env->spr[SPR_DSISR] = 0x04100000;
@@ -290,14 +293,15 @@
         cpu_abort(cs, "ERROR: instruction should not need "
                  "address translation\n");
     }
-    if ((rwx == 1 || key != 1) && (rwx == 0 || key != 0)) {
+    if ((access_type == MMU_DATA_STORE || key != 1) &&
+        (access_type == MMU_DATA_LOAD || key != 0)) {
         *raddr = eaddr;
         return 0;
     } else {
         cs->exception_index = POWERPC_EXCP_DSI;
         env->error_code = 0;
         env->spr[SPR_DAR] = eaddr;
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             env->spr[SPR_DSISR] = 0x0a000000;
         } else {
             env->spr[SPR_DSISR] = 0x08000000;
@@ -421,13 +425,16 @@
     hwaddr pte_offset;
     ppc_hash_pte32_t pte;
     int prot;
-    const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
+    int need_prot;
+    MMUAccessType access_type;
     hwaddr raddr;
 
     assert((rwx == 0) || (rwx == 1) || (rwx == 2));
+    access_type = rwx;
+    need_prot = prot_for_access_type(access_type);
 
     /* 1. Handle real mode accesses */
-    if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
+    if (access_type == MMU_INST_FETCH ? !msr_ir : !msr_dr) {
         /* Translation is off */
         raddr = eaddr;
         tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
@@ -438,17 +445,17 @@
 
     /* 2. Check Block Address Translation entries (BATs) */
     if (env->nb_BATs != 0) {
-        raddr = ppc_hash32_bat_lookup(cpu, eaddr, rwx, &prot);
+        raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, &prot);
         if (raddr != -1) {
-            if (need_prot[rwx] & ~prot) {
-                if (rwx == 2) {
+            if (need_prot & ~prot) {
+                if (access_type == MMU_INST_FETCH) {
                     cs->exception_index = POWERPC_EXCP_ISI;
                     env->error_code = 0x08000000;
                 } else {
                     cs->exception_index = POWERPC_EXCP_DSI;
                     env->error_code = 0;
                     env->spr[SPR_DAR] = eaddr;
-                    if (rwx == 1) {
+                    if (access_type == MMU_DATA_STORE) {
                         env->spr[SPR_DSISR] = 0x0a000000;
                     } else {
                         env->spr[SPR_DSISR] = 0x08000000;
@@ -469,7 +476,7 @@
 
     /* 4. Handle direct store segments */
     if (sr & SR32_T) {
-        if (ppc_hash32_direct_store(cpu, sr, eaddr, rwx,
+        if (ppc_hash32_direct_store(cpu, sr, eaddr, access_type,
                                     &raddr, &prot) == 0) {
             tlb_set_page(cs, eaddr & TARGET_PAGE_MASK,
                          raddr & TARGET_PAGE_MASK, prot, mmu_idx,
@@ -481,7 +488,7 @@
     }
 
     /* 5. Check for segment level no-execute violation */
-    if ((rwx == 2) && (sr & SR32_NX)) {
+    if (access_type == MMU_INST_FETCH && (sr & SR32_NX)) {
         cs->exception_index = POWERPC_EXCP_ISI;
         env->error_code = 0x10000000;
         return 1;
@@ -490,14 +497,14 @@
     /* 6. Locate the PTE in the hash table */
     pte_offset = ppc_hash32_htab_lookup(cpu, sr, eaddr, &pte);
     if (pte_offset == -1) {
-        if (rwx == 2) {
+        if (access_type == MMU_INST_FETCH) {
             cs->exception_index = POWERPC_EXCP_ISI;
             env->error_code = 0x40000000;
         } else {
             cs->exception_index = POWERPC_EXCP_DSI;
             env->error_code = 0;
             env->spr[SPR_DAR] = eaddr;
-            if (rwx == 1) {
+            if (access_type == MMU_DATA_STORE) {
                 env->spr[SPR_DSISR] = 0x42000000;
             } else {
                 env->spr[SPR_DSISR] = 0x40000000;
@@ -513,17 +520,17 @@
 
     prot = ppc_hash32_pte_prot(cpu, sr, pte);
 
-    if (need_prot[rwx] & ~prot) {
+    if (need_prot & ~prot) {
         /* Access right violation */
         qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
-        if (rwx == 2) {
+        if (access_type == MMU_INST_FETCH) {
             cs->exception_index = POWERPC_EXCP_ISI;
             env->error_code = 0x08000000;
         } else {
             cs->exception_index = POWERPC_EXCP_DSI;
             env->error_code = 0;
             env->spr[SPR_DAR] = eaddr;
-            if (rwx == 1) {
+            if (access_type == MMU_DATA_STORE) {
                 env->spr[SPR_DSISR] = 0x0a000000;
             } else {
                 env->spr[SPR_DSISR] = 0x08000000;
@@ -540,7 +547,7 @@
         ppc_hash32_set_r(cpu, pte_offset, pte.pte1);
     }
     if (!(pte.pte1 & HPTE32_R_C)) {
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             ppc_hash32_set_c(cpu, pte_offset, pte.pte1);
         } else {
             /*
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index d517a99..f48b625 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -29,6 +29,7 @@
 #include "mmu-hash64.h"
 #include "exec/log.h"
 #include "hw/hw.h"
+#include "internal.h"
 #include "mmu-book3s-v3.h"
 #include "helper_regs.h"
 
@@ -876,10 +877,12 @@
     hwaddr ptex;
     ppc_hash_pte64_t pte;
     int exec_prot, pp_prot, amr_prot, prot;
-    const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
+    MMUAccessType access_type;
+    int need_prot;
     hwaddr raddr;
 
     assert((rwx == 0) || (rwx == 1) || (rwx == 2));
+    access_type = rwx;
 
     /*
      * Note on LPCR usage: 970 uses HID4, but our special variant of
@@ -890,7 +893,7 @@
      */
 
     /* 1. Handle real mode accesses */
-    if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
+    if (access_type == MMU_INST_FETCH ? !msr_ir : !msr_dr) {
         /*
          * Translation is supposedly "off", but in real mode the top 4
          * effective address bits are (mostly) ignored
@@ -923,14 +926,19 @@
 
             /* Emulated old-style RMO mode, bounds check against RMLS */
             if (raddr >= limit) {
-                if (rwx == 2) {
+                switch (access_type) {
+                case MMU_INST_FETCH:
                     ppc_hash64_set_isi(cs, SRR1_PROTFAULT);
-                } else {
-                    int dsisr = DSISR_PROTFAULT;
-                    if (rwx == 1) {
-                        dsisr |= DSISR_ISSTORE;
-                    }
-                    ppc_hash64_set_dsi(cs, eaddr, dsisr);
+                    break;
+                case MMU_DATA_LOAD:
+                    ppc_hash64_set_dsi(cs, eaddr, DSISR_PROTFAULT);
+                    break;
+                case MMU_DATA_STORE:
+                    ppc_hash64_set_dsi(cs, eaddr,
+                                       DSISR_PROTFAULT | DSISR_ISSTORE);
+                    break;
+                default:
+                    g_assert_not_reached();
                 }
                 return 1;
             }
@@ -953,13 +961,19 @@
             exit(1);
         }
         /* Segment still not found, generate the appropriate interrupt */
-        if (rwx == 2) {
+        switch (access_type) {
+        case MMU_INST_FETCH:
             cs->exception_index = POWERPC_EXCP_ISEG;
             env->error_code = 0;
-        } else {
+            break;
+        case MMU_DATA_LOAD:
+        case MMU_DATA_STORE:
             cs->exception_index = POWERPC_EXCP_DSEG;
             env->error_code = 0;
             env->spr[SPR_DAR] = eaddr;
+            break;
+        default:
+            g_assert_not_reached();
         }
         return 1;
     }
@@ -967,7 +981,7 @@
 skip_slb_search:
 
     /* 3. Check for segment level no-execute violation */
-    if ((rwx == 2) && (slb->vsid & SLB_VSID_N)) {
+    if (access_type == MMU_INST_FETCH && (slb->vsid & SLB_VSID_N)) {
         ppc_hash64_set_isi(cs, SRR1_NOEXEC_GUARD);
         return 1;
     }
@@ -975,14 +989,18 @@
     /* 4. Locate the PTE in the hash table */
     ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
     if (ptex == -1) {
-        if (rwx == 2) {
+        switch (access_type) {
+        case MMU_INST_FETCH:
             ppc_hash64_set_isi(cs, SRR1_NOPTE);
-        } else {
-            int dsisr = DSISR_NOPTE;
-            if (rwx == 1) {
-                dsisr |= DSISR_ISSTORE;
-            }
-            ppc_hash64_set_dsi(cs, eaddr, dsisr);
+            break;
+        case MMU_DATA_LOAD:
+            ppc_hash64_set_dsi(cs, eaddr, DSISR_NOPTE);
+            break;
+        case MMU_DATA_STORE:
+            ppc_hash64_set_dsi(cs, eaddr, DSISR_NOPTE | DSISR_ISSTORE);
+            break;
+        default:
+            g_assert_not_reached();
         }
         return 1;
     }
@@ -996,10 +1014,11 @@
     amr_prot = ppc_hash64_amr_prot(cpu, pte);
     prot = exec_prot & pp_prot & amr_prot;
 
-    if ((need_prot[rwx] & ~prot) != 0) {
+    need_prot = prot_for_access_type(access_type);
+    if (need_prot & ~prot) {
         /* Access right violation */
         qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
-        if (rwx == 2) {
+        if (access_type == MMU_INST_FETCH) {
             int srr1 = 0;
             if (PAGE_EXEC & ~exec_prot) {
                 srr1 |= SRR1_NOEXEC_GUARD; /* Access violates noexec or guard */
@@ -1012,13 +1031,13 @@
             ppc_hash64_set_isi(cs, srr1);
         } else {
             int dsisr = 0;
-            if (need_prot[rwx] & ~pp_prot) {
+            if (need_prot & ~pp_prot) {
                 dsisr |= DSISR_PROTFAULT;
             }
-            if (rwx == 1) {
+            if (access_type == MMU_DATA_STORE) {
                 dsisr |= DSISR_ISSTORE;
             }
-            if (need_prot[rwx] & ~amr_prot) {
+            if (need_prot & ~amr_prot) {
                 dsisr |= DSISR_AMR;
             }
             ppc_hash64_set_dsi(cs, eaddr, dsisr);
@@ -1034,7 +1053,7 @@
         ppc_hash64_set_r(cpu, ptex, pte.pte1);
     }
     if (!(pte.pte1 & HPTE64_R_C)) {
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             ppc_hash64_set_c(cpu, ptex, pte.pte1);
         } else {
             /*
@@ -1120,16 +1139,6 @@
     cpu->env.tlb_need_flush = TLB_NEED_GLOBAL_FLUSH | TLB_NEED_LOCAL_FLUSH;
 }
 
-void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
-{
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    CPUPPCState *env = &cpu->env;
-
-    env->spr[SPR_LPCR] = val & pcc->lpcr_mask;
-    /* The gtse bit affects hflags */
-    hreg_compute_hflags(env);
-}
-
 void helper_store_lpcr(CPUPPCState *env, target_ulong val)
 {
     PowerPCCPU *cpu = env_archcpu(env);
@@ -1200,61 +1209,4 @@
     }
 };
 
-void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
-                                 bool (*cb)(void *, uint32_t, uint32_t),
-                                 void *opaque)
-{
-    PPCHash64Options *opts = cpu->hash64_opts;
-    int i;
-    int n = 0;
-    bool ci_largepage = false;
 
-    assert(opts);
-
-    n = 0;
-    for (i = 0; i < ARRAY_SIZE(opts->sps); i++) {
-        PPCHash64SegmentPageSizes *sps = &opts->sps[i];
-        int j;
-        int m = 0;
-
-        assert(n <= i);
-
-        if (!sps->page_shift) {
-            break;
-        }
-
-        for (j = 0; j < ARRAY_SIZE(sps->enc); j++) {
-            PPCHash64PageSize *ps = &sps->enc[j];
-
-            assert(m <= j);
-            if (!ps->page_shift) {
-                break;
-            }
-
-            if (cb(opaque, sps->page_shift, ps->page_shift)) {
-                if (ps->page_shift >= 16) {
-                    ci_largepage = true;
-                }
-                sps->enc[m++] = *ps;
-            }
-        }
-
-        /* Clear rest of the row */
-        for (j = m; j < ARRAY_SIZE(sps->enc); j++) {
-            memset(&sps->enc[j], 0, sizeof(sps->enc[j]));
-        }
-
-        if (m) {
-            n++;
-        }
-    }
-
-    /* Clear the rest of the table */
-    for (i = n; i < ARRAY_SIZE(opts->sps); i++) {
-        memset(&opts->sps[i], 0, sizeof(opts->sps[i]));
-    }
-
-    if (!ci_largepage) {
-        opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
-    }
-}
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 87729d4..4b8b8e7 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -15,12 +15,8 @@
                                target_ulong pte0, target_ulong pte1);
 unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
                                           uint64_t pte0, uint64_t pte1);
-void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
 void ppc_hash64_init(PowerPCCPU *cpu);
 void ppc_hash64_finalize(PowerPCCPU *cpu);
-void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
-                                 bool (*cb)(void *, uint32_t, uint32_t),
-                                 void *opaque);
 #endif
 
 /*
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 30fcfcf..7972153 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -25,6 +25,7 @@
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
 #include "exec/log.h"
+#include "internal.h"
 #include "mmu-radix64.h"
 #include "mmu-book3s-v3.h"
 
@@ -74,71 +75,94 @@
     return true;
 }
 
-static void ppc_radix64_raise_segi(PowerPCCPU *cpu, int rwx, vaddr eaddr)
+static void ppc_radix64_raise_segi(PowerPCCPU *cpu, MMUAccessType access_type,
+                                   vaddr eaddr)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
 
-    if (rwx == 2) { /* Instruction Segment Interrupt */
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        /* Instruction Segment Interrupt */
         cs->exception_index = POWERPC_EXCP_ISEG;
-    } else { /* Data Segment Interrupt */
+        break;
+    case MMU_DATA_STORE:
+    case MMU_DATA_LOAD:
+        /* Data Segment Interrupt */
         cs->exception_index = POWERPC_EXCP_DSEG;
         env->spr[SPR_DAR] = eaddr;
+        break;
+    default:
+        g_assert_not_reached();
     }
     env->error_code = 0;
 }
 
-static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr,
-                                uint32_t cause)
+static void ppc_radix64_raise_si(PowerPCCPU *cpu, MMUAccessType access_type,
+                                 vaddr eaddr, uint32_t cause)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
 
-    if (rwx == 2) { /* Instruction Storage Interrupt */
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        /* Instruction Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_ISI;
         env->error_code = cause;
-    } else { /* Data Storage Interrupt */
+        break;
+    case MMU_DATA_STORE:
+        cause |= DSISR_ISSTORE;
+        /* fall through */
+    case MMU_DATA_LOAD:
+        /* Data Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_DSI;
-        if (rwx == 1) { /* Write -> Store */
-            cause |= DSISR_ISSTORE;
-        }
         env->spr[SPR_DSISR] = cause;
         env->spr[SPR_DAR] = eaddr;
         env->error_code = 0;
+        break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, int rwx, vaddr eaddr,
-                                  hwaddr g_raddr, uint32_t cause)
+static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type,
+                                  vaddr eaddr, hwaddr g_raddr, uint32_t cause)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
 
-    if (rwx == 2) { /* H Instruction Storage Interrupt */
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        /* H Instruction Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_HISI;
         env->spr[SPR_ASDR] = g_raddr;
         env->error_code = cause;
-    } else { /* H Data Storage Interrupt */
+        break;
+    case MMU_DATA_STORE:
+        cause |= DSISR_ISSTORE;
+        /* fall through */
+    case MMU_DATA_LOAD:
+        /* H Data Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_HDSI;
-        if (rwx == 1) { /* Write -> Store */
-            cause |= DSISR_ISSTORE;
-        }
         env->spr[SPR_HDSISR] = cause;
         env->spr[SPR_HDAR] = eaddr;
         env->spr[SPR_ASDR] = g_raddr;
         env->error_code = 0;
+        break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
-                                   int *fault_cause, int *prot,
+static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
+                                   uint64_t pte, int *fault_cause, int *prot,
                                    bool partition_scoped)
 {
     CPUPPCState *env = &cpu->env;
-    const int need_prot[] = { PAGE_READ, PAGE_WRITE, PAGE_EXEC };
+    int need_prot;
 
     /* Check Page Attributes (pte58:59) */
-    if (((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO) && (rwx == 2)) {
+    if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) {
         /*
          * Radix PTE entries with the non-idempotent I/O attribute are treated
          * as guarded storage
@@ -158,7 +182,8 @@
     }
 
     /* Check if requested access type is allowed */
-    if (need_prot[rwx] & ~(*prot)) { /* Page Protected for that Access */
+    need_prot = prot_for_access_type(access_type);
+    if (need_prot & ~*prot) { /* Page Protected for that Access */
         *fault_cause |= DSISR_PROTFAULT;
         return true;
     }
@@ -166,15 +191,15 @@
     return false;
 }
 
-static void ppc_radix64_set_rc(PowerPCCPU *cpu, int rwx, uint64_t pte,
-                               hwaddr pte_addr, int *prot)
+static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type,
+                               uint64_t pte, hwaddr pte_addr, int *prot)
 {
     CPUState *cs = CPU(cpu);
     uint64_t npte;
 
     npte = pte | R_PTE_R; /* Always set reference bit */
 
-    if (rwx == 1) { /* Store/Write */
+    if (access_type == MMU_DATA_STORE) { /* Store/Write */
         npte |= R_PTE_C; /* Set change bit */
     } else {
         /*
@@ -269,7 +294,8 @@
     return true;
 }
 
-static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
+static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
+                                              MMUAccessType access_type,
                                               vaddr eaddr, hwaddr g_raddr,
                                               ppc_v3_pate_t pate,
                                               hwaddr *h_raddr, int *h_prot,
@@ -285,24 +311,25 @@
     if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
                               pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
                               &pte, &fault_cause, &pte_addr) ||
-        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, h_prot, true)) {
+        ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, h_prot, true)) {
         if (pde_addr) { /* address being translated was that of a guest pde */
             fault_cause |= DSISR_PRTABLE_FAULT;
         }
         if (guest_visible) {
-            ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
+            ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, fault_cause);
         }
         return 1;
     }
 
     if (guest_visible) {
-        ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
+        ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot);
     }
 
     return 0;
 }
 
-static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
+static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
+                                            MMUAccessType access_type,
                                             vaddr eaddr, uint64_t pid,
                                             ppc_v3_pate_t pate, hwaddr *g_raddr,
                                             int *g_prot, int *g_page_size,
@@ -321,7 +348,7 @@
     if (offset >= size) {
         /* offset exceeds size of the process table */
         if (guest_visible) {
-            ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE);
+            ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
         }
         return 1;
     }
@@ -362,7 +389,7 @@
         if (ret) {
             /* No valid PTE */
             if (guest_visible) {
-                ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+                ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
             }
             return ret;
         }
@@ -391,7 +418,7 @@
             if (ret) {
                 /* No valid pte */
                 if (guest_visible) {
-                    ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+                    ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
                 }
                 return ret;
             }
@@ -405,16 +432,16 @@
         *g_raddr = (rpn & ~mask) | (eaddr & mask);
     }
 
-    if (ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
+    if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, g_prot, false)) {
         /* Access denied due to protection */
         if (guest_visible) {
-            ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+            ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
         }
         return 1;
     }
 
     if (guest_visible) {
-        ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, g_prot);
+        ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot);
     }
 
     return 0;
@@ -437,7 +464,8 @@
  *              | = On        | Process Scoped |    Scoped     |
  *              +-------------+----------------+---------------+
  */
-static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
+static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr,
+                             MMUAccessType access_type,
                              bool relocation,
                              hwaddr *raddr, int *psizep, int *protp,
                              bool guest_visible)
@@ -451,7 +479,7 @@
     /* Virtual Mode Access - get the fully qualified address */
     if (!ppc_radix64_get_fully_qualified_addr(&cpu->env, eaddr, &lpid, &pid)) {
         if (guest_visible) {
-            ppc_radix64_raise_segi(cpu, rwx, eaddr);
+            ppc_radix64_raise_segi(cpu, access_type, eaddr);
         }
         return 1;
     }
@@ -464,13 +492,13 @@
     } else {
         if (!ppc64_v3_get_pate(cpu, lpid, &pate)) {
             if (guest_visible) {
-                ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE);
+                ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
             }
             return 1;
         }
         if (!validate_pate(cpu, lpid, &pate)) {
             if (guest_visible) {
-                ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_R_BADCONFIG);
+                ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
             }
             return 1;
         }
@@ -488,7 +516,7 @@
      * - Translates an effective address to a guest real address.
      */
     if (relocation) {
-        int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid,
+        int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
                                                    pate, &g_raddr, &prot,
                                                    &psize, guest_visible);
         if (ret) {
@@ -511,9 +539,10 @@
         if (lpid || !msr_hv) {
             int ret;
 
-            ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
-                                                     pate, raddr, &prot, &psize,
-                                                     false, guest_visible);
+            ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
+                                                     g_raddr, pate, raddr,
+                                                     &prot, &psize, false,
+                                                     guest_visible);
             if (ret) {
                 return ret;
             }
@@ -534,12 +563,14 @@
     CPUPPCState *env = &cpu->env;
     int page_size, prot;
     bool relocation;
+    MMUAccessType access_type;
     hwaddr raddr;
 
     assert(!(msr_hv && cpu->vhyp));
     assert((rwx == 0) || (rwx == 1) || (rwx == 2));
+    access_type = rwx;
 
-    relocation = ((rwx == 2) && (msr_ir == 1)) || ((rwx != 2) && (msr_dr == 1));
+    relocation = (access_type == MMU_INST_FETCH ? msr_ir : msr_dr);
     /* HV or virtual hypervisor Real Mode Access */
     if (!relocation && (msr_hv || cpu->vhyp)) {
         /* In real mode top 4 effective addr bits (mostly) ignored */
@@ -568,7 +599,7 @@
     }
 
     /* Translate eaddr to raddr (where raddr is addr qemu needs for access) */
-    if (ppc_radix64_xlate(cpu, eaddr, rwx, relocation, &raddr,
+    if (ppc_radix64_xlate(cpu, eaddr, access_type, relocation, &raddr,
                           &page_size, &prot, true)) {
         return 1;
     }
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index ca88658..37986c5 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -32,6 +32,7 @@
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/qemu-print.h"
+#include "internal.h"
 #include "mmu-book3s-v3.h"
 #include "mmu-radix64.h"
 
@@ -126,36 +127,14 @@
     return access;
 }
 
-static int check_prot(int prot, int rw, int access_type)
+static int check_prot(int prot, MMUAccessType access_type)
 {
-    int ret;
-
-    if (access_type == ACCESS_CODE) {
-        if (prot & PAGE_EXEC) {
-            ret = 0;
-        } else {
-            ret = -2;
-        }
-    } else if (rw) {
-        if (prot & PAGE_WRITE) {
-            ret = 0;
-        } else {
-            ret = -2;
-        }
-    } else {
-        if (prot & PAGE_READ) {
-            ret = 0;
-        } else {
-            ret = -2;
-        }
-    }
-
-    return ret;
+    return prot & prot_for_access_type(access_type) ? 0 : -2;
 }
 
-static inline int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
-                                       target_ulong pte1, int h,
-                                       int rw, int type)
+static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
+                                target_ulong pte1, int h,
+                                MMUAccessType access_type)
 {
     target_ulong ptem, mmask;
     int access, ret, pteh, ptev, pp;
@@ -182,7 +161,7 @@
             /* Keep the matching PTE information */
             ctx->raddr = pte1;
             ctx->prot = access;
-            ret = check_prot(ctx->prot, rw, type);
+            ret = check_prot(ctx->prot, access_type);
             if (ret == 0) {
                 /* Access granted */
                 qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
@@ -197,7 +176,7 @@
 }
 
 static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,
-                            int ret, int rw)
+                            int ret, MMUAccessType access_type)
 {
     int store = 0;
 
@@ -208,7 +187,7 @@
         store = 1;
     }
     if (!(*pte1p & 0x00000080)) {
-        if (rw == 1 && ret == 0) {
+        if (access_type == MMU_DATA_STORE && ret == 0) {
             /* Update changed flag */
             *pte1p |= 0x00000080;
             store = 1;
@@ -308,8 +287,8 @@
     env->last_way = way;
 }
 
-static inline int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
-                                   target_ulong eaddr, int rw, int access_type)
+static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
+                            target_ulong eaddr, MMUAccessType access_type)
 {
     ppc6xx_tlb_t *tlb;
     int nr, best, way;
@@ -318,8 +297,7 @@
     best = -1;
     ret = -1; /* No TLB found */
     for (way = 0; way < env->nb_ways; way++) {
-        nr = ppc6xx_tlb_getnum(env, eaddr, way,
-                               access_type == ACCESS_CODE ? 1 : 0);
+        nr = ppc6xx_tlb_getnum(env, eaddr, way, access_type == MMU_INST_FETCH);
         tlb = &env->tlb.tlb6[nr];
         /* This test "emulates" the PTE index match for hardware TLBs */
         if ((eaddr & TARGET_PAGE_MASK) != tlb->EPN) {
@@ -333,9 +311,10 @@
                   TARGET_FMT_lx " %c %c\n", nr, env->nb_tlb,
                   pte_is_valid(tlb->pte0) ? "valid" : "inval",
                   tlb->EPN, eaddr, tlb->pte1,
-                  rw ? 'S' : 'L', access_type == ACCESS_CODE ? 'I' : 'D');
+                  access_type == MMU_DATA_STORE ? 'S' : 'L',
+                  access_type == MMU_INST_FETCH ? 'I' : 'D');
         switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
-                                     0, rw, access_type)) {
+                                     0, access_type)) {
         case -3:
             /* TLB inconsistency */
             return -1;
@@ -366,7 +345,7 @@
         LOG_SWTLB("found TLB at addr " TARGET_FMT_plx " prot=%01x ret=%d\n",
                   ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
         /* Update page flags */
-        pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, rw);
+        pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, access_type);
     }
 
     return ret;
@@ -400,24 +379,22 @@
 }
 
 static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
-                           target_ulong virtual, int rw, int type)
+                           target_ulong virtual, MMUAccessType access_type)
 {
     target_ulong *BATlt, *BATut, *BATu, *BATl;
     target_ulong BEPIl, BEPIu, bl;
     int i, valid, prot;
     int ret = -1;
+    bool ifetch = access_type == MMU_INST_FETCH;
 
     LOG_BATS("%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
-             type == ACCESS_CODE ? 'I' : 'D', virtual);
-    switch (type) {
-    case ACCESS_CODE:
+             ifetch ? 'I' : 'D', virtual);
+    if (ifetch) {
         BATlt = env->IBAT[1];
         BATut = env->IBAT[0];
-        break;
-    default:
+    } else {
         BATlt = env->DBAT[1];
         BATut = env->DBAT[0];
-        break;
     }
     for (i = 0; i < env->nb_BATs; i++) {
         BATu = &BATut[i];
@@ -427,7 +404,7 @@
         bat_size_prot(env, &bl, &valid, &prot, BATu, BATl);
         LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
                  " BATl " TARGET_FMT_lx "\n", __func__,
-                 type == ACCESS_CODE ? 'I' : 'D', i, virtual, *BATu, *BATl);
+                 ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl);
         if ((virtual & 0xF0000000) == BEPIu &&
             ((virtual & 0x0FFE0000) & ~bl) == BEPIl) {
             /* BAT matches */
@@ -438,7 +415,7 @@
                     (virtual & 0x0001F000);
                 /* Compute access rights */
                 ctx->prot = prot;
-                ret = check_prot(ctx->prot, rw, type);
+                ret = check_prot(ctx->prot, access_type);
                 if (ret == 0) {
                     LOG_BATS("BAT %d match: r " TARGET_FMT_plx " prot=%c%c\n",
                              i, ctx->raddr, ctx->prot & PAGE_READ ? 'R' : '-',
@@ -461,7 +438,7 @@
                 LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
                          " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
                          TARGET_FMT_lx " " TARGET_FMT_lx "\n",
-                         __func__, type == ACCESS_CODE ? 'I' : 'D', i, virtual,
+                         __func__, ifetch ? 'I' : 'D', i, virtual,
                          *BATu, *BATl, BEPIu, BEPIl, bl);
             }
         }
@@ -472,8 +449,9 @@
 }
 
 /* Perform segment based translation */
-static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
-                                      target_ulong eaddr, int rw, int type)
+static int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
+                               target_ulong eaddr, MMUAccessType access_type,
+                               int type)
 {
     PowerPCCPU *cpu = env_archcpu(env);
     hwaddr hash;
@@ -497,7 +475,7 @@
             " nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx
             " ir=%d dr=%d pr=%d %d t=%d\n",
             eaddr, (int)(eaddr >> 28), sr, env->nip, env->lr, (int)msr_ir,
-            (int)msr_dr, pr != 0 ? 1 : 0, rw, type);
+            (int)msr_dr, pr != 0 ? 1 : 0, access_type == MMU_DATA_STORE, type);
     pgidx = (eaddr & ~SEGMENT_MASK_256M) >> target_page_bits;
     hash = vsid ^ pgidx;
     ctx->ptem = (vsid << 7) | (pgidx >> 10);
@@ -520,7 +498,7 @@
             /* Initialize real address with an invalid value */
             ctx->raddr = (hwaddr)-1ULL;
             /* Software TLB search */
-            ret = ppc6xx_tlb_check(env, ctx, eaddr, rw, type);
+            ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type);
 #if defined(DUMP_PAGE_TABLES)
             if (qemu_loglevel_mask(CPU_LOG_MMU)) {
                 CPUState *cs = env_cpu(env);
@@ -603,7 +581,8 @@
                           "address translation\n");
             return -4;
         }
-        if ((rw == 1 || ctx->key != 1) && (rw == 0 || ctx->key != 0)) {
+        if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&
+            (access_type == MMU_DATA_LOAD || ctx->key != 0)) {
             ctx->raddr = eaddr;
             ret = 2;
         } else {
@@ -682,8 +661,8 @@
 }
 
 static int mmu40x_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                       target_ulong address, int rw,
-                                       int access_type)
+                                       target_ulong address,
+                                       MMUAccessType access_type)
 {
     ppcemb_tlb_t *tlb;
     hwaddr raddr;
@@ -700,8 +679,8 @@
         }
         zsel = (tlb->attr >> 4) & 0xF;
         zpr = (env->spr[SPR_40x_ZPR] >> (30 - (2 * zsel))) & 0x3;
-        LOG_SWTLB("%s: TLB %d zsel %d zpr %d rw %d attr %08x\n",
-                    __func__, i, zsel, zpr, rw, tlb->attr);
+        LOG_SWTLB("%s: TLB %d zsel %d zpr %d ty %d attr %08x\n",
+                    __func__, i, zsel, zpr, access_type, tlb->attr);
         /* Check execute enable bit */
         switch (zpr) {
         case 0x2:
@@ -727,7 +706,7 @@
         check_perms:
             /* Check from TLB entry */
             ctx->prot = tlb->prot;
-            ret = check_prot(ctx->prot, rw, access_type);
+            ret = check_prot(ctx->prot, access_type);
             if (ret == -2) {
                 env->spr[SPR_40x_ESR] = 0;
             }
@@ -757,12 +736,11 @@
     env->spr[SPR_405_SLER] = val;
 }
 
-static inline int mmubooke_check_tlb(CPUPPCState *env, ppcemb_tlb_t *tlb,
-                                     hwaddr *raddr, int *prot,
-                                     target_ulong address, int rw,
-                                     int access_type, int i)
+static int mmubooke_check_tlb(CPUPPCState *env, ppcemb_tlb_t *tlb,
+                              hwaddr *raddr, int *prot, target_ulong address,
+                              MMUAccessType access_type, int i)
 {
-    int ret, prot2;
+    int prot2;
 
     if (ppcemb_tlb_check(env, tlb, raddr, address,
                          env->spr[SPR_BOOKE_PID],
@@ -794,42 +772,24 @@
     }
 
     /* Check the address space */
-    if (access_type == ACCESS_CODE) {
-        if (msr_ir != (tlb->attr & 1)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if (prot2 & PAGE_EXEC) {
-            LOG_SWTLB("%s: good TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: no PAGE_EXEC: %x\n", __func__, prot2);
-        ret = -3;
-    } else {
-        if (msr_dr != (tlb->attr & 1)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if ((!rw && prot2 & PAGE_READ) || (rw && (prot2 & PAGE_WRITE))) {
-            LOG_SWTLB("%s: found TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: PAGE_READ/WRITE doesn't match: %x\n", __func__, prot2);
-        ret = -2;
+    if ((access_type == MMU_INST_FETCH ? msr_ir : msr_dr) != (tlb->attr & 1)) {
+        LOG_SWTLB("%s: AS doesn't match\n", __func__);
+        return -1;
     }
 
-    return ret;
+    *prot = prot2;
+    if (prot2 & prot_for_access_type(access_type)) {
+        LOG_SWTLB("%s: good TLB!\n", __func__);
+        return 0;
+    }
+
+    LOG_SWTLB("%s: no prot match: %x\n", __func__, prot2);
+    return access_type == MMU_INST_FETCH ? -3 : -2;
 }
 
 static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                         target_ulong address, int rw,
-                                         int access_type)
+                                         target_ulong address,
+                                         MMUAccessType access_type)
 {
     ppcemb_tlb_t *tlb;
     hwaddr raddr;
@@ -839,7 +799,7 @@
     raddr = (hwaddr)-1ULL;
     for (i = 0; i < env->nb_tlb; i++) {
         tlb = &env->tlb.tlbe[i];
-        ret = mmubooke_check_tlb(env, tlb, &raddr, &ctx->prot, address, rw,
+        ret = mmubooke_check_tlb(env, tlb, &raddr, &ctx->prot, address,
                                  access_type, i);
         if (ret != -1) {
             break;
@@ -938,10 +898,10 @@
     return mmu_idx == PPC_TLB_EPID_STORE || mmu_idx == PPC_TLB_EPID_LOAD;
 }
 
-static uint32_t mmubooke206_esr(int mmu_idx, bool rw)
+static uint32_t mmubooke206_esr(int mmu_idx, MMUAccessType access_type)
 {
     uint32_t esr = 0;
-    if (rw) {
+    if (access_type == MMU_DATA_STORE) {
         esr |= ESR_ST;
     }
     if (is_epid_mmu(mmu_idx)) {
@@ -983,10 +943,9 @@
 /* Check if the tlb found by hashing really matches */
 static int mmubooke206_check_tlb(CPUPPCState *env, ppcmas_tlb_t *tlb,
                                  hwaddr *raddr, int *prot,
-                                 target_ulong address, int rw,
-                                 int access_type, int mmu_idx)
+                                 target_ulong address,
+                                 MMUAccessType access_type, int mmu_idx)
 {
-    int ret;
     int prot2 = 0;
     uint32_t epid;
     bool as, pr;
@@ -1043,44 +1002,31 @@
     }
 
     /* Check the address space and permissions */
-    if (access_type == ACCESS_CODE) {
+    if (access_type == MMU_INST_FETCH) {
         /* There is no way to fetch code using epid load */
         assert(!use_epid);
-        if (msr_ir != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if (prot2 & PAGE_EXEC) {
-            LOG_SWTLB("%s: good TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: no PAGE_EXEC: %x\n", __func__, prot2);
-        ret = -3;
-    } else {
-        if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if ((!rw && prot2 & PAGE_READ) || (rw && (prot2 & PAGE_WRITE))) {
-            LOG_SWTLB("%s: found TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: PAGE_READ/WRITE doesn't match: %x\n", __func__, prot2);
-        ret = -2;
+        as = msr_ir;
     }
 
-    return ret;
+    if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
+        LOG_SWTLB("%s: AS doesn't match\n", __func__);
+        return -1;
+    }
+
+    *prot = prot2;
+    if (prot2 & prot_for_access_type(access_type)) {
+        LOG_SWTLB("%s: good TLB!\n", __func__);
+        return 0;
+    }
+
+    LOG_SWTLB("%s: no prot match: %x\n", __func__, prot2);
+    return access_type == MMU_INST_FETCH ? -3 : -2;
 }
 
 static int mmubooke206_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                            target_ulong address, int rw,
-                                            int access_type, int mmu_idx)
+                                            target_ulong address,
+                                            MMUAccessType access_type,
+                                            int mmu_idx)
 {
     ppcmas_tlb_t *tlb;
     hwaddr raddr;
@@ -1098,7 +1044,7 @@
                 continue;
             }
             ret = mmubooke206_check_tlb(env, tlb, &raddr, &ctx->prot, address,
-                                        rw, access_type, mmu_idx);
+                                        access_type, mmu_idx);
             if (ret != -1) {
                 goto found_tlb;
             }
@@ -1361,8 +1307,8 @@
     }
 }
 
-static inline int check_physical(CPUPPCState *env, mmu_ctx_t *ctx,
-                                 target_ulong eaddr, int rw)
+static int check_physical(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr,
+                          MMUAccessType access_type)
 {
     int in_plb, ret;
 
@@ -1393,7 +1339,7 @@
                  eaddr >= env->pb[2] && eaddr < env->pb[3]) ? 1 : 0;
             if (in_plb ^ msr_px) {
                 /* Access in protected area */
-                if (rw == 1) {
+                if (access_type == MMU_DATA_STORE) {
                     /* Access is not allowed */
                     ret = -2;
                 }
@@ -1413,28 +1359,28 @@
     return ret;
 }
 
-static int get_physical_address_wtlb(
-    CPUPPCState *env, mmu_ctx_t *ctx,
-    target_ulong eaddr, int rw, int access_type,
-    int mmu_idx)
+static int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx,
+                                     target_ulong eaddr,
+                                     MMUAccessType access_type, int type,
+                                     int mmu_idx)
 {
     int ret = -1;
-    bool real_mode = (access_type == ACCESS_CODE && msr_ir == 0)
-        || (access_type != ACCESS_CODE && msr_dr == 0);
+    bool real_mode = (type == ACCESS_CODE && msr_ir == 0)
+        || (type != ACCESS_CODE && msr_dr == 0);
 
     switch (env->mmu_model) {
     case POWERPC_MMU_SOFT_6xx:
     case POWERPC_MMU_SOFT_74xx:
         if (real_mode) {
-            ret = check_physical(env, ctx, eaddr, rw);
+            ret = check_physical(env, ctx, eaddr, access_type);
         } else {
             /* Try to find a BAT */
             if (env->nb_BATs != 0) {
-                ret = get_bat_6xx_tlb(env, ctx, eaddr, rw, access_type);
+                ret = get_bat_6xx_tlb(env, ctx, eaddr, access_type);
             }
             if (ret < 0) {
                 /* We didn't match any BAT entry or don't have BATs */
-                ret = get_segment_6xx_tlb(env, ctx, eaddr, rw, access_type);
+                ret = get_segment_6xx_tlb(env, ctx, eaddr, access_type, type);
             }
         }
         break;
@@ -1442,19 +1388,17 @@
     case POWERPC_MMU_SOFT_4xx:
     case POWERPC_MMU_SOFT_4xx_Z:
         if (real_mode) {
-            ret = check_physical(env, ctx, eaddr, rw);
+            ret = check_physical(env, ctx, eaddr, access_type);
         } else {
-            ret = mmu40x_get_physical_address(env, ctx, eaddr,
-                                              rw, access_type);
+            ret = mmu40x_get_physical_address(env, ctx, eaddr, access_type);
         }
         break;
     case POWERPC_MMU_BOOKE:
-        ret = mmubooke_get_physical_address(env, ctx, eaddr,
-                                            rw, access_type);
+        ret = mmubooke_get_physical_address(env, ctx, eaddr, access_type);
         break;
     case POWERPC_MMU_BOOKE206:
-        ret = mmubooke206_get_physical_address(env, ctx, eaddr, rw,
-                                               access_type, mmu_idx);
+        ret = mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
+                                               mmu_idx);
         break;
     case POWERPC_MMU_MPC8xx:
         /* XXX: TODO */
@@ -1462,7 +1406,7 @@
         break;
     case POWERPC_MMU_REAL:
         if (real_mode) {
-            ret = check_physical(env, ctx, eaddr, rw);
+            ret = check_physical(env, ctx, eaddr, access_type);
         } else {
             cpu_abort(env_cpu(env),
                       "PowerPC in real mode do not do any translation\n");
@@ -1476,11 +1420,11 @@
     return ret;
 }
 
-static int get_physical_address(
-    CPUPPCState *env, mmu_ctx_t *ctx,
-    target_ulong eaddr, int rw, int access_type)
+static int get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
+                                target_ulong eaddr, MMUAccessType access_type,
+                                int type)
 {
-    return get_physical_address_wtlb(env, ctx, eaddr, rw, access_type, 0);
+    return get_physical_address_wtlb(env, ctx, eaddr, access_type, type, 0);
 }
 
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
@@ -1508,14 +1452,15 @@
         ;
     }
 
-    if (unlikely(get_physical_address(env, &ctx, addr, 0, ACCESS_INT) != 0)) {
+    if (unlikely(get_physical_address(env, &ctx, addr, MMU_DATA_LOAD,
+                                      ACCESS_INT) != 0)) {
 
         /*
          * Some MMUs have separate TLBs for code and data. If we only
          * try an ACCESS_INT, we may not be able to read instructions
          * mapped by code TLBs, so we also try a ACCESS_CODE.
          */
-        if (unlikely(get_physical_address(env, &ctx, addr, 0,
+        if (unlikely(get_physical_address(env, &ctx, addr, MMU_INST_FETCH,
                                           ACCESS_CODE) != 0)) {
             return -1;
         }
@@ -1525,13 +1470,14 @@
 }
 
 static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address,
-                                     int rw, int mmu_idx)
+                                         MMUAccessType access_type, int mmu_idx)
 {
     uint32_t epid;
     bool as, pr;
     uint32_t missed_tid = 0;
     bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr);
-    if (rw == 2) {
+
+    if (access_type == MMU_INST_FETCH) {
         as = msr_ir;
     }
     env->spr[SPR_BOOKE_MAS0] = env->spr[SPR_BOOKE_MAS4] & MAS4_TLBSELD_MASK;
@@ -1579,24 +1525,23 @@
 
 /* Perform address translation */
 static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
-                                    int rw, int mmu_idx)
+                                    MMUAccessType access_type, int mmu_idx)
 {
     CPUState *cs = env_cpu(env);
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     mmu_ctx_t ctx;
-    int access_type;
+    int type;
     int ret = 0;
 
-    if (rw == 2) {
+    if (access_type == MMU_INST_FETCH) {
         /* code access */
-        rw = 0;
-        access_type = ACCESS_CODE;
+        type = ACCESS_CODE;
     } else {
         /* data access */
-        access_type = env->access_type;
+        type = env->access_type;
     }
-    ret = get_physical_address_wtlb(env, &ctx, address, rw,
-                                    access_type, mmu_idx);
+    ret = get_physical_address_wtlb(env, &ctx, address, access_type,
+                                    type, mmu_idx);
     if (ret == 0) {
         tlb_set_page(cs, address & TARGET_PAGE_MASK,
                      ctx.raddr & TARGET_PAGE_MASK, ctx.prot,
@@ -1604,7 +1549,7 @@
         ret = 0;
     } else if (ret < 0) {
         LOG_MMU_STATE(cs);
-        if (access_type == ACCESS_CODE) {
+        if (type == ACCESS_CODE) {
             switch (ret) {
             case -1:
                 /* No matches in page tables or TLB */
@@ -1632,7 +1577,7 @@
                     cs->exception_index = POWERPC_EXCP_ITLB;
                     env->error_code = 0;
                     env->spr[SPR_BOOKE_DEAR] = address;
-                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 0);
+                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, MMU_DATA_LOAD);
                     return -1;
                 case POWERPC_MMU_MPC8xx:
                     /* XXX: TODO */
@@ -1674,7 +1619,7 @@
                 /* No matches in page tables or TLB */
                 switch (env->mmu_model) {
                 case POWERPC_MMU_SOFT_6xx:
-                    if (rw == 1) {
+                    if (access_type == MMU_DATA_STORE) {
                         cs->exception_index = POWERPC_EXCP_DSTLB;
                         env->error_code = 1 << 16;
                     } else {
@@ -1691,7 +1636,7 @@
                         get_pteg_offset32(cpu, ctx.hash[1]);
                     break;
                 case POWERPC_MMU_SOFT_74xx:
-                    if (rw == 1) {
+                    if (access_type == MMU_DATA_STORE) {
                         cs->exception_index = POWERPC_EXCP_DSTLB;
                     } else {
                         cs->exception_index = POWERPC_EXCP_DLTLB;
@@ -1708,7 +1653,7 @@
                     cs->exception_index = POWERPC_EXCP_DTLB;
                     env->error_code = 0;
                     env->spr[SPR_40x_DEAR] = address;
-                    if (rw) {
+                    if (access_type == MMU_DATA_STORE) {
                         env->spr[SPR_40x_ESR] = 0x00800000;
                     } else {
                         env->spr[SPR_40x_ESR] = 0x00000000;
@@ -1719,13 +1664,13 @@
                     cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
                     break;
                 case POWERPC_MMU_BOOKE206:
-                    booke206_update_mas_tlb_miss(env, address, rw, mmu_idx);
+                    booke206_update_mas_tlb_miss(env, address, access_type, mmu_idx);
                     /* fall through */
                 case POWERPC_MMU_BOOKE:
                     cs->exception_index = POWERPC_EXCP_DTLB;
                     env->error_code = 0;
                     env->spr[SPR_BOOKE_DEAR] = address;
-                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, rw);
+                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
                     return -1;
                 case POWERPC_MMU_REAL:
                     cpu_abort(cs, "PowerPC in real mode should never raise "
@@ -1743,16 +1688,16 @@
                 if (env->mmu_model == POWERPC_MMU_SOFT_4xx
                     || env->mmu_model == POWERPC_MMU_SOFT_4xx_Z) {
                     env->spr[SPR_40x_DEAR] = address;
-                    if (rw) {
+                    if (access_type == MMU_DATA_STORE) {
                         env->spr[SPR_40x_ESR] |= 0x00800000;
                     }
                 } else if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
                            (env->mmu_model == POWERPC_MMU_BOOKE206)) {
                     env->spr[SPR_BOOKE_DEAR] = address;
-                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, rw);
+                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
                 } else {
                     env->spr[SPR_DAR] = address;
-                    if (rw == 1) {
+                    if (access_type == MMU_DATA_STORE) {
                         env->spr[SPR_DSISR] = 0x0A000000;
                     } else {
                         env->spr[SPR_DSISR] = 0x08000000;
@@ -1761,7 +1706,7 @@
                 break;
             case -4:
                 /* Direct store exception */
-                switch (access_type) {
+                switch (type) {
                 case ACCESS_FLOAT:
                     /* Floating point load/store */
                     cs->exception_index = POWERPC_EXCP_ALIGN;
@@ -1773,7 +1718,7 @@
                     cs->exception_index = POWERPC_EXCP_DSI;
                     env->error_code = 0;
                     env->spr[SPR_DAR] = address;
-                    if (rw == 1) {
+                    if (access_type == MMU_DATA_STORE) {
                         env->spr[SPR_DSISR] = 0x06000000;
                     } else {
                         env->spr[SPR_DSISR] = 0x04000000;
@@ -1784,7 +1729,7 @@
                     cs->exception_index = POWERPC_EXCP_DSI;
                     env->error_code = 0;
                     env->spr[SPR_DAR] = address;
-                    if (rw == 1) {
+                    if (access_type == MMU_DATA_STORE) {
                         env->spr[SPR_DSISR] = 0x06100000;
                     } else {
                         env->spr[SPR_DSISR] = 0x04100000;
@@ -2085,32 +2030,6 @@
 
 /*****************************************************************************/
 /* Special registers manipulation */
-void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
-{
-    PowerPCCPU *cpu = env_archcpu(env);
-    qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
-    assert(!cpu->vhyp);
-#if defined(TARGET_PPC64)
-    if (mmu_is_64bit(env->mmu_model)) {
-        target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
-        target_ulong htabsize = value & SDR_64_HTABSIZE;
-
-        if (value & ~sdr_mask) {
-            error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
-                         value & ~sdr_mask);
-            value &= sdr_mask;
-        }
-        if (htabsize > 28) {
-            error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
-                         htabsize);
-            return;
-        }
-    }
-#endif /* defined(TARGET_PPC64) */
-    /* FIXME: Should check for valid HTABMASK values in 32-bit case */
-    env->spr[SPR_SDR1] = value;
-}
-
 #if defined(TARGET_PPC64)
 void ppc_store_ptcr(CPUPPCState *env, target_ulong value)
 {
diff --git a/target/ppc/spr_tcg.h b/target/ppc/spr_tcg.h
new file mode 100644
index 0000000..0be5f34
--- /dev/null
+++ b/target/ppc/spr_tcg.h
@@ -0,0 +1,136 @@
+/*
+ *  PowerPC emulation for qemu: read/write callbacks for SPRs
+ *
+ *  Copyright (C) 2021 Instituto de Pesquisas Eldorado
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef SPR_TCG_H
+#define SPR_TCG_H
+
+#define SPR_NOACCESS (&spr_noaccess)
+
+/* prototypes for readers and writers for SPRs */
+void spr_noaccess(DisasContext *ctx, int gprn, int sprn);
+void spr_read_generic(DisasContext *ctx, int gprn, int sprn);
+void spr_write_generic(DisasContext *ctx, int sprn, int gprn);
+void spr_read_xer(DisasContext *ctx, int gprn, int sprn);
+void spr_write_xer(DisasContext *ctx, int sprn, int gprn);
+void spr_read_lr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_lr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ctr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ctr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ureg(DisasContext *ctx, int gprn, int sprn);
+void spr_read_tbl(DisasContext *ctx, int gprn, int sprn);
+void spr_read_tbu(DisasContext *ctx, int gprn, int sprn);
+void spr_read_atbl(DisasContext *ctx, int gprn, int sprn);
+void spr_read_atbu(DisasContext *ctx, int gprn, int sprn);
+void spr_read_601_rtcl(DisasContext *ctx, int gprn, int sprn);
+void spr_read_601_rtcu(DisasContext *ctx, int gprn, int sprn);
+void spr_read_spefscr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_spefscr(DisasContext *ctx, int sprn, int gprn);
+
+#ifndef CONFIG_USER_ONLY
+void spr_write_generic32(DisasContext *ctx, int sprn, int gprn);
+void spr_write_clear(DisasContext *ctx, int sprn, int gprn);
+void spr_access_nop(DisasContext *ctx, int sprn, int gprn);
+void spr_read_decr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_decr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_tbl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_tbu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_atbl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_atbu(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ibat(DisasContext *ctx, int gprn, int sprn);
+void spr_read_ibat_h(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ibatu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ibatu_h(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ibatl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ibatl_h(DisasContext *ctx, int sprn, int gprn);
+void spr_read_dbat(DisasContext *ctx, int gprn, int sprn);
+void spr_read_dbat_h(DisasContext *ctx, int gprn, int sprn);
+void spr_write_dbatu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dbatu_h(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dbatl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dbatl_h(DisasContext *ctx, int sprn, int gprn);
+void spr_write_sdr1(DisasContext *ctx, int sprn, int gprn);
+void spr_write_601_rtcu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_601_rtcl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_hid0_601(DisasContext *ctx, int sprn, int gprn);
+void spr_read_601_ubat(DisasContext *ctx, int gprn, int sprn);
+void spr_write_601_ubatu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_601_ubatl(DisasContext *ctx, int sprn, int gprn);
+void spr_read_40x_pit(DisasContext *ctx, int gprn, int sprn);
+void spr_write_40x_pit(DisasContext *ctx, int sprn, int gprn);
+void spr_write_40x_dbcr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_40x_sler(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke_tcr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke_tsr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_403_pbr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_403_pbr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_pir(DisasContext *ctx, int sprn, int gprn);
+void spr_write_excp_prefix(DisasContext *ctx, int sprn, int gprn);
+void spr_write_excp_vector(DisasContext *ctx, int sprn, int gprn);
+void spr_read_thrm(DisasContext *ctx, int gprn, int sprn);
+void spr_write_e500_l1csr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_e500_l1csr1(DisasContext *ctx, int sprn, int gprn);
+void spr_write_e500_l2csr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke206_mmucsr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke_pid(DisasContext *ctx, int sprn, int gprn);
+void spr_write_eplc(DisasContext *ctx, int sprn, int gprn);
+void spr_write_epsc(DisasContext *ctx, int sprn, int gprn);
+void spr_write_mas73(DisasContext *ctx, int sprn, int gprn);
+void spr_read_mas73(DisasContext *ctx, int gprn, int sprn);
+#ifdef TARGET_PPC64
+void spr_read_cfar(DisasContext *ctx, int gprn, int sprn);
+void spr_write_cfar(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ureg(DisasContext *ctx, int sprn, int gprn);
+void spr_read_purr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_purr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_hdecr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_vtb(DisasContext *ctx, int gprn, int sprn);
+void spr_write_vtb(DisasContext *ctx, int sprn, int gprn);
+void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn);
+void spr_write_pidr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_hior(DisasContext *ctx, int gprn, int sprn);
+void spr_write_hior(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_pcr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn);
+void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn);
+void spr_write_amr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_uamor(DisasContext *ctx, int sprn, int gprn);
+void spr_write_iamr(DisasContext *ctx, int sprn, int gprn);
+#endif
+#endif
+
+#ifdef TARGET_PPC64
+void spr_read_prev_upper32(DisasContext *ctx, int gprn, int sprn);
+void spr_write_prev_upper32(DisasContext *ctx, int sprn, int gprn);
+void spr_read_tar(DisasContext *ctx, int gprn, int sprn);
+void spr_write_tar(DisasContext *ctx, int sprn, int gprn);
+void spr_read_tm(DisasContext *ctx, int gprn, int sprn);
+void spr_write_tm(DisasContext *ctx, int sprn, int gprn);
+void spr_read_tm_upper32(DisasContext *ctx, int gprn, int sprn);
+void spr_write_tm_upper32(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ebb(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ebb(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ebb_upper32(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ebb_upper32(DisasContext *ctx, int sprn, int gprn);
+void spr_write_hmer(DisasContext *ctx, int sprn, int gprn);
+void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn);
+#endif
+
+#endif
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index a638120..ea200f9 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -36,7 +36,10 @@
 #include "exec/translator.h"
 #include "exec/log.h"
 #include "qemu/atomic128.h"
+#include "spr_tcg.h"
 
+#include "qemu/qemu-print.h"
+#include "qapi/error.h"
 
 #define CPU_SINGLE_STEP 0x1
 #define CPU_BRANCH_STEP 0x2
@@ -154,8 +157,8 @@
 /* internal defines */
 struct DisasContext {
     DisasContextBase base;
+    target_ulong cia;  /* current instruction address */
     uint32_t opcode;
-    uint32_t exception;
     /* Routine used to access memory */
     bool pr, hv, dr, le_mode;
     bool lazy_tlb_flush;
@@ -181,6 +184,11 @@
     uint64_t insns_flags2;
 };
 
+#define DISAS_EXIT         DISAS_TARGET_0  /* exit to main loop, pc updated */
+#define DISAS_EXIT_UPDATE  DISAS_TARGET_1  /* exit to main loop, pc stale */
+#define DISAS_CHAIN        DISAS_TARGET_2  /* lookup next tb, pc updated */
+#define DISAS_CHAIN_UPDATE DISAS_TARGET_3  /* lookup next tb, pc stale */
+
 /* Return true iff byteswap is needed in a scalar memop */
 static inline bool need_byteswap(const DisasContext *ctx)
 {
@@ -252,15 +260,13 @@
      * These are all synchronous exceptions, we set the PC back to the
      * faulting instruction
      */
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->base.pc_next - 4);
-    }
+    gen_update_nip(ctx, ctx->cia);
     t0 = tcg_const_i32(excp);
     t1 = tcg_const_i32(error);
     gen_helper_raise_exception_err(cpu_env, t0, t1);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
-    ctx->exception = (excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_exception(DisasContext *ctx, uint32_t excp)
@@ -271,13 +277,11 @@
      * These are all synchronous exceptions, we set the PC back to the
      * faulting instruction
      */
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->base.pc_next - 4);
-    }
+    gen_update_nip(ctx, ctx->cia);
     t0 = tcg_const_i32(excp);
     gen_helper_raise_exception(cpu_env, t0);
     tcg_temp_free_i32(t0);
-    ctx->exception = (excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_exception_nip(DisasContext *ctx, uint32_t excp,
@@ -289,7 +293,21 @@
     t0 = tcg_const_i32(excp);
     gen_helper_raise_exception(cpu_env, t0);
     tcg_temp_free_i32(t0);
-    ctx->exception = (excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_icount_io_start(DisasContext *ctx)
+{
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+        /*
+         * An I/O instruction must be last in the TB.
+         * Chain to the next TB, and let the code from gen_tb_start
+         * decide if we need to return to the main loop.
+         * Doing this first also allows this value to be overridden.
+         */
+        ctx->base.is_jmp = DISAS_TOO_MANY;
+    }
 }
 
 /*
@@ -322,19 +340,8 @@
 
 static void gen_debug_exception(DisasContext *ctx)
 {
-    TCGv_i32 t0;
-
-    /*
-     * These are all synchronous exceptions, we set the PC back to the
-     * faulting instruction
-     */
-    if ((ctx->exception != POWERPC_EXCP_BRANCH) &&
-        (ctx->exception != POWERPC_EXCP_SYNC)) {
-        gen_update_nip(ctx, ctx->base.pc_next);
-    }
-    t0 = tcg_const_i32(EXCP_DEBUG);
-    gen_helper_raise_exception(cpu_env, t0);
-    tcg_temp_free_i32(t0);
+    gen_helper_raise_exception(cpu_env, tcg_constant_i32(EXCP_DEBUG));
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static inline void gen_inval_exception(DisasContext *ctx, uint32_t error)
@@ -354,18 +361,924 @@
     gen_exception_err(ctx, POWERPC_EXCP_HV_EMU, POWERPC_EXCP_PRIV | error);
 }
 
-/* Stop translation */
-static inline void gen_stop_exception(DisasContext *ctx)
+/*****************************************************************************/
+/* SPR READ/WRITE CALLBACKS */
+
+void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
 {
-    gen_update_nip(ctx, ctx->base.pc_next);
-    ctx->exception = POWERPC_EXCP_STOP;
+#if 0
+    sprn = ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
+    printf("ERROR: try to access SPR %d !\n", sprn);
+#endif
 }
 
-#ifndef CONFIG_USER_ONLY
-/* No need to update nip here, as execution flow will change */
-static inline void gen_sync_exception(DisasContext *ctx)
+/* #define PPC_DUMP_SPR_ACCESSES */
+
+/*
+ * Generic callbacks:
+ * do nothing but store/retrieve spr value
+ */
+static void spr_load_dump_spr(int sprn)
 {
-    ctx->exception = POWERPC_EXCP_SYNC;
+#ifdef PPC_DUMP_SPR_ACCESSES
+    TCGv_i32 t0 = tcg_const_i32(sprn);
+    gen_helper_load_dump_spr(cpu_env, t0);
+    tcg_temp_free_i32(t0);
+#endif
+}
+
+void spr_read_generic(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_load_spr(cpu_gpr[gprn], sprn);
+    spr_load_dump_spr(sprn);
+}
+
+static void spr_store_dump_spr(int sprn)
+{
+#ifdef PPC_DUMP_SPR_ACCESSES
+    TCGv_i32 t0 = tcg_const_i32(sprn);
+    gen_helper_store_dump_spr(cpu_env, t0);
+    tcg_temp_free_i32(t0);
+#endif
+}
+
+void spr_write_generic(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_store_spr(sprn, cpu_gpr[gprn]);
+    spr_store_dump_spr(sprn);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_generic32(DisasContext *ctx, int sprn, int gprn)
+{
+#ifdef TARGET_PPC64
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_ext32u_tl(t0, cpu_gpr[gprn]);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+    spr_store_dump_spr(sprn);
+#else
+    spr_write_generic(ctx, sprn, gprn);
+#endif
+}
+
+void spr_write_clear(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    gen_load_spr(t0, sprn);
+    tcg_gen_neg_tl(t1, cpu_gpr[gprn]);
+    tcg_gen_and_tl(t0, t0, t1);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
+{
+}
+
+#endif
+
+/* SPR common to all PowerPC */
+/* XER */
+void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv dst = cpu_gpr[gprn];
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    tcg_gen_mov_tl(dst, cpu_xer);
+    tcg_gen_shli_tl(t0, cpu_so, XER_SO);
+    tcg_gen_shli_tl(t1, cpu_ov, XER_OV);
+    tcg_gen_shli_tl(t2, cpu_ca, XER_CA);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_or_tl(dst, dst, t2);
+    tcg_gen_or_tl(dst, dst, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
+        tcg_gen_or_tl(dst, dst, t0);
+        tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
+        tcg_gen_or_tl(dst, dst, t0);
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv src = cpu_gpr[gprn];
+    /* Write all flags, while reading back check for isa300 */
+    tcg_gen_andi_tl(cpu_xer, src,
+                    ~((1u << XER_SO) |
+                      (1u << XER_OV) | (1u << XER_OV32) |
+                      (1u << XER_CA) | (1u << XER_CA32)));
+    tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
+    tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
+    tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
+    tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
+    tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
+}
+
+/* LR */
+void spr_read_lr(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_lr);
+}
+
+void spr_write_lr(DisasContext *ctx, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
+}
+
+/* CFAR */
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+void spr_read_cfar(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
+}
+
+void spr_write_cfar(DisasContext *ctx, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
+}
+#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
+
+/* CTR */
+void spr_read_ctr(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_ctr);
+}
+
+void spr_write_ctr(DisasContext *ctx, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_ctr, cpu_gpr[gprn]);
+}
+
+/* User read access to SPR */
+/* USPRx */
+/* UMMCRx */
+/* UPMCx */
+/* USIA */
+/* UDECR */
+void spr_read_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_load_spr(cpu_gpr[gprn], sprn + 0x10);
+}
+
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+void spr_write_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_store_spr(sprn + 0x10, cpu_gpr[gprn]);
+}
+#endif
+
+/* SPR common to all non-embedded PowerPC */
+/* DECR */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_decr(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_decr(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_decr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_decr(cpu_env, cpu_gpr[gprn]);
+}
+#endif
+
+/* SPR common to all non-embedded PowerPC, except 601 */
+/* Time base */
+void spr_read_tbl(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_tbl(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_read_tbu(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_tbu(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_read_atbl(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_atbl(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_read_atbu(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_atbu(cpu_gpr[gprn], cpu_env);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_tbl(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_tbl(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_tbu(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_tbu(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_atbl(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_atbl(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_atbu(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_atbu(cpu_env, cpu_gpr[gprn]);
+}
+
+#if defined(TARGET_PPC64)
+void spr_read_purr(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_purr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_purr(cpu_env, cpu_gpr[gprn]);
+}
+
+/* HDECR */
+void spr_read_hdecr(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_hdecr(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_hdecr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_read_vtb(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_vtb(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_vtb(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_vtb(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_tbu40(cpu_env, cpu_gpr[gprn]);
+}
+
+#endif
+#endif
+
+#if !defined(CONFIG_USER_ONLY)
+/* IBAT0U...IBAT0U */
+/* IBAT0L...IBAT7L */
+void spr_read_ibat(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
+}
+
+void spr_read_ibat_h(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           IBAT[sprn & 1][((sprn - SPR_IBAT4U) / 2) + 4]));
+}
+
+void spr_write_ibatu(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
+    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_ibatu_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4U) / 2) + 4);
+    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_ibatl(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0L) / 2);
+    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_ibatl_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4L) / 2) + 4);
+    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+/* DBAT0U...DBAT7U */
+/* DBAT0L...DBAT7L */
+void spr_read_dbat(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           DBAT[sprn & 1][(sprn - SPR_DBAT0U) / 2]));
+}
+
+void spr_read_dbat_h(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           DBAT[sprn & 1][((sprn - SPR_DBAT4U) / 2) + 4]));
+}
+
+void spr_write_dbatu(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0U) / 2);
+    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_dbatu_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4U) / 2) + 4);
+    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_dbatl(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0L) / 2);
+    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_dbatl_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4L) / 2) + 4);
+    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+/* SDR1 */
+void spr_write_sdr1(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_sdr1(cpu_env, cpu_gpr[gprn]);
+}
+
+#if defined(TARGET_PPC64)
+/* 64 bits PowerPC specific SPRs */
+/* PIDR */
+void spr_write_pidr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_pidr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_lpidr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_read_hior(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, excp_prefix));
+}
+
+void spr_write_hior(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0x3FFFFF00000ULL);
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
+    tcg_temp_free(t0);
+}
+void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_ptcr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
+}
+
+/* DPDES */
+void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]);
+}
+#endif
+#endif
+
+/* PowerPC 601 specific registers */
+/* RTC */
+void spr_read_601_rtcl(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_601_rtcl(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_read_601_rtcu(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_601_rtcu(cpu_gpr[gprn], cpu_env);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_601_rtcu(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_601_rtcu(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_601_rtcl(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_601_rtcl(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_hid0_601(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_hid0_601(cpu_env, cpu_gpr[gprn]);
+    /* Must stop the translation as endianness may have changed */
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+}
+#endif
+
+/* Unified bats */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_601_ubat(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
+}
+
+void spr_write_601_ubatu(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
+    gen_helper_store_601_batl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_601_ubatl(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
+    gen_helper_store_601_batu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+#endif
+
+/* PowerPC 40x specific registers */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_40x_pit(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_40x_pit(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_40x_pit(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_40x_pit(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_40x_dbcr0(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_store_spr(sprn, cpu_gpr[gprn]);
+    gen_helper_store_40x_dbcr0(cpu_env, cpu_gpr[gprn]);
+    /* We must stop translation as we may have rebooted */
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+}
+
+void spr_write_40x_sler(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_40x_sler(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_booke_tcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_booke_tcr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_booke_tsr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_booke_tsr(cpu_env, cpu_gpr[gprn]);
+}
+#endif
+
+/* PowerPC 403 specific registers */
+/* PBL1 / PBU1 / PBL2 / PBU2 */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_403_pbr(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState, pb[sprn - SPR_403_PBL1]));
+}
+
+void spr_write_403_pbr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(sprn - SPR_403_PBL1);
+    gen_helper_store_403_pbr(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_pir(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0xF);
+    gen_store_spr(SPR_PIR, t0);
+    tcg_temp_free(t0);
+}
+#endif
+
+/* SPE specific registers */
+void spr_read_spefscr(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    tcg_gen_ld_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
+    tcg_gen_extu_i32_tl(cpu_gpr[gprn], t0);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_spefscr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[gprn]);
+    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
+    tcg_temp_free_i32(t0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+/* Callback used to write the exception vector base */
+void spr_write_excp_prefix(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivpr_mask));
+    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_excp_vector(DisasContext *ctx, int sprn, int gprn)
+{
+    int sprn_offs;
+
+    if (sprn >= SPR_BOOKE_IVOR0 && sprn <= SPR_BOOKE_IVOR15) {
+        sprn_offs = sprn - SPR_BOOKE_IVOR0;
+    } else if (sprn >= SPR_BOOKE_IVOR32 && sprn <= SPR_BOOKE_IVOR37) {
+        sprn_offs = sprn - SPR_BOOKE_IVOR32 + 32;
+    } else if (sprn >= SPR_BOOKE_IVOR38 && sprn <= SPR_BOOKE_IVOR42) {
+        sprn_offs = sprn - SPR_BOOKE_IVOR38 + 38;
+    } else {
+        printf("Trying to write an unknown exception vector %d %03x\n",
+               sprn, sprn);
+        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+        return;
+    }
+
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivor_mask));
+    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_vectors[sprn_offs]));
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+#endif
+
+#ifdef TARGET_PPC64
+#ifndef CONFIG_USER_ONLY
+void spr_write_amr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /*
+     * Note, the HV=1 PR=0 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    if (ctx->pr) {
+        gen_load_spr(t1, SPR_UAMOR);
+    } else {
+        gen_load_spr(t1, SPR_AMOR);
+    }
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_AMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_AMR, t0);
+    spr_store_dump_spr(SPR_AMR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+void spr_write_uamor(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /*
+     * Note, the HV=1 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    gen_load_spr(t1, SPR_AMOR);
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_UAMOR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_UAMOR, t0);
+    spr_store_dump_spr(SPR_UAMOR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+void spr_write_iamr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /*
+     * Note, the HV=1 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    gen_load_spr(t1, SPR_AMOR);
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_IAMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_IAMR, t0);
+    spr_store_dump_spr(SPR_IAMR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+#endif
+#endif
+
+#ifndef CONFIG_USER_ONLY
+void spr_read_thrm(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_fixup_thrm(cpu_env);
+    gen_load_spr(cpu_gpr[gprn], sprn);
+    spr_load_dump_spr(sprn);
+}
+#endif /* !CONFIG_USER_ONLY */
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_e500_l1csr0(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR0_DCE | L1CSR0_CPE);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_e500_l1csr1(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR1_ICE | L1CSR1_CPE);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_e500_l2csr0(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn],
+                    ~(E500_L2CSR0_L2FI | E500_L2CSR0_L2FL | E500_L2CSR0_L2LFC));
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_booke206_mmucsr0(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_booke206_tlbflush(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_booke_pid(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(sprn);
+    gen_helper_booke_setpid(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+void spr_write_eplc(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_booke_set_eplc(cpu_env, cpu_gpr[gprn]);
+}
+void spr_write_epsc(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_booke_set_epsc(cpu_env, cpu_gpr[gprn]);
+}
+
+#endif
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_mas73(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv val = tcg_temp_new();
+    tcg_gen_ext32u_tl(val, cpu_gpr[gprn]);
+    gen_store_spr(SPR_BOOKE_MAS3, val);
+    tcg_gen_shri_tl(val, cpu_gpr[gprn], 32);
+    gen_store_spr(SPR_BOOKE_MAS7, val);
+    tcg_temp_free(val);
+}
+
+void spr_read_mas73(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv mas7 = tcg_temp_new();
+    TCGv mas3 = tcg_temp_new();
+    gen_load_spr(mas7, SPR_BOOKE_MAS7);
+    tcg_gen_shli_tl(mas7, mas7, 32);
+    gen_load_spr(mas3, SPR_BOOKE_MAS3);
+    tcg_gen_or_tl(cpu_gpr[gprn], mas3, mas7);
+    tcg_temp_free(mas3);
+    tcg_temp_free(mas7);
+}
+
+#endif
+
+#ifdef TARGET_PPC64
+static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
+                                    int bit, int sprn, int cause)
+{
+    TCGv_i32 t1 = tcg_const_i32(bit);
+    TCGv_i32 t2 = tcg_const_i32(sprn);
+    TCGv_i32 t3 = tcg_const_i32(cause);
+
+    gen_helper_fscr_facility_check(cpu_env, t1, t2, t3);
+
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_msr_facility_check(DisasContext *ctx, int facility_sprn,
+                                   int bit, int sprn, int cause)
+{
+    TCGv_i32 t1 = tcg_const_i32(bit);
+    TCGv_i32 t2 = tcg_const_i32(sprn);
+    TCGv_i32 t3 = tcg_const_i32(cause);
+
+    gen_helper_msr_facility_check(cpu_env, t1, t2, t3);
+
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+void spr_read_prev_upper32(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv spr_up = tcg_temp_new();
+    TCGv spr = tcg_temp_new();
+
+    gen_load_spr(spr, sprn - 1);
+    tcg_gen_shri_tl(spr_up, spr, 32);
+    tcg_gen_ext32u_tl(cpu_gpr[gprn], spr_up);
+
+    tcg_temp_free(spr);
+    tcg_temp_free(spr_up);
+}
+
+void spr_write_prev_upper32(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv spr = tcg_temp_new();
+
+    gen_load_spr(spr, sprn - 1);
+    tcg_gen_deposit_tl(spr, spr, cpu_gpr[gprn], 32, 32);
+    gen_store_spr(sprn - 1, spr);
+
+    tcg_temp_free(spr);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv hmer = tcg_temp_new();
+
+    gen_load_spr(hmer, sprn);
+    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
+    gen_store_spr(sprn, hmer);
+    spr_store_dump_spr(sprn);
+    tcg_temp_free(hmer);
+}
+
+void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_lpcr(cpu_env, cpu_gpr[gprn]);
+}
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+void spr_read_tar(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
+    spr_read_generic(ctx, gprn, sprn);
+}
+
+void spr_write_tar(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
+    spr_write_generic(ctx, sprn, gprn);
+}
+
+void spr_read_tm(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_read_generic(ctx, gprn, sprn);
+}
+
+void spr_write_tm(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_write_generic(ctx, sprn, gprn);
+}
+
+void spr_read_tm_upper32(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_read_prev_upper32(ctx, gprn, sprn);
+}
+
+void spr_write_tm_upper32(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_write_prev_upper32(ctx, sprn, gprn);
+}
+
+void spr_read_ebb(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_read_generic(ctx, gprn, sprn);
+}
+
+void spr_write_ebb(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_write_generic(ctx, sprn, gprn);
+}
+
+void spr_read_ebb_upper32(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_read_prev_upper32(ctx, gprn, sprn);
+}
+
+void spr_write_ebb_upper32(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_write_prev_upper32(ctx, sprn, gprn);
 }
 #endif
 
@@ -1851,18 +2764,13 @@
     if (l > 2) {
         tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], -1);
     } else {
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
+        gen_icount_io_start(ctx);
         if (l == 0) {
             gen_helper_darn32(cpu_gpr[rD(ctx->opcode)]);
         } else {
             /* Return 64-bit random for both CRN and RRN */
             gen_helper_darn64(cpu_gpr[rD(ctx->opcode)]);
         }
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_stop_exception(ctx);
-        }
     }
 }
 #endif
@@ -3112,7 +4020,7 @@
          */
         if (!(ctx->insns_flags2 & PPC2_ISA300)) {
             qemu_log_mask(LOG_GUEST_ERROR, "invalid eieio using bit 6 at @"
-                          TARGET_FMT_lx "\n", ctx->base.pc_next - 4);
+                          TARGET_FMT_lx "\n", ctx->cia);
         } else {
             bar = TCG_MO_ST_LD;
         }
@@ -3157,7 +4065,7 @@
         gen_check_tlb_flush(ctx, false);
     }
     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 }
 
 #define MEMOP_GET_SIZE(x)  (1 << ((x) & MO_SIZE))
@@ -3740,8 +4648,9 @@
         } else if (sse & (CPU_SINGLE_STEP | CPU_BRANCH_STEP)) {
             uint32_t excp = gen_prep_dbgex(ctx);
             gen_exception(ctx, excp);
+        } else {
+            tcg_gen_exit_tb(NULL, 0);
         }
-        tcg_gen_exit_tb(NULL, 0);
     } else {
         tcg_gen_lookup_and_goto_ptr();
     }
@@ -3776,20 +4685,20 @@
 {
     target_ulong li, target;
 
-    ctx->exception = POWERPC_EXCP_BRANCH;
     /* sign extend LI */
     li = LI(ctx->opcode);
     li = (li ^ 0x02000000) - 0x02000000;
     if (likely(AA(ctx->opcode) == 0)) {
-        target = ctx->base.pc_next + li - 4;
+        target = ctx->cia + li;
     } else {
         target = li;
     }
     if (LK(ctx->opcode)) {
         gen_setlr(ctx, ctx->base.pc_next);
     }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_update_cfar(ctx, ctx->cia);
     gen_goto_tb(ctx, 0, target);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 #define BCOND_IM  0
@@ -3802,7 +4711,6 @@
     uint32_t bo = BO(ctx->opcode);
     TCGLabel *l1;
     TCGv target;
-    ctx->exception = POWERPC_EXCP_BRANCH;
 
     if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
         target = tcg_temp_local_new();
@@ -3887,11 +4795,11 @@
         }
         tcg_temp_free_i32(temp);
     }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_update_cfar(ctx, ctx->cia);
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
         if (likely(AA(ctx->opcode) == 0)) {
-            gen_goto_tb(ctx, 0, ctx->base.pc_next + li - 4);
+            gen_goto_tb(ctx, 0, ctx->cia + li);
         } else {
             gen_goto_tb(ctx, 0, li);
         }
@@ -3909,6 +4817,7 @@
         gen_set_label(l1);
         gen_goto_tb(ctx, 1, ctx->base.pc_next);
     }
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_bc(DisasContext *ctx)
@@ -4004,12 +4913,10 @@
     }
     /* Restore CPU state */
     CHK_SV;
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_icount_io_start(ctx);
+    gen_update_cfar(ctx, ctx->cia);
     gen_helper_rfi(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 
@@ -4021,12 +4928,10 @@
 #else
     /* Restore CPU state */
     CHK_SV;
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_icount_io_start(ctx);
+    gen_update_cfar(ctx, ctx->cia);
     gen_helper_rfid(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 
@@ -4038,12 +4943,10 @@
 #else
     /* Restore CPU state */
     CHK_SV;
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_icount_io_start(ctx);
+    gen_update_cfar(ctx, ctx->cia);
     gen_helper_rfscv(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 #endif
@@ -4056,7 +4959,7 @@
     /* Restore CPU state */
     CHK_HV;
     gen_helper_hrfid(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 #endif
@@ -4083,13 +4986,10 @@
     uint32_t lev = (ctx->opcode >> 5) & 0x7F;
 
     /* Set the PC back to the faulting instruction. */
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->base.pc_next - 4);
-    }
+    gen_update_nip(ctx, ctx->cia);
     gen_helper_scv(cpu_env, tcg_constant_i32(lev));
 
-    /* This need not be exact, just not POWERPC_EXCP_NONE */
-    ctx->exception = POWERPC_SYSCALL_VECTORED;
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 #endif
 #endif
@@ -4175,43 +5075,6 @@
 
 /***                          Processor control                            ***/
 
-static void gen_read_xer(DisasContext *ctx, TCGv dst)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-    tcg_gen_mov_tl(dst, cpu_xer);
-    tcg_gen_shli_tl(t0, cpu_so, XER_SO);
-    tcg_gen_shli_tl(t1, cpu_ov, XER_OV);
-    tcg_gen_shli_tl(t2, cpu_ca, XER_CA);
-    tcg_gen_or_tl(t0, t0, t1);
-    tcg_gen_or_tl(dst, dst, t2);
-    tcg_gen_or_tl(dst, dst, t0);
-    if (is_isa300(ctx)) {
-        tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
-        tcg_gen_or_tl(dst, dst, t0);
-        tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
-        tcg_gen_or_tl(dst, dst, t0);
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void gen_write_xer(TCGv src)
-{
-    /* Write all flags, while reading back check for isa300 */
-    tcg_gen_andi_tl(cpu_xer, src,
-                    ~((1u << XER_SO) |
-                      (1u << XER_OV) | (1u << XER_OV32) |
-                      (1u << XER_CA) | (1u << XER_CA32)));
-    tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
-    tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
-    tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
-    tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
-    tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
-}
-
 /* mcrxr */
 static void gen_mcrxr(DisasContext *ctx)
 {
@@ -4299,15 +5162,6 @@
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_msr);
 }
 
-static void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
-{
-#if 0
-    sprn = ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
-    printf("ERROR: try to access SPR %d !\n", sprn);
-#endif
-}
-#define SPR_NOACCESS (&spr_noaccess)
-
 /* mfspr */
 static inline void gen_op_mfspr(DisasContext *ctx)
 {
@@ -4338,7 +5192,7 @@
             if (sprn != SPR_PVR) {
                 qemu_log_mask(LOG_GUEST_ERROR, "Trying to read privileged spr "
                               "%d (0x%03x) at " TARGET_FMT_lx "\n", sprn, sprn,
-                              ctx->base.pc_next - 4);
+                              ctx->cia);
             }
             gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
         }
@@ -4352,7 +5206,7 @@
         /* Not defined */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Trying to read invalid spr %d (0x%03x) at "
-                      TARGET_FMT_lx "\n", sprn, sprn, ctx->base.pc_next - 4);
+                      TARGET_FMT_lx "\n", sprn, sprn, ctx->cia);
 
         /*
          * The behaviour depends on MSR:PR and SPR# bit 0x10, it can
@@ -4416,9 +5270,7 @@
     CHK_SV;
 
 #if !defined(CONFIG_USER_ONLY)
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
+    gen_icount_io_start(ctx);
     if (ctx->opcode & 0x00010000) {
         /* L=1 form only updates EE and RI */
         TCGv t0 = tcg_temp_new();
@@ -4443,7 +5295,7 @@
         gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
     }
     /* Must stop the translation as machine state (may have) changed */
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 #endif /* !defined(CONFIG_USER_ONLY) */
 }
 #endif /* defined(TARGET_PPC64) */
@@ -4453,9 +5305,7 @@
     CHK_SV;
 
 #if !defined(CONFIG_USER_ONLY)
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
+    gen_icount_io_start(ctx);
     if (ctx->opcode & 0x00010000) {
         /* L=1 form only updates EE and RI */
         TCGv t0 = tcg_temp_new();
@@ -4488,7 +5338,7 @@
         tcg_temp_free(msr);
     }
     /* Must stop the translation as machine state (may have) changed */
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 #endif
 }
 
@@ -4516,7 +5366,7 @@
             /* Privilege exception */
             qemu_log_mask(LOG_GUEST_ERROR, "Trying to write privileged spr "
                           "%d (0x%03x) at " TARGET_FMT_lx "\n", sprn, sprn,
-                          ctx->base.pc_next - 4);
+                          ctx->cia);
             gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
         }
     } else {
@@ -4530,7 +5380,7 @@
         /* Not defined */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Trying to write invalid spr %d (0x%03x) at "
-                      TARGET_FMT_lx "\n", sprn, sprn, ctx->base.pc_next - 4);
+                      TARGET_FMT_lx "\n", sprn, sprn, ctx->cia);
 
 
         /*
@@ -5943,7 +6793,7 @@
     CHK_SV;
 
     gen_helper_rfsvc(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6323,7 +7173,7 @@
     CHK_SV;
     /* Restore CPU state */
     gen_helper_40x_rfci(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6335,7 +7185,7 @@
     CHK_SV;
     /* Restore CPU state */
     gen_helper_rfci(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6350,7 +7200,7 @@
     CHK_SV;
     /* Restore CPU state */
     gen_helper_rfdi(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6363,7 +7213,7 @@
     CHK_SV;
     /* Restore CPU state */
     gen_helper_rfmci(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6625,7 +7475,7 @@
      * Stop translation to have a chance to raise an exception if we
      * just set msr_ee to 1
      */
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6639,7 +7489,7 @@
     if (ctx->opcode & 0x00008000) {
         tcg_gen_ori_tl(cpu_msr, cpu_msr, (1 << MSR_EE));
         /* Stop translation to have a chance to raise an exception */
-        gen_stop_exception(ctx);
+        ctx->base.is_jmp = DISAS_EXIT_UPDATE;
     } else {
         tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
     }
@@ -7638,194 +8488,6 @@
 #include "translate/spe-ops.c.inc"
 };
 
-#include "helper_regs.h"
-#include "translate_init.c.inc"
-
-/*****************************************************************************/
-/* Misc PowerPC helpers */
-void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
-#define RGPL  4
-#define RFPL  4
-
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-    int i;
-
-    qemu_fprintf(f, "NIP " TARGET_FMT_lx "   LR " TARGET_FMT_lx " CTR "
-                 TARGET_FMT_lx " XER " TARGET_FMT_lx " CPU#%d\n",
-                 env->nip, env->lr, env->ctr, cpu_read_xer(env),
-                 cs->cpu_index);
-    qemu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
-                 "%08x iidx %d didx %d\n",
-                 env->msr, env->spr[SPR_HID0], env->hflags,
-                 cpu_mmu_index(env, true), cpu_mmu_index(env, false));
-#if !defined(NO_TIMER_DUMP)
-    qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
-#if !defined(CONFIG_USER_ONLY)
-                 " DECR " TARGET_FMT_lu
-#endif
-                 "\n",
-                 cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
-#if !defined(CONFIG_USER_ONLY)
-                 , cpu_ppc_load_decr(env)
-#endif
-        );
-#endif
-    for (i = 0; i < 32; i++) {
-        if ((i & (RGPL - 1)) == 0) {
-            qemu_fprintf(f, "GPR%02d", i);
-        }
-        qemu_fprintf(f, " %016" PRIx64, ppc_dump_gpr(env, i));
-        if ((i & (RGPL - 1)) == (RGPL - 1)) {
-            qemu_fprintf(f, "\n");
-        }
-    }
-    qemu_fprintf(f, "CR ");
-    for (i = 0; i < 8; i++)
-        qemu_fprintf(f, "%01x", env->crf[i]);
-    qemu_fprintf(f, "  [");
-    for (i = 0; i < 8; i++) {
-        char a = '-';
-        if (env->crf[i] & 0x08) {
-            a = 'L';
-        } else if (env->crf[i] & 0x04) {
-            a = 'G';
-        } else if (env->crf[i] & 0x02) {
-            a = 'E';
-        }
-        qemu_fprintf(f, " %c%c", a, env->crf[i] & 0x01 ? 'O' : ' ');
-    }
-    qemu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
-                 env->reserve_addr);
-
-    if (flags & CPU_DUMP_FPU) {
-        for (i = 0; i < 32; i++) {
-            if ((i & (RFPL - 1)) == 0) {
-                qemu_fprintf(f, "FPR%02d", i);
-            }
-            qemu_fprintf(f, " %016" PRIx64, *cpu_fpr_ptr(env, i));
-            if ((i & (RFPL - 1)) == (RFPL - 1)) {
-                qemu_fprintf(f, "\n");
-            }
-        }
-        qemu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr);
-    }
-
-#if !defined(CONFIG_USER_ONLY)
-    qemu_fprintf(f, " SRR0 " TARGET_FMT_lx "  SRR1 " TARGET_FMT_lx
-                 "    PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n",
-                 env->spr[SPR_SRR0], env->spr[SPR_SRR1],
-                 env->spr[SPR_PVR], env->spr[SPR_VRSAVE]);
-
-    qemu_fprintf(f, "SPRG0 " TARGET_FMT_lx " SPRG1 " TARGET_FMT_lx
-                 "  SPRG2 " TARGET_FMT_lx "  SPRG3 " TARGET_FMT_lx "\n",
-                 env->spr[SPR_SPRG0], env->spr[SPR_SPRG1],
-                 env->spr[SPR_SPRG2], env->spr[SPR_SPRG3]);
-
-    qemu_fprintf(f, "SPRG4 " TARGET_FMT_lx " SPRG5 " TARGET_FMT_lx
-                 "  SPRG6 " TARGET_FMT_lx "  SPRG7 " TARGET_FMT_lx "\n",
-                 env->spr[SPR_SPRG4], env->spr[SPR_SPRG5],
-                 env->spr[SPR_SPRG6], env->spr[SPR_SPRG7]);
-
-#if defined(TARGET_PPC64)
-    if (env->excp_model == POWERPC_EXCP_POWER7 ||
-        env->excp_model == POWERPC_EXCP_POWER8 ||
-        env->excp_model == POWERPC_EXCP_POWER9 ||
-        env->excp_model == POWERPC_EXCP_POWER10)  {
-        qemu_fprintf(f, "HSRR0 " TARGET_FMT_lx " HSRR1 " TARGET_FMT_lx "\n",
-                     env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
-    }
-#endif
-    if (env->excp_model == POWERPC_EXCP_BOOKE) {
-        qemu_fprintf(f, "CSRR0 " TARGET_FMT_lx " CSRR1 " TARGET_FMT_lx
-                     " MCSRR0 " TARGET_FMT_lx " MCSRR1 " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_CSRR0], env->spr[SPR_BOOKE_CSRR1],
-                     env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
-
-        qemu_fprintf(f, "  TCR " TARGET_FMT_lx "   TSR " TARGET_FMT_lx
-                     "    ESR " TARGET_FMT_lx "   DEAR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_TCR], env->spr[SPR_BOOKE_TSR],
-                     env->spr[SPR_BOOKE_ESR], env->spr[SPR_BOOKE_DEAR]);
-
-        qemu_fprintf(f, "  PIR " TARGET_FMT_lx " DECAR " TARGET_FMT_lx
-                     "   IVPR " TARGET_FMT_lx "   EPCR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_PIR], env->spr[SPR_BOOKE_DECAR],
-                     env->spr[SPR_BOOKE_IVPR], env->spr[SPR_BOOKE_EPCR]);
-
-        qemu_fprintf(f, " MCSR " TARGET_FMT_lx " SPRG8 " TARGET_FMT_lx
-                     "    EPR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_MCSR], env->spr[SPR_BOOKE_SPRG8],
-                     env->spr[SPR_BOOKE_EPR]);
-
-        /* FSL-specific */
-        qemu_fprintf(f, " MCAR " TARGET_FMT_lx "  PID1 " TARGET_FMT_lx
-                     "   PID2 " TARGET_FMT_lx "    SVR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_Exxx_MCAR], env->spr[SPR_BOOKE_PID1],
-                     env->spr[SPR_BOOKE_PID2], env->spr[SPR_E500_SVR]);
-
-        /*
-         * IVORs are left out as they are large and do not change often --
-         * they can be read with "p $ivor0", "p $ivor1", etc.
-         */
-    }
-
-#if defined(TARGET_PPC64)
-    if (env->flags & POWERPC_FLAG_CFAR) {
-        qemu_fprintf(f, " CFAR " TARGET_FMT_lx"\n", env->cfar);
-    }
-#endif
-
-    if (env->spr_cb[SPR_LPCR].name) {
-        qemu_fprintf(f, " LPCR " TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
-    }
-
-    switch (env->mmu_model) {
-    case POWERPC_MMU_32B:
-    case POWERPC_MMU_601:
-    case POWERPC_MMU_SOFT_6xx:
-    case POWERPC_MMU_SOFT_74xx:
-#if defined(TARGET_PPC64)
-    case POWERPC_MMU_64B:
-    case POWERPC_MMU_2_03:
-    case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_07:
-    case POWERPC_MMU_3_00:
-#endif
-        if (env->spr_cb[SPR_SDR1].name) { /* SDR1 Exists */
-            qemu_fprintf(f, " SDR1 " TARGET_FMT_lx " ", env->spr[SPR_SDR1]);
-        }
-        if (env->spr_cb[SPR_PTCR].name) { /* PTCR Exists */
-            qemu_fprintf(f, " PTCR " TARGET_FMT_lx " ", env->spr[SPR_PTCR]);
-        }
-        qemu_fprintf(f, "  DAR " TARGET_FMT_lx "  DSISR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_DAR], env->spr[SPR_DSISR]);
-        break;
-    case POWERPC_MMU_BOOKE206:
-        qemu_fprintf(f, " MAS0 " TARGET_FMT_lx "  MAS1 " TARGET_FMT_lx
-                     "   MAS2 " TARGET_FMT_lx "   MAS3 " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_MAS0], env->spr[SPR_BOOKE_MAS1],
-                     env->spr[SPR_BOOKE_MAS2], env->spr[SPR_BOOKE_MAS3]);
-
-        qemu_fprintf(f, " MAS4 " TARGET_FMT_lx "  MAS6 " TARGET_FMT_lx
-                     "   MAS7 " TARGET_FMT_lx "    PID " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_MAS4], env->spr[SPR_BOOKE_MAS6],
-                     env->spr[SPR_BOOKE_MAS7], env->spr[SPR_BOOKE_PID]);
-
-        qemu_fprintf(f, "MMUCFG " TARGET_FMT_lx " TLB0CFG " TARGET_FMT_lx
-                     " TLB1CFG " TARGET_FMT_lx "\n",
-                     env->spr[SPR_MMUCFG], env->spr[SPR_BOOKE_TLB0CFG],
-                     env->spr[SPR_BOOKE_TLB1CFG]);
-        break;
-    default:
-        break;
-    }
-#endif
-
-#undef RGPL
-#undef RFPL
-}
-
 /*****************************************************************************/
 /* Opcode types */
 enum {
@@ -8270,14 +8932,68 @@
 #endif
 }
 
+static bool decode_legacy(PowerPCCPU *cpu, DisasContext *ctx, uint32_t insn)
+{
+    opc_handler_t **table, *handler;
+    uint32_t inval;
+
+    ctx->opcode = insn;
+
+    LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
+              insn, opc1(insn), opc2(insn), opc3(insn), opc4(insn),
+              ctx->le_mode ? "little" : "big");
+
+    table = cpu->opcodes;
+    handler = table[opc1(insn)];
+    if (is_indirect_opcode(handler)) {
+        table = ind_table(handler);
+        handler = table[opc2(insn)];
+        if (is_indirect_opcode(handler)) {
+            table = ind_table(handler);
+            handler = table[opc3(insn)];
+            if (is_indirect_opcode(handler)) {
+                table = ind_table(handler);
+                handler = table[opc4(insn)];
+            }
+        }
+    }
+
+    /* Is opcode *REALLY* valid ? */
+    if (unlikely(handler->handler == &gen_invalid)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
+                      "%02x - %02x - %02x - %02x (%08x) "
+                      TARGET_FMT_lx "\n",
+                      opc1(insn), opc2(insn), opc3(insn), opc4(insn),
+                      insn, ctx->cia);
+        return false;
+    }
+
+    if (unlikely(handler->type & (PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE)
+                 && Rc(insn))) {
+        inval = handler->inval2;
+    } else {
+        inval = handler->inval1;
+    }
+
+    if (unlikely((insn & inval) != 0)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
+                      "%02x - %02x - %02x - %02x (%08x) "
+                      TARGET_FMT_lx "\n", insn & inval,
+                      opc1(insn), opc2(insn), opc3(insn), opc4(insn),
+                      insn, ctx->cia);
+        return false;
+    }
+
+    handler->handler(ctx);
+    return true;
+}
+
 static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUPPCState *env = cs->env_ptr;
     uint32_t hflags = ctx->base.tb->flags;
-    int bound;
 
-    ctx->exception = POWERPC_EXCP_NONE;
     ctx->spr_cb = env->spr_cb;
     ctx->pr = (hflags >> HFLAGS_PR) & 1;
     ctx->mem_idx = (hflags >> HFLAGS_DMMU_IDX) & 7;
@@ -8316,8 +9032,12 @@
         ctx->singlestep_enabled |= GDBSTUB_SINGLE_STEP;
     }
 
-    bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
-    ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
+    if (ctx->singlestep_enabled & (CPU_SINGLE_STEP | GDBSTUB_SINGLE_STEP)) {
+        ctx->base.max_insns = 1;
+    } else {
+        int bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
+        ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
+    }
 }
 
 static void ppc_tr_tb_start(DisasContextBase *db, CPUState *cs)
@@ -8334,8 +9054,8 @@
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
+    gen_update_nip(ctx, ctx->base.pc_next);
     gen_debug_exception(ctx);
-    dcbase->is_jmp = DISAS_NORETURN;
     /*
      * The address covered by the breakpoint must be included in
      * [tb->pc, tb->pc + tb->size) in order to for it to be properly
@@ -8351,100 +9071,93 @@
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = cs->env_ptr;
-    opc_handler_t **table, *handler;
+    uint32_t insn;
+    bool ok;
 
     LOG_DISAS("----------------\n");
     LOG_DISAS("nip=" TARGET_FMT_lx " super=%d ir=%d\n",
               ctx->base.pc_next, ctx->mem_idx, (int)msr_ir);
 
-    ctx->opcode = translator_ldl_swap(env, ctx->base.pc_next,
-                                      need_byteswap(ctx));
-
-    LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
-              ctx->opcode, opc1(ctx->opcode), opc2(ctx->opcode),
-              opc3(ctx->opcode), opc4(ctx->opcode),
-              ctx->le_mode ? "little" : "big");
+    ctx->cia = ctx->base.pc_next;
+    insn = translator_ldl_swap(env, ctx->base.pc_next, need_byteswap(ctx));
     ctx->base.pc_next += 4;
-    table = cpu->opcodes;
-    handler = table[opc1(ctx->opcode)];
-    if (is_indirect_opcode(handler)) {
-        table = ind_table(handler);
-        handler = table[opc2(ctx->opcode)];
-        if (is_indirect_opcode(handler)) {
-            table = ind_table(handler);
-            handler = table[opc3(ctx->opcode)];
-            if (is_indirect_opcode(handler)) {
-                table = ind_table(handler);
-                handler = table[opc4(ctx->opcode)];
-            }
-        }
-    }
-    /* Is opcode *REALLY* valid ? */
-    if (unlikely(handler->handler == &gen_invalid)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
-                      "%02x - %02x - %02x - %02x (%08x) "
-                      TARGET_FMT_lx " %d\n",
-                      opc1(ctx->opcode), opc2(ctx->opcode),
-                      opc3(ctx->opcode), opc4(ctx->opcode),
-                      ctx->opcode, ctx->base.pc_next - 4, (int)msr_ir);
-    } else {
-        uint32_t inval;
 
-        if (unlikely(handler->type & (PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE)
-                     && Rc(ctx->opcode))) {
-            inval = handler->inval2;
-        } else {
-            inval = handler->inval1;
-        }
-
-        if (unlikely((ctx->opcode & inval) != 0)) {
-            qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
-                          "%02x - %02x - %02x - %02x (%08x) "
-                          TARGET_FMT_lx "\n", ctx->opcode & inval,
-                          opc1(ctx->opcode), opc2(ctx->opcode),
-                          opc3(ctx->opcode), opc4(ctx->opcode),
-                          ctx->opcode, ctx->base.pc_next - 4);
-            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-            ctx->base.is_jmp = DISAS_NORETURN;
-            return;
-        }
+    ok = decode_legacy(cpu, ctx, insn);
+    if (!ok) {
+        gen_invalid(ctx);
     }
-    (*(handler->handler))(ctx);
+
 #if defined(DO_PPC_STATISTICS)
     handler->count++;
 #endif
-    /* Check trace mode exceptions */
-    if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP &&
-                 (ctx->base.pc_next <= 0x100 || ctx->base.pc_next > 0xF00) &&
-                 ctx->exception != POWERPC_SYSCALL &&
-                 ctx->exception != POWERPC_EXCP_TRAP &&
-                 ctx->exception != POWERPC_EXCP_BRANCH)) {
-        uint32_t excp = gen_prep_dbgex(ctx);
-        gen_exception_nip(ctx, excp, ctx->base.pc_next);
-    }
 
-    if (tcg_check_temp_count()) {
-        qemu_log("Opcode %02x %02x %02x %02x (%08x) leaked "
-                 "temporaries\n", opc1(ctx->opcode), opc2(ctx->opcode),
-                 opc3(ctx->opcode), opc4(ctx->opcode), ctx->opcode);
-    }
-
-    ctx->base.is_jmp = ctx->exception == POWERPC_EXCP_NONE ?
-        DISAS_NEXT : DISAS_NORETURN;
+    translator_loop_temp_check(&ctx->base);
 }
 
 static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    DisasJumpType is_jmp = ctx->base.is_jmp;
+    target_ulong nip = ctx->base.pc_next;
+    int sse;
 
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_goto_tb(ctx, 0, ctx->base.pc_next);
-    } else if (ctx->exception != POWERPC_EXCP_BRANCH) {
-        if (unlikely(ctx->base.singlestep_enabled)) {
-            gen_debug_exception(ctx);
+    if (is_jmp == DISAS_NORETURN) {
+        /* We have already exited the TB. */
+        return;
+    }
+
+    /* Honor single stepping. */
+    sse = ctx->singlestep_enabled & (CPU_SINGLE_STEP | GDBSTUB_SINGLE_STEP);
+    if (unlikely(sse)) {
+        switch (is_jmp) {
+        case DISAS_TOO_MANY:
+        case DISAS_EXIT_UPDATE:
+        case DISAS_CHAIN_UPDATE:
+            gen_update_nip(ctx, nip);
+            break;
+        case DISAS_EXIT:
+        case DISAS_CHAIN:
+            break;
+        default:
+            g_assert_not_reached();
         }
-        /* Generate the return instruction */
+
+        if (sse & GDBSTUB_SINGLE_STEP) {
+            gen_debug_exception(ctx);
+            return;
+        }
+        /* else CPU_SINGLE_STEP... */
+        if (nip <= 0x100 || nip > 0xf00) {
+            gen_exception(ctx, gen_prep_dbgex(ctx));
+            return;
+        }
+    }
+
+    switch (is_jmp) {
+    case DISAS_TOO_MANY:
+        if (use_goto_tb(ctx, nip)) {
+            tcg_gen_goto_tb(0);
+            gen_update_nip(ctx, nip);
+            tcg_gen_exit_tb(ctx->base.tb, 0);
+            break;
+        }
+        /* fall through */
+    case DISAS_CHAIN_UPDATE:
+        gen_update_nip(ctx, nip);
+        /* fall through */
+    case DISAS_CHAIN:
+        tcg_gen_lookup_and_goto_ptr();
+        break;
+
+    case DISAS_EXIT_UPDATE:
+        gen_update_nip(ctx, nip);
+        /* fall through */
+    case DISAS_EXIT:
         tcg_gen_exit_tb(NULL, 0);
+        break;
+
+    default:
+        g_assert_not_reached();
     }
 }
 
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index b817d31..57a7f73 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -139,7 +139,7 @@
     gen_addr_reg_index(ctx, EA);
 
     data = tcg_temp_new_i32();
-    tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, MO_TEUL);
+    tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UL));
     tcg_gen_gvec_dup_i32(MO_UL, vsr_full_offset(xT(ctx->opcode)), 16, 16, data);
 
     tcg_temp_free(EA);
@@ -162,7 +162,7 @@
     gen_addr_reg_index(ctx, EA);
 
     data = tcg_temp_new_i64();
-    tcg_gen_qemu_ld_i64(data, EA, ctx->mem_idx, MO_TEQ);
+    tcg_gen_qemu_ld_i64(data, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
     tcg_gen_gvec_dup_i64(MO_Q, vsr_full_offset(xT(ctx->opcode)), 16, 16, data);
 
     tcg_temp_free(EA);
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
index 4ba5e1d..c24baf8 100644
--- a/tests/fp/fp-bench.c
+++ b/tests/fp/fp-bench.c
@@ -14,6 +14,7 @@
 #include <math.h>
 #include <fenv.h>
 #include "qemu/timer.h"
+#include "qemu/int128.h"
 #include "fpu/softfloat.h"
 
 /* amortize the computation of random inputs */
@@ -50,8 +51,10 @@
 enum precision {
     PREC_SINGLE,
     PREC_DOUBLE,
+    PREC_QUAD,
     PREC_FLOAT32,
     PREC_FLOAT64,
+    PREC_FLOAT128,
     PREC_MAX_NR,
 };
 
@@ -89,6 +92,7 @@
     double d;
     float32 f32;
     float64 f64;
+    float128 f128;
     uint64_t u64;
 };
 
@@ -113,6 +117,10 @@
 static uint64_t random_ops[MAX_OPERANDS] = {
     SEED_A, SEED_B, SEED_C,
 };
+
+static float128 random_quad_ops[MAX_OPERANDS] = {
+    {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
+};
 static float_status soft_status;
 static enum precision precision;
 static enum op operation;
@@ -141,25 +149,45 @@
     int i;
 
     for (i = 0; i < n_ops; i++) {
-        uint64_t r = random_ops[i];
 
         switch (prec) {
         case PREC_SINGLE:
         case PREC_FLOAT32:
+        {
+            uint64_t r = random_ops[i];
             do {
                 r = xorshift64star(r);
             } while (!float32_is_normal(r));
+            random_ops[i] = r;
             break;
+        }
         case PREC_DOUBLE:
         case PREC_FLOAT64:
+        {
+            uint64_t r = random_ops[i];
             do {
                 r = xorshift64star(r);
             } while (!float64_is_normal(r));
+            random_ops[i] = r;
             break;
+        }
+        case PREC_QUAD:
+        case PREC_FLOAT128:
+        {
+            float128 r = random_quad_ops[i];
+            uint64_t hi = r.high;
+            uint64_t lo = r.low;
+            do {
+                hi = xorshift64star(hi);
+                lo = xorshift64star(lo);
+                r = make_float128(hi, lo);
+            } while (!float128_is_normal(r));
+            random_quad_ops[i] = r;
+            break;
+        }
         default:
             g_assert_not_reached();
         }
-        random_ops[i] = r;
     }
 }
 
@@ -184,6 +212,13 @@
                 ops[i].f64 = float64_chs(ops[i].f64);
             }
             break;
+        case PREC_QUAD:
+        case PREC_FLOAT128:
+            ops[i].f128 = random_quad_ops[i];
+            if (no_neg && float128_is_neg(ops[i].f128)) {
+                ops[i].f128 = float128_chs(ops[i].f128);
+            }
+            break;
         default:
             g_assert_not_reached();
         }
@@ -345,6 +380,41 @@
                 }
             }
             break;
+        case PREC_FLOAT128:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                float128 a = ops[0].f128;
+                float128 b = ops[1].f128;
+                float128 c = ops[2].f128;
+
+                switch (op) {
+                case OP_ADD:
+                    res.f128 = float128_add(a, b, &soft_status);
+                    break;
+                case OP_SUB:
+                    res.f128 = float128_sub(a, b, &soft_status);
+                    break;
+                case OP_MUL:
+                    res.f128 = float128_mul(a, b, &soft_status);
+                    break;
+                case OP_DIV:
+                    res.f128 = float128_div(a, b, &soft_status);
+                    break;
+                case OP_FMA:
+                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);
+                    break;
+                case OP_SQRT:
+                    res.f128 = float128_sqrt(a, &soft_status);
+                    break;
+                case OP_CMP:
+                    res.u64 = float128_compare_quiet(a, b, &soft_status);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
         default:
             g_assert_not_reached();
         }
@@ -369,7 +439,8 @@
     GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
     GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
     GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
-    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
+    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
+    GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
 
 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
@@ -383,7 +454,8 @@
     GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
     GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
     GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
-    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
 
 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
 #undef GEN_BENCH_ALL_TYPES_NO_NEG
@@ -397,6 +469,7 @@
         [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
         [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
         [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
+        [PREC_FLOAT128]   = bench_ ## opname ## _float128,      \
     }
 
 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
@@ -445,7 +518,7 @@
     fprintf(stderr, " -h = show this help message.\n");
     fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
             op_list, op_names[0]);
-    fprintf(stderr, " -p = floating point precision (single, double). "
+    fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
             "Default: single\n");
     fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
             "Default: even\n");
@@ -565,6 +638,8 @@
                 precision = PREC_SINGLE;
             } else if (!strcmp(optarg, "double")) {
                 precision = PREC_DOUBLE;
+            } else if (!strcmp(optarg, "quad")) {
+                precision = PREC_QUAD;
             } else {
                 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
                 exit(EXIT_FAILURE);
@@ -608,6 +683,9 @@
         case PREC_DOUBLE:
             precision = PREC_FLOAT64;
             break;
+        case PREC_QUAD:
+            precision = PREC_FLOAT128;
+            break;
         default:
             g_assert_not_reached();
         }
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
index 5a4cad8..ff131af 100644
--- a/tests/fp/fp-test.c
+++ b/tests/fp/fp-test.c
@@ -717,7 +717,7 @@
         test_abz_f128(true_abz_f128M, subj_abz_f128M);
         break;
     case F128_MULADD:
-        not_implemented();
+        test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
         break;
     case F128_SQRT:
         test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
diff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc
index 0cbd200..cb1bb77 100644
--- a/tests/fp/wrap.c.inc
+++ b/tests/fp/wrap.c.inc
@@ -574,6 +574,18 @@
 WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
 #undef WRAP_MULADD
 
+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
+                              const float128_t *cp, float128_t *res)
+{
+    float128 a, b, c, ret;
+
+    a = soft_to_qemu128(*ap);
+    b = soft_to_qemu128(*bp);
+    c = soft_to_qemu128(*cp);
+    ret = float128_muladd(a, b, c, 0, &qsf);
+    *res = qemu_to_soft128(ret);
+}
+
 #define WRAP_CMP16(name, func, retcond)         \
     static bool name(float16_t a, float16_t b)  \
     {                                           \
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index 1cddf5b..1f5c8f1 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -32,6 +32,7 @@
         'virtio-9p.c',
         'virtio-balloon.c',
         'virtio-blk.c',
+        'vhost-user-blk.c',
         'virtio-mmio.c',
         'virtio-net.c',
         'virtio-pci.c',
diff --git a/tests/qtest/libqos/vhost-user-blk.c b/tests/qtest/libqos/vhost-user-blk.c
new file mode 100644
index 0000000..568c342
--- /dev/null
+++ b/tests/qtest/libqos/vhost-user-blk.c
@@ -0,0 +1,130 @@
+/*
+ * libqos driver framework
+ *
+ * Based on tests/qtest/libqos/virtio-blk.c
+ *
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/module.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "vhost-user-blk.h"
+
+#define PCI_SLOT                0x04
+#define PCI_FN                  0x00
+
+/* virtio-blk-device */
+static void *qvhost_user_blk_get_driver(QVhostUserBlk *v_blk,
+                                    const char *interface)
+{
+    if (!g_strcmp0(interface, "vhost-user-blk")) {
+        return v_blk;
+    }
+    if (!g_strcmp0(interface, "virtio")) {
+        return v_blk->vdev;
+    }
+
+    fprintf(stderr, "%s not present in vhost-user-blk-device\n", interface);
+    g_assert_not_reached();
+}
+
+static void *qvhost_user_blk_device_get_driver(void *object,
+                                           const char *interface)
+{
+    QVhostUserBlkDevice *v_blk = object;
+    return qvhost_user_blk_get_driver(&v_blk->blk, interface);
+}
+
+static void *vhost_user_blk_device_create(void *virtio_dev,
+                                      QGuestAllocator *t_alloc,
+                                      void *addr)
+{
+    QVhostUserBlkDevice *vhost_user_blk = g_new0(QVhostUserBlkDevice, 1);
+    QVhostUserBlk *interface = &vhost_user_blk->blk;
+
+    interface->vdev = virtio_dev;
+
+    vhost_user_blk->obj.get_driver = qvhost_user_blk_device_get_driver;
+
+    return &vhost_user_blk->obj;
+}
+
+/* virtio-blk-pci */
+static void *qvhost_user_blk_pci_get_driver(void *object, const char *interface)
+{
+    QVhostUserBlkPCI *v_blk = object;
+    if (!g_strcmp0(interface, "pci-device")) {
+        return v_blk->pci_vdev.pdev;
+    }
+    return qvhost_user_blk_get_driver(&v_blk->blk, interface);
+}
+
+static void *vhost_user_blk_pci_create(void *pci_bus, QGuestAllocator *t_alloc,
+                                      void *addr)
+{
+    QVhostUserBlkPCI *vhost_user_blk = g_new0(QVhostUserBlkPCI, 1);
+    QVhostUserBlk *interface = &vhost_user_blk->blk;
+    QOSGraphObject *obj = &vhost_user_blk->pci_vdev.obj;
+
+    virtio_pci_init(&vhost_user_blk->pci_vdev, pci_bus, addr);
+    interface->vdev = &vhost_user_blk->pci_vdev.vdev;
+
+    g_assert_cmphex(interface->vdev->device_type, ==, VIRTIO_ID_BLOCK);
+
+    obj->get_driver = qvhost_user_blk_pci_get_driver;
+
+    return obj;
+}
+
+static void vhost_user_blk_register_nodes(void)
+{
+    /*
+     * FIXME: every test using these two nodes needs to setup a
+     * -drive,id=drive0 otherwise QEMU is not going to start.
+     * Therefore, we do not include "produces" edge for virtio
+     * and pci-device yet.
+     */
+
+    char *arg = g_strdup_printf("id=drv0,chardev=char1,addr=%x.%x",
+                                PCI_SLOT, PCI_FN);
+
+    QPCIAddress addr = {
+        .devfn = QPCI_DEVFN(PCI_SLOT, PCI_FN),
+    };
+
+    QOSGraphEdgeOptions opts = { };
+
+    /* virtio-blk-device */
+    /** opts.extra_device_opts = "drive=drive0"; */
+    qos_node_create_driver("vhost-user-blk-device",
+                           vhost_user_blk_device_create);
+    qos_node_consumes("vhost-user-blk-device", "virtio-bus", &opts);
+    qos_node_produces("vhost-user-blk-device", "vhost-user-blk");
+
+    /* virtio-blk-pci */
+    opts.extra_device_opts = arg;
+    add_qpci_address(&opts, &addr);
+    qos_node_create_driver("vhost-user-blk-pci", vhost_user_blk_pci_create);
+    qos_node_consumes("vhost-user-blk-pci", "pci-bus", &opts);
+    qos_node_produces("vhost-user-blk-pci", "vhost-user-blk");
+
+    g_free(arg);
+}
+
+libqos_init(vhost_user_blk_register_nodes);
diff --git a/tests/qtest/libqos/vhost-user-blk.h b/tests/qtest/libqos/vhost-user-blk.h
new file mode 100644
index 0000000..2a03456
--- /dev/null
+++ b/tests/qtest/libqos/vhost-user-blk.h
@@ -0,0 +1,48 @@
+/*
+ * libqos driver framework
+ *
+ * Based on tests/qtest/libqos/virtio-blk.c
+ *
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef TESTS_LIBQOS_VHOST_USER_BLK_H
+#define TESTS_LIBQOS_VHOST_USER_BLK_H
+
+#include "qgraph.h"
+#include "virtio.h"
+#include "virtio-pci.h"
+
+typedef struct QVhostUserBlk QVhostUserBlk;
+typedef struct QVhostUserBlkPCI QVhostUserBlkPCI;
+typedef struct QVhostUserBlkDevice QVhostUserBlkDevice;
+
+struct QVhostUserBlk {
+    QVirtioDevice *vdev;
+};
+
+struct QVhostUserBlkPCI {
+    QVirtioPCIDevice pci_vdev;
+    QVhostUserBlk blk;
+};
+
+struct QVhostUserBlkDevice {
+    QOSGraphObject obj;
+    QVhostUserBlk blk;
+};
+
+#endif
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 49de74f..c3a223a 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -231,6 +231,9 @@
   qos_test_ss.add(files('virtio-9p-test.c'))
 endif
 qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c'))
+if have_tools and have_vhost_user_blk_server
+  qos_test_ss.add(files('vhost-user-blk-test.c'))
+endif
 
 tpmemu_files = ['tpm-emu.c', 'tpm-util.c', 'tpm-tests.c']
 
@@ -269,6 +272,7 @@
   endif
   qtest_env.set('G_TEST_DBUS_DAEMON', meson.source_root() / 'tests/dbus-vmstate-daemon.sh')
   qtest_env.set('QTEST_QEMU_BINARY', './qemu-system-' + target_base)
+  qtest_env.set('QTEST_QEMU_STORAGE_DAEMON_BINARY', './storage-daemon/qemu-storage-daemon')
   
   foreach test : target_qtests
     # Executables are shared across targets, declare them only the first time we
diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c
new file mode 100644
index 0000000..8796c74
--- /dev/null
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -0,0 +1,989 @@
+/*
+ * QTest testcase for Vhost-user Block Device
+ *
+ * Based on tests/qtest//virtio-blk-test.c
+
+ * Copyright (c) 2014 SUSE LINUX Products GmbH
+ * Copyright (c) 2014 Marc Marí
+ * Copyright (c) 2020 Coiby Xu
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest-single.h"
+#include "qemu/bswap.h"
+#include "qemu/module.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "standard-headers/linux/virtio_pci.h"
+#include "libqos/qgraph.h"
+#include "libqos/vhost-user-blk.h"
+#include "libqos/libqos-pc.h"
+
+#define TEST_IMAGE_SIZE         (64 * 1024 * 1024)
+#define QVIRTIO_BLK_TIMEOUT_US  (30 * 1000 * 1000)
+#define PCI_SLOT_HP             0x06
+
+typedef struct {
+    pid_t pid;
+} QemuStorageDaemonState;
+
+typedef struct QVirtioBlkReq {
+    uint32_t type;
+    uint32_t ioprio;
+    uint64_t sector;
+    char *data;
+    uint8_t status;
+} QVirtioBlkReq;
+
+#ifdef HOST_WORDS_BIGENDIAN
+static const bool host_is_big_endian = true;
+#else
+static const bool host_is_big_endian; /* false */
+#endif
+
+static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req)
+{
+    if (qvirtio_is_big_endian(d) != host_is_big_endian) {
+        req->type = bswap32(req->type);
+        req->ioprio = bswap32(req->ioprio);
+        req->sector = bswap64(req->sector);
+    }
+}
+
+static inline void virtio_blk_fix_dwz_hdr(QVirtioDevice *d,
+    struct virtio_blk_discard_write_zeroes *dwz_hdr)
+{
+    if (qvirtio_is_big_endian(d) != host_is_big_endian) {
+        dwz_hdr->sector = bswap64(dwz_hdr->sector);
+        dwz_hdr->num_sectors = bswap32(dwz_hdr->num_sectors);
+        dwz_hdr->flags = bswap32(dwz_hdr->flags);
+    }
+}
+
+static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d,
+                                   QVirtioBlkReq *req, uint64_t data_size)
+{
+    uint64_t addr;
+    uint8_t status = 0xFF;
+    QTestState *qts = global_qtest;
+
+    switch (req->type) {
+    case VIRTIO_BLK_T_IN:
+    case VIRTIO_BLK_T_OUT:
+        g_assert_cmpuint(data_size % 512, ==, 0);
+        break;
+    case VIRTIO_BLK_T_DISCARD:
+    case VIRTIO_BLK_T_WRITE_ZEROES:
+        g_assert_cmpuint(data_size %
+                         sizeof(struct virtio_blk_discard_write_zeroes), ==, 0);
+        break;
+    default:
+        g_assert_cmpuint(data_size, ==, 0);
+    }
+
+    addr = guest_alloc(alloc, sizeof(*req) + data_size);
+
+    virtio_blk_fix_request(d, req);
+
+    qtest_memwrite(qts, addr, req, 16);
+    qtest_memwrite(qts, addr + 16, req->data, data_size);
+    qtest_memwrite(qts, addr + 16 + data_size, &status, sizeof(status));
+
+    return addr;
+}
+
+static void test_invalid_discard_write_zeroes(QVirtioDevice *dev,
+                                              QGuestAllocator *alloc,
+                                              QTestState *qts,
+                                              QVirtQueue *vq,
+                                              uint32_t type)
+{
+    QVirtioBlkReq req;
+    struct virtio_blk_discard_write_zeroes dwz_hdr;
+    struct virtio_blk_discard_write_zeroes dwz_hdr2[2];
+    uint64_t req_addr;
+    uint32_t free_head;
+    uint8_t status;
+
+    /* More than one dwz is not supported */
+    req.type = type;
+    req.data = (char *) dwz_hdr2;
+    dwz_hdr2[0].sector = 0;
+    dwz_hdr2[0].num_sectors = 1;
+    dwz_hdr2[0].flags = 0;
+    dwz_hdr2[1].sector = 1;
+    dwz_hdr2[1].num_sectors = 1;
+    dwz_hdr2[1].flags = 0;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr2[0]);
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr2[1]);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr2));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr2), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr2), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr2));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_UNSUPP);
+
+    guest_free(alloc, req_addr);
+
+    /* num_sectors must be less than config->max_write_zeroes_sectors */
+    req.type = type;
+    req.data = (char *) &dwz_hdr;
+    dwz_hdr.sector = 0;
+    dwz_hdr.num_sectors = 0xffffffff;
+    dwz_hdr.flags = 0;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_IOERR);
+
+    guest_free(alloc, req_addr);
+
+    /* sector must be less than the device capacity */
+    req.type = type;
+    req.data = (char *) &dwz_hdr;
+    dwz_hdr.sector = TEST_IMAGE_SIZE / 512 + 1;
+    dwz_hdr.num_sectors = 1;
+    dwz_hdr.flags = 0;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_IOERR);
+
+    guest_free(alloc, req_addr);
+
+    /* reserved flag bits must be zero */
+    req.type = type;
+    req.data = (char *) &dwz_hdr;
+    dwz_hdr.sector = 0;
+    dwz_hdr.num_sectors = 1;
+    dwz_hdr.flags = ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_UNSUPP);
+
+    guest_free(alloc, req_addr);
+}
+
+/* Returns the request virtqueue so the caller can perform further tests */
+static QVirtQueue *test_basic(QVirtioDevice *dev, QGuestAllocator *alloc)
+{
+    QVirtioBlkReq req;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint64_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+    QTestState *qts = global_qtest;
+    QVirtQueue *vq;
+
+    features = qvirtio_get_features(dev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                    (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                    (1u << VIRTIO_RING_F_EVENT_IDX) |
+                    (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev, features);
+
+    capacity = qvirtio_config_readq(dev, 0);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    vq = qvirtqueue_setup(dev, alloc, 0);
+
+    qvirtio_set_driver_ok(dev);
+
+    /* Write and read with 3 descriptor layout */
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = VIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    qtest_memread(qts, req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(alloc, req_addr);
+
+    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
+        struct virtio_blk_discard_write_zeroes dwz_hdr;
+        void *expected;
+
+        /*
+         * WRITE_ZEROES request on the same sector of previous test where
+         * we wrote "TEST".
+         */
+        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
+        req.data = (char *) &dwz_hdr;
+        dwz_hdr.sector = 0;
+        dwz_hdr.num_sectors = 1;
+        dwz_hdr.flags = 0;
+
+        virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                       false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 16 + sizeof(dwz_hdr));
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        /* Read request to check if the sector contains all zeroes */
+        req.type = VIRTIO_BLK_T_IN;
+        req.ioprio = 1;
+        req.sector = 0;
+        req.data = g_malloc0(512);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
+        qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        data = g_malloc(512);
+        expected = g_malloc0(512);
+        qtest_memread(qts, req_addr + 16, data, 512);
+        g_assert_cmpmem(data, 512, expected, 512);
+        g_free(expected);
+        g_free(data);
+
+        guest_free(alloc, req_addr);
+
+        test_invalid_discard_write_zeroes(dev, alloc, qts, vq,
+                                          VIRTIO_BLK_T_WRITE_ZEROES);
+    }
+
+    if (features & (1u << VIRTIO_BLK_F_DISCARD)) {
+        struct virtio_blk_discard_write_zeroes dwz_hdr;
+
+        req.type = VIRTIO_BLK_T_DISCARD;
+        req.data = (char *) &dwz_hdr;
+        dwz_hdr.sector = 0;
+        dwz_hdr.num_sectors = 1;
+        dwz_hdr.flags = 0;
+
+        virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr),
+                       1, true, false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 16 + sizeof(dwz_hdr));
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        test_invalid_discard_write_zeroes(dev, alloc, qts, vq,
+                                          VIRTIO_BLK_T_DISCARD);
+    }
+
+    if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
+        /* Write and read with 2 descriptor layout */
+        /* Write request */
+        req.type = VIRTIO_BLK_T_OUT;
+        req.ioprio = 1;
+        req.sector = 1;
+        req.data = g_malloc0(512);
+        strcpy(req.data, "TEST");
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 528, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        /* Read request */
+        req.type = VIRTIO_BLK_T_IN;
+        req.ioprio = 1;
+        req.sector = 1;
+        req.data = g_malloc0(512);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, 513, true, false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        data = g_malloc0(512);
+        qtest_memread(qts, req_addr + 16, data, 512);
+        g_assert_cmpstr(data, ==, "TEST");
+        g_free(data);
+
+        guest_free(alloc, req_addr);
+    }
+
+    return vq;
+}
+
+static void basic(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+    QVhostUserBlk *blk_if = obj;
+    QVirtQueue *vq;
+
+    vq = test_basic(blk_if->vdev, t_alloc);
+    qvirtqueue_cleanup(blk_if->vdev->bus, vq, t_alloc);
+
+}
+
+static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc)
+{
+    QVirtQueue *vq;
+    QVhostUserBlk *blk_if = obj;
+    QVirtioDevice *dev = blk_if->vdev;
+    QVirtioBlkReq req;
+    QVRingIndirectDesc *indirect;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint64_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+    QTestState *qts = global_qtest;
+
+    features = qvirtio_get_features(dev);
+    g_assert_cmphex(features & (1u << VIRTIO_RING_F_INDIRECT_DESC), !=, 0);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_EVENT_IDX) |
+                            (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev, features);
+
+    capacity = qvirtio_config_readq(dev, 0);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    vq = qvirtqueue_setup(dev, t_alloc, 0);
+    qvirtio_set_driver_ok(dev);
+
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr, 528, false);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr + 528, 1, true);
+    free_head = qvirtqueue_add_indirect(qts, vq, indirect);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    g_free(indirect);
+    guest_free(t_alloc, req_addr);
+
+    /* Read request */
+    req.type = VIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr, 16, false);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr + 16, 513, true);
+    free_head = qvirtqueue_add_indirect(qts, vq, indirect);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    qtest_memread(qts, req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    g_free(indirect);
+    guest_free(t_alloc, req_addr);
+    qvirtqueue_cleanup(dev->bus, vq, t_alloc);
+}
+
+static void idx(void *obj, void *u_data, QGuestAllocator *t_alloc)
+{
+    QVirtQueue *vq;
+    QVhostUserBlkPCI *blk = obj;
+    QVirtioPCIDevice *pdev = &blk->pci_vdev;
+    QVirtioDevice *dev = &pdev->vdev;
+    QVirtioBlkReq req;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint64_t features;
+    uint32_t free_head;
+    uint32_t write_head;
+    uint32_t desc_idx;
+    uint8_t status;
+    char *data;
+    QOSGraphObject *blk_object = obj;
+    QPCIDevice *pci_dev = blk_object->get_driver(blk_object, "pci-device");
+    QTestState *qts = global_qtest;
+
+    if (qpci_check_buggy_msi(pci_dev)) {
+        return;
+    }
+
+    qpci_msix_enable(pdev->pdev);
+    qvirtio_pci_set_msix_configuration_vector(pdev, t_alloc, 0);
+
+    features = qvirtio_get_features(dev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+                            (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev, features);
+
+    capacity = qvirtio_config_readq(dev, 0);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    vq = qvirtqueue_setup(dev, t_alloc, 0);
+    qvirtqueue_pci_msix_setup(pdev, (QVirtQueuePCI *)vq, t_alloc, 1);
+
+    qvirtio_set_driver_ok(dev);
+
+    /*
+     * libvhost-user signals the call fd in VHOST_USER_SET_VRING_CALL, make
+     * sure to wait for the isr here so we don't race and confuse it later on.
+     */
+    qvirtio_wait_queue_isr(qts, dev, vq, QVIRTIO_BLK_TIMEOUT_US);
+
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    /* Notify after processing the third request */
+    qvirtqueue_set_used_event(qts, vq, 2);
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+    write_head = free_head;
+
+    /* No notification expected */
+    status = qvirtio_wait_status_byte_no_isr(qts, dev,
+                                             vq, req_addr + 528,
+                                             QVIRTIO_BLK_TIMEOUT_US);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(t_alloc, req_addr);
+
+    /* Read request */
+    req.type = VIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    /* We get just one notification for both requests */
+    qvirtio_wait_used_elem(qts, dev, vq, write_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    g_assert(qvirtqueue_get_buf(qts, vq, &desc_idx, NULL));
+    g_assert_cmpint(desc_idx, ==, free_head);
+
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    qtest_memread(qts, req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(t_alloc, req_addr);
+
+    /* End test */
+    qpci_msix_disable(pdev->pdev);
+
+    qvirtqueue_cleanup(dev->bus, vq, t_alloc);
+}
+
+static void pci_hotplug(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+    QVirtioPCIDevice *dev1 = obj;
+    QVirtioPCIDevice *dev;
+    QTestState *qts = dev1->pdev->bus->qts;
+
+    /* plug secondary disk */
+    qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
+                         "{'addr': %s, 'chardev': 'char2'}",
+                         stringify(PCI_SLOT_HP) ".0");
+
+    dev = virtio_pci_new(dev1->pdev->bus,
+                         &(QPCIAddress) { .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
+                                        });
+    g_assert_nonnull(dev);
+    g_assert_cmpint(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
+    qvirtio_pci_device_disable(dev);
+    qos_object_destroy((QOSGraphObject *)dev);
+
+    /* unplug secondary disk */
+    qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
+}
+
+static void multiqueue(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+    QVirtioPCIDevice *pdev1 = obj;
+    QVirtioDevice *dev1 = &pdev1->vdev;
+    QVirtioPCIDevice *pdev8;
+    QVirtioDevice *dev8;
+    QTestState *qts = pdev1->pdev->bus->qts;
+    uint64_t features;
+    uint16_t num_queues;
+
+    /*
+     * The primary device has 1 queue and VIRTIO_BLK_F_MQ is not enabled. The
+     * VIRTIO specification allows VIRTIO_BLK_F_MQ to be enabled when there is
+     * only 1 virtqueue, but --device vhost-user-blk-pci doesn't do this (which
+     * is also spec-compliant).
+     */
+    features = qvirtio_get_features(dev1);
+    g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ), ==, 0);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+                            (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev1, features);
+
+    /* Hotplug a secondary device with 8 queues */
+    qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
+                         "{'addr': %s, 'chardev': 'char2', 'num-queues': 8}",
+                         stringify(PCI_SLOT_HP) ".0");
+
+    pdev8 = virtio_pci_new(pdev1->pdev->bus,
+                           &(QPCIAddress) {
+                               .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
+                           });
+    g_assert_nonnull(pdev8);
+    g_assert_cmpint(pdev8->vdev.device_type, ==, VIRTIO_ID_BLOCK);
+
+    qos_object_start_hw(&pdev8->obj);
+
+    dev8 = &pdev8->vdev;
+    features = qvirtio_get_features(dev8);
+    g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ),
+                    ==,
+                    (1u << VIRTIO_BLK_F_MQ));
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+                            (1u << VIRTIO_BLK_F_SCSI) |
+                            (1u << VIRTIO_BLK_F_MQ));
+    qvirtio_set_features(dev8, features);
+
+    num_queues = qvirtio_config_readw(dev8,
+            offsetof(struct virtio_blk_config, num_queues));
+    g_assert_cmpint(num_queues, ==, 8);
+
+    qvirtio_pci_device_disable(pdev8);
+    qos_object_destroy(&pdev8->obj);
+
+    /* unplug secondary disk */
+    qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
+}
+
+/*
+ * Check that setting the vring addr on a non-existent virtqueue does
+ * not crash.
+ */
+static void test_nonexistent_virtqueue(void *obj, void *data,
+                                       QGuestAllocator *t_alloc)
+{
+    QVhostUserBlkPCI *blk = obj;
+    QVirtioPCIDevice *pdev = &blk->pci_vdev;
+    QPCIBar bar0;
+    QPCIDevice *dev;
+
+    dev = qpci_device_find(pdev->pdev->bus, QPCI_DEVFN(4, 0));
+    g_assert(dev != NULL);
+    qpci_device_enable(dev);
+
+    bar0 = qpci_iomap(dev, 0, NULL);
+
+    qpci_io_writeb(dev, bar0, VIRTIO_PCI_QUEUE_SEL, 2);
+    qpci_io_writel(dev, bar0, VIRTIO_PCI_QUEUE_PFN, 1);
+
+    g_free(dev);
+}
+
+static const char *qtest_qemu_storage_daemon_binary(void)
+{
+    const char *qemu_storage_daemon_bin;
+
+    qemu_storage_daemon_bin = getenv("QTEST_QEMU_STORAGE_DAEMON_BINARY");
+    if (!qemu_storage_daemon_bin) {
+        fprintf(stderr, "Environment variable "
+                        "QTEST_QEMU_STORAGE_DAEMON_BINARY required\n");
+        exit(0);
+    }
+
+    return qemu_storage_daemon_bin;
+}
+
+/* g_test_queue_destroy() cleanup function for files */
+static void destroy_file(void *path)
+{
+    unlink(path);
+    g_free(path);
+    qos_invalidate_command_line();
+}
+
+static char *drive_create(void)
+{
+    int fd, ret;
+    /** vhost-user-blk won't recognize drive located in /tmp */
+    char *t_path = g_strdup("qtest.XXXXXX");
+
+    /** Create a temporary raw image */
+    fd = mkstemp(t_path);
+    g_assert_cmpint(fd, >=, 0);
+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
+    g_assert_cmpint(ret, ==, 0);
+    close(fd);
+
+    g_test_queue_destroy(destroy_file, t_path);
+    return t_path;
+}
+
+static char *create_listen_socket(int *fd)
+{
+    int tmp_fd;
+    char *path;
+
+    /* No race because our pid makes the path unique */
+    path = g_strdup_printf("/tmp/qtest-%d-sock.XXXXXX", getpid());
+    tmp_fd = mkstemp(path);
+    g_assert_cmpint(tmp_fd, >=, 0);
+    close(tmp_fd);
+    unlink(path);
+
+    *fd = qtest_socket_server(path);
+    g_test_queue_destroy(destroy_file, path);
+    return path;
+}
+
+/*
+ * g_test_queue_destroy() and qtest_add_abrt_handler() cleanup function for
+ * qemu-storage-daemon.
+ */
+static void quit_storage_daemon(void *data)
+{
+    QemuStorageDaemonState *qsd = data;
+    int wstatus;
+    pid_t pid;
+
+    /*
+     * If we were invoked as a g_test_queue_destroy() cleanup function we need
+     * to remove the abrt handler to avoid being called again if the code below
+     * aborts. Also, we must not leave the abrt handler installed after
+     * cleanup.
+     */
+    qtest_remove_abrt_handler(data);
+
+    /* Before quitting storage-daemon, quit qemu to avoid dubious messages */
+    qtest_kill_qemu(global_qtest);
+
+    kill(qsd->pid, SIGTERM);
+    pid = waitpid(qsd->pid, &wstatus, 0);
+    g_assert_cmpint(pid, ==, qsd->pid);
+    if (!WIFEXITED(wstatus)) {
+        fprintf(stderr, "%s: expected qemu-storage-daemon to exit\n",
+                __func__);
+        abort();
+    }
+    if (WEXITSTATUS(wstatus) != 0) {
+        fprintf(stderr, "%s: expected qemu-storage-daemon to exit "
+                "successfully, got %d\n",
+                __func__, WEXITSTATUS(wstatus));
+        abort();
+    }
+
+    g_free(data);
+}
+
+static void start_vhost_user_blk(GString *cmd_line, int vus_instances,
+                                 int num_queues)
+{
+    const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
+    int i;
+    gchar *img_path;
+    GString *storage_daemon_command = g_string_new(NULL);
+    QemuStorageDaemonState *qsd;
+
+    g_string_append_printf(storage_daemon_command,
+                           "exec %s ",
+                           vhost_user_blk_bin);
+
+    g_string_append_printf(cmd_line,
+            " -object memory-backend-memfd,id=mem,size=256M,share=on "
+            " -M memory-backend=mem -m 256M ");
+
+    for (i = 0; i < vus_instances; i++) {
+        int fd;
+        char *sock_path = create_listen_socket(&fd);
+
+        /* create image file */
+        img_path = drive_create();
+        g_string_append_printf(storage_daemon_command,
+            "--blockdev driver=file,node-name=disk%d,filename=%s "
+            "--export type=vhost-user-blk,id=disk%d,addr.type=unix,addr.path=%s,"
+            "node-name=disk%i,writable=on,num-queues=%d ",
+            i, img_path, i, sock_path, i, num_queues);
+
+        g_string_append_printf(cmd_line, "-chardev socket,id=char%d,path=%s ",
+                               i + 1, sock_path);
+    }
+
+    g_test_message("starting vhost-user backend: %s",
+                   storage_daemon_command->str);
+    pid_t pid = fork();
+    if (pid == 0) {
+        /*
+         * Close standard file descriptors so tap-driver.pl pipe detects when
+         * our parent terminates.
+         */
+        close(0);
+        close(1);
+        open("/dev/null", O_RDONLY);
+        open("/dev/null", O_WRONLY);
+
+        execlp("/bin/sh", "sh", "-c", storage_daemon_command->str, NULL);
+        exit(1);
+    }
+    g_string_free(storage_daemon_command, true);
+
+    qsd = g_new(QemuStorageDaemonState, 1);
+    qsd->pid = pid;
+
+    /* Make sure qemu-storage-daemon is stopped */
+    qtest_add_abrt_handler(quit_storage_daemon, qsd);
+    g_test_queue_destroy(quit_storage_daemon, qsd);
+}
+
+static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
+{
+    start_vhost_user_blk(cmd_line, 1, 1);
+    return arg;
+}
+
+/*
+ * Setup for hotplug.
+ *
+ * Since vhost-user server only serves one vhost-user client one time,
+ * another exprot
+ *
+ */
+static void *vhost_user_blk_hotplug_test_setup(GString *cmd_line, void *arg)
+{
+    /* "-chardev socket,id=char2" is used for pci_hotplug*/
+    start_vhost_user_blk(cmd_line, 2, 1);
+    return arg;
+}
+
+static void *vhost_user_blk_multiqueue_test_setup(GString *cmd_line, void *arg)
+{
+    start_vhost_user_blk(cmd_line, 2, 8);
+    return arg;
+}
+
+static void register_vhost_user_blk_test(void)
+{
+    QOSGraphTestOptions opts = {
+        .before = vhost_user_blk_test_setup,
+    };
+
+    /*
+     * tests for vhost-user-blk and vhost-user-blk-pci
+     * The tests are borrowed from tests/virtio-blk-test.c. But some tests
+     * regarding block_resize don't work for vhost-user-blk.
+     * vhost-user-blk device doesn't have -drive, so tests containing
+     * block_resize are also abandoned,
+     *  - config
+     *  - resize
+     */
+    qos_add_test("basic", "vhost-user-blk", basic, &opts);
+    qos_add_test("indirect", "vhost-user-blk", indirect, &opts);
+    qos_add_test("idx", "vhost-user-blk-pci", idx, &opts);
+    qos_add_test("nxvirtq", "vhost-user-blk-pci",
+                 test_nonexistent_virtqueue, &opts);
+
+    opts.before = vhost_user_blk_hotplug_test_setup;
+    qos_add_test("hotplug", "vhost-user-blk-pci", pci_hotplug, &opts);
+
+    opts.before = vhost_user_blk_multiqueue_test_setup;
+    qos_add_test("multiqueue", "vhost-user-blk-pci", multiqueue, &opts);
+}
+
+libqos_init(register_vhost_user_blk_test);
diff --git a/tests/tcg/sparc64/Makefile.target b/tests/tcg/sparc64/Makefile.target
index 5bd7f90..408dace 100644
--- a/tests/tcg/sparc64/Makefile.target
+++ b/tests/tcg/sparc64/Makefile.target
@@ -1,11 +1,6 @@
 # -*- Mode: makefile -*-
 #
-# sparc specific tweaks and masking out broken tests
-
-# different from the other hangs:
-# tests/tcg/multiarch/linux-test.c:264: Value too large for defined data type (ret=-1, errno=92/Value too large for defined data type)
-run-linux-test: linux-test
-	$(call skip-test, $<, "BROKEN")
+# sparc specific tweaks
 
 # On Sparc64 Linux support 8k pages
 EXTRA_RUNS+=run-test-mmap-8192