Merge tag 'pull-tcg-20240202-2' of https://gitlab.com/rth7680/qemu into staging

tests/tcg: Fix multiarch/gdbstub/prot-none.py
hw/core: Convert cpu_mmu_index to a CPUClass hook
tcg/loongarch64: Set vector registers call clobbered
target/sparc: floating-point cleanup
linux-user/aarch64: Add padding before __kernel_rt_sigreturn

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmW95WkdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/p+Qf/eVmh5q0pZqcur7ft
# 8FO0wlIz55OfhaA9MIpH7LEIHRKY37Ybebw2K6SPnx4FmPhLkaj4KXPPjT2nzdXw
# J2nQM+TOyxOd18GG8P80qFQ1a72dj8VSIRVAl9T46KuPXS5B7luArImfBlUk/GwV
# Qr/XkOPwVTp05E/ccMJ8PMlcVZw9osHVLqsaFVbsUv/FylTmstzA9c5Gw7/FTfkG
# T2rk+7go+F4IXs/9uQuuFMOpQOZngXE621hnro+qle7j9oarEUVJloAgVn06o59O
# fUjuoKO0aMCr2iQqNJTH7Dnqp5OIzzxUoXiNTOj0EimwWfAcUKthoFO2LGcy1/ew
# wWNR/Q==
# =e3J3
# -----END PGP SIGNATURE-----
# gpg: Signature made Sat 03 Feb 2024 07:04:09 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20240202-2' of https://gitlab.com/rth7680/qemu: (58 commits)
  linux-user/aarch64: Add padding before __kernel_rt_sigreturn
  target/sparc: Remove FSR_FTT_NMASK, FSR_FTT_CEXC_NMASK
  target/sparc: Split fcc out of env->fsr
  target/sparc: Remove cpu_fsr
  target/sparc: Split cexc and ftt from env->fsr
  target/sparc: Merge check_ieee_exceptions with FPop helpers
  target/sparc: Clear cexc and ftt in do_check_ieee_exceptions
  target/sparc: Split ver from env->fsr
  target/sparc: Introduce cpu_get_fsr, cpu_put_fsr
  target/sparc: Remove qt0, qt1 temporaries
  target/sparc: Use i128 for Fdmulq
  target/sparc: Use i128 for FdTOq, FxTOq
  target/sparc: Use i128 for FsTOq, FiTOq
  target/sparc: Use i128 for FCMPq, FCMPEq
  target/sparc: Use i128 for FqTOd, FqTOx
  target/sparc: Use i128 for FqTOs, FqTOi
  target/sparc: Use i128 for FADDq, FSUBq, FMULq, FDIVq
  target/sparc: Use i128 for FSQRTq
  target/sparc: Inline FNEG, FABS
  target/sparc: Introduce gen_{load,store}_fpr_Q
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 3facfcb..047cd2c 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1601,7 +1601,7 @@
     void *p;
 
     (void)probe_access_internal(env_cpu(env), addr, 1, MMU_INST_FETCH,
-                                cpu_mmu_index(env, true), false,
+                                cpu_mmu_index(env_cpu(env), true), false,
                                 &p, &full, 0, false);
     if (p == NULL) {
         return -1;
@@ -2959,26 +2959,30 @@
 
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
-    return do_ld1_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
+    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(cs, true));
+    return do_ld1_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 
 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
-    return do_ld2_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
+    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(cs, true));
+    return do_ld2_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 
 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
-    return do_ld4_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
+    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(cs, true));
+    return do_ld4_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 
 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
 {
-    MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
-    return do_ld8_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
+    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(cs, true));
+    return do_ld8_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 
 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
index 4483351..c82048e 100644
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -354,7 +354,8 @@
 
 uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_ldub_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldub_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
@@ -364,7 +365,8 @@
 
 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_lduw_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_lduw_be_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
@@ -374,17 +376,20 @@
 
 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_ldl_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldl_be_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_ldq_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldq_be_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_lduw_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_lduw_le_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
@@ -394,54 +399,63 @@
 
 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_ldl_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldl_le_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    return cpu_ldq_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldq_le_mmuidx_ra(env, addr, mmu_index, ra);
 }
 
 void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr,
                      uint32_t val, uintptr_t ra)
 {
-    cpu_stb_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stb_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr,
                         uint32_t val, uintptr_t ra)
 {
-    cpu_stw_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stw_be_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr,
                         uint32_t val, uintptr_t ra)
 {
-    cpu_stl_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stl_be_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr,
                         uint64_t val, uintptr_t ra)
 {
-    cpu_stq_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stq_be_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr,
                         uint32_t val, uintptr_t ra)
 {
-    cpu_stw_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stw_le_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr,
                         uint32_t val, uintptr_t ra)
 {
-    cpu_stl_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stl_le_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr,
                         uint64_t val, uintptr_t ra)
 {
-    cpu_stq_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stq_le_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 
 /*--------------------------*/
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8501a33..bc05dce 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -311,6 +311,10 @@
 #define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 2))
 #define TLB_WATCHPOINT      0
 
+static inline int cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return MMU_USER_IDX;
+}
 #else
 
 /*
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index dcbd5f5..9ead1be 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -8,6 +8,7 @@
 #include "exec/hwaddr.h"
 #endif
 #include "hw/core/cpu.h"
+#include "tcg/debug-assert.h"
 
 #define EXCP_INTERRUPT  0x10000 /* async interruption */
 #define EXCP_HLT        0x10001 /* hlt instruction reached */
@@ -262,4 +263,24 @@
     return (void *)env - sizeof(CPUState);
 }
 
+#ifndef CONFIG_USER_ONLY
+/**
+ * cpu_mmu_index:
+ * @env: The cpu environment
+ * @ifetch: True for code access, false for data access.
+ *
+ * Return the core mmu index for the current translation regime.
+ * This function is used by generic TCG code paths.
+ *
+ * The user-only version of this function is inline in cpu-all.h,
+ * where it always returns MMU_USER_IDX.
+ */
+static inline int cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    int ret = cs->cc->mmu_index(cs, ifetch);
+    tcg_debug_assert(ret >= 0 && ret < NB_MMU_MODES);
+    return ret;
+}
+#endif /* !CONFIG_USER_ONLY */
+
 #endif /* CPU_COMMON_H */
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 2c284d6..4385ce5 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -103,6 +103,8 @@
  * @parse_features: Callback to parse command line arguments.
  * @reset_dump_flags: #CPUDumpFlags to use for reset logging.
  * @has_work: Callback for checking if there is work to do.
+ * @mmu_index: Callback for choosing softmmu mmu index;
+ *       may be used internally by memory_rw_debug without TCG.
  * @memory_rw_debug: Callback for GDB memory access.
  * @dump_state: Callback for dumping state.
  * @query_cpu_fast:
@@ -150,6 +152,7 @@
     void (*parse_features)(const char *typename, char *str, Error **errp);
 
     bool (*has_work)(CPUState *cpu);
+    int (*mmu_index)(CPUState *cpu, bool ifetch);
     int (*memory_rw_debug)(CPUState *cpu, vaddr addr,
                            uint8_t *buf, int len, bool is_write);
     void (*dump_state)(CPUState *cpu, FILE *, int flags);
diff --git a/linux-user/aarch64/vdso-be.so b/linux-user/aarch64/vdso-be.so
index 6084f3d..808206a 100755
--- a/linux-user/aarch64/vdso-be.so
+++ b/linux-user/aarch64/vdso-be.so
Binary files differ
diff --git a/linux-user/aarch64/vdso-le.so b/linux-user/aarch64/vdso-le.so
index 947d534..941aaf2 100755
--- a/linux-user/aarch64/vdso-le.so
+++ b/linux-user/aarch64/vdso-le.so
Binary files differ
diff --git a/linux-user/aarch64/vdso.S b/linux-user/aarch64/vdso.S
index 34d3a9e..a0ac148 100644
--- a/linux-user/aarch64/vdso.S
+++ b/linux-user/aarch64/vdso.S
@@ -63,7 +63,11 @@
  * For now, elide the unwind info for __kernel_rt_sigreturn and rely on
  * the libgcc fallback routine as we have always done.  This requires
  * that the code sequence used be exact.
+ *
+ * Add a nop as a spacer to ensure that unwind does not pick up the
+ * unwind info from the preceding syscall.
  */
+	nop
 __kernel_rt_sigreturn:
 	/* No BTI C insn here -- we arrive via RET. */
 	mov	x8, #__NR_rt_sigreturn
diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c
index 3c1bde0..50424a5 100644
--- a/linux-user/sparc/cpu_loop.c
+++ b/linux-user/sparc/cpu_loop.c
@@ -293,7 +293,7 @@
         case TT_FP_EXCP:
             {
                 int code = TARGET_FPE_FLTUNK;
-                target_ulong fsr = env->fsr;
+                target_ulong fsr = cpu_get_fsr(env);
 
                 if ((fsr & FSR_FTT_MASK) == FSR_FTT_IEEE_EXCP) {
                     if (fsr & FSR_NVC) {
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index dfcae70..c2dc100 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -199,20 +199,21 @@
     for (i = 0; i < 32; ++i) {
         __put_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
     }
-    __put_user(env->fsr, &fpu->si_fsr);
+    __put_user(cpu_get_fsr(env), &fpu->si_fsr);
     __put_user(env->gsr, &fpu->si_gsr);
     __put_user(env->fprs, &fpu->si_fprs);
 #else
     for (i = 0; i < 16; ++i) {
         __put_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
     }
-    __put_user(env->fsr, &fpu->si_fsr);
+    __put_user(cpu_get_fsr(env), &fpu->si_fsr);
     __put_user(0, &fpu->si_fpqdepth);
 #endif
 }
 
 static void restore_fpu(struct target_siginfo_fpu *fpu, CPUSPARCState *env)
 {
+    target_ulong fsr;
     int i;
 
 #ifdef TARGET_SPARC64
@@ -230,15 +231,16 @@
             __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
         }
     }
-    __get_user(env->fsr, &fpu->si_fsr);
     __get_user(env->gsr, &fpu->si_gsr);
     env->fprs |= fprs;
 #else
     for (i = 0; i < 16; ++i) {
         __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
     }
-    __get_user(env->fsr, &fpu->si_fsr);
 #endif
+
+    __get_user(fsr, &fpu->si_fsr);
+    cpu_put_fsr(env, fsr);
 }
 
 #ifdef TARGET_ARCH_HAS_SETUP_FRAME
@@ -662,6 +664,7 @@
     __get_user(fenab, &(fpup->mcfpu_enab));
     if (fenab) {
         abi_ulong fprs;
+        abi_ulong fsr;
 
         /*
          * We use the FPRS from the guest only in deciding whether
@@ -690,7 +693,8 @@
                 __get_user(env->fpr[i].ll, &(fpup->mcfpu_fregs.dregs[i]));
             }
         }
-        __get_user(env->fsr, &(fpup->mcfpu_fsr));
+        __get_user(fsr, &(fpup->mcfpu_fsr));
+        cpu_put_fsr(env, fsr);
         __get_user(env->gsr, &(fpup->mcfpu_gsr));
     }
     unlock_user_struct(ucp, ucp_addr, 0);
diff --git a/semihosting/uaccess.c b/semihosting/uaccess.c
index 5d889f9..dc587d7 100644
--- a/semihosting/uaccess.c
+++ b/semihosting/uaccess.c
@@ -26,7 +26,7 @@
 
 ssize_t uaccess_strlen_user(CPUArchState *env, target_ulong addr)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), false);
     size_t len = 0;
 
     while (1) {
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index de705c3..bf70173 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -64,6 +64,11 @@
                                     | CPU_INTERRUPT_MCHK);
 }
 
+static int alpha_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return alpha_env_mmu_index(cpu_env(cs));
+}
+
 static void alpha_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->mach = bfd_mach_alpha_ev6;
@@ -230,6 +235,7 @@
 
     cc->class_by_name = alpha_cpu_class_by_name;
     cc->has_work = alpha_cpu_has_work;
+    cc->mmu_index = alpha_cpu_mmu_index;
     cc->dump_state = alpha_cpu_dump_state;
     cc->set_pc = alpha_cpu_set_pc;
     cc->get_pc = alpha_cpu_get_pc;
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index ce80658..7188a40 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -389,7 +389,7 @@
 
 #define TB_FLAG_UNALIGN       (1u << 1)
 
-static inline int cpu_mmu_index(CPUAlphaState *env, bool ifetch)
+static inline int alpha_env_mmu_index(CPUAlphaState *env)
 {
     int ret = env->flags & ENV_FLAG_PS_USER ? MMU_USER_IDX : MMU_KERNEL_IDX;
     if (env->flags & ENV_FLAG_PAL_MODE) {
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index 134eb72..4b464f8 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -2875,7 +2875,7 @@
     int64_t bound;
 
     ctx->tbflags = ctx->base.tb->flags;
-    ctx->mem_idx = cpu_mmu_index(env, false);
+    ctx->mem_idx = alpha_env_mmu_index(env);
     ctx->implver = env->implver;
     ctx->amask = env->amask;
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 1ce26e5..5fa86bc 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -133,6 +133,11 @@
          | CPU_INTERRUPT_EXITTB);
 }
 
+static int arm_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return arm_env_mmu_index(cpu_env(cs));
+}
+
 void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
                                  void *opaque)
 {
@@ -2501,6 +2506,7 @@
 
     cc->class_by_name = arm_cpu_class_by_name;
     cc->has_work = arm_cpu_has_work;
+    cc->mmu_index = arm_cpu_mmu_index;
     cc->dump_state = arm_cpu_dump_state;
     cc->set_pc = arm_cpu_set_pc;
     cc->get_pc = arm_cpu_get_pc;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d3477b1..63f31e0 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3241,19 +3241,6 @@
 #define EX_TBFLAG_AM32(IN, WHICH)  FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH)
 
 /**
- * cpu_mmu_index:
- * @env: The cpu environment
- * @ifetch: True for code access, false for data access.
- *
- * Return the core mmu index for the current translation regime.
- * This function is used by generic TCG code paths.
- */
-static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
-{
-    return EX_TBFLAG_ANY(env->hflags, MMUIDX);
-}
-
-/**
  * sve_vq
  * @env: the cpu context
  *
diff --git a/target/arm/helper.c b/target/arm/helper.c
index d51093a..8c1ff16 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -7841,7 +7841,7 @@
     uint64_t vaddr_in = (uint64_t) value;
     uint64_t vaddr = vaddr_in & ~(dline_size - 1);
     void *haddr;
-    int mem_idx = cpu_mmu_index(env, false);
+    int mem_idx = arm_env_mmu_index(env);
 
     /* This won't be crossing page boundaries */
     haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 71d6c70..fc337fe 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -40,6 +40,11 @@
 #define BANK_HYP    6
 #define BANK_MON    7
 
+static inline int arm_env_mmu_index(CPUARMState *env)
+{
+    return EX_TBFLAG_ANY(env->hflags, MMUIDX);
+}
+
 static inline bool excp_is_internal(int excp)
 {
     /* Return true if this exception number represents a QEMU-internal
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 198b975..ebaa7f0 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -856,7 +856,7 @@
         tbii = EX_TBFLAG_A64(env->hflags, TBII);
         if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
             /* TBI is enabled. */
-            int core_mmu_idx = cpu_mmu_index(env, false);
+            int core_mmu_idx = arm_env_mmu_index(env);
             if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) {
                 new_pc = sextract64(new_pc, 0, 56);
             } else {
@@ -925,7 +925,7 @@
      */
     int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
     uint64_t vaddr = vaddr_in & ~(blocklen - 1);
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     void *mem;
 
     /*
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index ffb8ea1..d971b81 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -291,7 +291,7 @@
 
 uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     uint8_t *mem;
     int rtag = 0;
 
@@ -311,7 +311,7 @@
 {
     if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) {
         arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
-                                    cpu_mmu_index(env, false), ra);
+                                    arm_env_mmu_index(env), ra);
         g_assert_not_reached();
     }
 }
@@ -344,7 +344,7 @@
 static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
                           uintptr_t ra, stg_store1 store1)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     uint8_t *mem;
 
     check_tag_aligned(env, ptr, ra);
@@ -371,7 +371,7 @@
 
 void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     uintptr_t ra = GETPC();
 
     check_tag_aligned(env, ptr, ra);
@@ -381,7 +381,7 @@
 static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
                            uintptr_t ra, stg_store1 store1)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     int tag = allocation_tag_from_addr(xt);
     uint8_t *mem1, *mem2;
 
@@ -429,7 +429,7 @@
 
 void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     uintptr_t ra = GETPC();
     int in_page = -(ptr | TARGET_PAGE_MASK);
 
@@ -445,7 +445,7 @@
 
 uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     uintptr_t ra = GETPC();
     int gm_bs = env_archcpu(env)->gm_blocksize;
     int gm_bs_bytes = 4 << gm_bs;
@@ -505,7 +505,7 @@
 
 void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     uintptr_t ra = GETPC();
     int gm_bs = env_archcpu(env)->gm_blocksize;
     int gm_bs_bytes = 4 << gm_bs;
@@ -555,7 +555,7 @@
 void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
 {
     uintptr_t ra = GETPC();
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     int log2_dcz_bytes, log2_tag_bytes;
     intptr_t dcz_bytes, tag_bytes;
     uint8_t *mem;
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index f006d15..bce4295 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5481,7 +5481,7 @@
                          CPUARMState *env, target_ulong addr,
                          MMUAccessType access_type, uintptr_t retaddr)
 {
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = arm_env_mmu_index(env);
     int mem_off = info->mem_off_first[0];
     bool nofault = fault == FAULT_NO;
     bool have_work = true;
@@ -6529,7 +6529,7 @@
                sve_ldst1_host_fn *host_fn,
                sve_ldst1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = arm_env_mmu_index(env);
     const intptr_t reg_max = simd_oprsz(desc);
     const int scale = simd_data(desc);
     ARMVectorReg scratch;
@@ -6715,7 +6715,7 @@
                  sve_ldst1_host_fn *host_fn,
                  sve_ldst1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = arm_env_mmu_index(env);
     const intptr_t reg_max = simd_oprsz(desc);
     const int scale = simd_data(desc);
     const int esize = 1 << esz;
@@ -6920,7 +6920,7 @@
                sve_ldst1_host_fn *host_fn,
                sve_ldst1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = arm_env_mmu_index(env);
     const intptr_t reg_max = simd_oprsz(desc);
     const int scale = simd_data(desc);
     void *host[ARM_MAX_VQ * 4];
diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c
index 5477c7f..885bf4e 100644
--- a/target/arm/tcg/tlb_helper.c
+++ b/target/arm/tcg/tlb_helper.c
@@ -281,7 +281,7 @@
 {
     ARMMMUFaultInfo fi = { .type = ARMFault_Alignment };
     int target_el = exception_target_el(env);
-    int mmu_idx = cpu_mmu_index(env, true);
+    int mmu_idx = arm_env_mmu_index(env);
     uint32_t fsc;
 
     env->exception.vaddress = pc;
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 1c68748..a40f445 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -50,6 +50,11 @@
             && cpu_interrupts_enabled(env);
 }
 
+static int avr_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return ifetch ? MMU_CODE_IDX : MMU_DATA_IDX;
+}
+
 static void avr_cpu_synchronize_from_tb(CPUState *cs,
                                         const TranslationBlock *tb)
 {
@@ -236,6 +241,7 @@
     cc->class_by_name = avr_cpu_class_by_name;
 
     cc->has_work = avr_cpu_has_work;
+    cc->mmu_index = avr_cpu_mmu_index;
     cc->dump_state = avr_cpu_dump_state;
     cc->set_pc = avr_cpu_set_pc;
     cc->get_pc = avr_cpu_get_pc;
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index 7d5dd42..d185d20 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -184,13 +184,6 @@
     env->features |= (1U << feature);
 }
 
-#define cpu_mmu_index avr_cpu_mmu_index
-
-static inline int avr_cpu_mmu_index(CPUAVRState *env, bool ifetch)
-{
-    return ifetch ? MMU_CODE_IDX : MMU_DATA_IDX;
-}
-
 void avr_cpu_tcg_init(void);
 
 int cpu_avr_exec(CPUState *cpu);
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index 6349148..163fb05 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -56,6 +56,11 @@
     return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
 }
 
+static int cris_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return !!(cpu_env(cs)->pregs[PR_CCS] & U_FLAG);
+}
+
 static void cris_cpu_reset_hold(Object *obj)
 {
     CPUState *s = CPU(obj);
@@ -274,6 +279,7 @@
 
     cc->class_by_name = cris_cpu_class_by_name;
     cc->has_work = cris_cpu_has_work;
+    cc->mmu_index = cris_cpu_mmu_index;
     cc->dump_state = cris_cpu_dump_state;
     cc->set_pc = cris_cpu_set_pc;
     cc->get_pc = cris_cpu_get_pc;
diff --git a/target/cris/cpu.h b/target/cris/cpu.h
index d830dca..3904e54 100644
--- a/target/cris/cpu.h
+++ b/target/cris/cpu.h
@@ -260,10 +260,6 @@
 
 /* MMU modes definitions */
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
-{
-	return !!(env->pregs[PR_CCS] & U_FLAG);
-}
 
 /* Support function regs.  */
 #define SFR_RW_GC_CFG      0][0
diff --git a/target/cris/translate.c b/target/cris/translate.c
index ee1402a..8f74b6c 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -94,6 +94,7 @@
 
     CRISCPU *cpu;
     target_ulong pc, ppc;
+    int mem_index;
 
     /* Decoder.  */
         unsigned int (*decoder)(CPUCRISState *env, struct DisasContext *dc);
@@ -1008,37 +1009,31 @@
 
 static void gen_load64(DisasContext *dc, TCGv_i64 dst, TCGv addr)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
-
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
     if (dc->delayed_branch == 1) {
         cris_store_direct_jmp(dc);
     }
 
-    tcg_gen_qemu_ld_i64(dst, addr, mem_index, MO_TEUQ);
+    tcg_gen_qemu_ld_i64(dst, addr, dc->mem_index, MO_TEUQ);
 }
 
 static void gen_load(DisasContext *dc, TCGv dst, TCGv addr, 
              unsigned int size, int sign)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
-
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
     if (dc->delayed_branch == 1) {
         cris_store_direct_jmp(dc);
     }
 
-    tcg_gen_qemu_ld_tl(dst, addr, mem_index,
+    tcg_gen_qemu_ld_tl(dst, addr, dc->mem_index,
                        MO_TE + ctz32(size) + (sign ? MO_SIGN : 0));
 }
 
 static void gen_store (DisasContext *dc, TCGv addr, TCGv val,
                unsigned int size)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
-
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
     if (dc->delayed_branch == 1) {
@@ -1055,7 +1050,7 @@
         return;
     }
 
-    tcg_gen_qemu_st_tl(val, addr, mem_index, MO_TE + ctz32(size));
+    tcg_gen_qemu_st_tl(val, addr, dc->mem_index, MO_TE + ctz32(size));
 
     if (dc->flags_x) {
         cris_evaluate_flags(dc);
@@ -2971,6 +2966,7 @@
     dc->cpu = env_archcpu(env);
     dc->ppc = pc_start;
     dc->pc = pc_start;
+    dc->mem_index = cpu_mmu_index(cs, false);
     dc->flags_uptodate = 1;
     dc->flags_x = tb_flags & X_FLAG;
     dc->cc_x_uptodate = 0;
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
index 6df599f..73fc27c 100644
--- a/target/cris/translate_v10.c.inc
+++ b/target/cris/translate_v10.c.inc
@@ -91,8 +91,6 @@
 static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val,
                        unsigned int size)
 {
-    int mem_index = cpu_mmu_index(&dc->cpu->env, false);
-
     /* If we get a fault on a delayslot we must keep the jmp state in
        the cpu-state to be able to re-execute the jmp.  */
     if (dc->delayed_branch == 1) {
@@ -101,11 +99,11 @@
 
     /* Conditional writes. */
     if (dc->flags_x) {
-        gen_store_v10_conditional(dc, addr, val, size, mem_index);
+        gen_store_v10_conditional(dc, addr, val, size, dc->mem_index);
         return;
     }
 
-    tcg_gen_qemu_st_tl(val, addr, mem_index, ctz32(size) | MO_TE);
+    tcg_gen_qemu_st_tl(val, addr, dc->mem_index, ctz32(size) | MO_TE);
 }
 
 
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 5c11ae3..3eef58f 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -146,15 +146,6 @@
     *flags = hex_flags;
 }
 
-static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
-{
-#ifdef CONFIG_USER_ONLY
-    return MMU_USER_IDX;
-#else
-#error System mode not supported on Hexagon yet
-#endif
-}
-
 typedef HexagonCPU ArchCPU;
 
 void hexagon_translate_init(void);
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index 3c01985..5f87c1b 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -94,6 +94,17 @@
     return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
 }
 
+static int hppa_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPUHPPAState *env = cpu_env(cs);
+
+    if (env->psw & (ifetch ? PSW_C : PSW_D)) {
+        return PRIV_P_TO_MMU_IDX(env->iaoq_f & 3, env->psw & PSW_P);
+    }
+    /* mmu disabled */
+    return env->psw & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX;
+}
+
 static void hppa_cpu_disas_set_info(CPUState *cs, disassemble_info *info)
 {
     info->mach = bfd_mach_hppa20;
@@ -194,6 +205,7 @@
 
     cc->class_by_name = hppa_cpu_class_by_name;
     cc->has_work = hppa_cpu_has_work;
+    cc->mmu_index = hppa_cpu_mmu_index;
     cc->dump_state = hppa_cpu_dump_state;
     cc->set_pc = hppa_cpu_set_pc;
     cc->get_pc = hppa_cpu_get_pc;
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 6a15340..7a181e8 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -281,19 +281,6 @@
     return hppa_is_pa20(env) ? 0 : PA10_BTLB_FIXED + PA10_BTLB_VARIABLE;
 }
 
-static inline int cpu_mmu_index(CPUHPPAState *env, bool ifetch)
-{
-#ifdef CONFIG_USER_ONLY
-    return MMU_USER_IDX;
-#else
-    if (env->psw & (ifetch ? PSW_C : PSW_D)) {
-        return PRIV_P_TO_MMU_IDX(env->iaoq_f & 3, env->psw & PSW_P);
-    }
-    /* mmu disabled */
-    return env->psw & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX;
-#endif
-}
-
 void hppa_translate_init(void);
 
 #define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index 4fcc612..629a9d9 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -646,7 +646,7 @@
 void HELPER(diag_btlb)(CPUHPPAState *env)
 {
     unsigned int phys_page, len, slot;
-    int mmu_idx = cpu_mmu_index(env, 0);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), 0);
     uintptr_t ra = GETPC();
     HPPATLBEntry *btlb;
     uint64_t virt_page;
diff --git a/target/hppa/op_helper.c b/target/hppa/op_helper.c
index ce15469..b1f24a5 100644
--- a/target/hppa/op_helper.c
+++ b/target/hppa/op_helper.c
@@ -59,7 +59,7 @@
 static void atomic_store_mask32(CPUHPPAState *env, target_ulong addr,
                                 uint32_t val, uint32_t mask, uintptr_t ra)
 {
-    int mmu_idx = cpu_mmu_index(env, 0);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), 0);
     uint32_t old, new, cmp, *haddr;
     void *vaddr;
 
@@ -86,7 +86,7 @@
                                 int size, uintptr_t ra)
 {
 #ifdef CONFIG_ATOMIC64
-    int mmu_idx = cpu_mmu_index(env, 0);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), 0);
     uint64_t old, new, cmp, *haddr;
     void *vaddr;
 
@@ -235,7 +235,7 @@
     default:
         /* Nothing is stored, but protection is checked and the
            cacheline is marked dirty.  */
-        probe_write(env, addr, 0, cpu_mmu_index(env, 0), ra);
+        probe_write(env, addr, 0, cpu_mmu_index(env_cpu(env), 0), ra);
         break;
     }
 }
@@ -296,7 +296,7 @@
     default:
         /* Nothing is stored, but protection is checked and the
            cacheline is marked dirty.  */
-        probe_write(env, addr, 0, cpu_mmu_index(env, 0), ra);
+        probe_write(env, addr, 0, cpu_mmu_index(env_cpu(env), 0), ra);
         break;
     }
 }
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 03822d9..ef46755 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7720,6 +7720,15 @@
     return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0;
 }
 
+static int x86_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPUX86State *env = cpu_env(cs);
+
+    return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
+        (!(env->hflags & HF_SMAP_MASK) || (env->eflags & AC_MASK))
+        ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX;
+}
+
 static void x86_disas_set_info(CPUState *cs, disassemble_info *info)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -7954,6 +7963,7 @@
     cc->class_by_name = x86_cpu_class_by_name;
     cc->parse_features = x86_cpu_parse_featurestr;
     cc->has_work = x86_cpu_has_work;
+    cc->mmu_index = x86_cpu_mmu_index;
     cc->dump_state = x86_cpu_dump_state;
     cc->set_pc = x86_cpu_set_pc;
     cc->get_pc = x86_cpu_get_pc;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7f0786e..6a5b180 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2296,13 +2296,6 @@
 #define MMU_NESTED_IDX  3
 #define MMU_PHYS_IDX    4
 
-static inline int cpu_mmu_index(CPUX86State *env, bool ifetch)
-{
-    return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
-        (!(env->hflags & HF_SMAP_MASK) || (env->eflags & AC_MASK))
-        ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX;
-}
-
 static inline int cpu_mmu_index_kernel(CPUX86State *env)
 {
     return !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP_IDX :
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 2808903..10cba16 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -6955,7 +6955,7 @@
     dc->cc_op_dirty = false;
     dc->popl_esp_hack = 0;
     /* select memory access functions */
-    dc->mem_index = cpu_mmu_index(env, false);
+    dc->mem_index = cpu_mmu_index(cpu, false);
     dc->cpuid_features = env->features[FEAT_1_EDX];
     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 76eb496..7dc50bf 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -375,6 +375,16 @@
 #endif
 }
 
+static int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPULoongArchState *env = cpu_env(cs);
+
+    if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) {
+        return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV);
+    }
+    return MMU_DA_IDX;
+}
+
 static void loongarch_la464_initfn(Object *obj)
 {
     LoongArchCPU *cpu = LOONGARCH_CPU(obj);
@@ -777,6 +787,7 @@
 
     cc->class_by_name = loongarch_cpu_class_by_name;
     cc->has_work = loongarch_cpu_has_work;
+    cc->mmu_index = loongarch_cpu_mmu_index;
     cc->dump_state = loongarch_cpu_dump_state;
     cc->set_pc = loongarch_cpu_set_pc;
     cc->get_pc = loongarch_cpu_get_pc;
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 0fa5e0c..ec37579 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -404,21 +404,9 @@
  */
 #define MMU_PLV_KERNEL   0
 #define MMU_PLV_USER     3
-#define MMU_IDX_KERNEL   MMU_PLV_KERNEL
-#define MMU_IDX_USER     MMU_PLV_USER
-#define MMU_IDX_DA       4
-
-static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch)
-{
-#ifdef CONFIG_USER_ONLY
-    return MMU_IDX_USER;
-#else
-    if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) {
-        return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV);
-    }
-    return MMU_IDX_DA;
-#endif
-}
+#define MMU_KERNEL_IDX   MMU_PLV_KERNEL
+#define MMU_USER_IDX     MMU_PLV_USER
+#define MMU_DA_IDX       4
 
 static inline bool is_la64(CPULoongArchState *env)
 {
diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c
index f68d63f..45f821d 100644
--- a/target/loongarch/cpu_helper.c
+++ b/target/loongarch/cpu_helper.c
@@ -171,8 +171,8 @@
                          int *prot, target_ulong address,
                          MMUAccessType access_type, int mmu_idx)
 {
-    int user_mode = mmu_idx == MMU_IDX_USER;
-    int kernel_mode = mmu_idx == MMU_IDX_KERNEL;
+    int user_mode = mmu_idx == MMU_USER_IDX;
+    int kernel_mode = mmu_idx == MMU_KERNEL_IDX;
     uint32_t plv, base_c, base_v;
     int64_t addr_high;
     uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA);
@@ -224,7 +224,7 @@
     int prot;
 
     if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) != 0) {
+                             cpu_mmu_index(cs, false)) != 0) {
         return -1;
     }
     return phys_addr;
diff --git a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
index 01d4572..7e4ec93 100644
--- a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
@@ -323,7 +323,7 @@
 
 static void check_mmu_idx(DisasContext *ctx)
 {
-    if (ctx->mem_idx != MMU_IDX_DA) {
+    if (ctx->mem_idx != MMU_DA_IDX) {
         tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4);
         ctx->base.is_jmp = DISAS_EXIT;
     }
diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c
index 804ab7a..a08c08b 100644
--- a/target/loongarch/tcg/tlb_helper.c
+++ b/target/loongarch/tcg/tlb_helper.c
@@ -90,7 +90,7 @@
     uint8_t tlb_ps;
     LoongArchTLB *tlb = &env->tlb[index];
 
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), false);
     uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V);
     uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V);
     uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index 235515c..58674cb 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -125,7 +125,7 @@
     if (ctx->base.tb->flags & HW_FLAGS_CRMD_PG) {
         ctx->mem_idx = ctx->plv;
     } else {
-        ctx->mem_idx = MMU_IDX_DA;
+        ctx->mem_idx = MMU_DA_IDX;
     }
 
     /* Bound the number of insns to execute to those left on the page.  */
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index 44000f5..8a8392e 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -56,6 +56,11 @@
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
 
+static int m68k_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return cpu_env(cs)->sr & SR_S ? MMU_KERNEL_IDX : MMU_USER_IDX;
+}
+
 static void m68k_set_feature(CPUM68KState *env, int feature)
 {
     env->features |= BIT_ULL(feature);
@@ -551,6 +556,7 @@
 
     cc->class_by_name = m68k_cpu_class_by_name;
     cc->has_work = m68k_cpu_has_work;
+    cc->mmu_index = m68k_cpu_mmu_index;
     cc->dump_state = m68k_cpu_dump_state;
     cc->set_pc = m68k_cpu_set_pc;
     cc->get_pc = m68k_cpu_get_pc;
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
index d13427b..aca4aa6 100644
--- a/target/m68k/cpu.h
+++ b/target/m68k/cpu.h
@@ -577,10 +577,6 @@
 /* MMU modes definitions */
 #define MMU_KERNEL_IDX 0
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index (CPUM68KState *env, bool ifetch)
-{
-    return (env->sr & SR_S) == 0 ? 1 : 0;
-}
 
 bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                        MMUAccessType access_type, int mmu_idx,
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
index 1ce850b..47b4173 100644
--- a/target/m68k/op_helper.c
+++ b/target/m68k/op_helper.c
@@ -811,7 +811,7 @@
     uint32_t l1, l2;
     uintptr_t ra = GETPC();
 #if defined(CONFIG_ATOMIC64)
-    int mmu_idx = cpu_mmu_index(env, 0);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), 0);
     MemOpIdx oi = make_memop_idx(MO_BEUQ, mmu_idx);
 #endif
 
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 2318ad7..2002231 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -118,6 +118,22 @@
     return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
 }
 
+static int mb_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPUMBState *env = cpu_env(cs);
+    MicroBlazeCPU *cpu = env_archcpu(env);
+
+    /* Are we in nommu mode?.  */
+    if (!(env->msr & MSR_VM) || !cpu->cfg.use_mmu) {
+        return MMU_NOMMU_IDX;
+    }
+
+    if (env->msr & MSR_UM) {
+        return MMU_USER_IDX;
+    }
+    return MMU_KERNEL_IDX;
+}
+
 #ifndef CONFIG_USER_ONLY
 static void mb_cpu_ns_axi_dp(void *opaque, int irq, int level)
 {
@@ -415,7 +431,7 @@
 
     cc->class_by_name = mb_cpu_class_by_name;
     cc->has_work = mb_cpu_has_work;
-
+    cc->mmu_index = mb_cpu_mmu_index;
     cc->dump_state = mb_cpu_dump_state;
     cc->set_pc = mb_cpu_set_pc;
     cc->get_pc = mb_cpu_get_pc;
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index b537436..446af5d 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -434,21 +434,6 @@
                                MemTxResult response, uintptr_t retaddr);
 #endif
 
-static inline int cpu_mmu_index(CPUMBState *env, bool ifetch)
-{
-    MicroBlazeCPU *cpu = env_archcpu(env);
-
-    /* Are we in nommu mode?.  */
-    if (!(env->msr & MSR_VM) || !cpu->cfg.use_mmu) {
-        return MMU_NOMMU_IDX;
-    }
-
-    if (env->msr & MSR_UM) {
-        return MMU_USER_IDX;
-    }
-    return MMU_KERNEL_IDX;
-}
-
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_mb_cpu;
 #endif
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index 98bdb82..460eee0 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -228,10 +228,9 @@
                                         MemTxAttrs *attrs)
 {
     MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
-    CPUMBState *env = &cpu->env;
     target_ulong vaddr, paddr = 0;
     MicroBlazeMMULookup lu;
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = cpu_mmu_index(cs, false);
     unsigned int hit;
 
     /* Caller doesn't initialize */
diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c
index 7565197..2340066 100644
--- a/target/microblaze/mmu.c
+++ b/target/microblaze/mmu.c
@@ -305,7 +305,7 @@
             }
 
             hit = mmu_translate(cpu, &lu, v & TLB_EPN_MASK,
-                                0, cpu_mmu_index(env, false));
+                                0, cpu_mmu_index(env_cpu(env), false));
             if (hit) {
                 env->mmu.regs[MMU_R_TLBX] = lu.idx;
             } else {
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index 2e62864..a465c2d 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -1607,7 +1607,7 @@
     dc->ext_imm = dc->base.tb->cs_base;
     dc->r0 = NULL;
     dc->r0_set = false;
-    dc->mem_index = cpu_mmu_index(&cpu->env, false);
+    dc->mem_index = cpu_mmu_index(cs, false);
     dc->jmp_cond = dc->tb_flags & D_FLAG ? TCG_COND_ALWAYS : TCG_COND_NEVER;
     dc->jmp_dest = -1;
 
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index df544ab..d644adb 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -182,6 +182,11 @@
     return has_work;
 }
 
+static int mips_cpu_mmu_index(CPUState *cs, bool ifunc)
+{
+    return mips_env_mmu_index(cpu_env(cs));
+}
+
 #include "cpu-defs.c.inc"
 
 static void mips_cpu_reset_hold(Object *obj)
@@ -579,6 +584,7 @@
 
     cc->class_by_name = mips_cpu_class_by_name;
     cc->has_work = mips_cpu_has_work;
+    cc->mmu_index = mips_cpu_mmu_index;
     cc->dump_state = mips_cpu_dump_state;
     cc->set_pc = mips_cpu_set_pc;
     cc->get_pc = mips_cpu_get_pc;
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 1163a71..ef26fe0 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -1242,18 +1242,20 @@
  * MMU modes definitions. We carefully match the indices with our
  * hflags layout.
  */
+#define MMU_KERNEL_IDX 0
 #define MMU_USER_IDX 2
+#define MMU_ERL_IDX 3
 
 static inline int hflags_mmu_index(uint32_t hflags)
 {
     if (hflags & MIPS_HFLAG_ERL) {
-        return 3; /* ERL */
+        return MMU_ERL_IDX;
     } else {
         return hflags & MIPS_HFLAG_KSU;
     }
 }
 
-static inline int cpu_mmu_index(CPUMIPSState *env, bool ifetch)
+static inline int mips_env_mmu_index(CPUMIPSState *env)
 {
     return hflags_mmu_index(env->hflags);
 }
diff --git a/target/mips/sysemu/physaddr.c b/target/mips/sysemu/physaddr.c
index 05990aa..13c8bc8f 100644
--- a/target/mips/sysemu/physaddr.c
+++ b/target/mips/sysemu/physaddr.c
@@ -236,7 +236,7 @@
     int prot;
 
     if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) != 0) {
+                             mips_env_mmu_index(env)) != 0) {
         return -1;
     }
     return phys_addr;
diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c
index 7a8dbad..d218176 100644
--- a/target/mips/tcg/msa_helper.c
+++ b/target/mips/tcg/msa_helper.c
@@ -8214,7 +8214,7 @@
 #if !defined(CONFIG_USER_ONLY)
 #define MEMOP_IDX(DF)                                                   \
     MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
-                                 cpu_mmu_index(env, false));
+                                 mips_env_mmu_index(env));
 #else
 #define MEMOP_IDX(DF)
 #endif
@@ -8323,7 +8323,7 @@
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = mips_env_mmu_index(env);
     uintptr_t ra = GETPC();
 
     ensure_writable_pages(env, addr, mmu_idx, ra);
@@ -8337,7 +8337,7 @@
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = mips_env_mmu_index(env);
     uintptr_t ra = GETPC();
     uint64_t d0, d1;
 
@@ -8358,7 +8358,7 @@
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = mips_env_mmu_index(env);
     uintptr_t ra = GETPC();
     uint64_t d0, d1;
 
@@ -8379,7 +8379,7 @@
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = mips_env_mmu_index(env);
     uintptr_t ra = GETPC();
 
     ensure_writable_pages(env, addr, mmu_idx, GETPC());
diff --git a/target/mips/tcg/sysemu/cp0_helper.c b/target/mips/tcg/sysemu/cp0_helper.c
index cc545ae..62f6fb4 100644
--- a/target/mips/tcg/sysemu/cp0_helper.c
+++ b/target/mips/tcg/sysemu/cp0_helper.c
@@ -1202,7 +1202,7 @@
                 old, old & env->CP0_Cause & CP0Ca_IP_mask,
                 val, val & env->CP0_Cause & CP0Ca_IP_mask,
                 env->CP0_Cause);
-        switch (cpu_mmu_index(env, false)) {
+        switch (mips_env_mmu_index(env)) {
         case 3:
             qemu_log(", ERL\n");
             break;
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
index 93276f7..518d3fb 100644
--- a/target/mips/tcg/sysemu/special_helper.c
+++ b/target/mips/tcg/sysemu/special_helper.c
@@ -68,7 +68,7 @@
         if (env->hflags & MIPS_HFLAG_DM) {
             qemu_log(" DEPC " TARGET_FMT_lx, env->CP0_DEPC);
         }
-        switch (cpu_mmu_index(env, false)) {
+        switch (mips_env_mmu_index(env)) {
         case 3:
             qemu_log(", ERL\n");
             break;
diff --git a/target/mips/tcg/sysemu/tlb_helper.c b/target/mips/tcg/sysemu/tlb_helper.c
index 4ede904..cdae42f 100644
--- a/target/mips/tcg/sysemu/tlb_helper.c
+++ b/target/mips/tcg/sysemu/tlb_helper.c
@@ -623,7 +623,7 @@
 static int walk_directory(CPUMIPSState *env, uint64_t *vaddr,
         int directory_index, bool *huge_page, bool *hgpg_directory_hit,
         uint64_t *pw_entrylo0, uint64_t *pw_entrylo1,
-        unsigned directory_shift, unsigned leaf_shift)
+        unsigned directory_shift, unsigned leaf_shift, int ptw_mmu_idx)
 {
     int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1;
     int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F;
@@ -638,8 +638,7 @@
     uint64_t w = 0;
 
     if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) !=
-                             TLBRET_MATCH) {
+                             ptw_mmu_idx) != TLBRET_MATCH) {
         /* wrong base address */
         return 0;
     }
@@ -666,8 +665,7 @@
                 *pw_entrylo0 = entry;
             }
             if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD,
-                                     cpu_mmu_index(env, false)) !=
-                                     TLBRET_MATCH) {
+                                     ptw_mmu_idx) != TLBRET_MATCH) {
                 return 0;
             }
             if (!get_pte(env, vaddr2, leafentry_size, &entry)) {
@@ -690,7 +688,7 @@
 }
 
 static bool page_table_walk_refill(CPUMIPSState *env, vaddr address,
-                                   int mmu_idx)
+                                   int ptw_mmu_idx)
 {
     int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F;
     int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F;
@@ -776,7 +774,7 @@
         vaddr |= goffset;
         switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit,
                                &pw_entrylo0, &pw_entrylo1,
-                               directory_shift, leaf_shift))
+                               directory_shift, leaf_shift, ptw_mmu_idx))
         {
         case 0:
             return false;
@@ -793,7 +791,7 @@
         vaddr |= uoffset;
         switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit,
                                &pw_entrylo0, &pw_entrylo1,
-                               directory_shift, leaf_shift))
+                               directory_shift, leaf_shift, ptw_mmu_idx))
         {
         case 0:
             return false;
@@ -810,7 +808,7 @@
         vaddr |= moffset;
         switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit,
                                &pw_entrylo0, &pw_entrylo1,
-                               directory_shift, leaf_shift))
+                               directory_shift, leaf_shift, ptw_mmu_idx))
         {
         case 0:
             return false;
@@ -825,8 +823,7 @@
     /* Leaf Level Page Table - First half of PTE pair */
     vaddr |= ptoffset0;
     if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) !=
-                             TLBRET_MATCH) {
+                             ptw_mmu_idx) != TLBRET_MATCH) {
         return false;
     }
     if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
@@ -838,8 +835,7 @@
     /* Leaf Level Page Table - Second half of PTE pair */
     vaddr |= ptoffset1;
     if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) !=
-                             TLBRET_MATCH) {
+                             ptw_mmu_idx) != TLBRET_MATCH) {
         return false;
     }
     if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
@@ -944,12 +940,10 @@
          * Memory reads during hardware page table walking are performed
          * as if they were kernel-mode load instructions.
          */
-        int mode = (env->hflags & MIPS_HFLAG_KSU);
-        bool ret_walker;
-        env->hflags &= ~MIPS_HFLAG_KSU;
-        ret_walker = page_table_walk_refill(env, address, mmu_idx);
-        env->hflags |= mode;
-        if (ret_walker) {
+        int ptw_mmu_idx = (env->hflags & MIPS_HFLAG_ERL ?
+                           MMU_ERL_IDX : MMU_KERNEL_IDX);
+
+        if (page_table_walk_refill(env, address, ptw_mmu_idx)) {
             ret = get_physical_address(env, &physical, &prot, address,
                                        access_type, mmu_idx);
             if (ret == TLBRET_MATCH) {
@@ -979,7 +973,7 @@
 
     /* data access */
     ret = get_physical_address(env, &physical, &prot, address, access_type,
-                               cpu_mmu_index(env, false));
+                               mips_env_mmu_index(env));
     if (ret == TLBRET_MATCH) {
         return physical;
     }
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index 596c0c5..0760bf6 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -57,6 +57,12 @@
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
 
+static int nios2_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return (cpu_env(cs)->ctrl[CR_STATUS] & CR_STATUS_U
+            ? MMU_USER_IDX : MMU_SUPERVISOR_IDX);
+}
+
 static void nios2_cpu_reset_hold(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -381,6 +387,7 @@
 
     cc->class_by_name = nios2_cpu_class_by_name;
     cc->has_work = nios2_cpu_has_work;
+    cc->mmu_index = nios2_cpu_mmu_index;
     cc->dump_state = nios2_cpu_dump_state;
     cc->set_pc = nios2_cpu_set_pc;
     cc->get_pc = nios2_cpu_get_pc;
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index 2d79b5b..4164a34 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -270,12 +270,6 @@
 #define MMU_SUPERVISOR_IDX  0
 #define MMU_USER_IDX        1
 
-static inline int cpu_mmu_index(CPUNios2State *env, bool ifetch)
-{
-    return (env->ctrl[CR_STATUS] & CR_STATUS_U) ? MMU_USER_IDX :
-                                                  MMU_SUPERVISOR_IDX;
-}
-
 #ifndef CONFIG_USER_ONLY
 hwaddr nios2_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 3078372..612556b 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -948,7 +948,7 @@
     Nios2CPU *cpu = env_archcpu(env);
     int page_insns;
 
-    dc->mem_idx = cpu_mmu_index(env, false);
+    dc->mem_idx = cpu_mmu_index(cs, false);
     dc->cr_state = cpu->cr_state;
     dc->tb_flags = dc->base.tb->flags;
     dc->eic_present = cpu->eic_present;
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 477d49d..a3cb80c 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -68,6 +68,18 @@
                                     CPU_INTERRUPT_TIMER);
 }
 
+static int openrisc_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPUOpenRISCState *env = cpu_env(cs);
+
+    if (env->sr & (ifetch ? SR_IME : SR_DME)) {
+        /* The mmu is enabled; test supervisor state.  */
+        return env->sr & SR_SM ? MMU_SUPERVISOR_IDX : MMU_USER_IDX;
+    }
+
+    return MMU_NOMMU_IDX;  /* mmu is disabled */
+}
+
 static void openrisc_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->print_insn = print_insn_or1k;
@@ -239,6 +251,7 @@
 
     cc->class_by_name = openrisc_cpu_class_by_name;
     cc->has_work = openrisc_cpu_has_work;
+    cc->mmu_index = openrisc_cpu_mmu_index;
     cc->dump_state = openrisc_cpu_dump_state;
     cc->set_pc = openrisc_cpu_set_pc;
     cc->get_pc = openrisc_cpu_get_pc;
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index b454014..b1b7db5 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -361,18 +361,6 @@
            | (env->sr & (SR_SM | SR_DME | SR_IME | SR_OVE));
 }
 
-static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
-{
-    int ret = MMU_NOMMU_IDX;  /* mmu is disabled */
-
-    if (env->sr & (ifetch ? SR_IME : SR_DME)) {
-        /* The mmu is enabled; test supervisor state.  */
-        ret = env->sr & SR_SM ? MMU_SUPERVISOR_IDX : MMU_USER_IDX;
-    }
-
-    return ret;
-}
-
 static inline uint32_t cpu_get_sr(const CPUOpenRISCState *env)
 {
     return (env->sr
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index d4cbc5e..785bcb6 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -1528,7 +1528,7 @@
     CPUOpenRISCState *env = cpu_env(cs);
     int bound;
 
-    dc->mem_idx = cpu_mmu_index(env, false);
+    dc->mem_idx = cpu_mmu_index(cs, false);
     dc->tb_flags = dc->base.tb->flags;
     dc->delayed_branch = (dc->tb_flags & TB_FLAGS_DFLAG) != 0;
     dc->cpucfgr = env->cpucfgr;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index f8101ff..a44de22 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1624,7 +1624,7 @@
 
 /* MMU modes definitions */
 #define MMU_USER_IDX 0
-static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
+static inline int ppc_env_mmu_index(CPUPPCState *env, bool ifetch)
 {
 #ifdef CONFIG_USER_ONLY
     return MMU_USER_IDX;
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 23eb552..9931372 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7105,6 +7105,11 @@
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
 
+static int ppc_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return ppc_env_mmu_index(cpu_env(cs), ifetch);
+}
+
 static void ppc_cpu_reset_hold(Object *obj)
 {
     CPUState *s = CPU(obj);
@@ -7372,6 +7377,7 @@
 
     cc->class_by_name = ppc_cpu_class_by_name;
     cc->has_work = ppc_cpu_has_work;
+    cc->mmu_index = ppc_cpu_mmu_index;
     cc->dump_state = ppc_cpu_dump_state;
     cc->set_pc = ppc_cpu_set_pc;
     cc->get_pc = ppc_cpu_get_pc;
@@ -7457,7 +7463,7 @@
     qemu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
                  "%08x iidx %d didx %d\n",
                  env->msr, env->spr[SPR_HID0], env->hflags,
-                 cpu_mmu_index(env, true), cpu_mmu_index(env, false));
+                 ppc_env_mmu_index(env, true), ppc_env_mmu_index(env, false));
 #if !defined(CONFIG_USER_ONLY)
     if (env->tb_env) {
         qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index c753548..ea7e844 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -83,7 +83,7 @@
 void helper_lmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
     uintptr_t raddr = GETPC();
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = ppc_env_mmu_index(env, false);
     void *host = probe_contiguous(env, addr, (32 - reg) * 4,
                                   MMU_DATA_LOAD, mmu_idx, raddr);
 
@@ -105,7 +105,7 @@
 void helper_stmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
     uintptr_t raddr = GETPC();
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = ppc_env_mmu_index(env, false);
     void *host = probe_contiguous(env, addr, (32 - reg) * 4,
                                   MMU_DATA_STORE, mmu_idx, raddr);
 
@@ -135,7 +135,7 @@
         return;
     }
 
-    mmu_idx = cpu_mmu_index(env, false);
+    mmu_idx = ppc_env_mmu_index(env, false);
     host = probe_contiguous(env, addr, nb, MMU_DATA_LOAD, mmu_idx, raddr);
 
     if (likely(host)) {
@@ -224,7 +224,7 @@
         return;
     }
 
-    mmu_idx = cpu_mmu_index(env, false);
+    mmu_idx = ppc_env_mmu_index(env, false);
     host = probe_contiguous(env, addr, nb, MMU_DATA_STORE, mmu_idx, raddr);
 
     if (likely(host)) {
@@ -276,7 +276,7 @@
     target_ulong mask, dcbz_size = env->dcache_line_size;
     uint32_t i;
     void *haddr;
-    int mmu_idx = epid ? PPC_TLB_EPID_STORE : cpu_mmu_index(env, false);
+    int mmu_idx = epid ? PPC_TLB_EPID_STORE : ppc_env_mmu_index(env, false);
 
 #if defined(TARGET_PPC64)
     /* Check for dcbz vs dcbzl on 970 */
diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 6ca5d12..751403f 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1561,9 +1561,9 @@
      * mapped by code TLBs, so we also try a MMU_INST_FETCH.
      */
     if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p,
-                  cpu_mmu_index(&cpu->env, false), false) ||
+                  ppc_env_mmu_index(&cpu->env, false), false) ||
         ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p,
-                  cpu_mmu_index(&cpu->env, true), false)) {
+                  ppc_env_mmu_index(&cpu->env, true), false)) {
         return raddr & TARGET_PAGE_MASK;
     }
     return -1;
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 8cbfc7e..be21fa0 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -867,6 +867,11 @@
 #endif
 }
 
+static int riscv_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return riscv_env_mmu_index(cpu_env(cs), ifetch);
+}
+
 static void riscv_cpu_reset_hold(Object *obj)
 {
 #ifndef CONFIG_USER_ONLY
@@ -1810,6 +1815,7 @@
 
     cc->class_by_name = riscv_cpu_class_by_name;
     cc->has_work = riscv_cpu_has_work;
+    cc->mmu_index = riscv_cpu_mmu_index;
     cc->dump_state = riscv_cpu_dump_state;
     cc->set_pc = riscv_cpu_set_pc;
     cc->get_pc = riscv_cpu_get_pc;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 5f3955c..f63ee9c 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -498,7 +498,7 @@
 void riscv_cpu_set_geilen(CPURISCVState *env, target_ulong geilen);
 bool riscv_cpu_vector_enabled(CPURISCVState *env);
 void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable);
-int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch);
+int riscv_env_mmu_index(CPURISCVState *env, bool ifetch);
 G_NORETURN void  riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                                MMUAccessType access_type,
                                                int mmu_idx, uintptr_t retaddr);
@@ -507,8 +507,6 @@
                         bool probe, uintptr_t retaddr);
 char *riscv_isa_string(RISCVCPU *cpu);
 
-#define cpu_mmu_index riscv_cpu_mmu_index
-
 #ifndef CONFIG_USER_ONLY
 void riscv_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
                                      vaddr addr, unsigned size,
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index c7cc7eb..b6b23b7 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -33,7 +33,7 @@
 #include "debug.h"
 #include "tcg/oversized-guest.h"
 
-int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
+int riscv_env_mmu_index(CPURISCVState *env, bool ifetch)
 {
 #ifdef CONFIG_USER_ONLY
     return 0;
@@ -106,7 +106,7 @@
 #else
     flags = FIELD_DP32(flags, TB_FLAGS, PRIV, env->priv);
 
-    flags |= cpu_mmu_index(env, 0);
+    flags |= riscv_env_mmu_index(env, 0);
     fs = get_field(env->mstatus, MSTATUS_FS);
     vs = get_field(env->mstatus, MSTATUS_VS);
 
@@ -1200,7 +1200,7 @@
     CPURISCVState *env = &cpu->env;
     hwaddr phys_addr;
     int prot;
-    int mmu_idx = cpu_mmu_index(&cpu->env, false);
+    int mmu_idx = riscv_env_mmu_index(&cpu->env, false);
 
     if (get_physical_address(env, &phys_addr, &prot, addr, NULL, 0, mmu_idx,
                              true, env->virt_enabled, true)) {
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index 5355225..f414aae 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -157,7 +157,7 @@
 {
     RISCVCPU *cpu = env_archcpu(env);
     uint16_t cbozlen = cpu->cfg.cboz_blocksize;
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = riscv_env_mmu_index(env, false);
     uintptr_t ra = GETPC();
     void *mem;
 
@@ -205,7 +205,7 @@
                                 uintptr_t ra)
 {
     RISCVCPU *cpu = env_archcpu(env);
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = riscv_env_mmu_index(env, false);
     uint16_t cbomlen = cpu->cfg.cbom_blocksize;
     void *phost;
     int ret;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index c1c3a4d..fe0d5d0 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -113,14 +113,15 @@
 {
     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
     target_ulong curlen = MIN(pagelen, len);
+    int mmu_index = riscv_env_mmu_index(env, false);
 
     probe_access(env, adjust_addr(env, addr), curlen, access_type,
-                 cpu_mmu_index(env, false), ra);
+                 mmu_index, ra);
     if (len > curlen) {
         addr += curlen;
         curlen = len - curlen;
         probe_access(env, adjust_addr(env, addr), curlen, access_type,
-                     cpu_mmu_index(env, false), ra);
+                     mmu_index, ra);
     }
 }
 
@@ -464,6 +465,7 @@
     uint32_t esz = 1 << log2_esz;
     uint32_t vma = vext_vma(desc);
     target_ulong addr, offset, remain;
+    int mmu_index = riscv_env_mmu_index(env, false);
 
     /* probe every access */
     for (i = env->vstart; i < env->vl; i++) {
@@ -478,8 +480,7 @@
             remain = nf << log2_esz;
             while (remain > 0) {
                 offset = -(addr | TARGET_PAGE_MASK);
-                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
-                                         cpu_mmu_index(env, false));
+                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
                 if (host) {
 #ifdef CONFIG_USER_ONLY
                     if (!page_check_range(addr, offset, PAGE_READ)) {
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 353132d..5205167 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -64,6 +64,11 @@
         (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR);
 }
 
+static int riscv_cpu_mmu_index(CPUState *cs, bool ifunc)
+{
+    return 0;
+}
+
 static void rx_cpu_reset_hold(Object *obj)
 {
     RXCPU *cpu = RX_CPU(obj);
@@ -204,6 +209,7 @@
 
     cc->class_by_name = rx_cpu_class_by_name;
     cc->has_work = rx_cpu_has_work;
+    cc->mmu_index = riscv_cpu_mmu_index;
     cc->dump_state = rx_cpu_dump_state;
     cc->set_pc = rx_cpu_set_pc;
     cc->get_pc = rx_cpu_get_pc;
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index 65f9cd2..c53593d 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -158,11 +158,6 @@
     *flags = FIELD_DP32(*flags, PSW, U, env->psw_u);
 }
 
-static inline int cpu_mmu_index(CPURXState *env, bool ifetch)
-{
-    return 0;
-}
-
 static inline uint32_t rx_cpu_pack_psw(CPURXState *env)
 {
     uint32_t psw = 0;
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 7f12386..49a2341 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -142,6 +142,11 @@
     return s390_cpu_has_int(cpu);
 }
 
+static int s390x_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return s390x_env_mmu_index(cpu_env(cs), ifetch);
+}
+
 static void s390_query_cpu_fast(CPUState *cpu, CpuInfoFast *value)
 {
     S390CPU *s390_cpu = S390_CPU(cpu);
@@ -352,6 +357,7 @@
     scc->reset = s390_cpu_reset;
     cc->class_by_name = s390_cpu_class_by_name,
     cc->has_work = s390_cpu_has_work;
+    cc->mmu_index = s390x_cpu_mmu_index;
     cc->dump_state = s390_cpu_dump_state;
     cc->query_cpu_fast = s390_query_cpu_fast;
     cc->set_pc = s390_cpu_set_pc;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index fa3aac4..d37a49b 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -381,7 +381,7 @@
 #define MMU_HOME_IDX            2
 #define MMU_REAL_IDX            3
 
-static inline int cpu_mmu_index(CPUS390XState *env, bool ifetch)
+static inline int s390x_env_mmu_index(CPUS390XState *env, bool ifetch)
 {
 #ifdef CONFIG_USER_ONLY
     return MMU_USER_IDX;
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 8410325..557831d 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -358,7 +358,7 @@
 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
                              uint64_t src, uintptr_t ra)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca1, srca2, desta;
     uint32_t i;
     uint8_t c = 0;
@@ -392,7 +392,7 @@
 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
                              uint64_t src, uintptr_t ra)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca1, srca2, desta;
     uint32_t i;
     uint8_t c = 0;
@@ -433,7 +433,7 @@
 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
                              uint64_t src, uintptr_t ra)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca1, srca2, desta;
     uint32_t i;
     uint8_t c = 0;
@@ -467,7 +467,7 @@
 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
                               uint64_t src, uintptr_t ra)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca, desta;
     uint32_t i;
 
@@ -508,7 +508,7 @@
 /* move right to left */
 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     const uint64_t ra = GETPC();
     S390Access srca, desta;
     int32_t i;
@@ -529,7 +529,7 @@
 /* move inverse  */
 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca, desta;
     uintptr_t ra = GETPC();
     int i;
@@ -550,7 +550,7 @@
 /* move numerics  */
 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca1, srca2, desta;
     uintptr_t ra = GETPC();
     int i;
@@ -572,7 +572,7 @@
 /* move with offset  */
 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     /* MVO always processes one more byte than specified - maximum is 16 */
     const int len_dest = (l >> 4) + 1;
     const int len_src = (l & 0xf) + 1;
@@ -606,7 +606,7 @@
 /* move zones  */
 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     S390Access srca1, srca2, desta;
     uintptr_t ra = GETPC();
     int i;
@@ -669,7 +669,7 @@
 
     if (!mask) {
         /* Recognize access exceptions for the first byte */
-        probe_read(env, addr, 1, cpu_mmu_index(env, false), ra);
+        probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
     }
 
     while (mask) {
@@ -893,7 +893,7 @@
 {
     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     const bool f = extract64(r0, 11, 1);
     const bool s = extract64(r0, 10, 1);
     const bool cco = extract64(r0, 8, 1);
@@ -946,7 +946,7 @@
 /* string copy */
 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     const uint64_t d = get_address(env, r1);
     const uint64_t s = get_address(env, r2);
     const uint8_t c = env->regs[0];
@@ -1027,7 +1027,7 @@
                                uint64_t *src, uint64_t *srclen,
                                uint16_t pad, int wordsize, uintptr_t ra)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
     S390Access srca, desta;
     int i, cc;
@@ -1084,7 +1084,7 @@
 /* move long */
 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const int mmu_idx = s390x_env_mmu_index(env, false);
     uintptr_t ra = GETPC();
     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
     uint64_t dest = get_address(env, r1);
@@ -1742,7 +1742,7 @@
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                         uint64_t a2, bool parallel)
 {
-    uint32_t mem_idx = cpu_mmu_index(env, false);
+    uint32_t mem_idx = s390x_env_mmu_index(env, false);
     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
@@ -2867,12 +2867,14 @@
 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
                         uintptr_t ra)
 {
+    const int mmu_idx = s390x_env_mmu_index(env, false);
+
     /* test the actual access, not just any access to the page due to LAP */
     while (len) {
         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
         const uint64_t curlen = MIN(pagelen, len);
 
-        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
+        probe_write(env, addr, curlen, mmu_idx, ra);
         addr = wrap_address(env, addr + curlen);
         len -= curlen;
     }
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index 3977295..2031168 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -89,6 +89,21 @@
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
 }
 
+static int sh4_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPUSH4State *env = cpu_env(cs);
+
+    /*
+     * The instruction in a RTE delay slot is fetched in privileged mode,
+     * but executed in user mode.
+     */
+    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
+        return 0;
+    } else {
+        return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
+    }
+}
+
 static void superh_cpu_reset_hold(Object *obj)
 {
     CPUState *s = CPU(obj);
@@ -266,6 +281,7 @@
 
     cc->class_by_name = superh_cpu_class_by_name;
     cc->has_work = superh_cpu_has_work;
+    cc->mmu_index = sh4_cpu_mmu_index;
     cc->dump_state = superh_cpu_dump_state;
     cc->set_pc = superh_cpu_set_pc;
     cc->get_pc = superh_cpu_get_pc;
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 0e6fa65..9211da6 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -273,16 +273,6 @@
 
 /* MMU modes definitions */
 #define MMU_USER_IDX 1
-static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
-{
-    /* The instruction in a RTE delay slot is fetched in privileged
-       mode, but executed in user mode.  */
-    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
-        return 0;
-    } else {
-        return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
-    }
-}
 
 #include "exec/cpu-all.h"
 
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 7d0d629..313ebc4 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -368,7 +368,7 @@
     {
         .name = "Fujitsu MB86904",
         .iu_version = 0x04 << 24, /* Impl 0, ver 4 */
-        .fpu_version = 4 << 17, /* FPU version 4 (Meiko) */
+        .fpu_version = 4 << FSR_VER_SHIFT, /* FPU version 4 (Meiko) */
         .mmu_version = 0x04 << 24, /* Impl 0, ver 4 */
         .mmu_bm = 0x00004000,
         .mmu_ctpr_mask = 0x00ffffc0,
@@ -381,7 +381,7 @@
     {
         .name = "Fujitsu MB86907",
         .iu_version = 0x05 << 24, /* Impl 0, ver 5 */
-        .fpu_version = 4 << 17, /* FPU version 4 (Meiko) */
+        .fpu_version = 4 << FSR_VER_SHIFT, /* FPU version 4 (Meiko) */
         .mmu_version = 0x05 << 24, /* Impl 0, ver 5 */
         .mmu_bm = 0x00004000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -394,7 +394,7 @@
     {
         .name = "TI MicroSparc I",
         .iu_version = 0x41000000,
-        .fpu_version = 4 << 17,
+        .fpu_version = 4 << FSR_VER_SHIFT,
         .mmu_version = 0x41000000,
         .mmu_bm = 0x00004000,
         .mmu_ctpr_mask = 0x007ffff0,
@@ -407,7 +407,7 @@
     {
         .name = "TI MicroSparc II",
         .iu_version = 0x42000000,
-        .fpu_version = 4 << 17,
+        .fpu_version = 4 << FSR_VER_SHIFT,
         .mmu_version = 0x02000000,
         .mmu_bm = 0x00004000,
         .mmu_ctpr_mask = 0x00ffffc0,
@@ -420,7 +420,7 @@
     {
         .name = "TI MicroSparc IIep",
         .iu_version = 0x42000000,
-        .fpu_version = 4 << 17,
+        .fpu_version = 4 << FSR_VER_SHIFT,
         .mmu_version = 0x04000000,
         .mmu_bm = 0x00004000,
         .mmu_ctpr_mask = 0x00ffffc0,
@@ -433,7 +433,7 @@
     {
         .name = "TI SuperSparc 40", /* STP1020NPGA */
         .iu_version = 0x41000000, /* SuperSPARC 2.x */
-        .fpu_version = 0 << 17,
+        .fpu_version = 0 << FSR_VER_SHIFT,
         .mmu_version = 0x00000800, /* SuperSPARC 2.x, no MXCC */
         .mmu_bm = 0x00002000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -446,7 +446,7 @@
     {
         .name = "TI SuperSparc 50", /* STP1020PGA */
         .iu_version = 0x40000000, /* SuperSPARC 3.x */
-        .fpu_version = 0 << 17,
+        .fpu_version = 0 << FSR_VER_SHIFT,
         .mmu_version = 0x01000800, /* SuperSPARC 3.x, no MXCC */
         .mmu_bm = 0x00002000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -459,7 +459,7 @@
     {
         .name = "TI SuperSparc 51",
         .iu_version = 0x40000000, /* SuperSPARC 3.x */
-        .fpu_version = 0 << 17,
+        .fpu_version = 0 << FSR_VER_SHIFT,
         .mmu_version = 0x01000000, /* SuperSPARC 3.x, MXCC */
         .mmu_bm = 0x00002000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -473,7 +473,7 @@
     {
         .name = "TI SuperSparc 60", /* STP1020APGA */
         .iu_version = 0x40000000, /* SuperSPARC 3.x */
-        .fpu_version = 0 << 17,
+        .fpu_version = 0 << FSR_VER_SHIFT,
         .mmu_version = 0x01000800, /* SuperSPARC 3.x, no MXCC */
         .mmu_bm = 0x00002000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -486,7 +486,7 @@
     {
         .name = "TI SuperSparc 61",
         .iu_version = 0x44000000, /* SuperSPARC 3.x */
-        .fpu_version = 0 << 17,
+        .fpu_version = 0 << FSR_VER_SHIFT,
         .mmu_version = 0x01000000, /* SuperSPARC 3.x, MXCC */
         .mmu_bm = 0x00002000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -500,7 +500,7 @@
     {
         .name = "TI SuperSparc II",
         .iu_version = 0x40000000, /* SuperSPARC II 1.x */
-        .fpu_version = 0 << 17,
+        .fpu_version = 0 << FSR_VER_SHIFT,
         .mmu_version = 0x08000000, /* SuperSPARC II 1.x, MXCC */
         .mmu_bm = 0x00002000,
         .mmu_ctpr_mask = 0xffffffc0,
@@ -514,7 +514,7 @@
     {
         .name = "LEON2",
         .iu_version = 0xf2000000,
-        .fpu_version = 4 << 17, /* FPU version 4 (Meiko) */
+        .fpu_version = 4 << FSR_VER_SHIFT, /* FPU version 4 (Meiko) */
         .mmu_version = 0xf2000000,
         .mmu_bm = 0x00004000,
         .mmu_ctpr_mask = 0x007ffff0,
@@ -527,7 +527,7 @@
     {
         .name = "LEON3",
         .iu_version = 0xf3000000,
-        .fpu_version = 4 << 17, /* FPU version 4 (Meiko) */
+        .fpu_version = 4 << FSR_VER_SHIFT, /* FPU version 4 (Meiko) */
         .mmu_version = 0xf3000000,
         .mmu_bm = 0x00000000,
         .mmu_ctpr_mask = 0xfffffffc,
@@ -670,7 +670,7 @@
                  env->cansave, env->canrestore, env->otherwin, env->wstate,
                  env->cleanwin, env->nwindows - 1 - env->cwp);
     qemu_fprintf(f, "fsr: " TARGET_FMT_lx " y: " TARGET_FMT_lx " fprs: %016x\n",
-                 env->fsr, env->y, env->fprs);
+                 cpu_get_fsr(env), env->y, env->fprs);
 
 #else
     qemu_fprintf(f, "psr: %08x (icc: ", cpu_get_psr(env));
@@ -679,7 +679,7 @@
                  env->psrps ? 'P' : '-', env->psret ? 'E' : '-',
                  env->wim);
     qemu_fprintf(f, "fsr: " TARGET_FMT_lx " y: " TARGET_FMT_lx "\n",
-                 env->fsr, env->y);
+                 cpu_get_fsr(env), env->y);
 #endif
     qemu_fprintf(f, "\n");
 }
@@ -718,6 +718,34 @@
            cpu_interrupts_enabled(env);
 }
 
+static int sparc_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    CPUSPARCState *env = cpu_env(cs);
+
+#ifndef TARGET_SPARC64
+    if ((env->mmuregs[0] & MMU_E) == 0) { /* MMU disabled */
+        return MMU_PHYS_IDX;
+    } else {
+        return env->psrs;
+    }
+#else
+    /* IMMU or DMMU disabled.  */
+    if (ifetch
+        ? (env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0
+        : (env->lsu & DMMU_E) == 0) {
+        return MMU_PHYS_IDX;
+    } else if (cpu_hypervisor_mode(env)) {
+        return MMU_PHYS_IDX;
+    } else if (env->tl > 0) {
+        return MMU_NUCLEUS_IDX;
+    } else if (cpu_supervisor_mode(env)) {
+        return MMU_KERNEL_IDX;
+    } else {
+        return MMU_USER_IDX;
+    }
+#endif
+}
+
 static char *sparc_cpu_type_name(const char *cpu_model)
 {
     char *name = g_strdup_printf(SPARC_CPU_TYPE_NAME("%s"), cpu_model);
@@ -758,7 +786,6 @@
 #endif
 
     env->version = env->def.iu_version;
-    env->fsr = env->def.fpu_version;
     env->nwindows = env->def.nwindows;
 #if !defined(TARGET_SPARC64)
     env->mmuregs[0] |= env->def.mmu_version;
@@ -770,6 +797,7 @@
     env->version |= env->def.maxtl << 8;
     env->version |= env->def.nwindows - 1;
 #endif
+    cpu_put_fsr(env, 0);
 
     cpu_exec_realizefn(cs, &local_err);
     if (local_err != NULL) {
@@ -906,6 +934,7 @@
     cc->class_by_name = sparc_cpu_class_by_name;
     cc->parse_features = sparc_cpu_parse_features;
     cc->has_work = sparc_cpu_has_work;
+    cc->mmu_index = sparc_cpu_mmu_index;
     cc->dump_state = sparc_cpu_dump_state;
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
     cc->memory_rw_debug = sparc_cpu_memory_rw_debug;
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 12a11ec..edf46b3 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -31,8 +31,10 @@
 
 #if !defined(TARGET_SPARC64)
 #define TARGET_DPREGS 16
+#define TARGET_FCCREGS 1
 #else
 #define TARGET_DPREGS 32
+#define TARGET_FCCREGS 4
 #endif
 
 /*#define EXCP_INTERRUPT 0x100*/
@@ -176,6 +178,7 @@
 #define FSR_DZM   (1ULL << 24)
 #define FSR_NXM   (1ULL << 23)
 #define FSR_TEM_MASK (FSR_NVM | FSR_OFM | FSR_UFM | FSR_DZM | FSR_NXM)
+#define FSR_TEM_SHIFT  23
 
 #define FSR_NVA   (1ULL << 9)
 #define FSR_OFA   (1ULL << 8)
@@ -183,6 +186,7 @@
 #define FSR_DZA   (1ULL << 6)
 #define FSR_NXA   (1ULL << 5)
 #define FSR_AEXC_MASK (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
+#define FSR_AEXC_SHIFT 5
 
 #define FSR_NVC   (1ULL << 4)
 #define FSR_OFC   (1ULL << 3)
@@ -191,31 +195,22 @@
 #define FSR_NXC   (1ULL << 0)
 #define FSR_CEXC_MASK (FSR_NVC | FSR_OFC | FSR_UFC | FSR_DZC | FSR_NXC)
 
+#define FSR_VER_SHIFT  17
+#define FSR_VER_MASK   (7 << FSR_VER_SHIFT)
+
 #define FSR_FTT2   (1ULL << 16)
 #define FSR_FTT1   (1ULL << 15)
 #define FSR_FTT0   (1ULL << 14)
 #define FSR_FTT_MASK (FSR_FTT2 | FSR_FTT1 | FSR_FTT0)
-#ifdef TARGET_SPARC64
-#define FSR_FTT_NMASK      0xfffffffffffe3fffULL
-#define FSR_FTT_CEXC_NMASK 0xfffffffffffe3fe0ULL
-#define FSR_LDFSR_OLDMASK  0x0000003f000fc000ULL
-#define FSR_LDXFSR_MASK    0x0000003fcfc00fffULL
-#define FSR_LDXFSR_OLDMASK 0x00000000000fc000ULL
-#else
-#define FSR_FTT_NMASK      0xfffe3fffULL
-#define FSR_FTT_CEXC_NMASK 0xfffe3fe0ULL
-#define FSR_LDFSR_OLDMASK  0x000fc000ULL
-#endif
-#define FSR_LDFSR_MASK     0xcfc00fffULL
 #define FSR_FTT_IEEE_EXCP (1ULL << 14)
 #define FSR_FTT_UNIMPFPOP (3ULL << 14)
 #define FSR_FTT_SEQ_ERROR (4ULL << 14)
 #define FSR_FTT_INVAL_FPR (6ULL << 14)
 
-#define FSR_FCC1_SHIFT 11
-#define FSR_FCC1  (1ULL << FSR_FCC1_SHIFT)
-#define FSR_FCC0_SHIFT 10
-#define FSR_FCC0  (1ULL << FSR_FCC0_SHIFT)
+#define FSR_FCC0_SHIFT    10
+#define FSR_FCC1_SHIFT    32
+#define FSR_FCC2_SHIFT    34
+#define FSR_FCC3_SHIFT    36
 
 /* MMU */
 #define MMU_E     (1<<0)
@@ -461,7 +456,11 @@
     target_ulong cond; /* conditional branch result (XXX: save it in a
                           temporary register when possible) */
 
-    target_ulong fsr;      /* FPU state register */
+    /* FPU State Register, in parts */
+    uint32_t fsr;                    /* rm, tem, aexc */
+    uint32_t fsr_cexc_ftt;           /* cexc, ftt */
+    uint32_t fcc[TARGET_FCCREGS];    /* fcc* */
+
     CPU_DoubleU fpr[TARGET_DPREGS];  /* floating point registers */
     uint32_t cwp;      /* index of current register window (extracted
                           from PSR) */
@@ -509,8 +508,6 @@
     uint64_t mmubpregs[4];
     uint64_t prom_addr;
 #endif
-    /* temporary float registers */
-    float128 qt0, qt1;
     float_status fp_status;
 #if defined(TARGET_SPARC64)
 #define MAXTL_MAX 8
@@ -619,7 +616,9 @@
                                 const TranslationBlock *tb,
                                 const uint64_t *data);
 
-/* cpu-exec.c */
+/* fop_helper.c */
+target_ulong cpu_get_fsr(CPUSPARCState *);
+void cpu_put_fsr(CPUSPARCState *, target_ulong);
 
 /* win_helper.c */
 target_ulong cpu_get_psr(CPUSPARCState *env1);
@@ -708,34 +707,6 @@
 }
 #endif
 
-static inline int cpu_mmu_index(CPUSPARCState *env, bool ifetch)
-{
-#if defined(CONFIG_USER_ONLY)
-    return MMU_USER_IDX;
-#elif !defined(TARGET_SPARC64)
-    if ((env->mmuregs[0] & MMU_E) == 0) { /* MMU disabled */
-        return MMU_PHYS_IDX;
-    } else {
-        return env->psrs;
-    }
-#else
-    /* IMMU or DMMU disabled.  */
-    if (ifetch
-        ? (env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0
-        : (env->lsu & DMMU_E) == 0) {
-        return MMU_PHYS_IDX;
-    } else if (cpu_hypervisor_mode(env)) {
-        return MMU_PHYS_IDX;
-    } else if (env->tl > 0) {
-        return MMU_NUCLEUS_IDX;
-    } else if (cpu_supervisor_mode(env)) {
-        return MMU_KERNEL_IDX;
-    } else {
-        return MMU_USER_IDX;
-    }
-#endif
-}
-
 static inline int cpu_interrupts_enabled(CPUSPARCState *env1)
 {
 #if !defined (TARGET_SPARC64)
@@ -783,7 +754,7 @@
     uint32_t flags;
     *pc = env->pc;
     *cs_base = env->npc;
-    flags = cpu_mmu_index(env, false);
+    flags = cpu_mmu_index(env_cpu(env), false);
 #ifndef CONFIG_USER_ONLY
     if (cpu_supervisor_mode(env)) {
         flags |= TB_FLAG_SUPER;
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 0f8aa3a..1205a59 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -23,13 +23,32 @@
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
 
-#define QT0 (env->qt0)
-#define QT1 (env->qt1)
+static inline float128 f128_in(Int128 i)
+{
+    union {
+        Int128 i;
+        float128 f;
+    } u;
 
-static target_ulong do_check_ieee_exceptions(CPUSPARCState *env, uintptr_t ra)
+    u.i = i;
+    return u.f;
+}
+
+static inline Int128 f128_ret(float128 f)
+{
+    union {
+        Int128 i;
+        float128 f;
+    } u;
+
+    u.f = f;
+    return u.i;
+}
+
+static void check_ieee_exceptions(CPUSPARCState *env, uintptr_t ra)
 {
     target_ulong status = get_float_exception_flags(&env->fp_status);
-    target_ulong fsr = env->fsr;
+    uint32_t cexc = 0;
 
     if (unlikely(status)) {
         /* Keep exception flags clear for next time.  */
@@ -37,333 +56,384 @@
 
         /* Copy IEEE 754 flags into FSR */
         if (status & float_flag_invalid) {
-            fsr |= FSR_NVC;
+            cexc |= FSR_NVC;
         }
         if (status & float_flag_overflow) {
-            fsr |= FSR_OFC;
+            cexc |= FSR_OFC;
         }
         if (status & float_flag_underflow) {
-            fsr |= FSR_UFC;
+            cexc |= FSR_UFC;
         }
         if (status & float_flag_divbyzero) {
-            fsr |= FSR_DZC;
+            cexc |= FSR_DZC;
         }
         if (status & float_flag_inexact) {
-            fsr |= FSR_NXC;
+            cexc |= FSR_NXC;
         }
 
-        if ((fsr & FSR_CEXC_MASK) & ((fsr & FSR_TEM_MASK) >> 23)) {
-            CPUState *cs = env_cpu(env);
-
-            /* Unmasked exception, generate a trap.  Note that while
-               the helper is marked as NO_WG, we can get away with
-               writing to cpu state along the exception path, since
-               TCG generated code will never see the write.  */
-            env->fsr = fsr | FSR_FTT_IEEE_EXCP;
-            cs->exception_index = TT_FP_EXCP;
-            cpu_loop_exit_restore(cs, ra);
-        } else {
-            /* Accumulate exceptions */
-            fsr |= (fsr & FSR_CEXC_MASK) << 5;
+        if (cexc & (env->fsr >> FSR_TEM_SHIFT)) {
+            /* Unmasked exception, generate an IEEE trap. */
+            env->fsr_cexc_ftt = cexc | FSR_FTT_IEEE_EXCP;
+            cpu_raise_exception_ra(env, TT_FP_EXCP, ra);
         }
+
+        /* Accumulate exceptions */
+        env->fsr |= cexc << FSR_AEXC_SHIFT;
     }
 
-    return fsr;
+    /* No trap, so FTT is cleared. */
+    env->fsr_cexc_ftt = cexc;
 }
 
-target_ulong helper_check_ieee_exceptions(CPUSPARCState *env)
+float32 helper_fadds(CPUSPARCState *env, float32 src1, float32 src2)
 {
-    return do_check_ieee_exceptions(env, GETPC());
+    float32 ret = float32_add(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-#define F_HELPER(name, p) void helper_f##name##p(CPUSPARCState *env)
+float32 helper_fsubs(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    float32 ret = float32_sub(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
 
-#define F_BINOP(name)                                           \
-    float32 helper_f ## name ## s (CPUSPARCState *env, float32 src1, \
-                                   float32 src2)                \
-    {                                                           \
-        return float32_ ## name (src1, src2, &env->fp_status);  \
-    }                                                           \
-    float64 helper_f ## name ## d (CPUSPARCState * env, float64 src1,\
-                                   float64 src2)                \
-    {                                                           \
-        return float64_ ## name (src1, src2, &env->fp_status);  \
-    }                                                           \
-    F_HELPER(name, q)                                           \
-    {                                                           \
-        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
-    }
+float32 helper_fmuls(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    float32 ret = float32_mul(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
 
-F_BINOP(add);
-F_BINOP(sub);
-F_BINOP(mul);
-F_BINOP(div);
-#undef F_BINOP
+float32 helper_fdivs(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    float32 ret = float32_div(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_faddd(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    float64 ret = float64_add(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fsubd(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    float64 ret = float64_sub(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fmuld(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    float64 ret = float64_mul(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fdivd(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    float64 ret = float64_div(src1, src2, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+Int128 helper_faddq(CPUSPARCState *env, Int128 src1, Int128 src2)
+{
+    float128 ret = float128_add(f128_in(src1), f128_in(src2), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
+}
+
+Int128 helper_fsubq(CPUSPARCState *env, Int128 src1, Int128 src2)
+{
+    float128 ret = float128_sub(f128_in(src1), f128_in(src2), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
+}
+
+Int128 helper_fmulq(CPUSPARCState *env, Int128 src1, Int128 src2)
+{
+    float128 ret = float128_mul(f128_in(src1), f128_in(src2), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
+}
+
+Int128 helper_fdivq(CPUSPARCState *env, Int128 src1, Int128 src2)
+{
+    float128 ret = float128_div(f128_in(src1), f128_in(src2), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
+}
 
 float64 helper_fsmuld(CPUSPARCState *env, float32 src1, float32 src2)
 {
-    return float64_mul(float32_to_float64(src1, &env->fp_status),
-                       float32_to_float64(src2, &env->fp_status),
-                       &env->fp_status);
+    float64 ret = float64_mul(float32_to_float64(src1, &env->fp_status),
+                              float32_to_float64(src2, &env->fp_status),
+                              &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-void helper_fdmulq(CPUSPARCState *env, float64 src1, float64 src2)
+Int128 helper_fdmulq(CPUSPARCState *env, float64 src1, float64 src2)
 {
-    QT0 = float128_mul(float64_to_float128(src1, &env->fp_status),
-                       float64_to_float128(src2, &env->fp_status),
-                       &env->fp_status);
+    float128 ret = float128_mul(float64_to_float128(src1, &env->fp_status),
+                                float64_to_float128(src2, &env->fp_status),
+                                &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
 }
 
-float32 helper_fnegs(float32 src)
-{
-    return float32_chs(src);
-}
-
-#ifdef TARGET_SPARC64
-float64 helper_fnegd(float64 src)
-{
-    return float64_chs(src);
-}
-
-F_HELPER(neg, q)
-{
-    QT0 = float128_chs(QT1);
-}
-#endif
-
 /* Integer to float conversion.  */
 float32 helper_fitos(CPUSPARCState *env, int32_t src)
 {
-    return int32_to_float32(src, &env->fp_status);
+    float32 ret = int32_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 float64 helper_fitod(CPUSPARCState *env, int32_t src)
 {
-    return int32_to_float64(src, &env->fp_status);
+    float64 ret = int32_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-void helper_fitoq(CPUSPARCState *env, int32_t src)
+Int128 helper_fitoq(CPUSPARCState *env, int32_t src)
 {
-    QT0 = int32_to_float128(src, &env->fp_status);
+    float128 ret = int32_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
 }
 
 #ifdef TARGET_SPARC64
 float32 helper_fxtos(CPUSPARCState *env, int64_t src)
 {
-    return int64_to_float32(src, &env->fp_status);
+    float32 ret = int64_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 float64 helper_fxtod(CPUSPARCState *env, int64_t src)
 {
-    return int64_to_float64(src, &env->fp_status);
+    float64 ret = int64_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-void helper_fxtoq(CPUSPARCState *env, int64_t src)
+Int128 helper_fxtoq(CPUSPARCState *env, int64_t src)
 {
-    QT0 = int64_to_float128(src, &env->fp_status);
+    float128 ret = int64_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
 }
 #endif
-#undef F_HELPER
 
 /* floating point conversion */
 float32 helper_fdtos(CPUSPARCState *env, float64 src)
 {
-    return float64_to_float32(src, &env->fp_status);
+    float32 ret = float64_to_float32(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 float64 helper_fstod(CPUSPARCState *env, float32 src)
 {
-    return float32_to_float64(src, &env->fp_status);
+    float64 ret = float32_to_float64(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-float32 helper_fqtos(CPUSPARCState *env)
+float32 helper_fqtos(CPUSPARCState *env, Int128 src)
 {
-    return float128_to_float32(QT1, &env->fp_status);
+    float32 ret = float128_to_float32(f128_in(src), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-void helper_fstoq(CPUSPARCState *env, float32 src)
+Int128 helper_fstoq(CPUSPARCState *env, float32 src)
 {
-    QT0 = float32_to_float128(src, &env->fp_status);
+    float128 ret = float32_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
 }
 
-float64 helper_fqtod(CPUSPARCState *env)
+float64 helper_fqtod(CPUSPARCState *env, Int128 src)
 {
-    return float128_to_float64(QT1, &env->fp_status);
+    float64 ret = float128_to_float64(f128_in(src), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-void helper_fdtoq(CPUSPARCState *env, float64 src)
+Int128 helper_fdtoq(CPUSPARCState *env, float64 src)
 {
-    QT0 = float64_to_float128(src, &env->fp_status);
+    float128 ret = float64_to_float128(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
 }
 
 /* Float to integer conversion.  */
 int32_t helper_fstoi(CPUSPARCState *env, float32 src)
 {
-    return float32_to_int32_round_to_zero(src, &env->fp_status);
+    int32_t ret = float32_to_int32_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 int32_t helper_fdtoi(CPUSPARCState *env, float64 src)
 {
-    return float64_to_int32_round_to_zero(src, &env->fp_status);
+    int32_t ret = float64_to_int32_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-int32_t helper_fqtoi(CPUSPARCState *env)
+int32_t helper_fqtoi(CPUSPARCState *env, Int128 src)
 {
-    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+    int32_t ret = float128_to_int32_round_to_zero(f128_in(src),
+                                                  &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 #ifdef TARGET_SPARC64
 int64_t helper_fstox(CPUSPARCState *env, float32 src)
 {
-    return float32_to_int64_round_to_zero(src, &env->fp_status);
+    int64_t ret = float32_to_int64_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 int64_t helper_fdtox(CPUSPARCState *env, float64 src)
 {
-    return float64_to_int64_round_to_zero(src, &env->fp_status);
+    int64_t ret = float64_to_int64_round_to_zero(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-int64_t helper_fqtox(CPUSPARCState *env)
+int64_t helper_fqtox(CPUSPARCState *env, Int128 src)
 {
-    return float128_to_int64_round_to_zero(QT1, &env->fp_status);
-}
-#endif
-
-float32 helper_fabss(float32 src)
-{
-    return float32_abs(src);
-}
-
-#ifdef TARGET_SPARC64
-float64 helper_fabsd(float64 src)
-{
-    return float64_abs(src);
-}
-
-void helper_fabsq(CPUSPARCState *env)
-{
-    QT0 = float128_abs(QT1);
+    int64_t ret = float128_to_int64_round_to_zero(f128_in(src),
+                                                  &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 #endif
 
 float32 helper_fsqrts(CPUSPARCState *env, float32 src)
 {
-    return float32_sqrt(src, &env->fp_status);
+    float32 ret = float32_sqrt(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
 float64 helper_fsqrtd(CPUSPARCState *env, float64 src)
 {
-    return float64_sqrt(src, &env->fp_status);
+    float64 ret = float64_sqrt(src, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
 }
 
-void helper_fsqrtq(CPUSPARCState *env)
+Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
 {
-    QT0 = float128_sqrt(QT1, &env->fp_status);
+    float128 ret = float128_sqrt(f128_in(src), &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return f128_ret(ret);
 }
 
-#define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
-    target_ulong glue(helper_, name) (CPUSPARCState *env)               \
-    {                                                                   \
-        FloatRelation ret;                                              \
-        target_ulong fsr;                                               \
-        if (E) {                                                        \
-            ret = glue(size, _compare)(reg1, reg2, &env->fp_status);    \
-        } else {                                                        \
-            ret = glue(size, _compare_quiet)(reg1, reg2,                \
-                                             &env->fp_status);          \
-        }                                                               \
-        fsr = do_check_ieee_exceptions(env, GETPC());                   \
-        switch (ret) {                                                  \
-        case float_relation_unordered:                                  \
-            fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                         \
-            fsr |= FSR_NVA;                                             \
-            break;                                                      \
-        case float_relation_less:                                       \
-            fsr &= ~(FSR_FCC1) << FS;                                   \
-            fsr |= FSR_FCC0 << FS;                                      \
-            break;                                                      \
-        case float_relation_greater:                                    \
-            fsr &= ~(FSR_FCC0) << FS;                                   \
-            fsr |= FSR_FCC1 << FS;                                      \
-            break;                                                      \
-        default:                                                        \
-            fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                      \
-            break;                                                      \
-        }                                                               \
-        return fsr;                                                     \
+static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra)
+{
+    check_ieee_exceptions(env, ra);
+
+    /*
+     * FCC values:
+     * 0 =
+     * 1 <
+     * 2 >
+     * 3 unordered
+     */
+    switch (r) {
+    case float_relation_equal:
+        return 0;
+    case float_relation_less:
+        return 1;
+    case float_relation_greater:
+        return 2;
+    case float_relation_unordered:
+        env->fsr |= FSR_NVA;
+        return 3;
     }
-#define GEN_FCMP_T(name, size, FS, E)                                   \
-    target_ulong glue(helper_, name)(CPUSPARCState *env, size src1, size src2)\
-    {                                                                   \
-        FloatRelation ret;                                              \
-        target_ulong fsr;                                               \
-        if (E) {                                                        \
-            ret = glue(size, _compare)(src1, src2, &env->fp_status);    \
-        } else {                                                        \
-            ret = glue(size, _compare_quiet)(src1, src2,                \
-                                             &env->fp_status);          \
-        }                                                               \
-        fsr = do_check_ieee_exceptions(env, GETPC());                   \
-        switch (ret) {                                                  \
-        case float_relation_unordered:                                  \
-            fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                         \
-            break;                                                      \
-        case float_relation_less:                                       \
-            fsr &= ~(FSR_FCC1 << FS);                                   \
-            fsr |= FSR_FCC0 << FS;                                      \
-            break;                                                      \
-        case float_relation_greater:                                    \
-            fsr &= ~(FSR_FCC0 << FS);                                   \
-            fsr |= FSR_FCC1 << FS;                                      \
-            break;                                                      \
-        default:                                                        \
-            fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                      \
-            break;                                                      \
-        }                                                               \
-        return fsr;                                                     \
-    }
+    g_assert_not_reached();
+}
 
-GEN_FCMP_T(fcmps, float32, 0, 0);
-GEN_FCMP_T(fcmpd, float64, 0, 0);
+uint32_t helper_fcmps(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    FloatRelation r = float32_compare_quiet(src1, src2, &env->fp_status);
+    return finish_fcmp(env, r, GETPC());
+}
 
-GEN_FCMP_T(fcmpes, float32, 0, 1);
-GEN_FCMP_T(fcmped, float64, 0, 1);
+uint32_t helper_fcmpes(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    FloatRelation r = float32_compare(src1, src2, &env->fp_status);
+    return finish_fcmp(env, r, GETPC());
+}
 
-GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
-GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
+uint32_t helper_fcmpd(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    FloatRelation r = float64_compare_quiet(src1, src2, &env->fp_status);
+    return finish_fcmp(env, r, GETPC());
+}
 
+uint32_t helper_fcmped(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    FloatRelation r = float64_compare(src1, src2, &env->fp_status);
+    return finish_fcmp(env, r, GETPC());
+}
+
+uint32_t helper_fcmpq(CPUSPARCState *env, Int128 src1, Int128 src2)
+{
+    FloatRelation r = float128_compare_quiet(f128_in(src1), f128_in(src2),
+                                             &env->fp_status);
+    return finish_fcmp(env, r, GETPC());
+}
+
+uint32_t helper_fcmpeq(CPUSPARCState *env, Int128 src1, Int128 src2)
+{
+    FloatRelation r = float128_compare(f128_in(src1), f128_in(src2),
+                                       &env->fp_status);
+    return finish_fcmp(env, r, GETPC());
+}
+
+target_ulong cpu_get_fsr(CPUSPARCState *env)
+{
+    target_ulong fsr = env->fsr | env->fsr_cexc_ftt;
+
+    fsr |= env->fcc[0] << FSR_FCC0_SHIFT;
 #ifdef TARGET_SPARC64
-GEN_FCMP_T(fcmps_fcc1, float32, 22, 0);
-GEN_FCMP_T(fcmpd_fcc1, float64, 22, 0);
-GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
-
-GEN_FCMP_T(fcmps_fcc2, float32, 24, 0);
-GEN_FCMP_T(fcmpd_fcc2, float64, 24, 0);
-GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
-
-GEN_FCMP_T(fcmps_fcc3, float32, 26, 0);
-GEN_FCMP_T(fcmpd_fcc3, float64, 26, 0);
-GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
-
-GEN_FCMP_T(fcmpes_fcc1, float32, 22, 1);
-GEN_FCMP_T(fcmped_fcc1, float64, 22, 1);
-GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
-
-GEN_FCMP_T(fcmpes_fcc2, float32, 24, 1);
-GEN_FCMP_T(fcmped_fcc2, float64, 24, 1);
-GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
-
-GEN_FCMP_T(fcmpes_fcc3, float32, 26, 1);
-GEN_FCMP_T(fcmped_fcc3, float64, 26, 1);
-GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
+    fsr |= (uint64_t)env->fcc[1] << FSR_FCC1_SHIFT;
+    fsr |= (uint64_t)env->fcc[2] << FSR_FCC2_SHIFT;
+    fsr |= (uint64_t)env->fcc[3] << FSR_FCC3_SHIFT;
 #endif
-#undef GEN_FCMP_T
-#undef GEN_FCMP
 
-static void set_fsr(CPUSPARCState *env, target_ulong fsr)
+    /* VER is kept completely separate until re-assembly. */
+    fsr |= env->def.fpu_version;
+
+    return fsr;
+}
+
+target_ulong helper_get_fsr(CPUSPARCState *env)
+{
+    return cpu_get_fsr(env);
+}
+
+static void set_fsr_nonsplit(CPUSPARCState *env, target_ulong fsr)
 {
     int rnd_mode;
 
+    env->fsr = fsr & (FSR_RD_MASK | FSR_TEM_MASK | FSR_AEXC_MASK);
+
     switch (fsr & FSR_RD_MASK) {
     case FSR_RD_NEAREST:
         rnd_mode = float_round_nearest_even;
@@ -382,7 +452,23 @@
     set_float_rounding_mode(rnd_mode, &env->fp_status);
 }
 
-void helper_set_fsr(CPUSPARCState *env, target_ulong fsr)
+void cpu_put_fsr(CPUSPARCState *env, target_ulong fsr)
 {
-    set_fsr(env, fsr);
+    env->fsr_cexc_ftt = fsr & (FSR_CEXC_MASK | FSR_FTT_MASK);
+
+    env->fcc[0] = extract32(fsr, FSR_FCC0_SHIFT, 2);
+#ifdef TARGET_SPARC64
+    env->fcc[1] = extract64(fsr, FSR_FCC1_SHIFT, 2);
+    env->fcc[2] = extract64(fsr, FSR_FCC2_SHIFT, 2);
+    env->fcc[3] = extract64(fsr, FSR_FCC3_SHIFT, 2);
+#endif
+
+    set_fsr_nonsplit(env, fsr);
+}
+
+void helper_set_fsr_nofcc_noftt(CPUSPARCState *env, uint32_t fsr)
+{
+    env->fsr_cexc_ftt &= FSR_FTT_MASK;
+    env->fsr_cexc_ftt |= fsr & FSR_CEXC_MASK;
+    set_fsr_nonsplit(env, fsr);
 }
diff --git a/target/sparc/gdbstub.c b/target/sparc/gdbstub.c
index a1c8fdc..d1586b2 100644
--- a/target/sparc/gdbstub.c
+++ b/target/sparc/gdbstub.c
@@ -64,7 +64,7 @@
     case 69:
         return gdb_get_rega(mem_buf, env->npc);
     case 70:
-        return gdb_get_rega(mem_buf, env->fsr);
+        return gdb_get_rega(mem_buf, cpu_get_fsr(env));
     case 71:
         return gdb_get_rega(mem_buf, 0); /* csr */
     default:
@@ -94,7 +94,7 @@
                                      ((env->pstate & 0xfff) << 8) |
                                      cpu_get_cwp64(env));
     case 83:
-        return gdb_get_regl(mem_buf, env->fsr);
+        return gdb_get_regl(mem_buf, cpu_get_fsr(env));
     case 84:
         return gdb_get_regl(mem_buf, env->fprs);
     case 85:
@@ -156,7 +156,7 @@
             env->npc = tmp;
             break;
         case 70:
-            env->fsr = tmp;
+            cpu_put_fsr(env, tmp);
             break;
         default:
             return 0;
@@ -191,7 +191,7 @@
             cpu_put_cwp64(env, tmp & 0xff);
             break;
         case 83:
-            env->fsr = tmp;
+            cpu_put_fsr(env, tmp);
             break;
         case 84:
             env->fprs = tmp;
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 55eff66..6a42ba4 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -35,87 +35,60 @@
 DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32)
 DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
 #endif
-DEF_HELPER_FLAGS_1(check_ieee_exceptions, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_2(set_fsr, TCG_CALL_NO_RWG, void, env, tl)
-DEF_HELPER_FLAGS_1(fabss, TCG_CALL_NO_RWG_SE, f32, f32)
-DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_RWG, f32, env, f32)
-DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_RWG, f64, env, f64)
-DEF_HELPER_FLAGS_3(fcmps, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmpd, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_3(fcmpes, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_1(fsqrtq, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_1(fcmpq, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_1(fcmpeq, TCG_CALL_NO_WG, tl, env)
-#ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_NO_RWG_SE, f64, f64)
-DEF_HELPER_FLAGS_3(fcmps_fcc1, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmps_fcc2, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmps_fcc3, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmpd_fcc1, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_3(fcmpd_fcc2, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_3(fcmpd_fcc3, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_3(fcmpes_fcc1, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmpes_fcc2, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmpes_fcc3, TCG_CALL_NO_WG, tl, env, f32, f32)
-DEF_HELPER_FLAGS_3(fcmped_fcc1, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_3(fcmped_fcc2, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_3(fcmped_fcc3, TCG_CALL_NO_WG, tl, env, f64, f64)
-DEF_HELPER_FLAGS_1(fabsq, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_1(fcmpq_fcc1, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_1(fcmpq_fcc2, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_1(fcmpq_fcc3, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_1(fcmpeq_fcc1, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_1(fcmpeq_fcc2, TCG_CALL_NO_WG, tl, env)
-DEF_HELPER_FLAGS_1(fcmpeq_fcc3, TCG_CALL_NO_WG, tl, env)
-#endif
+DEF_HELPER_FLAGS_1(get_fsr, TCG_CALL_NO_WG_SE, tl, env)
+DEF_HELPER_FLAGS_2(set_fsr_nofcc_noftt, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_WG, f32, env, f32)
+DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_WG, f64, env, f64)
+DEF_HELPER_FLAGS_2(fsqrtq, TCG_CALL_NO_WG, i128, env, i128)
+DEF_HELPER_FLAGS_3(fcmps, TCG_CALL_NO_WG, i32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fcmpes, TCG_CALL_NO_WG, i32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fcmpd, TCG_CALL_NO_WG, i32, env, f64, f64)
+DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, i32, env, f64, f64)
+DEF_HELPER_FLAGS_3(fcmpq, TCG_CALL_NO_WG, i32, env, i128, i128)
+DEF_HELPER_FLAGS_3(fcmpeq, TCG_CALL_NO_WG, i32, env, i128, i128)
 DEF_HELPER_2(raise_exception, noreturn, env, int)
-#define F_HELPER_0_1(name) \
-  DEF_HELPER_FLAGS_1(f ## name, TCG_CALL_NO_RWG, void, env)
 
-DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_RWG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_RWG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_RWG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_RWG, f64, env, f64, f64)
-F_HELPER_0_1(addq)
-F_HELPER_0_1(subq)
-F_HELPER_0_1(mulq)
-F_HELPER_0_1(divq)
+DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
 
-DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_RWG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_RWG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_RWG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_RWG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(faddq, TCG_CALL_NO_WG, i128, env, i128, i128)
+DEF_HELPER_FLAGS_3(fsubq, TCG_CALL_NO_WG, i128, env, i128, i128)
+DEF_HELPER_FLAGS_3(fmulq, TCG_CALL_NO_WG, i128, env, i128, i128)
+DEF_HELPER_FLAGS_3(fdivq, TCG_CALL_NO_WG, i128, env, i128, i128)
 
-DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_RWG, f64, env, f32, f32)
-DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_RWG, void, env, f64, f64)
+DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
 
-DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_NO_RWG_SE, f32, f32)
-DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_RWG_SE, f64, env, s32)
-DEF_HELPER_FLAGS_2(fitoq, TCG_CALL_NO_RWG, void, env, s32)
+DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_WG, f64, env, f32, f32)
+DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_WG, i128, env, f64, f64)
 
-DEF_HELPER_FLAGS_2(fitos, TCG_CALL_NO_RWG, f32, env, s32)
+DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_WG, f64, env, s32)
+DEF_HELPER_FLAGS_2(fitoq, TCG_CALL_NO_WG, i128, env, s32)
+
+DEF_HELPER_FLAGS_2(fitos, TCG_CALL_NO_WG, f32, env, s32)
 
 #ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_NO_RWG_SE, f64, f64)
-DEF_HELPER_FLAGS_1(fnegq, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_2(fxtos, TCG_CALL_NO_RWG, f32, env, s64)
-DEF_HELPER_FLAGS_2(fxtod, TCG_CALL_NO_RWG, f64, env, s64)
-DEF_HELPER_FLAGS_2(fxtoq, TCG_CALL_NO_RWG, void, env, s64)
+DEF_HELPER_FLAGS_2(fxtos, TCG_CALL_NO_WG, f32, env, s64)
+DEF_HELPER_FLAGS_2(fxtod, TCG_CALL_NO_WG, f64, env, s64)
+DEF_HELPER_FLAGS_2(fxtoq, TCG_CALL_NO_WG, i128, env, s64)
 #endif
-DEF_HELPER_FLAGS_2(fdtos, TCG_CALL_NO_RWG, f32, env, f64)
-DEF_HELPER_FLAGS_2(fstod, TCG_CALL_NO_RWG, f64, env, f32)
-DEF_HELPER_FLAGS_1(fqtos, TCG_CALL_NO_RWG, f32, env)
-DEF_HELPER_FLAGS_2(fstoq, TCG_CALL_NO_RWG, void, env, f32)
-DEF_HELPER_FLAGS_1(fqtod, TCG_CALL_NO_RWG, f64, env)
-DEF_HELPER_FLAGS_2(fdtoq, TCG_CALL_NO_RWG, void, env, f64)
-DEF_HELPER_FLAGS_2(fstoi, TCG_CALL_NO_RWG, s32, env, f32)
-DEF_HELPER_FLAGS_2(fdtoi, TCG_CALL_NO_RWG, s32, env, f64)
-DEF_HELPER_FLAGS_1(fqtoi, TCG_CALL_NO_RWG, s32, env)
+DEF_HELPER_FLAGS_2(fdtos, TCG_CALL_NO_WG, f32, env, f64)
+DEF_HELPER_FLAGS_2(fstod, TCG_CALL_NO_WG, f64, env, f32)
+DEF_HELPER_FLAGS_2(fqtos, TCG_CALL_NO_WG, f32, env, i128)
+DEF_HELPER_FLAGS_2(fstoq, TCG_CALL_NO_WG, i128, env, f32)
+DEF_HELPER_FLAGS_2(fqtod, TCG_CALL_NO_WG, f64, env, i128)
+DEF_HELPER_FLAGS_2(fdtoq, TCG_CALL_NO_WG, i128, env, f64)
+DEF_HELPER_FLAGS_2(fstoi, TCG_CALL_NO_WG, s32, env, f32)
+DEF_HELPER_FLAGS_2(fdtoi, TCG_CALL_NO_WG, s32, env, f64)
+DEF_HELPER_FLAGS_2(fqtoi, TCG_CALL_NO_WG, s32, env, i128)
 #ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_2(fstox, TCG_CALL_NO_RWG, s64, env, f32)
-DEF_HELPER_FLAGS_2(fdtox, TCG_CALL_NO_RWG, s64, env, f64)
-DEF_HELPER_FLAGS_1(fqtox, TCG_CALL_NO_RWG, s64, env)
+DEF_HELPER_FLAGS_2(fstox, TCG_CALL_NO_WG, s64, env, f32)
+DEF_HELPER_FLAGS_2(fdtox, TCG_CALL_NO_WG, s64, env, f64)
+DEF_HELPER_FLAGS_2(fqtox, TCG_CALL_NO_WG, s64, env, i128)
 
 DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
@@ -141,6 +114,5 @@
 VIS_CMPHELPER(cmple)
 VIS_CMPHELPER(cmpne)
 #endif
-#undef F_HELPER_0_1
 #undef VIS_HELPER
 #undef VIS_CMPHELPER
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 09066d5..1ecd58e 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -66,9 +66,6 @@
 #endif
 #endif
 
-#define QT0 (env->qt0)
-#define QT1 (env->qt1)
-
 #if defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
 /* Calculates TSB pointer value for fault page size
  * UltraSPARC IIi has fixed sizes (8k or 64k) for the page pointers
@@ -690,7 +687,7 @@
     case ASI_M_IODIAG:  /* Turbosparc IOTLB Diagnostic */
         break;
     case ASI_KERNELTXT: /* Supervisor code access */
-        oi = make_memop_idx(memop, cpu_mmu_index(env, true));
+        oi = make_memop_idx(memop, cpu_mmu_index(env_cpu(env), true));
         switch (size) {
         case 1:
             ret = cpu_ldb_code_mmu(env, addr, oi, GETPC());
diff --git a/target/sparc/machine.c b/target/sparc/machine.c
index 2b5686c..48e0cf2 100644
--- a/target/sparc/machine.c
+++ b/target/sparc/machine.c
@@ -83,6 +83,32 @@
     .put = put_psr,
 };
 
+static int get_fsr(QEMUFile *f, void *opaque, size_t size,
+                   const VMStateField *field)
+{
+    SPARCCPU *cpu = opaque;
+    target_ulong val = qemu_get_betl(f);
+
+    cpu_put_fsr(&cpu->env, val);
+    return 0;
+}
+
+static int put_fsr(QEMUFile *f, void *opaque, size_t size,
+                   const VMStateField *field, JSONWriter *vmdesc)
+{
+    SPARCCPU *cpu = opaque;
+    target_ulong val = cpu_get_fsr(&cpu->env);
+
+    qemu_put_betl(f, val);
+    return 0;
+}
+
+static const VMStateInfo vmstate_fsr = {
+    .name = "fsr",
+    .get = get_fsr,
+    .put = put_fsr,
+};
+
 #ifdef TARGET_SPARC64
 static int get_xcc(QEMUFile *f, void *opaque, size_t size,
                    const VMStateField *field)
@@ -157,7 +183,6 @@
         VMSTATE_UINTTL(env.npc, SPARCCPU),
         VMSTATE_UINTTL(env.y, SPARCCPU),
         {
-
             .name = "psr",
             .version_id = 0,
             .size = sizeof(uint32_t),
@@ -165,7 +190,14 @@
             .flags = VMS_SINGLE,
             .offset = 0,
         },
-        VMSTATE_UINTTL(env.fsr, SPARCCPU),
+        {
+            .name = "fsr",
+            .version_id = 0,
+            .size = sizeof(target_ulong),
+            .info = &vmstate_fsr,
+            .flags = VMS_SINGLE,
+            .offset = 0,
+        },
         VMSTATE_UINTTL(env.tbr, SPARCCPU),
         VMSTATE_INT32(env.interrupt_index, SPARCCPU),
         VMSTATE_UINT32(env.pil_in, SPARCCPU),
diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index 453498c..5170a66 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -901,7 +901,7 @@
     SPARCCPU *cpu = SPARC_CPU(cs);
     CPUSPARCState *env = &cpu->env;
     hwaddr phys_addr;
-    int mmu_idx = cpu_mmu_index(env, false);
+    int mmu_idx = cpu_mmu_index(cs, false);
 
     if (cpu_sparc_get_phys_page(env, &phys_addr, addr, 2, mmu_idx) != 0) {
         if (cpu_sparc_get_phys_page(env, &phys_addr, addr, 0, mmu_idx) != 0) {
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 97184fa..7df6f83 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -43,9 +43,7 @@
 #else
 # define gen_helper_clear_softint(E, S)         qemu_build_not_reached()
 # define gen_helper_done(E)                     qemu_build_not_reached()
-# define gen_helper_fabsd(D, S)                 qemu_build_not_reached()
 # define gen_helper_flushw(E)                   qemu_build_not_reached()
-# define gen_helper_fnegd(D, S)                 qemu_build_not_reached()
 # define gen_helper_rdccr(D, E)                 qemu_build_not_reached()
 # define gen_helper_rdcwp(D, E)                 qemu_build_not_reached()
 # define gen_helper_restored(E)                 qemu_build_not_reached()
@@ -61,7 +59,6 @@
 # define gen_helper_write_softint(E, S)         qemu_build_not_reached()
 # define gen_helper_wrpil(E, S)                 qemu_build_not_reached()
 # define gen_helper_wrpstate(E, S)              qemu_build_not_reached()
-# define gen_helper_fabsq                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpeq16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpeq32             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpgt16             ({ qemu_build_not_reached(); NULL; })
@@ -79,7 +76,6 @@
 # define gen_helper_fmul8x16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fmuld8sux16          ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fmuld8ulx16          ({ qemu_build_not_reached(); NULL; })
-# define gen_helper_fnegq                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fpmerge              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fqtox                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fstox                ({ qemu_build_not_reached(); NULL; })
@@ -87,8 +83,6 @@
 # define gen_helper_fxtoq                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fxtos                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_pdist                ({ qemu_build_not_reached(); NULL; })
-# define FSR_LDXFSR_MASK                        0
-# define FSR_LDXFSR_OLDMASK                     0
 # define MAXTL_MASK                             0
 #endif
 
@@ -103,7 +97,7 @@
 
 /* global register indexes */
 static TCGv_ptr cpu_regwptr;
-static TCGv cpu_fsr, cpu_pc, cpu_npc;
+static TCGv cpu_pc, cpu_npc;
 static TCGv cpu_regs[32];
 static TCGv cpu_y;
 static TCGv cpu_tbr;
@@ -134,6 +128,7 @@
 
 /* Floating point registers */
 static TCGv_i64 cpu_fpr[TARGET_DPREGS];
+static TCGv_i32 cpu_fcc[TARGET_FCCREGS];
 
 #define env_field_offsetof(X)     offsetof(CPUSPARCState, X)
 #ifdef TARGET_SPARC64
@@ -246,11 +241,6 @@
     gen_update_fprs_dirty(dc, dst);
 }
 
-static TCGv_i32 gen_dest_fpr_F(DisasContext *dc)
-{
-    return tcg_temp_new_i32();
-}
-
 static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
 {
     src = DFPREG(src);
@@ -269,28 +259,20 @@
     return cpu_fpr[DFPREG(dst) / 2];
 }
 
-static void gen_op_load_fpr_QT0(unsigned int src)
+static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src)
 {
-    tcg_gen_st_i64(cpu_fpr[src / 2], tcg_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, ll.upper));
-    tcg_gen_st_i64(cpu_fpr[src/2 + 1], tcg_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, ll.lower));
+    TCGv_i128 ret = tcg_temp_new_i128();
+
+    src = QFPREG(src);
+    tcg_gen_concat_i64_i128(ret, cpu_fpr[src / 2 + 1], cpu_fpr[src / 2]);
+    return ret;
 }
 
-static void gen_op_load_fpr_QT1(unsigned int src)
+static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst, TCGv_i128 v)
 {
-    tcg_gen_st_i64(cpu_fpr[src / 2], tcg_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, ll.upper));
-    tcg_gen_st_i64(cpu_fpr[src/2 + 1], tcg_env, offsetof(CPUSPARCState, qt1) +
-                   offsetof(CPU_QuadU, ll.lower));
-}
-
-static void gen_op_store_QT0_fpr(unsigned int dst)
-{
-    tcg_gen_ld_i64(cpu_fpr[dst / 2], tcg_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, ll.upper));
-    tcg_gen_ld_i64(cpu_fpr[dst/2 + 1], tcg_env, offsetof(CPUSPARCState, qt0) +
-                   offsetof(CPU_QuadU, ll.lower));
+    dst = DFPREG(dst);
+    tcg_gen_extr_i128_i64(cpu_fpr[dst / 2 + 1], cpu_fpr[dst / 2], v);
+    gen_update_fprs_dirty(dc, dst);
 }
 
 /* moves */
@@ -736,159 +718,6 @@
 #endif
 }
 
-// 1
-static void gen_op_eval_ba(TCGv dst)
-{
-    tcg_gen_movi_tl(dst, 1);
-}
-
-// 0
-static void gen_op_eval_bn(TCGv dst)
-{
-    tcg_gen_movi_tl(dst, 0);
-}
-
-/*
-  FPSR bit field FCC1 | FCC0:
-   0 =
-   1 <
-   2 >
-   3 unordered
-*/
-static void gen_mov_reg_FCC0(TCGv reg, TCGv src,
-                                    unsigned int fcc_offset)
-{
-    tcg_gen_shri_tl(reg, src, FSR_FCC0_SHIFT + fcc_offset);
-    tcg_gen_andi_tl(reg, reg, 0x1);
-}
-
-static void gen_mov_reg_FCC1(TCGv reg, TCGv src, unsigned int fcc_offset)
-{
-    tcg_gen_shri_tl(reg, src, FSR_FCC1_SHIFT + fcc_offset);
-    tcg_gen_andi_tl(reg, reg, 0x1);
-}
-
-// !0: FCC0 | FCC1
-static void gen_op_eval_fbne(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_or_tl(dst, dst, t0);
-}
-
-// 1 or 2: FCC0 ^ FCC1
-static void gen_op_eval_fblg(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_xor_tl(dst, dst, t0);
-}
-
-// 1 or 3: FCC0
-static void gen_op_eval_fbul(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-}
-
-// 1: FCC0 & !FCC1
-static void gen_op_eval_fbl(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_andc_tl(dst, dst, t0);
-}
-
-// 2 or 3: FCC1
-static void gen_op_eval_fbug(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    gen_mov_reg_FCC1(dst, src, fcc_offset);
-}
-
-// 2: !FCC0 & FCC1
-static void gen_op_eval_fbg(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_andc_tl(dst, t0, dst);
-}
-
-// 3: FCC0 & FCC1
-static void gen_op_eval_fbu(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, t0);
-}
-
-// 0: !(FCC0 | FCC1)
-static void gen_op_eval_fbe(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_or_tl(dst, dst, t0);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// 0 or 3: !(FCC0 ^ FCC1)
-static void gen_op_eval_fbue(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_xor_tl(dst, dst, t0);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// 0 or 2: !FCC0
-static void gen_op_eval_fbge(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !1: !(FCC0 & !FCC1)
-static void gen_op_eval_fbuge(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_andc_tl(dst, dst, t0);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// 0 or 1: !FCC1
-static void gen_op_eval_fble(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    gen_mov_reg_FCC1(dst, src, fcc_offset);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !2: !(!FCC0 & FCC1)
-static void gen_op_eval_fbule(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_andc_tl(dst, t0, dst);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
-// !3: !(FCC0 & FCC1)
-static void gen_op_eval_fbo(TCGv dst, TCGv src, unsigned int fcc_offset)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(t0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, t0);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-}
-
 static void finishing_insn(DisasContext *dc)
 {
     /*
@@ -1113,80 +942,62 @@
 
 static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond)
 {
-    unsigned int offset;
-    TCGv r_dst;
+    TCGv_i32 fcc = cpu_fcc[cc];
+    TCGv_i32 c1 = fcc;
+    int c2 = 0;
+    TCGCond tcond;
 
-    /* For now we still generate a straight boolean result.  */
-    cmp->cond = TCG_COND_NE;
-    cmp->c1 = r_dst = tcg_temp_new();
-    cmp->c2 = 0;
-
-    switch (cc) {
-    default:
-    case 0x0:
-        offset = 0;
+    /*
+     * FCC values:
+     * 0 =
+     * 1 <
+     * 2 >
+     * 3 unordered
+     */
+    switch (cond & 7) {
+    case 0x0: /* fbn */
+        tcond = TCG_COND_NEVER;
         break;
-    case 0x1:
-        offset = 32 - 10;
+    case 0x1: /* fbne : !0 */
+        tcond = TCG_COND_NE;
         break;
-    case 0x2:
-        offset = 34 - 10;
+    case 0x2: /* fblg : 1 or 2 */
+        /* fcc in {1,2} - 1 -> fcc in {0,1} */
+        c1 = tcg_temp_new_i32();
+        tcg_gen_addi_i32(c1, fcc, -1);
+        c2 = 1;
+        tcond = TCG_COND_LEU;
         break;
-    case 0x3:
-        offset = 36 - 10;
+    case 0x3: /* fbul : 1 or 3 */
+        c1 = tcg_temp_new_i32();
+        tcg_gen_andi_i32(c1, fcc, 1);
+        tcond = TCG_COND_NE;
+        break;
+    case 0x4: /* fbl  : 1 */
+        c2 = 1;
+        tcond = TCG_COND_EQ;
+        break;
+    case 0x5: /* fbug : 2 or 3 */
+        c2 = 2;
+        tcond = TCG_COND_GEU;
+        break;
+    case 0x6: /* fbg  : 2 */
+        c2 = 2;
+        tcond = TCG_COND_EQ;
+        break;
+    case 0x7: /* fbu  : 3 */
+        c2 = 3;
+        tcond = TCG_COND_EQ;
         break;
     }
-
-    switch (cond) {
-    case 0x0:
-        gen_op_eval_bn(r_dst);
-        break;
-    case 0x1:
-        gen_op_eval_fbne(r_dst, cpu_fsr, offset);
-        break;
-    case 0x2:
-        gen_op_eval_fblg(r_dst, cpu_fsr, offset);
-        break;
-    case 0x3:
-        gen_op_eval_fbul(r_dst, cpu_fsr, offset);
-        break;
-    case 0x4:
-        gen_op_eval_fbl(r_dst, cpu_fsr, offset);
-        break;
-    case 0x5:
-        gen_op_eval_fbug(r_dst, cpu_fsr, offset);
-        break;
-    case 0x6:
-        gen_op_eval_fbg(r_dst, cpu_fsr, offset);
-        break;
-    case 0x7:
-        gen_op_eval_fbu(r_dst, cpu_fsr, offset);
-        break;
-    case 0x8:
-        gen_op_eval_ba(r_dst);
-        break;
-    case 0x9:
-        gen_op_eval_fbe(r_dst, cpu_fsr, offset);
-        break;
-    case 0xa:
-        gen_op_eval_fbue(r_dst, cpu_fsr, offset);
-        break;
-    case 0xb:
-        gen_op_eval_fbge(r_dst, cpu_fsr, offset);
-        break;
-    case 0xc:
-        gen_op_eval_fbuge(r_dst, cpu_fsr, offset);
-        break;
-    case 0xd:
-        gen_op_eval_fble(r_dst, cpu_fsr, offset);
-        break;
-    case 0xe:
-        gen_op_eval_fbule(r_dst, cpu_fsr, offset);
-        break;
-    case 0xf:
-        gen_op_eval_fbo(r_dst, cpu_fsr, offset);
-        break;
+    if (cond & 8) {
+        tcond = tcg_invert_cond(tcond);
     }
+
+    cmp->cond = tcond;
+    cmp->c2 = c2;
+    cmp->c1 = tcg_temp_new();
+    tcg_gen_extu_i32_tl(cmp->c1, c1);
 }
 
 static bool gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src)
@@ -1216,7 +1027,8 @@
 
 static void gen_op_clear_ieee_excp_and_FTT(void)
 {
-    tcg_gen_andi_tl(cpu_fsr, cpu_fsr, FSR_FTT_CEXC_NMASK);
+    tcg_gen_st_i32(tcg_constant_i32(0), tcg_env,
+                   offsetof(CPUSPARCState, fsr_cexc_ftt));
 }
 
 static void gen_op_fmovs(TCGv_i32 dst, TCGv_i32 src)
@@ -1228,13 +1040,13 @@
 static void gen_op_fnegs(TCGv_i32 dst, TCGv_i32 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fnegs(dst, src);
+    tcg_gen_xori_i32(dst, src, 1u << 31);
 }
 
 static void gen_op_fabss(TCGv_i32 dst, TCGv_i32 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fabss(dst, src);
+    tcg_gen_andi_i32(dst, src, ~(1u << 31));
 }
 
 static void gen_op_fmovd(TCGv_i64 dst, TCGv_i64 src)
@@ -1246,161 +1058,44 @@
 static void gen_op_fnegd(TCGv_i64 dst, TCGv_i64 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fnegd(dst, src);
+    tcg_gen_xori_i64(dst, src, 1ull << 63);
 }
 
 static void gen_op_fabsd(TCGv_i64 dst, TCGv_i64 src)
 {
     gen_op_clear_ieee_excp_and_FTT();
-    gen_helper_fabsd(dst, src);
+    tcg_gen_andi_i64(dst, src, ~(1ull << 63));
 }
 
-#ifdef TARGET_SPARC64
-static void gen_op_fcmps(int fccno, TCGv_i32 r_rs1, TCGv_i32 r_rs2)
+static void gen_op_fnegq(TCGv_i128 dst, TCGv_i128 src)
 {
-    switch (fccno) {
-    case 0:
-        gen_helper_fcmps(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 1:
-        gen_helper_fcmps_fcc1(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 2:
-        gen_helper_fcmps_fcc2(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 3:
-        gen_helper_fcmps_fcc3(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    }
+    TCGv_i64 l = tcg_temp_new_i64();
+    TCGv_i64 h = tcg_temp_new_i64();
+
+    tcg_gen_extr_i128_i64(l, h, src);
+    tcg_gen_xori_i64(h, h, 1ull << 63);
+    tcg_gen_concat_i64_i128(dst, l, h);
 }
 
-static void gen_op_fcmpd(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
+static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
 {
-    switch (fccno) {
-    case 0:
-        gen_helper_fcmpd(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 1:
-        gen_helper_fcmpd_fcc1(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 2:
-        gen_helper_fcmpd_fcc2(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 3:
-        gen_helper_fcmpd_fcc3(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    }
+    TCGv_i64 l = tcg_temp_new_i64();
+    TCGv_i64 h = tcg_temp_new_i64();
+
+    tcg_gen_extr_i128_i64(l, h, src);
+    tcg_gen_andi_i64(h, h, ~(1ull << 63));
+    tcg_gen_concat_i64_i128(dst, l, h);
 }
 
-static void gen_op_fcmpq(int fccno)
+static void gen_op_fpexception_im(DisasContext *dc, int ftt)
 {
-    switch (fccno) {
-    case 0:
-        gen_helper_fcmpq(cpu_fsr, tcg_env);
-        break;
-    case 1:
-        gen_helper_fcmpq_fcc1(cpu_fsr, tcg_env);
-        break;
-    case 2:
-        gen_helper_fcmpq_fcc2(cpu_fsr, tcg_env);
-        break;
-    case 3:
-        gen_helper_fcmpq_fcc3(cpu_fsr, tcg_env);
-        break;
-    }
-}
-
-static void gen_op_fcmpes(int fccno, TCGv_i32 r_rs1, TCGv_i32 r_rs2)
-{
-    switch (fccno) {
-    case 0:
-        gen_helper_fcmpes(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 1:
-        gen_helper_fcmpes_fcc1(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 2:
-        gen_helper_fcmpes_fcc2(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 3:
-        gen_helper_fcmpes_fcc3(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    }
-}
-
-static void gen_op_fcmped(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
-{
-    switch (fccno) {
-    case 0:
-        gen_helper_fcmped(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 1:
-        gen_helper_fcmped_fcc1(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 2:
-        gen_helper_fcmped_fcc2(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    case 3:
-        gen_helper_fcmped_fcc3(cpu_fsr, tcg_env, r_rs1, r_rs2);
-        break;
-    }
-}
-
-static void gen_op_fcmpeq(int fccno)
-{
-    switch (fccno) {
-    case 0:
-        gen_helper_fcmpeq(cpu_fsr, tcg_env);
-        break;
-    case 1:
-        gen_helper_fcmpeq_fcc1(cpu_fsr, tcg_env);
-        break;
-    case 2:
-        gen_helper_fcmpeq_fcc2(cpu_fsr, tcg_env);
-        break;
-    case 3:
-        gen_helper_fcmpeq_fcc3(cpu_fsr, tcg_env);
-        break;
-    }
-}
-
-#else
-
-static void gen_op_fcmps(int fccno, TCGv r_rs1, TCGv r_rs2)
-{
-    gen_helper_fcmps(cpu_fsr, tcg_env, r_rs1, r_rs2);
-}
-
-static void gen_op_fcmpd(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
-{
-    gen_helper_fcmpd(cpu_fsr, tcg_env, r_rs1, r_rs2);
-}
-
-static void gen_op_fcmpq(int fccno)
-{
-    gen_helper_fcmpq(cpu_fsr, tcg_env);
-}
-
-static void gen_op_fcmpes(int fccno, TCGv r_rs1, TCGv r_rs2)
-{
-    gen_helper_fcmpes(cpu_fsr, tcg_env, r_rs1, r_rs2);
-}
-
-static void gen_op_fcmped(int fccno, TCGv_i64 r_rs1, TCGv_i64 r_rs2)
-{
-    gen_helper_fcmped(cpu_fsr, tcg_env, r_rs1, r_rs2);
-}
-
-static void gen_op_fcmpeq(int fccno)
-{
-    gen_helper_fcmpeq(cpu_fsr, tcg_env);
-}
-#endif
-
-static void gen_op_fpexception_im(DisasContext *dc, int fsr_flags)
-{
-    tcg_gen_andi_tl(cpu_fsr, cpu_fsr, FSR_FTT_NMASK);
-    tcg_gen_ori_tl(cpu_fsr, cpu_fsr, fsr_flags);
+    /*
+     * CEXC is only set when succesfully completing an FPop,
+     * or when raising FSR_FTT_IEEE_EXCP, i.e. check_ieee_exception.
+     * Thus we can simply store FTT into this field.
+     */
+    tcg_gen_st_i32(tcg_constant_i32(ftt), tcg_env,
+                   offsetof(CPUSPARCState, fsr_cexc_ftt));
     gen_exception(dc, TT_FP_EXCP);
 }
 
@@ -1727,28 +1422,35 @@
 
     case GET_ASI_BCOPY:
         assert(TARGET_LONG_BITS == 32);
-        /* Copy 32 bytes from the address in SRC to ADDR.  */
-        /* ??? The original qemu code suggests 4-byte alignment, dropping
-           the low bits, but the only place I can see this used is in the
-           Linux kernel with 32 byte alignment, which would make more sense
-           as a cacheline-style operation.  */
+        /*
+         * Copy 32 bytes from the address in SRC to ADDR.
+         *
+         * From Ross RT625 hyperSPARC manual, section 4.6:
+         * "Block Copy and Block Fill will work only on cache line boundaries."
+         *
+         * It does not specify if an unaliged address is truncated or trapped.
+         * Previous qemu behaviour was to truncate to 4 byte alignment, which
+         * is obviously wrong.  The only place I can see this used is in the
+         * Linux kernel which begins with page alignment, advancing by 32,
+         * so is always aligned.  Assume truncation as the simpler option.
+         *
+         * Since the loads and stores are paired, allow the copy to happen
+         * in the host endianness.  The copy need not be atomic.
+         */
         {
+            MemOp mop = MO_128 | MO_ATOM_IFALIGN_PAIR;
             TCGv saddr = tcg_temp_new();
             TCGv daddr = tcg_temp_new();
-            TCGv four = tcg_constant_tl(4);
-            TCGv_i32 tmp = tcg_temp_new_i32();
-            int i;
+            TCGv_i128 tmp = tcg_temp_new_i128();
 
-            tcg_gen_andi_tl(saddr, src, -4);
-            tcg_gen_andi_tl(daddr, addr, -4);
-            for (i = 0; i < 32; i += 4) {
-                /* Since the loads and stores are paired, allow the
-                   copy to happen in the host endianness.  */
-                tcg_gen_qemu_ld_i32(tmp, saddr, da->mem_idx, MO_UL);
-                tcg_gen_qemu_st_i32(tmp, daddr, da->mem_idx, MO_UL);
-                tcg_gen_add_tl(saddr, saddr, four);
-                tcg_gen_add_tl(daddr, daddr, four);
-            }
+            tcg_gen_andi_tl(saddr, src, -32);
+            tcg_gen_andi_tl(daddr, addr, -32);
+            tcg_gen_qemu_ld_i128(tmp, saddr, da->mem_idx, mop);
+            tcg_gen_qemu_st_i128(tmp, daddr, da->mem_idx, mop);
+            tcg_gen_addi_tl(saddr, saddr, 16);
+            tcg_gen_addi_tl(daddr, daddr, 16);
+            tcg_gen_qemu_ld_i128(tmp, saddr, da->mem_idx, mop);
+            tcg_gen_qemu_st_i128(tmp, daddr, da->mem_idx, mop);
         }
         break;
 
@@ -1866,7 +1568,7 @@
         memop |= MO_ALIGN_4;
         switch (size) {
         case MO_32:
-            d32 = gen_dest_fpr_F(dc);
+            d32 = tcg_temp_new_i32();
             tcg_gen_qemu_ld_i32(d32, addr, da->mem_idx, memop);
             gen_store_fpr_F(dc, rd, d32);
             break;
@@ -1931,7 +1633,7 @@
             case MO_32:
                 d64 = tcg_temp_new_i64();
                 gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop);
-                d32 = gen_dest_fpr_F(dc);
+                d32 = tcg_temp_new_i32();
                 tcg_gen_extrl_i64_i32(d32, d64);
                 gen_store_fpr_F(dc, rd, d32);
                 break;
@@ -2165,23 +1867,22 @@
 
     case GET_ASI_BFILL:
         assert(TARGET_LONG_BITS == 32);
-        /* Store 32 bytes of T64 to ADDR.  */
-        /* ??? The original qemu code suggests 8-byte alignment, dropping
-           the low bits, but the only place I can see this used is in the
-           Linux kernel with 32 byte alignment, which would make more sense
-           as a cacheline-style operation.  */
+        /*
+         * Store 32 bytes of [rd:rd+1] to ADDR.
+         * See comments for GET_ASI_COPY above.
+         */
         {
-            TCGv_i64 t64 = tcg_temp_new_i64();
-            TCGv d_addr = tcg_temp_new();
-            TCGv eight = tcg_constant_tl(8);
-            int i;
+            MemOp mop = MO_TE | MO_128 | MO_ATOM_IFALIGN_PAIR;
+            TCGv_i64 t8 = tcg_temp_new_i64();
+            TCGv_i128 t16 = tcg_temp_new_i128();
+            TCGv daddr = tcg_temp_new();
 
-            tcg_gen_concat_tl_i64(t64, lo, hi);
-            tcg_gen_andi_tl(d_addr, addr, -8);
-            for (i = 0; i < 32; i += 8) {
-                tcg_gen_qemu_st_i64(t64, d_addr, da->mem_idx, da->memop);
-                tcg_gen_add_tl(d_addr, d_addr, eight);
-            }
+            tcg_gen_concat_tl_i64(t8, lo, hi);
+            tcg_gen_concat_i64_i128(t16, t8, t8);
+            tcg_gen_andi_tl(daddr, addr, -32);
+            tcg_gen_qemu_st_i128(t16, daddr, da->mem_idx, mop);
+            tcg_gen_addi_tl(daddr, daddr, 16);
+            tcg_gen_qemu_st_i128(t16, daddr, da->mem_idx, mop);
         }
         break;
 
@@ -2222,7 +1923,7 @@
 
     s1 = gen_load_fpr_F(dc, rs);
     s2 = gen_load_fpr_F(dc, rd);
-    dst = gen_dest_fpr_F(dc);
+    dst = tcg_temp_new_i32();
     zero = tcg_constant_i32(0);
 
     tcg_gen_movcond_i32(TCG_COND_NE, dst, c32, zero, s1, s2);
@@ -4383,38 +4084,75 @@
     return true;
 }
 
-static bool do_ldfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop,
-                     target_ulong new_mask, target_ulong old_mask)
+static bool trans_LDFSR(DisasContext *dc, arg_r_r_ri *a)
 {
-    TCGv tmp, addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
+    TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
+    TCGv_i32 tmp;
+
     if (addr == NULL) {
         return false;
     }
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
-    tmp = tcg_temp_new();
-    tcg_gen_qemu_ld_tl(tmp, addr, dc->mem_idx, mop | MO_ALIGN);
-    tcg_gen_andi_tl(tmp, tmp, new_mask);
-    tcg_gen_andi_tl(cpu_fsr, cpu_fsr, old_mask);
-    tcg_gen_or_tl(cpu_fsr, cpu_fsr, tmp);
-    gen_helper_set_fsr(tcg_env, cpu_fsr);
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_qemu_ld_i32(tmp, addr, dc->mem_idx, MO_TEUL | MO_ALIGN);
+
+    tcg_gen_extract_i32(cpu_fcc[0], tmp, FSR_FCC0_SHIFT, 2);
+    /* LDFSR does not change FCC[1-3]. */
+
+    gen_helper_set_fsr_nofcc_noftt(tcg_env, tmp);
     return advance_pc(dc);
 }
 
-TRANS(LDFSR, ALL, do_ldfsr, a, MO_TEUL, FSR_LDFSR_MASK, FSR_LDFSR_OLDMASK)
-TRANS(LDXFSR, 64, do_ldfsr, a, MO_TEUQ, FSR_LDXFSR_MASK, FSR_LDXFSR_OLDMASK)
+static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a)
+{
+#ifdef TARGET_SPARC64
+    TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
+    TCGv_i64 t64;
+    TCGv_i32 lo, hi;
+
+    if (addr == NULL) {
+        return false;
+    }
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+
+    t64 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(t64, addr, dc->mem_idx, MO_TEUQ | MO_ALIGN);
+
+    lo = tcg_temp_new_i32();
+    hi = cpu_fcc[3];
+    tcg_gen_extr_i64_i32(lo, hi, t64);
+    tcg_gen_extract_i32(cpu_fcc[0], lo, FSR_FCC0_SHIFT, 2);
+    tcg_gen_extract_i32(cpu_fcc[1], hi, FSR_FCC1_SHIFT - 32, 2);
+    tcg_gen_extract_i32(cpu_fcc[2], hi, FSR_FCC2_SHIFT - 32, 2);
+    tcg_gen_extract_i32(cpu_fcc[3], hi, FSR_FCC3_SHIFT - 32, 2);
+
+    gen_helper_set_fsr_nofcc_noftt(tcg_env, lo);
+    return advance_pc(dc);
+#else
+    return false;
+#endif
+}
 
 static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop)
 {
     TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
+    TCGv fsr;
+
     if (addr == NULL) {
         return false;
     }
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
-    tcg_gen_qemu_st_tl(cpu_fsr, addr, dc->mem_idx, mop | MO_ALIGN);
+
+    fsr = tcg_temp_new();
+    gen_helper_get_fsr(fsr, tcg_env);
+    tcg_gen_qemu_st_tl(fsr, addr, dc->mem_idx, mop | MO_ALIGN);
     return advance_pc(dc);
 }
 
@@ -4491,7 +4229,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_F(dc);
+    dst = tcg_temp_new_i32();
     src = gen_load_fpr_D(dc, a->rs);
     func(dst, src);
     gen_store_fpr_F(dc, a->rd, dst);
@@ -4510,10 +4248,8 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     tmp = gen_load_fpr_F(dc, a->rs);
     func(tmp, tcg_env, tmp);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_F(dc, a->rd, tmp);
     return advance_pc(dc);
 }
@@ -4532,11 +4268,9 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
-    dst = gen_dest_fpr_F(dc);
+    dst = tcg_temp_new_i32();
     src = gen_load_fpr_D(dc, a->rs);
     func(dst, tcg_env, src);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_F(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4576,11 +4310,9 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     dst = gen_dest_fpr_D(dc, a->rd);
     src = gen_load_fpr_D(dc, a->rs);
     func(dst, tcg_env, src);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4599,11 +4331,9 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     dst = gen_dest_fpr_D(dc, a->rd);
     src = gen_load_fpr_F(dc, a->rs);
     func(dst, tcg_env, src);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4612,32 +4342,11 @@
 TRANS(FsTOd, ALL, do_env_df, a, gen_helper_fstod)
 TRANS(FsTOx, 64, do_env_df, a, gen_helper_fstox)
 
-static bool trans_FMOVq(DisasContext *dc, arg_FMOVq *a)
-{
-    int rd, rs;
-
-    if (!avail_64(dc)) {
-        return false;
-    }
-    if (gen_trap_ifnofpu(dc)) {
-        return true;
-    }
-    if (gen_trap_float128(dc)) {
-        return true;
-    }
-
-    gen_op_clear_ieee_excp_and_FTT();
-    rd = QFPREG(a->rd);
-    rs = QFPREG(a->rs);
-    tcg_gen_mov_i64(cpu_fpr[rd / 2], cpu_fpr[rs / 2]);
-    tcg_gen_mov_i64(cpu_fpr[rd / 2 + 1], cpu_fpr[rs / 2 + 1]);
-    gen_update_fprs_dirty(dc, rd);
-    return advance_pc(dc);
-}
-
 static bool do_qq(DisasContext *dc, arg_r_r *a,
-                  void (*func)(TCGv_env))
+                  void (*func)(TCGv_i128, TCGv_i128))
 {
+    TCGv_i128 t;
+
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
@@ -4646,19 +4355,21 @@
     }
 
     gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT1(QFPREG(a->rs));
-    func(tcg_env);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
+    t = gen_load_fpr_Q(dc, a->rs);
+    func(t, t);
+    gen_store_fpr_Q(dc, a->rd, t);
     return advance_pc(dc);
 }
 
-TRANS(FNEGq, 64, do_qq, a, gen_helper_fnegq)
-TRANS(FABSq, 64, do_qq, a, gen_helper_fabsq)
+TRANS(FMOVq, 64, do_qq, a, tcg_gen_mov_i128)
+TRANS(FNEGq, 64, do_qq, a, gen_op_fnegq)
+TRANS(FABSq, 64, do_qq, a, gen_op_fabsq)
 
 static bool do_env_qq(DisasContext *dc, arg_r_r *a,
-                       void (*func)(TCGv_env))
+                      void (*func)(TCGv_i128, TCGv_env, TCGv_i128))
 {
+    TCGv_i128 t;
+
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
@@ -4666,20 +4377,18 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT1(QFPREG(a->rs));
-    func(tcg_env);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
+    t = gen_load_fpr_Q(dc, a->rs);
+    func(t, tcg_env, t);
+    gen_store_fpr_Q(dc, a->rd, t);
     return advance_pc(dc);
 }
 
 TRANS(FSQRTq, ALL, do_env_qq, a, gen_helper_fsqrtq)
 
 static bool do_env_fq(DisasContext *dc, arg_r_r *a,
-                      void (*func)(TCGv_i32, TCGv_env))
+                      void (*func)(TCGv_i32, TCGv_env, TCGv_i128))
 {
+    TCGv_i128 src;
     TCGv_i32 dst;
 
     if (gen_trap_ifnofpu(dc)) {
@@ -4689,11 +4398,9 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT1(QFPREG(a->rs));
-    dst = gen_dest_fpr_F(dc);
-    func(dst, tcg_env);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
+    src = gen_load_fpr_Q(dc, a->rs);
+    dst = tcg_temp_new_i32();
+    func(dst, tcg_env, src);
     gen_store_fpr_F(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4702,8 +4409,9 @@
 TRANS(FqTOi, ALL, do_env_fq, a, gen_helper_fqtoi)
 
 static bool do_env_dq(DisasContext *dc, arg_r_r *a,
-                      void (*func)(TCGv_i64, TCGv_env))
+                      void (*func)(TCGv_i64, TCGv_env, TCGv_i128))
 {
+    TCGv_i128 src;
     TCGv_i64 dst;
 
     if (gen_trap_ifnofpu(dc)) {
@@ -4713,11 +4421,9 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT1(QFPREG(a->rs));
+    src = gen_load_fpr_Q(dc, a->rs);
     dst = gen_dest_fpr_D(dc, a->rd);
-    func(dst, tcg_env);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
+    func(dst, tcg_env, src);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4726,9 +4432,10 @@
 TRANS(FqTOx, 64, do_env_dq, a, gen_helper_fqtox)
 
 static bool do_env_qf(DisasContext *dc, arg_r_r *a,
-                      void (*func)(TCGv_env, TCGv_i32))
+                      void (*func)(TCGv_i128, TCGv_env, TCGv_i32))
 {
     TCGv_i32 src;
+    TCGv_i128 dst;
 
     if (gen_trap_ifnofpu(dc)) {
         return true;
@@ -4737,11 +4444,10 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     src = gen_load_fpr_F(dc, a->rs);
-    func(tcg_env, src);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
+    dst = tcg_temp_new_i128();
+    func(dst, tcg_env, src);
+    gen_store_fpr_Q(dc, a->rd, dst);
     return advance_pc(dc);
 }
 
@@ -4749,9 +4455,10 @@
 TRANS(FsTOq, ALL, do_env_qf, a, gen_helper_fstoq)
 
 static bool do_env_qd(DisasContext *dc, arg_r_r *a,
-                      void (*func)(TCGv_env, TCGv_i64))
+                      void (*func)(TCGv_i128, TCGv_env, TCGv_i64))
 {
     TCGv_i64 src;
+    TCGv_i128 dst;
 
     if (gen_trap_ifnofpu(dc)) {
         return true;
@@ -4760,11 +4467,10 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     src = gen_load_fpr_D(dc, a->rs);
-    func(tcg_env, src);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
+    dst = tcg_temp_new_i128();
+    func(dst, tcg_env, src);
+    gen_store_fpr_Q(dc, a->rd, dst);
     return advance_pc(dc);
 }
 
@@ -4809,11 +4515,9 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     src1 = gen_load_fpr_F(dc, a->rs1);
     src2 = gen_load_fpr_F(dc, a->rs2);
     func(src1, tcg_env, src1, src2);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_F(dc, a->rd, src1);
     return advance_pc(dc);
 }
@@ -4904,12 +4608,10 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     dst = gen_dest_fpr_D(dc, a->rd);
     src1 = gen_load_fpr_D(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
     func(dst, tcg_env, src1, src2);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4931,12 +4633,10 @@
         return raise_unimpfpop(dc);
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     dst = gen_dest_fpr_D(dc, a->rd);
     src1 = gen_load_fpr_F(dc, a->rs1);
     src2 = gen_load_fpr_F(dc, a->rs2);
     gen_helper_fsmuld(dst, tcg_env, src1, src2);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
 }
@@ -4962,8 +4662,10 @@
 TRANS(PDIST, VIS1, do_dddd, a, gen_helper_pdist)
 
 static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a,
-                       void (*func)(TCGv_env))
+                       void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128))
 {
+    TCGv_i128 src1, src2;
+
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
@@ -4971,13 +4673,10 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT0(QFPREG(a->rs1));
-    gen_op_load_fpr_QT1(QFPREG(a->rs2));
-    func(tcg_env);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
+    src1 = gen_load_fpr_Q(dc, a->rs1);
+    src2 = gen_load_fpr_Q(dc, a->rs2);
+    func(src1, tcg_env, src1, src2);
+    gen_store_fpr_Q(dc, a->rd, src1);
     return advance_pc(dc);
 }
 
@@ -4989,6 +4688,7 @@
 static bool trans_FdMULq(DisasContext *dc, arg_r_r_r *a)
 {
     TCGv_i64 src1, src2;
+    TCGv_i128 dst;
 
     if (gen_trap_ifnofpu(dc)) {
         return true;
@@ -4997,13 +4697,11 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     src1 = gen_load_fpr_D(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
-    gen_helper_fdmulq(tcg_env, src1, src2);
-    gen_helper_check_ieee_exceptions(cpu_fsr, tcg_env);
-    gen_op_store_QT0_fpr(QFPREG(a->rd));
-    gen_update_fprs_dirty(dc, QFPREG(a->rd));
+    dst = tcg_temp_new_i128();
+    gen_helper_fdmulq(dst, tcg_env, src1, src2);
+    gen_store_fpr_Q(dc, a->rd, dst);
     return advance_pc(dc);
 }
 
@@ -5086,13 +4784,12 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     src1 = gen_load_fpr_F(dc, a->rs1);
     src2 = gen_load_fpr_F(dc, a->rs2);
     if (e) {
-        gen_op_fcmpes(a->cc, src1, src2);
+        gen_helper_fcmpes(cpu_fcc[a->cc], tcg_env, src1, src2);
     } else {
-        gen_op_fcmps(a->cc, src1, src2);
+        gen_helper_fcmps(cpu_fcc[a->cc], tcg_env, src1, src2);
     }
     return advance_pc(dc);
 }
@@ -5111,13 +4808,12 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
     src1 = gen_load_fpr_D(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
     if (e) {
-        gen_op_fcmped(a->cc, src1, src2);
+        gen_helper_fcmped(cpu_fcc[a->cc], tcg_env, src1, src2);
     } else {
-        gen_op_fcmpd(a->cc, src1, src2);
+        gen_helper_fcmpd(cpu_fcc[a->cc], tcg_env, src1, src2);
     }
     return advance_pc(dc);
 }
@@ -5127,6 +4823,8 @@
 
 static bool do_fcmpq(DisasContext *dc, arg_FCMPq *a, bool e)
 {
+    TCGv_i128 src1, src2;
+
     if (avail_32(dc) && a->cc != 0) {
         return false;
     }
@@ -5137,13 +4835,12 @@
         return true;
     }
 
-    gen_op_clear_ieee_excp_and_FTT();
-    gen_op_load_fpr_QT0(QFPREG(a->rs1));
-    gen_op_load_fpr_QT1(QFPREG(a->rs2));
+    src1 = gen_load_fpr_Q(dc, a->rs1);
+    src2 = gen_load_fpr_Q(dc, a->rs2);
     if (e) {
-        gen_op_fcmpeq(a->cc);
+        gen_helper_fcmpeq(cpu_fcc[a->cc], tcg_env, src1, src2);
     } else {
-        gen_op_fcmpq(a->cc);
+        gen_helper_fcmpq(cpu_fcc[a->cc], tcg_env, src1, src2);
     }
     return advance_pc(dc);
 }
@@ -5349,6 +5046,18 @@
         "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
     };
 
+    static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = {
+#ifdef TARGET_SPARC64
+        { &cpu_fprs, offsetof(CPUSPARCState, fprs), "fprs" },
+        { &cpu_fcc[0], offsetof(CPUSPARCState, fcc[0]), "fcc0" },
+        { &cpu_fcc[1], offsetof(CPUSPARCState, fcc[1]), "fcc1" },
+        { &cpu_fcc[2], offsetof(CPUSPARCState, fcc[2]), "fcc2" },
+        { &cpu_fcc[3], offsetof(CPUSPARCState, fcc[3]), "fcc3" },
+#else
+        { &cpu_fcc[0], offsetof(CPUSPARCState, fcc[0]), "fcc" },
+#endif
+    };
+
     static const struct { TCGv *ptr; int off; const char *name; } rtl[] = {
 #ifdef TARGET_SPARC64
         { &cpu_gsr, offsetof(CPUSPARCState, gsr), "gsr" },
@@ -5360,7 +5069,6 @@
         { &cpu_icc_Z, offsetof(CPUSPARCState, icc_Z), "icc_Z" },
         { &cpu_icc_C, offsetof(CPUSPARCState, icc_C), "icc_C" },
         { &cpu_cond, offsetof(CPUSPARCState, cond), "cond" },
-        { &cpu_fsr, offsetof(CPUSPARCState, fsr), "fsr" },
         { &cpu_pc, offsetof(CPUSPARCState, pc), "pc" },
         { &cpu_npc, offsetof(CPUSPARCState, npc), "npc" },
         { &cpu_y, offsetof(CPUSPARCState, y), "y" },
@@ -5373,6 +5081,10 @@
                                          offsetof(CPUSPARCState, regwptr),
                                          "regwptr");
 
+    for (i = 0; i < ARRAY_SIZE(r32); ++i) {
+        *r32[i].ptr = tcg_global_mem_new_i32(tcg_env, r32[i].off, r32[i].name);
+    }
+
     for (i = 0; i < ARRAY_SIZE(rtl); ++i) {
         *rtl[i].ptr = tcg_global_mem_new(tcg_env, rtl[i].off, rtl[i].name);
     }
@@ -5395,11 +5107,6 @@
                                             offsetof(CPUSPARCState, fpr[i]),
                                             fregnames[i]);
     }
-
-#ifdef TARGET_SPARC64
-    cpu_fprs = tcg_global_mem_new_i32(tcg_env,
-                                      offsetof(CPUSPARCState, fprs), "fprs");
-#endif
 }
 
 void sparc_restore_state_to_opc(CPUState *cs,
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index e6d91c7..74e8a22 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -89,6 +89,11 @@
     return true;
 }
 
+static int tricore_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return 0;
+}
+
 static void tricore_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
@@ -194,6 +199,7 @@
                                        &mcc->parent_phases);
     cc->class_by_name = tricore_cpu_class_by_name;
     cc->has_work = tricore_cpu_has_work;
+    cc->mmu_index = tricore_cpu_mmu_index;
 
     cc->gdb_read_register = tricore_cpu_gdb_read_register;
     cc->gdb_write_register = tricore_cpu_gdb_write_register;
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index 2d4446c..220af69 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -246,11 +246,6 @@
 
 #define MMU_USER_IDX 2
 
-static inline int cpu_mmu_index(CPUTriCoreState *env, bool ifetch)
-{
-    return 0;
-}
-
 #include "exec/cpu-all.h"
 
 FIELD(TB_FLAGS, PRIV, 0, 2)
diff --git a/target/tricore/helper.c b/target/tricore/helper.c
index 174f666..649373a 100644
--- a/target/tricore/helper.c
+++ b/target/tricore/helper.c
@@ -48,7 +48,7 @@
     TriCoreCPU *cpu = TRICORE_CPU(cs);
     hwaddr phys_addr;
     int prot;
-    int mmu_idx = cpu_mmu_index(&cpu->env, false);
+    int mmu_idx = cpu_mmu_index(cs, false);
 
     if (get_physical_address(&cpu->env, &phys_addr, &prot, addr,
                              MMU_DATA_LOAD, mmu_idx)) {
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index f1156c3..278c514 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -8355,7 +8355,7 @@
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUTriCoreState *env = cpu_env(cs);
-    ctx->mem_idx = cpu_mmu_index(env, false);
+    ctx->mem_idx = cpu_mmu_index(cs, false);
 
     uint32_t tb_flags = (uint32_t)ctx->base.tb->flags;
     ctx->priv = FIELD_EX32(tb_flags, TB_FLAGS, PRIV);
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index 62020b1..79f9181 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -74,6 +74,11 @@
 #endif
 }
 
+static int xtensa_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+    return xtensa_get_cring(cpu_env(cs));
+}
+
 #ifdef CONFIG_USER_ONLY
 static bool abi_call0;
 
@@ -252,6 +257,7 @@
 
     cc->class_by_name = xtensa_cpu_class_by_name;
     cc->has_work = xtensa_cpu_has_work;
+    cc->mmu_index = xtensa_cpu_mmu_index;
     cc->dump_state = xtensa_cpu_dump_state;
     cc->set_pc = xtensa_cpu_set_pc;
     cc->get_pc = xtensa_cpu_get_pc;
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 4b033ee..6b8d063 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -713,11 +713,6 @@
 /* MMU modes definitions */
 #define MMU_USER_IDX 3
 
-static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch)
-{
-    return xtensa_get_cring(env);
-}
-
 #define XTENSA_TBFLAG_RING_MASK 0x3
 #define XTENSA_TBFLAG_EXCM 0x4
 #define XTENSA_TBFLAG_LITBASE 0x8
diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
index 2fda4e8..47063b0 100644
--- a/target/xtensa/mmu_helper.c
+++ b/target/xtensa/mmu_helper.c
@@ -66,7 +66,7 @@
      * only the side-effects (ie any MMU or other exception)
      */
     probe_access(env, vaddr, 1, MMU_INST_FETCH,
-                 cpu_mmu_index(env, true), GETPC());
+                 cpu_mmu_index(env_cpu(env), true), GETPC());
 }
 
 void HELPER(wsr_rasid)(CPUXtensaState *env, uint32_t v)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index bab0a17..dcf0205 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -2327,7 +2327,7 @@
     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
     tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
-    tcg_target_call_clobber_regs = ALL_GENERAL_REGS;
+    tcg_target_call_clobber_regs = ALL_GENERAL_REGS | ALL_VECTOR_REGS;
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
diff --git a/tests/tcg/multiarch/gdbstub/prot-none.py b/tests/tcg/multiarch/gdbstub/prot-none.py
index e829d3e..7e26458 100644
--- a/tests/tcg/multiarch/gdbstub/prot-none.py
+++ b/tests/tcg/multiarch/gdbstub/prot-none.py
@@ -20,7 +20,7 @@
 
 def run_test():
     """Run through the tests one by one"""
-    if not probe_proc_self_mem:
+    if not probe_proc_self_mem():
         print("SKIP: /proc/self/mem is not usable")
         exit(0)
     gdb.Breakpoint("break_here")