softmmu templates: optionally pass CPUState to memory access functions

Optionally, make memory access helpers take a parameter for CPUState
instead of relying on global env.

On most targets, perform simple moves to reorder registers. On i386,
switch from regparm(3) calling convention to standard stack-based
version.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 5af21b3..4d59a63 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -929,6 +929,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -936,6 +957,8 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -943,6 +966,7 @@
     __stq_mmu,
 };
 #endif
+#endif
 
 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
@@ -1075,6 +1099,19 @@
                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
 
     switch (opc) {
@@ -1341,6 +1378,22 @@
     }
 # endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[3], 0,
+                    tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index c5a3730..e579ef0 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,6 +882,27 @@
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -889,12 +910,15 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1061,6 +1085,15 @@
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1212,6 +1245,17 @@
         tcg_abort();
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index c4e940d..43a51a1 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -178,6 +178,9 @@
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDX);
+#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
@@ -957,6 +960,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -964,12 +988,15 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Perform the TLB load and compare.
 
@@ -1188,11 +1215,26 @@
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
 #else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1386,11 +1428,26 @@
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                 TCG_REG_RSI, data_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
     stack_adjust = 0;
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index f90252a..e02dacc 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,12 +1452,25 @@
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
     __ldl_mmu,
     __ldq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1517,6 +1530,15 @@
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1547,12 +1569,25 @@
     }
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1622,6 +1657,17 @@
         data_reg = TCG_REG_R2;
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c6aa5bc..393ba07 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -750,6 +750,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -757,6 +778,8 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -764,6 +787,7 @@
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
@@ -858,6 +882,15 @@
 # endif
     tcg_out_movi(s, TCG_TYPE_I32, sp_args++, mem_index);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
@@ -1069,6 +1102,17 @@
     }
 
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 6a34cab..b0aa914 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -508,6 +508,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -515,6 +536,8 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -522,6 +545,7 @@
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 {
@@ -598,6 +622,16 @@
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
     switch (opc) {
     case 0|4:
@@ -829,6 +863,17 @@
     ir++;
 
     tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 7f723b5..409a1ac 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -552,6 +552,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -559,12 +580,15 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -648,6 +672,15 @@
     tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
 
     switch (opc) {
@@ -796,6 +829,17 @@
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
 
     label2_ptr = s->code_ptr;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 47ffcc1..04662c1 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,6 +301,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -308,6 +329,8 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -315,6 +338,7 @@
     __stq_mmu,
 };
 #endif
+#endif
 
 static uint8_t *tb_ret_addr;
 
@@ -1483,9 +1507,29 @@
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                    tcg_target_call_iarg_regs[2]);
+        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index b287122..80f0818 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@
 };
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -86,9 +92,9 @@
 
 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O0,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_O1
-#endif
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
 };
 
 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
@@ -155,6 +161,9 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
+#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -706,6 +715,27 @@
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -713,6 +743,8 @@
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -720,6 +752,7 @@
     __stq_mmu,
 };
 #endif
+#endif
 
 #if TARGET_LONG_BITS == 32
 #define TARGET_LD_OP LDUW
@@ -801,6 +834,17 @@
 
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 
     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index bd85073..453f187 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,6 +798,9 @@
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -808,6 +811,9 @@
 #endif
         break;
     case INDEX_op_qemu_st64:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);