ARM TCG conversion 1/16.


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4138 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index b284a21..275733e 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -89,7 +89,7 @@
     uint32_t NZF; /* N is bit 31. Z is computed from NZF */
     uint32_t QF; /* 0 or 1 */
     uint32_t GE; /* cpsr[19:16] */
-    int thumb; /* cprs[5]. 0 = arm mode, 1 = thumb mode. */
+    uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
 
     /* System control coprocessor (cp15) */
@@ -207,6 +207,7 @@
 } CPUARMState;
 
 CPUARMState *cpu_arm_init(const char *cpu_model);
+void arm_translate_init(void);
 int cpu_arm_exec(CPUARMState *s);
 void cpu_arm_close(CPUARMState *s);
 void do_interrupt(CPUARMState *);
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 86470db..df09778 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5,6 +5,7 @@
 #include "cpu.h"
 #include "exec-all.h"
 #include "gdbstub.h"
+#include "helpers.h"
 
 static uint32_t cortexa8_cp15_c0_c1[8] =
 { 0x1031, 0x11, 0x400, 0, 0x31100003, 0x20000000, 0x01202000, 0x11 };
@@ -174,6 +175,7 @@
 {
     CPUARMState *env;
     uint32_t id;
+    static int inited = 0;
 
     id = cpu_arm_find_by_name(cpu_model);
     if (id == 0)
@@ -182,6 +184,11 @@
     if (!env)
         return NULL;
     cpu_exec_init(env);
+    if (!inited) {
+        inited = 1;
+        arm_translate_init();
+    }
+
     env->cpu_model_str = cpu_model;
     env->cp15.c0_cpuid = id;
     cpu_reset(env);
@@ -315,6 +322,24 @@
     env->uncached_cpsr = (env->uncached_cpsr & ~mask) | (val & mask);
 }
 
+#define HELPER(x) helper_##x
+/* Sign/zero extend */
+uint32_t HELPER(sxtb16)(uint32_t x)
+{
+    uint32_t res;
+    res = (uint16_t)(int8_t)x;
+    res |= (uint32_t)(int8_t)(x >> 16) << 16;
+    return res;
+}
+
+uint32_t HELPER(uxtb16)(uint32_t x)
+{
+    uint32_t res;
+    res = (uint16_t)(uint8_t)x;
+    res |= (uint32_t)(uint8_t)(x >> 16) << 16;
+    return res;
+}
+
 #if defined(CONFIG_USER_ONLY)
 
 void do_interrupt (CPUState *env)
@@ -1861,3 +1886,4 @@
 }
 
 #endif
+
diff --git a/target-arm/helpers.h b/target-arm/helpers.h
new file mode 100644
index 0000000..577f5ee
--- /dev/null
+++ b/target-arm/helpers.h
@@ -0,0 +1,6 @@
+#ifndef DEF_HELPER
+#define DEF_HELPER(name, ret, args) ret helper_##name args;
+#endif
+
+DEF_HELPER(sxtb16, uint32_t, (uint32_t))
+DEF_HELPER(uxtb16, uint32_t, (uint32_t))
diff --git a/target-arm/op.c b/target-arm/op.c
index c2d33ca..9a01675 100644
--- a/target-arm/op.c
+++ b/target-arm/op.c
@@ -20,122 +20,6 @@
  */
 #include "exec.h"
 
-#define REGNAME r0
-#define REG (env->regs[0])
-#include "op_template.h"
-
-#define REGNAME r1
-#define REG (env->regs[1])
-#include "op_template.h"
-
-#define REGNAME r2
-#define REG (env->regs[2])
-#include "op_template.h"
-
-#define REGNAME r3
-#define REG (env->regs[3])
-#include "op_template.h"
-
-#define REGNAME r4
-#define REG (env->regs[4])
-#include "op_template.h"
-
-#define REGNAME r5
-#define REG (env->regs[5])
-#include "op_template.h"
-
-#define REGNAME r6
-#define REG (env->regs[6])
-#include "op_template.h"
-
-#define REGNAME r7
-#define REG (env->regs[7])
-#include "op_template.h"
-
-#define REGNAME r8
-#define REG (env->regs[8])
-#include "op_template.h"
-
-#define REGNAME r9
-#define REG (env->regs[9])
-#include "op_template.h"
-
-#define REGNAME r10
-#define REG (env->regs[10])
-#include "op_template.h"
-
-#define REGNAME r11
-#define REG (env->regs[11])
-#include "op_template.h"
-
-#define REGNAME r12
-#define REG (env->regs[12])
-#include "op_template.h"
-
-#define REGNAME r13
-#define REG (env->regs[13])
-#include "op_template.h"
-
-#define REGNAME r14
-#define REG (env->regs[14])
-#include "op_template.h"
-
-#define REGNAME r15
-#define REG (env->regs[15])
-#define SET_REG(x) REG = x & ~(uint32_t)1
-#include "op_template.h"
-
-void OPPROTO op_bx_T0(void)
-{
-  env->regs[15] = T0 & ~(uint32_t)1;
-  env->thumb = (T0 & 1) != 0;
-}
-
-void OPPROTO op_movl_T0_0(void)
-{
-    T0 = 0;
-}
-
-void OPPROTO op_movl_T0_im(void)
-{
-    T0 = PARAM1;
-}
-
-void OPPROTO op_movl_T1_im(void)
-{
-    T1 = PARAM1;
-}
-
-void OPPROTO op_mov_CF_T1(void)
-{
-    env->CF = ((uint32_t)T1) >> 31;
-}
-
-void OPPROTO op_movl_T2_im(void)
-{
-    T2 = PARAM1;
-}
-
-void OPPROTO op_addl_T1_im(void)
-{
-    T1 += PARAM1;
-}
-
-void OPPROTO op_addl_T1_T2(void)
-{
-    T1 += T2;
-}
-
-void OPPROTO op_subl_T1_T2(void)
-{
-    T1 -= T2;
-}
-
-void OPPROTO op_addl_T0_T1(void)
-{
-    T0 += T1;
-}
-
 void OPPROTO op_addl_T0_T1_cc(void)
 {
     unsigned int src1;
@@ -146,11 +30,6 @@
     env->VF = (src1 ^ T1 ^ -1) & (src1 ^ T0);
 }
 
-void OPPROTO op_adcl_T0_T1(void)
-{
-    T0 += T1 + env->CF;
-}
-
 void OPPROTO op_adcl_T0_T1_cc(void)
 {
     unsigned int src1;
@@ -169,11 +48,6 @@
 
 #define OPSUB(sub, sbc, res, T0, T1)            \
                                                 \
-void OPPROTO op_ ## sub ## l_T0_T1(void)        \
-{                                               \
-    res = T0 - T1;                              \
-}                                               \
-                                                \
 void OPPROTO op_ ## sub ## l_T0_T1_cc(void)     \
 {                                               \
     unsigned int src1;                          \
@@ -211,46 +85,6 @@
 
 OPSUB(rsb, rsc, T0, T1, T0)
 
-void OPPROTO op_andl_T0_T1(void)
-{
-    T0 &= T1;
-}
-
-void OPPROTO op_xorl_T0_T1(void)
-{
-    T0 ^= T1;
-}
-
-void OPPROTO op_orl_T0_T1(void)
-{
-    T0 |= T1;
-}
-
-void OPPROTO op_bicl_T0_T1(void)
-{
-    T0 &= ~T1;
-}
-
-void OPPROTO op_notl_T0(void)
-{
-    T0 = ~T0;
-}
-
-void OPPROTO op_notl_T1(void)
-{
-    T1 = ~T1;
-}
-
-void OPPROTO op_logic_T0_cc(void)
-{
-    env->NZF = T0;
-}
-
-void OPPROTO op_logic_T1_cc(void)
-{
-    env->NZF = T1;
-}
-
 #define EIP (env->regs[15])
 
 void OPPROTO op_test_eq(void)
@@ -485,51 +319,6 @@
 
 /* shifts */
 
-/* Used by NEON.  */
-void OPPROTO op_shll_T0_im(void)
-{
-    T1 = T1 << PARAM1;
-}
-
-/* T1 based */
-
-void OPPROTO op_shll_T1_im(void)
-{
-    T1 = T1 << PARAM1;
-}
-
-void OPPROTO op_shrl_T1_im(void)
-{
-    T1 = (uint32_t)T1 >> PARAM1;
-}
-
-void OPPROTO op_shrl_T1_0(void)
-{
-    T1 = 0;
-}
-
-void OPPROTO op_sarl_T1_im(void)
-{
-    T1 = (int32_t)T1 >> PARAM1;
-}
-
-void OPPROTO op_sarl_T1_0(void)
-{
-    T1 = (int32_t)T1 >> 31;
-}
-
-void OPPROTO op_rorl_T1_im(void)
-{
-    int shift;
-    shift = PARAM1;
-    T1 = ((uint32_t)T1 >> shift) | (T1 << (32 - shift));
-}
-
-void OPPROTO op_rrxl_T1(void)
-{
-    T1 = ((uint32_t)T1 >> 1) | ((uint32_t)env->CF << 31);
-}
-
 /* T1 based, set C flag */
 void OPPROTO op_shll_T1_im_cc(void)
 {
@@ -577,44 +366,6 @@
     env->CF = c;
 }
 
-/* T2 based */
-void OPPROTO op_shll_T2_im(void)
-{
-    T2 = T2 << PARAM1;
-}
-
-void OPPROTO op_shrl_T2_im(void)
-{
-    T2 = (uint32_t)T2 >> PARAM1;
-}
-
-void OPPROTO op_shrl_T2_0(void)
-{
-    T2 = 0;
-}
-
-void OPPROTO op_sarl_T2_im(void)
-{
-    T2 = (int32_t)T2 >> PARAM1;
-}
-
-void OPPROTO op_sarl_T2_0(void)
-{
-    T2 = (int32_t)T2 >> 31;
-}
-
-void OPPROTO op_rorl_T2_im(void)
-{
-    int shift;
-    shift = PARAM1;
-    T2 = ((uint32_t)T2 >> shift) | (T2 << (32 - shift));
-}
-
-void OPPROTO op_rrxl_T2(void)
-{
-    T2 = ((uint32_t)T2 >> 1) | ((uint32_t)env->CF << 31);
-}
-
 /* T1 based, use T0 as shift count */
 
 void OPPROTO op_shll_T1_T0(void)
@@ -733,53 +484,6 @@
     FORCE_RET();
 }
 
-void OPPROTO op_sarl_T0_im(void)
-{
-    T0 = (int32_t)T0 >> PARAM1;
-}
-
-/* Sign/zero extend */
-void OPPROTO op_sxth_T0(void)
-{
-  T0 = (int16_t)T0;
-}
-
-void OPPROTO op_sxth_T1(void)
-{
-  T1 = (int16_t)T1;
-}
-
-void OPPROTO op_sxtb_T1(void)
-{
-    T1 = (int8_t)T1;
-}
-
-void OPPROTO op_uxtb_T1(void)
-{
-    T1 = (uint8_t)T1;
-}
-
-void OPPROTO op_uxth_T1(void)
-{
-    T1 = (uint16_t)T1;
-}
-
-void OPPROTO op_sxtb16_T1(void)
-{
-    uint32_t res;
-    res = (uint16_t)(int8_t)T1;
-    res |= (uint32_t)(int8_t)(T1 >> 16) << 16;
-    T1 = res;
-}
-
-void OPPROTO op_uxtb16_T1(void)
-{
-    uint32_t res;
-    res = (uint16_t)(uint8_t)T1;
-    res |= (uint32_t)(uint8_t)(T1 >> 16) << 16;
-    T1 = res;
-}
-
 #define SIGNBIT (uint32_t)0x80000000
 /* saturating arithmetic  */
 void OPPROTO op_addl_T0_T1_setq(void)
@@ -1369,31 +1073,6 @@
     FORCE_RET();
 }
 
-void OPPROTO op_movl_T0_T1(void)
-{
-    T0 = T1;
-}
-
-void OPPROTO op_movl_T0_T2(void)
-{
-    T0 = T2;
-}
-
-void OPPROTO op_movl_T1_T0(void)
-{
-    T1 = T0;
-}
-
-void OPPROTO op_movl_T1_T2(void)
-{
-    T1 = T2;
-}
-
-void OPPROTO op_movl_T2_T0(void)
-{
-    T2 = T0;
-}
-
 /* ARMv6 Media instructions.  */
 
 /* Note that signed overflow is undefined in C.  The following routines are
@@ -1769,15 +1448,6 @@
 }
 
 /* Dual 16-bit add.  */
-void OPPROTO op_add16_T1_T2(void)
-{
-    uint32_t mask;
-    mask = (T0 & T1) & 0x8000;
-    T0 ^= ~0x8000;
-    T1 ^= ~0x8000;
-    T0 = (T0 + T1) ^ mask;
-}
-
 static inline uint8_t do_usad(uint8_t a, uint8_t b)
 {
     if (a > b)
diff --git a/target-arm/op_template.h b/target-arm/op_template.h
deleted file mode 100644
index 33d53c0..0000000
--- a/target-arm/op_template.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  ARM micro operations (templates for various register related
- *  operations)
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef SET_REG
-#define SET_REG(x) REG = x
-#endif
-
-void OPPROTO glue(op_movl_T0_, REGNAME)(void)
-{
-    T0 = REG;
-}
-
-void OPPROTO glue(op_movl_T1_, REGNAME)(void)
-{
-    T1 = REG;
-}
-
-void OPPROTO glue(op_movl_T2_, REGNAME)(void)
-{
-    T2 = REG;
-}
-
-void OPPROTO glue(glue(op_movl_, REGNAME), _T0)(void)
-{
-    SET_REG (T0);
-}
-
-void OPPROTO glue(glue(op_movl_, REGNAME), _T1)(void)
-{
-    SET_REG (T1);
-}
-
-#undef REG
-#undef REGNAME
-#undef SET_REG
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6de78f8..ef529eb 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -29,6 +29,7 @@
 #include "exec-all.h"
 #include "disas.h"
 #include "tcg-op.h"
+#include "helpers.h"
 
 #define ENABLE_ARCH_5J    0
 #define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
@@ -73,6 +74,240 @@
 extern FILE *logfile;
 extern int loglevel;
 
+static TCGv cpu_env;
+/* FIXME:  These should be removed.  */
+static TCGv cpu_T[3];
+
+/* initialize TCG globals.  */
+void arm_translate_init(void)
+{
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+
+    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
+    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
+    cpu_T[2] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG3, "T2");
+}
+
+/* The code generator doesn't like lots of temporaries, so maintain our own
+   cache for reuse within a function.  */
+#define MAX_TEMPS 8
+static int num_temps;
+static TCGv temps[MAX_TEMPS];
+
+/* Allocate a temporary variable.  */
+static TCGv new_tmp(void)
+{
+    TCGv tmp;
+    if (num_temps == MAX_TEMPS)
+        abort();
+
+    if (GET_TCGV(temps[num_temps]))
+      return temps[num_temps++];
+
+    tmp = tcg_temp_new(TCG_TYPE_I32);
+    temps[num_temps++] = tmp;
+    return tmp;
+}
+
+/* Release a temporary variable.  */
+static void dead_tmp(TCGv tmp)
+{
+    int i;
+    num_temps--;
+    i = num_temps;
+    if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
+        return;
+
+    /* Shuffle this temp to the last slot.  */
+    while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
+        i--;
+    while (i < num_temps) {
+        temps[i] = temps[i + 1];
+        i++;
+    }
+    temps[i] = tmp;
+}
+
+/* Set a variable to the value of a CPU register.  */
+static void load_reg_var(DisasContext *s, TCGv var, int reg)
+{
+    if (reg == 15) {
+        uint32_t addr;
+        /* normaly, since we updated PC, we need only to add one insn */
+        if (s->thumb)
+            addr = (long)s->pc + 2;
+        else
+            addr = (long)s->pc + 4;
+        tcg_gen_movi_i32(var, addr);
+    } else {
+        tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
+    }
+}
+
+/* Create a new temporary and set it to the value of a CPU register.  */
+static inline TCGv load_reg(DisasContext *s, int reg)
+{
+    TCGv tmp = new_tmp();
+    load_reg_var(s, tmp, reg);
+    return tmp;
+}
+
+/* Set a CPU register.  The source must be a temporary and will be
+   marked as dead.  */
+static void store_reg(DisasContext *s, int reg, TCGv var)
+{
+    if (reg == 15) {
+        tcg_gen_andi_i32(var, var, ~1);
+        s->is_jmp = DISAS_JUMP;
+    }
+    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
+    dead_tmp(var);
+}
+
+
+/* Basic operations.  */
+#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
+#define gen_op_movl_T0_T2() tcg_gen_mov_i32(cpu_T[0], cpu_T[2])
+#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
+#define gen_op_movl_T1_T2() tcg_gen_mov_i32(cpu_T[1], cpu_T[2])
+#define gen_op_movl_T2_T0() tcg_gen_mov_i32(cpu_T[2], cpu_T[0])
+#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
+#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
+#define gen_op_movl_T2_im(im) tcg_gen_movi_i32(cpu_T[2], im)
+
+#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
+#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
+#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
+#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
+
+#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
+#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
+#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
+#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
+#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
+#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
+#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
+
+#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
+#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
+#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
+#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
+#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
+
+/* Value extensions.  */
+#define gen_uxtb(var) tcg_gen_andi_i32(var, var, 0xff)
+#define gen_uxth(var) tcg_gen_andi_i32(var, var, 0xffff)
+#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
+#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
+
+#define HELPER_ADDR(x) helper_##x
+
+#define gen_sxtb16(var) tcg_gen_helper_1_1(HELPER_ADDR(sxtb16), var, var)
+#define gen_uxtb16(var) tcg_gen_helper_1_1(HELPER_ADDR(uxtb16), var, var)
+
+/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
+    tmp = (t0 ^ t1) & 0x8000;
+    t0 &= ~0x8000;
+    t1 &= ~0x8000;
+    t0 = (t0 + t1) ^ tmp;
+ */
+
+static void gen_add16(TCGv t0, TCGv t1)
+{
+    TCGv tmp = new_tmp();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_andi_i32(tmp, tmp, 0x8000);
+    tcg_gen_andi_i32(t0, t0, ~0x8000);
+    tcg_gen_andi_i32(t1, t1, ~0x8000);
+    tcg_gen_add_i32(t0, t0, t1);
+    tcg_gen_xor_i32(t0, t0, tmp);
+    dead_tmp(tmp);
+    dead_tmp(t1);
+}
+
+/* Set CF to the top bit of var.  */
+static void gen_set_CF_bit31(TCGv var)
+{
+    TCGv tmp = new_tmp();
+    tcg_gen_shri_i32(tmp, var, 31);
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, CF));
+    dead_tmp(tmp);
+}
+
+/* Set N and Z flags from var.  */
+static inline void gen_logic_CC(TCGv var)
+{
+    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NZF));
+}
+
+/* T0 += T1 + CF.  */
+static void gen_adc_T0_T1(void)
+{
+    TCGv tmp = new_tmp();
+    gen_op_addl_T0_T1();
+    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUState, CF));
+    tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
+    dead_tmp(tmp);
+}
+
+/* FIXME:  Implement this natively.  */
+static inline void tcg_gen_not_i32(TCGv t0, TCGv t1)
+{
+    tcg_gen_xori_i32(t0, t1, ~0);
+}
+
+/* T0 &= ~T1.  Clobbers T1.  */
+/* FIXME: Implement bic natively.  */
+static inline void gen_op_bicl_T0_T1(void)
+{
+    gen_op_notl_T1();
+    gen_op_andl_T0_T1();
+}
+
+/* FIXME:  Implement this natively.  */
+static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
+{
+    TCGv tmp;
+
+    if (i == 0)
+        return;
+
+    tmp = new_tmp();
+    tcg_gen_shri_i32(tmp, t1, i);
+    tcg_gen_shli_i32(t1, t1, 32 - i);
+    tcg_gen_or_i32(t0, t1, tmp);
+    dead_tmp(tmp);
+}
+
+/* Shift by immediate.  Includes special handling for shift == 0.  */
+static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift)
+{
+    if (shift != 0) {
+        switch (shiftop) {
+        case 0: tcg_gen_shli_i32(var, var, shift); break;
+        case 1: tcg_gen_shri_i32(var, var, shift); break;
+        case 2: tcg_gen_sari_i32(var, var, shift); break;
+        case 3: tcg_gen_rori_i32(var, var, shift); break;
+        }
+    } else {
+        TCGv tmp;
+
+        switch (shiftop) {
+        case 0: break;
+        case 1: tcg_gen_movi_i32(var, 0); break;
+        case 2: tcg_gen_sari_i32(var, var, 31); break;
+        case 3: /* rrx */
+            tcg_gen_shri_i32(var, var, 1);
+            tmp = new_tmp();
+            tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUState, CF));
+            tcg_gen_shli_i32(tmp, tmp, 31);
+            tcg_gen_or_i32(var, var, tmp);
+            dead_tmp(tmp);
+            break;
+        }
+    }
+};
+
 #define PAS_OP(pfx) {  \
     gen_op_ ## pfx ## add16_T0_T1, \
     gen_op_ ## pfx ## addsubx_T0_T1, \
@@ -154,34 +389,6 @@
     1, /* mvn */
 };
 
-static GenOpFunc1 *gen_shift_T1_im[4] = {
-    gen_op_shll_T1_im,
-    gen_op_shrl_T1_im,
-    gen_op_sarl_T1_im,
-    gen_op_rorl_T1_im,
-};
-
-static GenOpFunc *gen_shift_T1_0[4] = {
-    NULL,
-    gen_op_shrl_T1_0,
-    gen_op_sarl_T1_0,
-    gen_op_rrxl_T1,
-};
-
-static GenOpFunc1 *gen_shift_T2_im[4] = {
-    gen_op_shll_T2_im,
-    gen_op_shrl_T2_im,
-    gen_op_sarl_T2_im,
-    gen_op_rorl_T2_im,
-};
-
-static GenOpFunc *gen_shift_T2_0[4] = {
-    NULL,
-    gen_op_shrl_T2_0,
-    gen_op_sarl_T2_0,
-    gen_op_rrxl_T2,
-};
-
 static GenOpFunc1 *gen_shift_T1_im_cc[4] = {
     gen_op_shll_T1_im_cc,
     gen_op_shrl_T1_im_cc,
@@ -210,108 +417,6 @@
     gen_op_rorl_T1_T0_cc,
 };
 
-static GenOpFunc *gen_op_movl_TN_reg[3][16] = {
-    {
-        gen_op_movl_T0_r0,
-        gen_op_movl_T0_r1,
-        gen_op_movl_T0_r2,
-        gen_op_movl_T0_r3,
-        gen_op_movl_T0_r4,
-        gen_op_movl_T0_r5,
-        gen_op_movl_T0_r6,
-        gen_op_movl_T0_r7,
-        gen_op_movl_T0_r8,
-        gen_op_movl_T0_r9,
-        gen_op_movl_T0_r10,
-        gen_op_movl_T0_r11,
-        gen_op_movl_T0_r12,
-        gen_op_movl_T0_r13,
-        gen_op_movl_T0_r14,
-        gen_op_movl_T0_r15,
-    },
-    {
-        gen_op_movl_T1_r0,
-        gen_op_movl_T1_r1,
-        gen_op_movl_T1_r2,
-        gen_op_movl_T1_r3,
-        gen_op_movl_T1_r4,
-        gen_op_movl_T1_r5,
-        gen_op_movl_T1_r6,
-        gen_op_movl_T1_r7,
-        gen_op_movl_T1_r8,
-        gen_op_movl_T1_r9,
-        gen_op_movl_T1_r10,
-        gen_op_movl_T1_r11,
-        gen_op_movl_T1_r12,
-        gen_op_movl_T1_r13,
-        gen_op_movl_T1_r14,
-        gen_op_movl_T1_r15,
-    },
-    {
-        gen_op_movl_T2_r0,
-        gen_op_movl_T2_r1,
-        gen_op_movl_T2_r2,
-        gen_op_movl_T2_r3,
-        gen_op_movl_T2_r4,
-        gen_op_movl_T2_r5,
-        gen_op_movl_T2_r6,
-        gen_op_movl_T2_r7,
-        gen_op_movl_T2_r8,
-        gen_op_movl_T2_r9,
-        gen_op_movl_T2_r10,
-        gen_op_movl_T2_r11,
-        gen_op_movl_T2_r12,
-        gen_op_movl_T2_r13,
-        gen_op_movl_T2_r14,
-        gen_op_movl_T2_r15,
-    },
-};
-
-static GenOpFunc *gen_op_movl_reg_TN[2][16] = {
-    {
-        gen_op_movl_r0_T0,
-        gen_op_movl_r1_T0,
-        gen_op_movl_r2_T0,
-        gen_op_movl_r3_T0,
-        gen_op_movl_r4_T0,
-        gen_op_movl_r5_T0,
-        gen_op_movl_r6_T0,
-        gen_op_movl_r7_T0,
-        gen_op_movl_r8_T0,
-        gen_op_movl_r9_T0,
-        gen_op_movl_r10_T0,
-        gen_op_movl_r11_T0,
-        gen_op_movl_r12_T0,
-        gen_op_movl_r13_T0,
-        gen_op_movl_r14_T0,
-        gen_op_movl_r15_T0,
-    },
-    {
-        gen_op_movl_r0_T1,
-        gen_op_movl_r1_T1,
-        gen_op_movl_r2_T1,
-        gen_op_movl_r3_T1,
-        gen_op_movl_r4_T1,
-        gen_op_movl_r5_T1,
-        gen_op_movl_r6_T1,
-        gen_op_movl_r7_T1,
-        gen_op_movl_r8_T1,
-        gen_op_movl_r9_T1,
-        gen_op_movl_r10_T1,
-        gen_op_movl_r11_T1,
-        gen_op_movl_r12_T1,
-        gen_op_movl_r13_T1,
-        gen_op_movl_r14_T1,
-        gen_op_movl_r15_T1,
-    },
-};
-
-static GenOpFunc1 *gen_op_movl_TN_im[3] = {
-    gen_op_movl_T0_im,
-    gen_op_movl_T1_im,
-    gen_op_movl_T2_im,
-};
-
 static GenOpFunc1 *gen_shift_T0_im_thumb_cc[3] = {
     gen_op_shll_T0_im_thumb_cc,
     gen_op_shrl_T0_im_thumb_cc,
@@ -324,12 +429,19 @@
     gen_op_sarl_T0_im_thumb,
 };
 
+/* Set PC and thumb state from T0.  Clobbers T0.  */
 static inline void gen_bx(DisasContext *s)
 {
-  s->is_jmp = DISAS_UPDATE;
-  gen_op_bx_T0();
-}
+    TCGv tmp;
 
+    s->is_jmp = DISAS_UPDATE;
+    tmp = new_tmp();
+    tcg_gen_andi_i32(tmp, cpu_T[0], 1);
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
+    dead_tmp(tmp);
+    tcg_gen_andi_i32(cpu_T[0], cpu_T[0], ~1);
+    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, regs[15]));
+}
 
 #if defined(CONFIG_USER_ONLY)
 #define gen_ldst(name, s) gen_op_##name##_raw()
@@ -343,41 +455,38 @@
     } while (0)
 #endif
 
-static inline void gen_movl_TN_reg(DisasContext *s, int reg, int t)
-{
-    int val;
-
-    if (reg == 15) {
-        /* normaly, since we updated PC, we need only to add one insn */
-        if (s->thumb)
-            val = (long)s->pc + 2;
-        else
-            val = (long)s->pc + 4;
-        gen_op_movl_TN_im[t](val);
-    } else {
-        gen_op_movl_TN_reg[t][reg]();
-    }
-}
-
 static inline void gen_movl_T0_reg(DisasContext *s, int reg)
 {
-    gen_movl_TN_reg(s, reg, 0);
+    load_reg_var(s, cpu_T[0], reg);
 }
 
 static inline void gen_movl_T1_reg(DisasContext *s, int reg)
 {
-    gen_movl_TN_reg(s, reg, 1);
+    load_reg_var(s, cpu_T[1], reg);
 }
 
 static inline void gen_movl_T2_reg(DisasContext *s, int reg)
 {
-    gen_movl_TN_reg(s, reg, 2);
+    load_reg_var(s, cpu_T[2], reg);
+}
+
+static inline void gen_set_pc_T0(void)
+{
+    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, regs[15]));
 }
 
 static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
 {
-    gen_op_movl_reg_TN[t][reg]();
+    TCGv tmp;
     if (reg == 15) {
+        tmp = new_tmp();
+        tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
+    } else {
+        tmp = cpu_T[t];
+    }
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
+    if (reg == 15) {
+        dead_tmp(tmp);
         s->is_jmp = DISAS_JUMP;
     }
 }
@@ -403,6 +512,7 @@
 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn)
 {
     int val, rm, shift, shiftop;
+    TCGv offset;
 
     if (!(insn & (1 << 25))) {
         /* immediate */
@@ -415,17 +525,14 @@
         /* shift/register */
         rm = (insn) & 0xf;
         shift = (insn >> 7) & 0x1f;
-        gen_movl_T2_reg(s, rm);
         shiftop = (insn >> 5) & 3;
-        if (shift != 0) {
-            gen_shift_T2_im[shiftop](shift);
-        } else if (shiftop != 0) {
-            gen_shift_T2_0[shiftop]();
-        }
+        offset = load_reg(s, rm);
+        gen_arm_shift_im(offset, shiftop, shift);
         if (!(insn & (1 << 23)))
-            gen_op_subl_T1_T2();
+            tcg_gen_sub_i32(cpu_T[1], cpu_T[1], offset);
         else
-            gen_op_addl_T1_T2();
+            tcg_gen_add_i32(cpu_T[1], cpu_T[1], offset);
+        dead_tmp(offset);
     }
 }
 
@@ -433,6 +540,7 @@
                                         int extra)
 {
     int val, rm;
+    TCGv offset;
 
     if (insn & (1 << 22)) {
         /* immediate */
@@ -447,11 +555,12 @@
         if (extra)
             gen_op_addl_T1_im(extra);
         rm = (insn) & 0xf;
-        gen_movl_T2_reg(s, rm);
+        offset = load_reg(s, rm);
         if (!(insn & (1 << 23)))
-            gen_op_subl_T1_T2();
+            tcg_gen_sub_i32(cpu_T[1], cpu_T[1], offset);
         else
-            gen_op_addl_T1_T2();
+            tcg_gen_add_i32(cpu_T[1], cpu_T[1], offset);
+        dead_tmp(offset);
     }
 }
 
@@ -979,7 +1088,7 @@
         case 3:
             return 1;
         }
-        gen_op_movl_reg_TN[0][rd]();
+        gen_movl_reg_T0(s, rd);
         break;
     case 0x117: case 0x517: case 0x917: case 0xd17:	/* TEXTRC */
         if ((insn & 0x000ff008) != 0x0003f000)
@@ -1531,21 +1640,21 @@
         gen_op_iwmmxt_movq_M0_wRn(wrd);
         switch ((insn >> 16) & 0xf) {
         case 0x0:					/* TMIA */
-            gen_op_movl_TN_reg[0][rd0]();
-            gen_op_movl_TN_reg[1][rd1]();
+            gen_movl_T0_reg(s, rd0);
+            gen_movl_T1_reg(s, rd1);
             gen_op_iwmmxt_muladdsl_M0_T0_T1();
             break;
         case 0x8:					/* TMIAPH */
-            gen_op_movl_TN_reg[0][rd0]();
-            gen_op_movl_TN_reg[1][rd1]();
+            gen_movl_T0_reg(s, rd0);
+            gen_movl_T1_reg(s, rd1);
             gen_op_iwmmxt_muladdsw_M0_T0_T1();
             break;
         case 0xc: case 0xd: case 0xe: case 0xf:		/* TMIAxy */
-            gen_op_movl_TN_reg[1][rd0]();
+            gen_movl_T1_reg(s, rd0);
             if (insn & (1 << 16))
                 gen_op_shrl_T1_im(16);
             gen_op_movl_T0_T1();
-            gen_op_movl_TN_reg[1][rd1]();
+            gen_movl_T1_reg(s, rd1);
             if (insn & (1 << 17))
                 gen_op_shrl_T1_im(16);
             gen_op_iwmmxt_muladdswl_M0_T0_T1();
@@ -1580,24 +1689,24 @@
 
         switch ((insn >> 16) & 0xf) {
         case 0x0:					/* MIA */
-            gen_op_movl_TN_reg[0][rd0]();
-            gen_op_movl_TN_reg[1][rd1]();
+            gen_movl_T0_reg(s, rd0);
+            gen_movl_T1_reg(s, rd1);
             gen_op_iwmmxt_muladdsl_M0_T0_T1();
             break;
         case 0x8:					/* MIAPH */
-            gen_op_movl_TN_reg[0][rd0]();
-            gen_op_movl_TN_reg[1][rd1]();
+            gen_movl_T0_reg(s, rd0);
+            gen_movl_T1_reg(s, rd1);
             gen_op_iwmmxt_muladdsw_M0_T0_T1();
             break;
         case 0xc:					/* MIABB */
         case 0xd:					/* MIABT */
         case 0xe:					/* MIATB */
         case 0xf:					/* MIATT */
-            gen_op_movl_TN_reg[1][rd0]();
+            gen_movl_T1_reg(s, rd0);
             if (insn & (1 << 16))
                 gen_op_shrl_T1_im(16);
             gen_op_movl_T0_T1();
-            gen_op_movl_TN_reg[1][rd1]();
+            gen_movl_T1_reg(s, rd1);
             if (insn & (1 << 17))
                 gen_op_shrl_T1_im(16);
             gen_op_iwmmxt_muladdswl_M0_T0_T1();
@@ -1621,13 +1730,13 @@
 
         if (insn & ARM_CP_RW_BIT) {			/* MRA */
             gen_op_iwmmxt_movl_T0_T1_wRn(acc);
-            gen_op_movl_reg_TN[0][rdlo]();
+            gen_movl_reg_T0(s, rdlo);
             gen_op_movl_T0_im((1 << (40 - 32)) - 1);
             gen_op_andl_T0_T1();
-            gen_op_movl_reg_TN[0][rdhi]();
+            gen_movl_reg_T0(s, rdhi);
         } else {					/* MAR */
-            gen_op_movl_TN_reg[0][rdlo]();
-            gen_op_movl_TN_reg[1][rdhi]();
+            gen_movl_T0_reg(s, rdlo);
+            gen_movl_T1_reg(s, rdhi);
             gen_op_iwmmxt_movl_wRn_T0_T1(acc);
         }
         return 0;
@@ -1650,14 +1759,14 @@
         if (!env->cp[cp].cp_read)
             return 1;
         gen_op_movl_T0_im((uint32_t) s->pc);
-        gen_op_movl_reg_TN[0][15]();
+        gen_set_pc_T0();
         gen_op_movl_T0_cp(insn);
         gen_movl_reg_T0(s, rd);
     } else {
         if (!env->cp[cp].cp_write)
             return 1;
         gen_op_movl_T0_im((uint32_t) s->pc);
-        gen_op_movl_reg_TN[0][15]();
+        gen_set_pc_T0();
         gen_movl_T0_reg(s, rd);
         gen_op_movl_cp_T0(insn);
     }
@@ -1713,7 +1822,7 @@
         || (insn & 0x0fff0fff) == 0x0e070f58) {
         /* Wait for interrupt.  */
         gen_op_movl_T0_im((long)s->pc);
-        gen_op_movl_reg_TN[0][15]();
+        gen_set_pc_T0();
         s->is_jmp = DISAS_WFI;
         return 0;
     }
@@ -1817,9 +1926,9 @@
                         if (offset)
                             gen_op_shrl_T1_im(offset);
                         if (insn & (1 << 23))
-                            gen_op_uxtb_T1();
+                            gen_uxtb(cpu_T[1]);
                         else
-                            gen_op_sxtb_T1();
+                            gen_sxtb(cpu_T[1]);
                         break;
                     case 1:
                         NEON_GET_REG(T1, rn, pass);
@@ -1827,13 +1936,13 @@
                             if (offset) {
                                 gen_op_shrl_T1_im(16);
                             } else {
-                                gen_op_uxth_T1();
+                                gen_uxth(cpu_T[1]);
                             }
                         } else {
                             if (offset) {
                                 gen_op_sarl_T1_im(16);
                             } else {
-                                gen_op_sxth_T1();
+                                gen_sxth(cpu_T[1]);
                             }
                         }
                         break;
@@ -2418,11 +2527,11 @@
     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
         tcg_gen_goto_tb(n);
         gen_op_movl_T0_im(dest);
-        gen_op_movl_r15_T0();
+        gen_set_pc_T0();
         tcg_gen_exit_tb((long)tb + n);
     } else {
         gen_op_movl_T0_im(dest);
-        gen_op_movl_r15_T0();
+        gen_set_pc_T0();
         tcg_gen_exit_tb(0);
     }
 }
@@ -2444,13 +2553,13 @@
 static inline void gen_mulxy(int x, int y)
 {
     if (x)
-        gen_op_sarl_T0_im(16);
+        tcg_gen_sari_i32(cpu_T[0], cpu_T[0], 16);
     else
-        gen_op_sxth_T0();
+        gen_sxth(cpu_T[0]);
     if (y)
         gen_op_sarl_T1_im(16);
     else
-        gen_op_sxth_T1();
+        gen_sxth(cpu_T[1]);
     gen_op_mul_T0_T1();
 }
 
@@ -2501,7 +2610,7 @@
 /* Generate an old-style exception return.  */
 static void gen_exception_return(DisasContext *s)
 {
-    gen_op_movl_reg_TN[0][15]();
+    gen_set_pc_T0();
     gen_op_movl_T0_spsr();
     gen_op_movl_cpsr_T0(0xffffffff);
     s->is_jmp = DISAS_UPDATE;
@@ -2512,7 +2621,7 @@
 {
     gen_op_movl_cpsr_T0(0xffffffff);
     gen_op_movl_T0_T2();
-    gen_op_movl_reg_TN[0][15]();
+    gen_set_pc_T0();
     s->is_jmp = DISAS_UPDATE;
 }
 
@@ -2529,7 +2638,7 @@
     switch (val) {
     case 3: /* wfi */
         gen_op_movl_T0_im((long)s->pc);
-        gen_op_movl_reg_TN[0][15]();
+        gen_set_pc_T0();
         s->is_jmp = DISAS_WFI;
         break;
     case 2: /* wfe */
@@ -3011,14 +3120,18 @@
         }
     }
     if (rm != 15) {
-        gen_movl_T1_reg(s, rn);
+        TCGv base;
+
+        base = load_reg(s, rn);
         if (rm == 13) {
-            gen_op_addl_T1_im(stride);
+            tcg_gen_addi_i32(base, base, stride);
         } else {
-            gen_movl_T2_reg(s, rm);
-            gen_op_addl_T1_T2();
+            TCGv index;
+            index = load_reg(s, rm);
+            tcg_gen_add_i32(base, base, index);
+            dead_tmp(index);
         }
-        gen_movl_reg_T1(s, rn);
+        store_reg(s, rn, base);
     }
     return 0;
 }
@@ -4626,6 +4739,7 @@
 static void disas_arm_insn(CPUState * env, DisasContext *s)
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
+    TCGv tmp;
 
     insn = ldl_code(s->pc);
     s->pc += 4;
@@ -4936,7 +5050,7 @@
         case 7: /* bkpt */
             gen_set_condexec(s);
             gen_op_movl_T0_im((long)s->pc - 4);
-            gen_op_movl_reg_TN[0][15]();
+            gen_set_pc_T0();
             gen_op_bkpt();
             s->is_jmp = DISAS_JUMP;
             break;
@@ -4954,7 +5068,7 @@
                 if (sh & 4)
                     gen_op_sarl_T1_im(16);
                 else
-                    gen_op_sxth_T1();
+                    gen_sxth(cpu_T[1]);
                 gen_op_imulw_T0_T1();
                 if ((sh & 2) == 0) {
                     gen_movl_T1_reg(s, rn);
@@ -5001,7 +5115,7 @@
                 val = (val >> shift) | (val << (32 - shift));
             gen_op_movl_T1_im(val);
             if (logic_cc && shift)
-                gen_op_mov_CF_T1();
+                gen_set_CF_bit31(cpu_T[1]);
         } else {
             /* register */
             rm = (insn) & 0xf;
@@ -5009,18 +5123,14 @@
             shiftop = (insn >> 5) & 3;
             if (!(insn & (1 << 4))) {
                 shift = (insn >> 7) & 0x1f;
-                if (shift != 0) {
-                    if (logic_cc) {
+                if (logic_cc) {
+                    if (shift != 0) {
                         gen_shift_T1_im_cc[shiftop](shift);
-                    } else {
-                        gen_shift_T1_im[shiftop](shift);
-                    }
-                } else if (shiftop != 0) {
-                    if (logic_cc) {
+                    } else if (shiftop != 0) {
                         gen_shift_T1_0_cc[shiftop]();
-                    } else {
-                        gen_shift_T1_0[shiftop]();
                     }
+                } else {
+                    gen_arm_shift_im(cpu_T[1], shiftop, shift);
                 }
             } else {
                 rs = (insn >> 8) & 0xf;
@@ -5083,7 +5193,7 @@
             if (set_cc)
                 gen_op_adcl_T0_T1_cc();
             else
-                gen_op_adcl_T0_T1();
+                gen_adc_T0_T1();
             gen_movl_reg_T0(s, rd);
             break;
         case 0x06:
@@ -5389,20 +5499,21 @@
                             gen_op_rorl_T1_im(shift * 8);
                         op1 = (insn >> 20) & 7;
                         switch (op1) {
-                        case 0: gen_op_sxtb16_T1(); break;
-                        case 2: gen_op_sxtb_T1();   break;
-                        case 3: gen_op_sxth_T1();   break;
-                        case 4: gen_op_uxtb16_T1(); break;
-                        case 6: gen_op_uxtb_T1();   break;
-                        case 7: gen_op_uxth_T1();   break;
+                        case 0: gen_sxtb16(cpu_T[1]); break;
+                        case 2: gen_sxtb(cpu_T[1]);   break;
+                        case 3: gen_sxth(cpu_T[1]);   break;
+                        case 4: gen_uxtb16(cpu_T[1]); break;
+                        case 6: gen_uxtb(cpu_T[1]);   break;
+                        case 7: gen_uxth(cpu_T[1]);   break;
                         default: goto illegal_op;
                         }
                         if (rn != 15) {
-                            gen_movl_T2_reg(s, rn);
+                            tmp = load_reg(s, rn);
                             if ((op1 & 3) == 0) {
-                                gen_op_add16_T1_T2();
+                                gen_add16(cpu_T[1], tmp);
                             } else {
-                                gen_op_addl_T1_T2();
+                                tcg_gen_add_i32(cpu_T[1], cpu_T[1], tmp);
+                                dead_tmp(tmp);
                             }
                         }
                         gen_movl_reg_T1(s, rd);
@@ -5667,7 +5778,7 @@
                             if (i == 15) {
                                 /* special case: r15 = PC + 8 */
                                 val = (long)s->pc + 4;
-                                gen_op_movl_TN_im[0](val);
+                                gen_op_movl_T0_im(val);
                             } else if (user) {
                                 gen_op_movl_T0_user(i);
                             } else {
@@ -5723,7 +5834,7 @@
                 val = (int32_t)s->pc;
                 if (insn & (1 << 24)) {
                     gen_op_movl_T0_im(val);
-                    gen_op_movl_reg_TN[0][14]();
+                    gen_movl_reg_T0(s, 14);
                 }
                 offset = (((int32_t)insn << 8) >> 8);
                 val += (offset << 2) + 4;
@@ -5740,14 +5851,14 @@
         case 0xf:
             /* swi */
             gen_op_movl_T0_im((long)s->pc);
-            gen_op_movl_reg_TN[0][15]();
+            gen_set_pc_T0();
             s->is_jmp = DISAS_SWI;
             break;
         default:
         illegal_op:
             gen_set_condexec(s);
             gen_op_movl_T0_im((long)s->pc - 4);
-            gen_op_movl_reg_TN[0][15]();
+            gen_set_pc_T0();
             gen_op_undef_insn();
             s->is_jmp = DISAS_JUMP;
             break;
@@ -5806,7 +5917,7 @@
         if (conds)
             gen_op_adcl_T0_T1_cc();
         else
-            gen_op_adcl_T0_T1();
+            gen_adc_T0_T1();
         break;
     case 11: /* sbc */
         if (conds)
@@ -5832,7 +5943,7 @@
     if (logic_cc) {
         gen_op_logic_T0_cc();
         if (shifter_out)
-            gen_op_mov_CF_T1();
+            gen_set_CF_bit31(cpu_T[1]);
     }
     return 0;
 }
@@ -5843,6 +5954,7 @@
 {
     uint32_t insn, imm, shift, offset, addr;
     uint32_t rd, rn, rm, rs;
+    TCGv tmp;
     int op;
     int shiftop;
     int conds;
@@ -5966,13 +6078,15 @@
                 } else {
                     gen_movl_T1_reg(s, rn);
                 }
-                gen_movl_T2_reg(s, rm);
-                gen_op_addl_T1_T2();
+                tmp = load_reg(s, rm);
+                tcg_gen_add_i32(cpu_T[1], cpu_T[1], tmp);
                 if (insn & (1 << 4)) {
                     /* tbh */
-                    gen_op_addl_T1_T2();
+                    tcg_gen_add_i32(cpu_T[1], cpu_T[1], tmp);
+                    dead_tmp(tmp);
                     gen_ldst(lduw, s);
                 } else { /* tbb */
+                    dead_tmp(tmp);
                     gen_ldst(ldub, s);
                 }
                 gen_op_jmp_T0_im(s->pc);
@@ -6126,18 +6240,14 @@
         shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
         conds = (insn & (1 << 20)) != 0;
         logic_cc = (conds && thumb2_logic_op(op));
-        if (shift != 0) {
-            if (logic_cc) {
+        if (logic_cc) {
+            if (shift != 0) {
                 gen_shift_T1_im_cc[shiftop](shift);
-            } else {
-                gen_shift_T1_im[shiftop](shift);
-            }
-        } else if (shiftop != 0) {
-            if (logic_cc) {
+            } else if (shiftop != 0) {
                 gen_shift_T1_0_cc[shiftop]();
-            } else {
-                gen_shift_T1_0[shiftop]();
             }
+        } else {
+            gen_arm_shift_im(cpu_T[1], shiftop, shift);
         }
         if (gen_thumb2_data_op(s, op, conds, 0))
             goto illegal_op;
@@ -6172,20 +6282,21 @@
                 gen_op_rorl_T1_im(shift * 8);
             op = (insn >> 20) & 7;
             switch (op) {
-            case 0: gen_op_sxth_T1();   break;
-            case 1: gen_op_uxth_T1();   break;
-            case 2: gen_op_sxtb16_T1(); break;
-            case 3: gen_op_uxtb16_T1(); break;
-            case 4: gen_op_sxtb_T1();   break;
-            case 5: gen_op_uxtb_T1();   break;
+            case 0: gen_sxth(cpu_T[1]);   break;
+            case 1: gen_uxth(cpu_T[1]);   break;
+            case 2: gen_sxtb16(cpu_T[1]); break;
+            case 3: gen_uxtb16(cpu_T[1]); break;
+            case 4: gen_sxtb(cpu_T[1]);   break;
+            case 5: gen_uxtb(cpu_T[1]);   break;
             default: goto illegal_op;
             }
             if (rn != 15) {
-                gen_movl_T2_reg(s, rn);
+                tmp = load_reg(s, rn);
                 if ((op >> 1) == 1) {
-                    gen_op_add16_T1_T2();
+                    gen_add16(cpu_T[1], tmp);
                 } else {
-                    gen_op_addl_T1_T2();
+                    tcg_gen_add_i32(cpu_T[1], cpu_T[1], tmp);
+                    dead_tmp(tmp);
                 }
             }
             gen_movl_reg_T1(s, rd);
@@ -6286,7 +6397,7 @@
                 if (op)
                     gen_op_sarl_T1_im(16);
                 else
-                    gen_op_sxth_T1();
+                    gen_sxth(cpu_T[1]);
                 gen_op_imulw_T0_T1();
                 if (rs != 15)
                   {
@@ -6718,10 +6829,11 @@
                     shift = (insn >> 4) & 0xf;
                     if (shift > 3)
                         goto illegal_op;
-                    gen_movl_T2_reg(s, rm);
+                    tmp = load_reg(s, rm);
                     if (shift)
-                        gen_op_shll_T2_im(shift);
-                    gen_op_addl_T1_T2();
+                        tcg_gen_shli_i32(tmp, tmp, shift);
+                    tcg_gen_add_i32(cpu_T[1], cpu_T[1], tmp);
+                    dead_tmp(tmp);
                     break;
                 case 4: /* Negative offset.  */
                     gen_op_addl_T1_im(-imm);
@@ -6733,7 +6845,6 @@
                     imm = -imm;
                     /* Fall through.  */
                 case 3: /* Post-increment.  */
-                    gen_op_movl_T2_im(imm);
                     postinc = 1;
                     writeback = 1;
                     break;
@@ -6802,6 +6913,7 @@
     uint32_t val, insn, op, rm, rn, rd, shift, cond;
     int32_t offset;
     int i;
+    TCGv tmp;
 
     if (s->condexec_mask) {
         cond = s->condexec_cond;
@@ -6989,7 +7101,7 @@
             break;
         case 0x5: /* adc */
             if (s->condexec_mask)
-                gen_op_adcl_T0_T1();
+                gen_adc_T0_T1();
             else
                 gen_op_adcl_T0_T1_cc();
             break;
@@ -7064,8 +7176,9 @@
         rm = (insn >> 6) & 7;
         op = (insn >> 9) & 7;
         gen_movl_T1_reg(s, rn);
-        gen_movl_T2_reg(s, rm);
-        gen_op_addl_T1_T2();
+        tmp = load_reg(s, rm);
+        tcg_gen_add_i32(cpu_T[1], cpu_T[1], tmp);
+        dead_tmp(tmp);
 
         if (op < 3) /* store */
             gen_movl_T0_reg(s, rd);
@@ -7106,8 +7219,7 @@
         rn = (insn >> 3) & 7;
         gen_movl_T1_reg(s, rn);
         val = (insn >> 4) & 0x7c;
-        gen_op_movl_T2_im(val);
-        gen_op_addl_T1_T2();
+        tcg_gen_addi_i32(cpu_T[1], cpu_T[1], val);
 
         if (insn & (1 << 11)) {
             /* load */
@@ -7126,8 +7238,7 @@
         rn = (insn >> 3) & 7;
         gen_movl_T1_reg(s, rn);
         val = (insn >> 6) & 0x1f;
-        gen_op_movl_T2_im(val);
-        gen_op_addl_T1_T2();
+        tcg_gen_addi_i32(cpu_T[1], cpu_T[1], val);
 
         if (insn & (1 << 11)) {
             /* load */
@@ -7146,8 +7257,7 @@
         rn = (insn >> 3) & 7;
         gen_movl_T1_reg(s, rn);
         val = (insn >> 5) & 0x3e;
-        gen_op_movl_T2_im(val);
-        gen_op_addl_T1_T2();
+        tcg_gen_addi_i32(cpu_T[1], cpu_T[1], val);
 
         if (insn & (1 << 11)) {
             /* load */
@@ -7165,8 +7275,7 @@
         rd = (insn >> 8) & 7;
         gen_movl_T1_reg(s, 13);
         val = (insn & 0xff) * 4;
-        gen_op_movl_T2_im(val);
-        gen_op_addl_T1_T2();
+        tcg_gen_addi_i32(cpu_T[1], cpu_T[1], val);
 
         if (insn & (1 << 11)) {
             /* load */
@@ -7201,13 +7310,12 @@
         switch (op) {
         case 0:
             /* adjust stack pointer */
-            gen_movl_T1_reg(s, 13);
+            tmp = load_reg(s, 13);
             val = (insn & 0x7f) * 4;
             if (insn & (1 << 7))
               val = -(int32_t)val;
-            gen_op_movl_T2_im(val);
-            gen_op_addl_T1_T2();
-            gen_movl_reg_T1(s, 13);
+            tcg_gen_addi_i32(tmp, tmp, val);
+            store_reg(s, 13, tmp);
             break;
 
         case 2: /* sign/zero extend.  */
@@ -7216,10 +7324,10 @@
             rm = (insn >> 3) & 7;
             gen_movl_T1_reg(s, rm);
             switch ((insn >> 6) & 3) {
-            case 0: gen_op_sxth_T1(); break;
-            case 1: gen_op_sxtb_T1(); break;
-            case 2: gen_op_uxth_T1(); break;
-            case 3: gen_op_uxtb_T1(); break;
+            case 0: gen_sxth(cpu_T[1]); break;
+            case 1: gen_sxtb(cpu_T[1]); break;
+            case 2: gen_uxth(cpu_T[1]); break;
+            case 3: gen_uxtb(cpu_T[1]); break;
             }
             gen_movl_reg_T1(s, rd);
             break;
@@ -7235,10 +7343,8 @@
                     offset += 4;
             }
             if ((insn & (1 << 11)) == 0) {
-                gen_op_movl_T2_im(-offset);
-                gen_op_addl_T1_T2();
+                gen_op_addl_T1_im(-offset);
             }
-            gen_op_movl_T2_im(4);
             for (i = 0; i < 8; i++) {
                 if (insn & (1 << i)) {
                     if (insn & (1 << 11)) {
@@ -7251,7 +7357,7 @@
                         gen_ldst(stl, s);
                     }
                     /* advance to the next address.  */
-                    gen_op_addl_T1_T2();
+                    gen_op_addl_T1_im(4);
                 }
             }
             if (insn & (1 << 8)) {
@@ -7265,11 +7371,10 @@
                     gen_movl_T0_reg(s, 14);
                     gen_ldst(stl, s);
                 }
-                gen_op_addl_T1_T2();
+                gen_op_addl_T1_im(4);
             }
             if ((insn & (1 << 11)) == 0) {
-                gen_op_movl_T2_im(-offset);
-                gen_op_addl_T1_T2();
+                gen_op_addl_T1_im(-offset);
             }
             /* write back the new stack pointer */
             gen_movl_reg_T1(s, 13);
@@ -7308,7 +7413,7 @@
         case 0xe: /* bkpt */
             gen_set_condexec(s);
             gen_op_movl_T0_im((long)s->pc - 2);
-            gen_op_movl_reg_TN[0][15]();
+            gen_set_pc_T0();
             gen_op_bkpt();
             s->is_jmp = DISAS_JUMP;
             break;
@@ -7363,7 +7468,6 @@
         /* load/store multiple */
         rn = (insn >> 8) & 0x7;
         gen_movl_T1_reg(s, rn);
-        gen_op_movl_T2_im(4);
         for (i = 0; i < 8; i++) {
             if (insn & (1 << i)) {
                 if (insn & (1 << 11)) {
@@ -7376,7 +7480,7 @@
                     gen_ldst(stl, s);
                 }
                 /* advance to the next address */
-                gen_op_addl_T1_T2();
+                gen_op_addl_T1_im(4);
             }
         }
         /* Base register writeback.  */
@@ -7395,7 +7499,7 @@
             gen_set_condexec(s);
             gen_op_movl_T0_im((long)s->pc | 1);
             /* Don't set r15.  */
-            gen_op_movl_reg_TN[0][15]();
+            gen_set_pc_T0();
             s->is_jmp = DISAS_SWI;
             break;
         }
@@ -7434,7 +7538,7 @@
 undef32:
     gen_set_condexec(s);
     gen_op_movl_T0_im((long)s->pc - 4);
-    gen_op_movl_reg_TN[0][15]();
+    gen_set_pc_T0();
     gen_op_undef_insn();
     s->is_jmp = DISAS_JUMP;
     return;
@@ -7442,7 +7546,7 @@
 undef:
     gen_set_condexec(s);
     gen_op_movl_T0_im((long)s->pc - 2);
-    gen_op_movl_reg_TN[0][15]();
+    gen_set_pc_T0();
     gen_op_undef_insn();
     s->is_jmp = DISAS_JUMP;
 }
@@ -7461,6 +7565,9 @@
     uint32_t next_page_start;
 
     /* generate intermediate code */
+    num_temps = 0;
+    memset(temps, 0, sizeof(temps));
+
     pc_start = tb->pc;
 
     dc->tb = tb;
@@ -7502,7 +7609,7 @@
                 if (env->breakpoints[j] == dc->pc) {
                     gen_set_condexec(dc);
                     gen_op_movl_T0_im((long)dc->pc);
-                    gen_op_movl_reg_TN[0][15]();
+                    gen_set_pc_T0();
                     gen_op_debug();
                     dc->is_jmp = DISAS_JUMP;
                     /* Advance PC so that clearing the breakpoint will
@@ -7537,6 +7644,10 @@
         } else {
             disas_arm_insn(env, dc);
         }
+        if (num_temps) {
+            fprintf(stderr, "Internal resource leak before %08x\n", dc->pc);
+            num_temps = 0;
+        }
 
         if (dc->condjmp && !dc->is_jmp) {
             gen_set_label(dc->condlabel);
@@ -7572,7 +7683,7 @@
         }
         if (dc->condjmp || !dc->is_jmp) {
             gen_op_movl_T0_im((long)dc->pc);
-            gen_op_movl_reg_TN[0][15]();
+            gen_set_pc_T0();
             dc->condjmp = 0;
         }
         gen_set_condexec(dc);