use the TCG code generator


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@3944 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 9723e76..aa397f6 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -28,10 +28,8 @@
 #include "cpu.h"
 #include "exec-all.h"
 #include "disas.h"
-
-/* XXX: move that elsewhere */
-static uint16_t *gen_opc_ptr;
-static uint32_t *gen_opparam_ptr;
+#include "helper.h"
+#include "tcg-op.h"
 
 #define PREFIX_REPZ   0x01
 #define PREFIX_REPNZ  0x02
@@ -57,14 +55,79 @@
 #define REX_B(s) 0
 #endif
 
+//#define MACRO_TEST   1
+
 #ifdef TARGET_X86_64
-static int x86_64_hregs;
+#define TCG_TYPE_TL TCG_TYPE_I64
+#define tcg_gen_movi_tl tcg_gen_movi_i64
+#define tcg_gen_mov_tl tcg_gen_mov_i64
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64
+#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64
+#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64
+#define tcg_gen_ld_tl tcg_gen_ld_i64
+#define tcg_gen_st8_tl tcg_gen_st8_i64
+#define tcg_gen_st16_tl tcg_gen_st16_i64
+#define tcg_gen_st32_tl tcg_gen_st32_i64
+#define tcg_gen_st_tl tcg_gen_st_i64
+#define tcg_gen_add_tl tcg_gen_add_i64
+#define tcg_gen_addi_tl tcg_gen_addi_i64
+#define tcg_gen_sub_tl tcg_gen_sub_i64
+#define tcg_gen_subi_tl tcg_gen_subi_i64
+#define tcg_gen_and_tl tcg_gen_and_i64
+#define tcg_gen_andi_tl tcg_gen_andi_i64
+#define tcg_gen_or_tl tcg_gen_or_i64
+#define tcg_gen_ori_tl tcg_gen_ori_i64
+#define tcg_gen_xor_tl tcg_gen_xor_i64
+#define tcg_gen_xori_tl tcg_gen_xori_i64
+#define tcg_gen_shl_tl tcg_gen_shl_i64
+#define tcg_gen_shli_tl tcg_gen_shli_i64
+#define tcg_gen_shr_tl tcg_gen_shr_i64
+#define tcg_gen_shri_tl tcg_gen_shri_i64
+#define tcg_gen_sar_tl tcg_gen_sar_i64
+#define tcg_gen_sari_tl tcg_gen_sari_i64
+#else
+#define TCG_TYPE_TL TCG_TYPE_I32
+#define tcg_gen_movi_tl tcg_gen_movi_i32
+#define tcg_gen_mov_tl tcg_gen_mov_i32
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32
+#define tcg_gen_ld32u_tl tcg_gen_ld_i32
+#define tcg_gen_ld32s_tl tcg_gen_ld_i32
+#define tcg_gen_ld_tl tcg_gen_ld_i32
+#define tcg_gen_st8_tl tcg_gen_st8_i32
+#define tcg_gen_st16_tl tcg_gen_st16_i32
+#define tcg_gen_st32_tl tcg_gen_st_i32
+#define tcg_gen_st_tl tcg_gen_st_i32
+#define tcg_gen_add_tl tcg_gen_add_i32
+#define tcg_gen_addi_tl tcg_gen_addi_i32
+#define tcg_gen_sub_tl tcg_gen_sub_i32
+#define tcg_gen_subi_tl tcg_gen_subi_i32
+#define tcg_gen_and_tl tcg_gen_and_i32
+#define tcg_gen_andi_tl tcg_gen_andi_i32
+#define tcg_gen_or_tl tcg_gen_or_i32
+#define tcg_gen_ori_tl tcg_gen_ori_i32
+#define tcg_gen_xor_tl tcg_gen_xor_i32
+#define tcg_gen_xori_tl tcg_gen_xori_i32
+#define tcg_gen_shl_tl tcg_gen_shl_i32
+#define tcg_gen_shli_tl tcg_gen_shli_i32
+#define tcg_gen_shr_tl tcg_gen_shr_i32
+#define tcg_gen_shri_tl tcg_gen_shri_i32
+#define tcg_gen_sar_tl tcg_gen_sar_i32
+#define tcg_gen_sari_tl tcg_gen_sari_i32
 #endif
 
-#ifdef USE_DIRECT_JUMP
-#define TBPARAM(x)
-#else
-#define TBPARAM(x) (long)(x)
+/* global register indexes */
+static int cpu_env, cpu_T[2], cpu_A0;
+/* local register indexes (only used inside old micro ops) */
+static int cpu_tmp0;
+
+#ifdef TARGET_X86_64
+static int x86_64_hregs;
 #endif
 
 typedef struct DisasContext {
@@ -131,15 +194,6 @@
     OP_SAR = 7,
 };
 
-enum {
-#define DEF(s, n, copy_size) INDEX_op_ ## s,
-#include "opc.h"
-#undef DEF
-    NB_OPS,
-};
-
-#include "gen-op.h"
-
 /* operand size */
 enum {
     OT_BYTE = 0,
@@ -164,6 +218,73 @@
     OR_A0, /* temporary register used when doing address evaluation */
 };
 
+static inline void gen_op_movl_T0_0(void)
+{
+    tcg_gen_movi_tl(cpu_T[0], 0);
+}
+
+static inline void gen_op_movl_T0_im(int32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_imu(uint32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T1_im(int32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_T1_imu(uint32_t val)
+{
+    tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_A0_im(uint32_t val)
+{
+    tcg_gen_movi_tl(cpu_A0, val);
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_im(int64_t val)
+{
+    tcg_gen_movi_tl(cpu_A0, val);
+}
+#endif
+
+static inline void gen_movtl_T0_im(target_ulong val)
+{
+    tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_movtl_T1_im(target_ulong val)
+{
+    tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_andl_T0_ffff(void)
+{
+    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+}
+
+static inline void gen_op_andl_T0_im(uint32_t val)
+{
+    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_T1(void)
+{
+    tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_andl_A0_ffff(void)
+{
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
+}
+
 #ifdef TARGET_X86_64
 
 #define NB_OP_SIZES 4
@@ -186,45 +307,6 @@
   prefix ## R14 ## suffix,\
   prefix ## R15 ## suffix,
 
-#define DEF_BREGS(prefixb, prefixh, suffix)             \
-                                                        \
-static void prefixb ## ESP ## suffix ## _wrapper(void)  \
-{                                                       \
-    if (x86_64_hregs)                                 \
-        prefixb ## ESP ## suffix ();                    \
-    else                                                \
-        prefixh ## EAX ## suffix ();                    \
-}                                                       \
-                                                        \
-static void prefixb ## EBP ## suffix ## _wrapper(void)  \
-{                                                       \
-    if (x86_64_hregs)                                 \
-        prefixb ## EBP ## suffix ();                    \
-    else                                                \
-        prefixh ## ECX ## suffix ();                    \
-}                                                       \
-                                                        \
-static void prefixb ## ESI ## suffix ## _wrapper(void)  \
-{                                                       \
-    if (x86_64_hregs)                                 \
-        prefixb ## ESI ## suffix ();                    \
-    else                                                \
-        prefixh ## EDX ## suffix ();                    \
-}                                                       \
-                                                        \
-static void prefixb ## EDI ## suffix ## _wrapper(void)  \
-{                                                       \
-    if (x86_64_hregs)                                 \
-        prefixb ## EDI ## suffix ();                    \
-    else                                                \
-        prefixh ## EBX ## suffix ();                    \
-}
-
-DEF_BREGS(gen_op_movb_, gen_op_movh_, _T0)
-DEF_BREGS(gen_op_movb_, gen_op_movh_, _T1)
-DEF_BREGS(gen_op_movl_T0_, gen_op_movh_T0_, )
-DEF_BREGS(gen_op_movl_T1_, gen_op_movh_T1_, )
-
 #else /* !TARGET_X86_64 */
 
 #define NB_OP_SIZES 3
@@ -241,218 +323,227 @@
 
 #endif /* !TARGET_X86_64 */
 
-static GenOpFunc *gen_op_mov_reg_T0[NB_OP_SIZES][CPU_NB_REGS] = {
-    [OT_BYTE] = {
-        gen_op_movb_EAX_T0,
-        gen_op_movb_ECX_T0,
-        gen_op_movb_EDX_T0,
-        gen_op_movb_EBX_T0,
-#ifdef TARGET_X86_64
-        gen_op_movb_ESP_T0_wrapper,
-        gen_op_movb_EBP_T0_wrapper,
-        gen_op_movb_ESI_T0_wrapper,
-        gen_op_movb_EDI_T0_wrapper,
-        gen_op_movb_R8_T0,
-        gen_op_movb_R9_T0,
-        gen_op_movb_R10_T0,
-        gen_op_movb_R11_T0,
-        gen_op_movb_R12_T0,
-        gen_op_movb_R13_T0,
-        gen_op_movb_R14_T0,
-        gen_op_movb_R15_T0,
+#if defined(WORDS_BIGENDIAN)
+#define REG_B_OFFSET (sizeof(target_ulong) - 1)
+#define REG_H_OFFSET (sizeof(target_ulong) - 2)
+#define REG_W_OFFSET (sizeof(target_ulong) - 2)
+#define REG_L_OFFSET (sizeof(target_ulong) - 4)
+#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 #else
-        gen_op_movh_EAX_T0,
-        gen_op_movh_ECX_T0,
-        gen_op_movh_EDX_T0,
-        gen_op_movh_EBX_T0,
+#define REG_B_OFFSET 0
+#define REG_H_OFFSET 1
+#define REG_W_OFFSET 0
+#define REG_L_OFFSET 0
+#define REG_LH_OFFSET 4
 #endif
-    },
-    [OT_WORD] = {
-        DEF_REGS(gen_op_movw_, _T0)
-    },
-    [OT_LONG] = {
-        DEF_REGS(gen_op_movl_, _T0)
-    },
-#ifdef TARGET_X86_64
-    [OT_QUAD] = {
-        DEF_REGS(gen_op_movq_, _T0)
-    },
-#endif
-};
 
-static GenOpFunc *gen_op_mov_reg_T1[NB_OP_SIZES][CPU_NB_REGS] = {
-    [OT_BYTE] = {
-        gen_op_movb_EAX_T1,
-        gen_op_movb_ECX_T1,
-        gen_op_movb_EDX_T1,
-        gen_op_movb_EBX_T1,
-#ifdef TARGET_X86_64
-        gen_op_movb_ESP_T1_wrapper,
-        gen_op_movb_EBP_T1_wrapper,
-        gen_op_movb_ESI_T1_wrapper,
-        gen_op_movb_EDI_T1_wrapper,
-        gen_op_movb_R8_T1,
-        gen_op_movb_R9_T1,
-        gen_op_movb_R10_T1,
-        gen_op_movb_R11_T1,
-        gen_op_movb_R12_T1,
-        gen_op_movb_R13_T1,
-        gen_op_movb_R14_T1,
-        gen_op_movb_R15_T1,
-#else
-        gen_op_movh_EAX_T1,
-        gen_op_movh_ECX_T1,
-        gen_op_movh_EDX_T1,
-        gen_op_movh_EBX_T1,
-#endif
-    },
-    [OT_WORD] = {
-        DEF_REGS(gen_op_movw_, _T1)
-    },
-    [OT_LONG] = {
-        DEF_REGS(gen_op_movl_, _T1)
-    },
-#ifdef TARGET_X86_64
-    [OT_QUAD] = {
-        DEF_REGS(gen_op_movq_, _T1)
-    },
-#endif
-};
-
-static GenOpFunc *gen_op_mov_reg_A0[NB_OP_SIZES - 1][CPU_NB_REGS] = {
-    [0] = {
-        DEF_REGS(gen_op_movw_, _A0)
-    },
-    [1] = {
-        DEF_REGS(gen_op_movl_, _A0)
-    },
-#ifdef TARGET_X86_64
-    [2] = {
-        DEF_REGS(gen_op_movq_, _A0)
-    },
-#endif
-};
-
-static GenOpFunc *gen_op_mov_TN_reg[NB_OP_SIZES][2][CPU_NB_REGS] =
+static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg)
 {
-    [OT_BYTE] = {
-        {
-            gen_op_movl_T0_EAX,
-            gen_op_movl_T0_ECX,
-            gen_op_movl_T0_EDX,
-            gen_op_movl_T0_EBX,
+    switch(ot) {
+    case OT_BYTE:
+        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+            tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+        } else {
+            tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+        }
+        break;
+    case OT_WORD:
+        tcg_gen_st16_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
 #ifdef TARGET_X86_64
-            gen_op_movl_T0_ESP_wrapper,
-            gen_op_movl_T0_EBP_wrapper,
-            gen_op_movl_T0_ESI_wrapper,
-            gen_op_movl_T0_EDI_wrapper,
-            gen_op_movl_T0_R8,
-            gen_op_movl_T0_R9,
-            gen_op_movl_T0_R10,
-            gen_op_movl_T0_R11,
-            gen_op_movl_T0_R12,
-            gen_op_movl_T0_R13,
-            gen_op_movl_T0_R14,
-            gen_op_movl_T0_R15,
+    case OT_LONG:
+        tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        tcg_gen_movi_tl(cpu_tmp0, 0);
+        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        break;
+    default:
+    case OT_QUAD:
+        tcg_gen_st_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]));
+        break;
 #else
-            gen_op_movh_T0_EAX,
-            gen_op_movh_T0_ECX,
-            gen_op_movh_T0_EDX,
-            gen_op_movh_T0_EBX,
+    default:
+    case OT_LONG:
+        tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        break;
 #endif
-        },
-        {
-            gen_op_movl_T1_EAX,
-            gen_op_movl_T1_ECX,
-            gen_op_movl_T1_EDX,
-            gen_op_movl_T1_EBX,
+    }
+}
+
+static inline void gen_op_mov_reg_T0(int ot, int reg)
+{
+    gen_op_mov_reg_TN(ot, 0, reg);
+}
+
+static inline void gen_op_mov_reg_T1(int ot, int reg)
+{
+    gen_op_mov_reg_TN(ot, 1, reg);
+}
+
+static inline void gen_op_mov_reg_A0(int size, int reg)
+{
+    switch(size) {
+    case 0:
+        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        break;
 #ifdef TARGET_X86_64
-            gen_op_movl_T1_ESP_wrapper,
-            gen_op_movl_T1_EBP_wrapper,
-            gen_op_movl_T1_ESI_wrapper,
-            gen_op_movl_T1_EDI_wrapper,
-            gen_op_movl_T1_R8,
-            gen_op_movl_T1_R9,
-            gen_op_movl_T1_R10,
-            gen_op_movl_T1_R11,
-            gen_op_movl_T1_R12,
-            gen_op_movl_T1_R13,
-            gen_op_movl_T1_R14,
-            gen_op_movl_T1_R15,
+    case 1:
+        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        /* high part of register set to zero */
+        tcg_gen_movi_tl(cpu_tmp0, 0);
+        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
+        break;
+    default:
+    case 2:
+        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+        break;
 #else
-            gen_op_movh_T1_EAX,
-            gen_op_movh_T1_ECX,
-            gen_op_movh_T1_EDX,
-            gen_op_movh_T1_EBX,
+    default:
+    case 1:
+        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        break;
 #endif
-        },
-    },
-    [OT_WORD] = {
-        {
-            DEF_REGS(gen_op_movl_T0_, )
-        },
-        {
-            DEF_REGS(gen_op_movl_T1_, )
-        },
-    },
-    [OT_LONG] = {
-        {
-            DEF_REGS(gen_op_movl_T0_, )
-        },
-        {
-            DEF_REGS(gen_op_movl_T1_, )
-        },
-    },
+    }
+}
+
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+    switch(ot) {
+    case OT_BYTE:
+        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+            goto std_case;
+        } else {
+            tcg_gen_ld8u_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+        }
+        break;
+    default:
+    std_case:
+        tcg_gen_ld_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]));
+        break;
+    }
+}
+
+static inline void gen_op_movl_A0_reg(int reg)
+{
+    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+}
+
+static inline void gen_op_addl_A0_im(int32_t val)
+{
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 #ifdef TARGET_X86_64
-    [OT_QUAD] = {
-        {
-            DEF_REGS(gen_op_movl_T0_, )
-        },
-        {
-            DEF_REGS(gen_op_movl_T1_, )
-        },
-    },
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 #endif
-};
-
-static GenOpFunc *gen_op_movl_A0_reg[CPU_NB_REGS] = {
-    DEF_REGS(gen_op_movl_A0_, )
-};
-
-static GenOpFunc *gen_op_addl_A0_reg_sN[4][CPU_NB_REGS] = {
-    [0] = {
-        DEF_REGS(gen_op_addl_A0_, )
-    },
-    [1] = {
-        DEF_REGS(gen_op_addl_A0_, _s1)
-    },
-    [2] = {
-        DEF_REGS(gen_op_addl_A0_, _s2)
-    },
-    [3] = {
-        DEF_REGS(gen_op_addl_A0_, _s3)
-    },
-};
+}
 
 #ifdef TARGET_X86_64
-static GenOpFunc *gen_op_movq_A0_reg[CPU_NB_REGS] = {
-    DEF_REGS(gen_op_movq_A0_, )
-};
+static inline void gen_op_addq_A0_im(int64_t val)
+{
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+}
+#endif
+    
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s))
+        gen_op_addq_A0_im(val);
+    else
+#endif
+        gen_op_addl_A0_im(val);
+}
 
-static GenOpFunc *gen_op_addq_A0_reg_sN[4][CPU_NB_REGS] = {
-    [0] = {
-        DEF_REGS(gen_op_addq_A0_, )
-    },
-    [1] = {
-        DEF_REGS(gen_op_addq_A0_, _s1)
-    },
-    [2] = {
-        DEF_REGS(gen_op_addq_A0_, _s2)
-    },
-    [3] = {
-        DEF_REGS(gen_op_addq_A0_, _s3)
-    },
-};
+static inline void gen_op_addl_T0_T1(void)
+{
+    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_jmp_T0(void)
+{
+    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_op_addw_ESP_im(int32_t val)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ESP]));
+    tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+    tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ESP]) + REG_W_OFFSET);
+}
+
+static inline void gen_op_addl_ESP_im(int32_t val)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ESP]));
+    tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
+#endif
+    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ESP]));
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_addq_ESP_im(int32_t val)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ESP]));
+    tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
+    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ESP]));
+}
+#endif
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+    tcg_gen_movi_tl(cpu_tmp0, val);
+    tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, cc_op));
+}
+
+static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+    if (shift != 0) 
+        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+static inline void gen_op_movl_A0_seg(int reg)
+{
+    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
+}
+
+static inline void gen_op_addl_A0_seg(int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_seg(int reg)
+{
+    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
+}
+
+static inline void gen_op_addq_A0_seg(int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+
+static inline void gen_op_movq_A0_reg(int reg)
+{
+    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
+}
+
+static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
+{
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
+    if (shift != 0) 
+        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
 #endif
 
 static GenOpFunc *gen_op_cmov_reg_T1_T0[NB_OP_SIZES - 1][CPU_NB_REGS] = {
@@ -469,17 +560,6 @@
 #endif
 };
 
-static GenOpFunc *gen_op_arith_T0_T1_cc[8] = {
-    NULL,
-    gen_op_orl_T0_T1,
-    NULL,
-    NULL,
-    gen_op_andl_T0_T1,
-    NULL,
-    gen_op_xorl_T0_T1,
-    NULL,
-};
-
 #define DEF_ARITHC(SUFFIX)\
     {\
         gen_op_adcb ## SUFFIX ## _T0_T1_cc,\
@@ -681,133 +761,113 @@
 #endif
 };
 
-static GenOpFunc *gen_op_lds_T0_A0[3 * 4] = {
-    gen_op_ldsb_raw_T0_A0,
-    gen_op_ldsw_raw_T0_A0,
-    X86_64_ONLY(gen_op_ldsl_raw_T0_A0),
-    NULL,
-#ifndef CONFIG_USER_ONLY
-    gen_op_ldsb_kernel_T0_A0,
-    gen_op_ldsw_kernel_T0_A0,
-    X86_64_ONLY(gen_op_ldsl_kernel_T0_A0),
-    NULL,
-
-    gen_op_ldsb_user_T0_A0,
-    gen_op_ldsw_user_T0_A0,
-    X86_64_ONLY(gen_op_ldsl_user_T0_A0),
-    NULL,
-#endif
-};
-
-static GenOpFunc *gen_op_ldu_T0_A0[3 * 4] = {
-    gen_op_ldub_raw_T0_A0,
-    gen_op_lduw_raw_T0_A0,
-    NULL,
-    NULL,
-
-#ifndef CONFIG_USER_ONLY
-    gen_op_ldub_kernel_T0_A0,
-    gen_op_lduw_kernel_T0_A0,
-    NULL,
-    NULL,
-
-    gen_op_ldub_user_T0_A0,
-    gen_op_lduw_user_T0_A0,
-    NULL,
-    NULL,
-#endif
-};
+static inline void gen_op_lds_T0_A0(int idx)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
+        break;
+    default:
+    case 2:
+        tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
+        break;
+    }
+}
 
 /* sign does not matter, except for lidt/lgdt call (TODO: fix it) */
-static GenOpFunc *gen_op_ld_T0_A0[3 * 4] = {
-    gen_op_ldub_raw_T0_A0,
-    gen_op_lduw_raw_T0_A0,
-    gen_op_ldl_raw_T0_A0,
-    X86_64_ONLY(gen_op_ldq_raw_T0_A0),
+static inline void gen_op_ld_T0_A0(int idx)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_ld16u(cpu_T[0], cpu_A0, mem_index);
+        break;
+    case 2:
+        tcg_gen_qemu_ld32u(cpu_T[0], cpu_A0, mem_index);
+        break;
+    default:
+    case 3:
+        tcg_gen_qemu_ld64(cpu_T[0], cpu_A0, mem_index);
+        break;
+    }
+}
 
-#ifndef CONFIG_USER_ONLY
-    gen_op_ldub_kernel_T0_A0,
-    gen_op_lduw_kernel_T0_A0,
-    gen_op_ldl_kernel_T0_A0,
-    X86_64_ONLY(gen_op_ldq_kernel_T0_A0),
+static inline void gen_op_ldu_T0_A0(int idx)
+{
+    gen_op_ld_T0_A0(idx);
+}
 
-    gen_op_ldub_user_T0_A0,
-    gen_op_lduw_user_T0_A0,
-    gen_op_ldl_user_T0_A0,
-    X86_64_ONLY(gen_op_ldq_user_T0_A0),
-#endif
-};
+static inline void gen_op_ld_T1_A0(int idx)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_ld8u(cpu_T[1], cpu_A0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_ld16u(cpu_T[1], cpu_A0, mem_index);
+        break;
+    case 2:
+        tcg_gen_qemu_ld32u(cpu_T[1], cpu_A0, mem_index);
+        break;
+    default:
+    case 3:
+        tcg_gen_qemu_ld64(cpu_T[1], cpu_A0, mem_index);
+        break;
+    }
+}
 
-static GenOpFunc *gen_op_ld_T1_A0[3 * 4] = {
-    gen_op_ldub_raw_T1_A0,
-    gen_op_lduw_raw_T1_A0,
-    gen_op_ldl_raw_T1_A0,
-    X86_64_ONLY(gen_op_ldq_raw_T1_A0),
+static inline void gen_op_st_T0_A0(int idx)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_st8(cpu_T[0], cpu_A0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_st16(cpu_T[0], cpu_A0, mem_index);
+        break;
+    case 2:
+        tcg_gen_qemu_st32(cpu_T[0], cpu_A0, mem_index);
+        break;
+    default:
+    case 3:
+        tcg_gen_qemu_st64(cpu_T[0], cpu_A0, mem_index);
+        break;
+    }
+}
 
-#ifndef CONFIG_USER_ONLY
-    gen_op_ldub_kernel_T1_A0,
-    gen_op_lduw_kernel_T1_A0,
-    gen_op_ldl_kernel_T1_A0,
-    X86_64_ONLY(gen_op_ldq_kernel_T1_A0),
-
-    gen_op_ldub_user_T1_A0,
-    gen_op_lduw_user_T1_A0,
-    gen_op_ldl_user_T1_A0,
-    X86_64_ONLY(gen_op_ldq_user_T1_A0),
-#endif
-};
-
-static GenOpFunc *gen_op_st_T0_A0[3 * 4] = {
-    gen_op_stb_raw_T0_A0,
-    gen_op_stw_raw_T0_A0,
-    gen_op_stl_raw_T0_A0,
-    X86_64_ONLY(gen_op_stq_raw_T0_A0),
-
-#ifndef CONFIG_USER_ONLY
-    gen_op_stb_kernel_T0_A0,
-    gen_op_stw_kernel_T0_A0,
-    gen_op_stl_kernel_T0_A0,
-    X86_64_ONLY(gen_op_stq_kernel_T0_A0),
-
-    gen_op_stb_user_T0_A0,
-    gen_op_stw_user_T0_A0,
-    gen_op_stl_user_T0_A0,
-    X86_64_ONLY(gen_op_stq_user_T0_A0),
-#endif
-};
-
-static GenOpFunc *gen_op_st_T1_A0[3 * 4] = {
-    NULL,
-    gen_op_stw_raw_T1_A0,
-    gen_op_stl_raw_T1_A0,
-    X86_64_ONLY(gen_op_stq_raw_T1_A0),
-
-#ifndef CONFIG_USER_ONLY
-    NULL,
-    gen_op_stw_kernel_T1_A0,
-    gen_op_stl_kernel_T1_A0,
-    X86_64_ONLY(gen_op_stq_kernel_T1_A0),
-
-    NULL,
-    gen_op_stw_user_T1_A0,
-    gen_op_stl_user_T1_A0,
-    X86_64_ONLY(gen_op_stq_user_T1_A0),
-#endif
-};
+static inline void gen_op_st_T1_A0(int idx)
+{
+    int mem_index = (idx >> 2) - 1;
+    switch(idx & 3) {
+    case 0:
+        tcg_gen_qemu_st8(cpu_T[1], cpu_A0, mem_index);
+        break;
+    case 1:
+        tcg_gen_qemu_st16(cpu_T[1], cpu_A0, mem_index);
+        break;
+    case 2:
+        tcg_gen_qemu_st32(cpu_T[1], cpu_A0, mem_index);
+        break;
+    default:
+    case 3:
+        tcg_gen_qemu_st64(cpu_T[1], cpu_A0, mem_index);
+        break;
+    }
+}
 
 static inline void gen_jmp_im(target_ulong pc)
 {
-#ifdef TARGET_X86_64
-    if (pc == (uint32_t)pc) {
-        gen_op_movl_eip_im(pc);
-    } else if (pc == (int32_t)pc) {
-        gen_op_movq_eip_im(pc);
-    } else {
-        gen_op_movq_eip_im64(pc >> 32, pc);
-    }
-#else
-    gen_op_movl_eip_im(pc);
-#endif
+    tcg_gen_movi_tl(cpu_tmp0, pc);
+    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
 }
 
 static inline void gen_string_movl_A0_ESI(DisasContext *s)
@@ -818,10 +878,10 @@
 #ifdef TARGET_X86_64
     if (s->aflag == 2) {
         if (override >= 0) {
-            gen_op_movq_A0_seg(offsetof(CPUX86State,segs[override].base));
-            gen_op_addq_A0_reg_sN[0][R_ESI]();
+            gen_op_movq_A0_seg(override);
+            gen_op_addq_A0_reg_sN(0, R_ESI);
         } else {
-            gen_op_movq_A0_reg[R_ESI]();
+            gen_op_movq_A0_reg(R_ESI);
         }
     } else
 #endif
@@ -830,18 +890,18 @@
         if (s->addseg && override < 0)
             override = R_DS;
         if (override >= 0) {
-            gen_op_movl_A0_seg(offsetof(CPUX86State,segs[override].base));
-            gen_op_addl_A0_reg_sN[0][R_ESI]();
+            gen_op_movl_A0_seg(override);
+            gen_op_addl_A0_reg_sN(0, R_ESI);
         } else {
-            gen_op_movl_A0_reg[R_ESI]();
+            gen_op_movl_A0_reg(R_ESI);
         }
     } else {
         /* 16 address, always override */
         if (override < 0)
             override = R_DS;
-        gen_op_movl_A0_reg[R_ESI]();
+        gen_op_movl_A0_reg(R_ESI);
         gen_op_andl_A0_ffff();
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+        gen_op_addl_A0_seg(override);
     }
 }
 
@@ -849,20 +909,20 @@
 {
 #ifdef TARGET_X86_64
     if (s->aflag == 2) {
-        gen_op_movq_A0_reg[R_EDI]();
+        gen_op_movq_A0_reg(R_EDI);
     } else
 #endif
     if (s->aflag) {
         if (s->addseg) {
-            gen_op_movl_A0_seg(offsetof(CPUX86State,segs[R_ES].base));
-            gen_op_addl_A0_reg_sN[0][R_EDI]();
+            gen_op_movl_A0_seg(R_ES);
+            gen_op_addl_A0_reg_sN(0, R_EDI);
         } else {
-            gen_op_movl_A0_reg[R_EDI]();
+            gen_op_movl_A0_reg(R_EDI);
         }
     } else {
-        gen_op_movl_A0_reg[R_EDI]();
+        gen_op_movl_A0_reg(R_EDI);
         gen_op_andl_A0_ffff();
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_ES].base));
+        gen_op_addl_A0_seg(R_ES);
     }
 }
 
@@ -958,9 +1018,9 @@
 static inline void gen_movs(DisasContext *s, int ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_op_ld_T0_A0(ot + s->mem_index);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_st_T0_A0(ot + s->mem_index);
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
     if (s->aflag == 2) {
@@ -1002,9 +1062,9 @@
 
 static inline void gen_stos(DisasContext *s, int ot)
 {
-    gen_op_mov_TN_reg[OT_LONG][0][R_EAX]();
+    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_st_T0_A0(ot + s->mem_index);
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
     if (s->aflag == 2) {
@@ -1021,8 +1081,8 @@
 static inline void gen_lods(DisasContext *s, int ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0[ot + s->mem_index]();
-    gen_op_mov_reg_T0[ot][R_EAX]();
+    gen_op_ld_T0_A0(ot + s->mem_index);
+    gen_op_mov_reg_T0(ot, R_EAX);
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
     if (s->aflag == 2) {
@@ -1038,9 +1098,9 @@
 
 static inline void gen_scas(DisasContext *s, int ot)
 {
-    gen_op_mov_TN_reg[OT_LONG][0][R_EAX]();
+    gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_T1_A0[ot + s->mem_index]();
+    gen_op_ld_T1_A0(ot + s->mem_index);
     gen_op_cmpl_T0_T1_cc();
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
@@ -1058,9 +1118,9 @@
 static inline void gen_cmps(DisasContext *s, int ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_op_ld_T0_A0(ot + s->mem_index);
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_T1_A0[ot + s->mem_index]();
+    gen_op_ld_T1_A0(ot + s->mem_index);
     gen_op_cmpl_T0_T1_cc();
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
@@ -1082,9 +1142,9 @@
 {
     gen_string_movl_A0_EDI(s);
     gen_op_movl_T0_0();
-    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_st_T0_A0(ot + s->mem_index);
     gen_op_in_DX_T0[ot]();
-    gen_op_st_T0_A0[ot + s->mem_index]();
+    gen_op_st_T0_A0(ot + s->mem_index);
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
     if (s->aflag == 2) {
@@ -1101,7 +1161,7 @@
 static inline void gen_outs(DisasContext *s, int ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_T0_A0[ot + s->mem_index]();
+    gen_op_ld_T0_A0(ot + s->mem_index);
     gen_op_out_DX_T0[ot]();
     gen_op_movl_T0_Dshift[ot]();
 #ifdef TARGET_X86_64
@@ -1320,9 +1380,9 @@
     GenOpFunc *gen_update_cc;
 
     if (d != OR_TMP0) {
-        gen_op_mov_TN_reg[ot][0][d]();
+        gen_op_mov_TN_reg(ot, 0, d);
     } else {
-        gen_op_ld_T0_A0[ot + s1->mem_index]();
+        gen_op_ld_T0_A0(ot + s1->mem_index);
     }
     switch(op) {
     case OP_ADCL:
@@ -1331,7 +1391,7 @@
             gen_op_set_cc_op(s1->cc_op);
         if (d != OR_TMP0) {
             gen_op_arithc_T0_T1_cc[ot][op - OP_ADCL]();
-            gen_op_mov_reg_T0[ot][d]();
+            gen_op_mov_reg_T0(ot, d);
         } else {
             gen_op_arithc_mem_T0_T1_cc[ot + s1->mem_index][op - OP_ADCL]();
         }
@@ -1343,15 +1403,23 @@
         gen_update_cc = gen_op_update2_cc;
         break;
     case OP_SUBL:
-        gen_op_subl_T0_T1();
+        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         s1->cc_op = CC_OP_SUBB + ot;
         gen_update_cc = gen_op_update2_cc;
         break;
     default:
     case OP_ANDL:
+        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        s1->cc_op = CC_OP_LOGICB + ot;
+        gen_update_cc = gen_op_update1_cc;
+        break;
     case OP_ORL:
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        s1->cc_op = CC_OP_LOGICB + ot;
+        gen_update_cc = gen_op_update1_cc;
+        break;
     case OP_XORL:
-        gen_op_arith_T0_T1_cc[op]();
+        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         s1->cc_op = CC_OP_LOGICB + ot;
         gen_update_cc = gen_op_update1_cc;
         break;
@@ -1363,9 +1431,9 @@
     }
     if (op != OP_CMPL) {
         if (d != OR_TMP0)
-            gen_op_mov_reg_T0[ot][d]();
+            gen_op_mov_reg_T0(ot, d);
         else
-            gen_op_st_T0_A0[ot + s1->mem_index]();
+            gen_op_st_T0_A0(ot + s1->mem_index);
     }
     /* the flags update must happen after the memory write (precise
        exception support) */
@@ -1378,9 +1446,9 @@
 static void gen_inc(DisasContext *s1, int ot, int d, int c)
 {
     if (d != OR_TMP0)
-        gen_op_mov_TN_reg[ot][0][d]();
+        gen_op_mov_TN_reg(ot, 0, d);
     else
-        gen_op_ld_T0_A0[ot + s1->mem_index]();
+        gen_op_ld_T0_A0(ot + s1->mem_index);
     if (s1->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s1->cc_op);
     if (c > 0) {
@@ -1391,20 +1459,20 @@
         s1->cc_op = CC_OP_DECB + ot;
     }
     if (d != OR_TMP0)
-        gen_op_mov_reg_T0[ot][d]();
+        gen_op_mov_reg_T0(ot, d);
     else
-        gen_op_st_T0_A0[ot + s1->mem_index]();
+        gen_op_st_T0_A0(ot + s1->mem_index);
     gen_op_update_inc_cc();
 }
 
 static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
 {
     if (d != OR_TMP0)
-        gen_op_mov_TN_reg[ot][0][d]();
+        gen_op_mov_TN_reg(ot, 0, d);
     else
-        gen_op_ld_T0_A0[ot + s1->mem_index]();
+        gen_op_ld_T0_A0(ot + s1->mem_index);
     if (s != OR_TMP1)
-        gen_op_mov_TN_reg[ot][1][s]();
+        gen_op_mov_TN_reg(ot, 1, s);
     /* for zero counts, flags are not updated, so must do it dynamically */
     if (s1->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s1->cc_op);
@@ -1414,7 +1482,7 @@
     else
         gen_op_shift_mem_T0_T1_cc[ot + s1->mem_index][op]();
     if (d != OR_TMP0)
-        gen_op_mov_reg_T0[ot][d]();
+        gen_op_mov_reg_T0(ot, d);
     s1->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 }
 
@@ -1487,27 +1555,21 @@
                 disp += s->popl_esp_hack;
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                gen_op_movq_A0_reg[base]();
+                gen_op_movq_A0_reg(base);
                 if (disp != 0) {
-                    if ((int32_t)disp == disp)
-                        gen_op_addq_A0_im(disp);
-                    else
-                        gen_op_addq_A0_im64(disp >> 32, disp);
+                    gen_op_addq_A0_im(disp);
                 }
             } else
 #endif
             {
-                gen_op_movl_A0_reg[base]();
+                gen_op_movl_A0_reg(base);
                 if (disp != 0)
                     gen_op_addl_A0_im(disp);
             }
         } else {
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                if ((int32_t)disp == disp)
-                    gen_op_movq_A0_im(disp);
-                else
-                    gen_op_movq_A0_im64(disp >> 32, disp);
+                gen_op_movq_A0_im(disp);
             } else
 #endif
             {
@@ -1518,11 +1580,11 @@
         if (havesib && (index != 4 || scale != 0)) {
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                gen_op_addq_A0_reg_sN[scale][index]();
+                gen_op_addq_A0_reg_sN(scale, index);
             } else
 #endif
             {
-                gen_op_addl_A0_reg_sN[scale][index]();
+                gen_op_addl_A0_reg_sN(scale, index);
             }
         }
         if (must_add_seg) {
@@ -1534,11 +1596,11 @@
             }
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                gen_op_addq_A0_seg(offsetof(CPUX86State,segs[override].base));
+                gen_op_addq_A0_seg(override);
             } else
 #endif
             {
-                gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+                gen_op_addl_A0_seg(override);
             }
         }
     } else {
@@ -1565,33 +1627,33 @@
         }
         switch(rm) {
         case 0:
-            gen_op_movl_A0_reg[R_EBX]();
-            gen_op_addl_A0_reg_sN[0][R_ESI]();
+            gen_op_movl_A0_reg(R_EBX);
+            gen_op_addl_A0_reg_sN(0, R_ESI);
             break;
         case 1:
-            gen_op_movl_A0_reg[R_EBX]();
-            gen_op_addl_A0_reg_sN[0][R_EDI]();
+            gen_op_movl_A0_reg(R_EBX);
+            gen_op_addl_A0_reg_sN(0, R_EDI);
             break;
         case 2:
-            gen_op_movl_A0_reg[R_EBP]();
-            gen_op_addl_A0_reg_sN[0][R_ESI]();
+            gen_op_movl_A0_reg(R_EBP);
+            gen_op_addl_A0_reg_sN(0, R_ESI);
             break;
         case 3:
-            gen_op_movl_A0_reg[R_EBP]();
-            gen_op_addl_A0_reg_sN[0][R_EDI]();
+            gen_op_movl_A0_reg(R_EBP);
+            gen_op_addl_A0_reg_sN(0, R_EDI);
             break;
         case 4:
-            gen_op_movl_A0_reg[R_ESI]();
+            gen_op_movl_A0_reg(R_ESI);
             break;
         case 5:
-            gen_op_movl_A0_reg[R_EDI]();
+            gen_op_movl_A0_reg(R_EDI);
             break;
         case 6:
-            gen_op_movl_A0_reg[R_EBP]();
+            gen_op_movl_A0_reg(R_EBP);
             break;
         default:
         case 7:
-            gen_op_movl_A0_reg[R_EBX]();
+            gen_op_movl_A0_reg(R_EBX);
             break;
         }
         if (disp != 0)
@@ -1605,7 +1667,7 @@
                 else
                     override = R_DS;
             }
-            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+            gen_op_addl_A0_seg(override);
         }
     }
 
@@ -1680,11 +1742,11 @@
     if (must_add_seg) {
 #ifdef TARGET_X86_64
         if (CODE64(s)) {
-            gen_op_addq_A0_seg(offsetof(CPUX86State,segs[override].base));
+            gen_op_addq_A0_seg(override);
         } else
 #endif
         {
-            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+            gen_op_addl_A0_seg(override);
         }
     }
 }
@@ -1700,23 +1762,23 @@
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_TN_reg[ot][0][reg]();
-            gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_mov_reg_T0(ot, rm);
         } else {
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_T0[ot][reg]();
+                gen_op_mov_reg_T0(ot, reg);
         }
     } else {
         gen_lea_modrm(s, modrm, &opreg, &disp);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_TN_reg[ot][0][reg]();
-            gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_st_T0_A0(ot + s->mem_index);
         } else {
-            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T0_A0(ot + s->mem_index);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_T0[ot][reg]();
+                gen_op_mov_reg_T0(ot, reg);
         }
     }
 }
@@ -1762,13 +1824,9 @@
     if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
         (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
         /* jump to same page: we can use a direct jump */
-        if (tb_num == 0)
-            gen_op_goto_tb0(TBPARAM(tb));
-        else
-            gen_op_goto_tb1(TBPARAM(tb));
+        tcg_gen_goto_tb(tb_num);
         gen_jmp_im(eip);
-        gen_op_movl_T0_im((long)tb + tb_num);
-        gen_op_exit_tb();
+        tcg_gen_exit_tb((long)tb + tb_num);
     } else {
         /* jump to another page: currently not optimized */
         gen_jmp_im(eip);
@@ -1995,11 +2053,7 @@
     }
 }
 
-#ifdef TARGET_X86_64
-#define SVM_movq_T1_im(x) gen_op_movq_T1_im64((x) >> 32, x)
-#else
-#define SVM_movq_T1_im(x) gen_op_movl_T1_im(x)
-#endif
+#define SVM_movq_T1_im(x) gen_movtl_T1_im(x)
 
 static inline int
 gen_svm_check_io(DisasContext *s, target_ulong pc_start, uint64_t type)
@@ -2091,26 +2145,13 @@
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        if (addend == 8)
-            gen_op_addq_ESP_8();
-        else
-            gen_op_addq_ESP_im(addend);
+        gen_op_addq_ESP_im(addend);
     } else
 #endif
     if (s->ss32) {
-        if (addend == 2)
-            gen_op_addl_ESP_2();
-        else if (addend == 4)
-            gen_op_addl_ESP_4();
-        else
-            gen_op_addl_ESP_im(addend);
+        gen_op_addl_ESP_im(addend);
     } else {
-        if (addend == 2)
-            gen_op_addw_ESP_2();
-        else if (addend == 4)
-            gen_op_addw_ESP_4();
-        else
-            gen_op_addw_ESP_im(addend);
+        gen_op_addw_ESP_im(addend);
     }
 }
 
@@ -2119,38 +2160,38 @@
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        gen_op_movq_A0_reg[R_ESP]();
+        gen_op_movq_A0_reg(R_ESP);
         if (s->dflag) {
-            gen_op_subq_A0_8();
-            gen_op_st_T0_A0[OT_QUAD + s->mem_index]();
+            gen_op_addq_A0_im(-8);
+            gen_op_st_T0_A0(OT_QUAD + s->mem_index);
         } else {
-            gen_op_subq_A0_2();
-            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+            gen_op_addq_A0_im(-2);
+            gen_op_st_T0_A0(OT_WORD + s->mem_index);
         }
-        gen_op_movq_ESP_A0();
+        gen_op_mov_reg_A0(2, R_ESP);
     } else
 #endif
     {
-        gen_op_movl_A0_reg[R_ESP]();
+        gen_op_movl_A0_reg(R_ESP);
         if (!s->dflag)
-            gen_op_subl_A0_2();
+            gen_op_addl_A0_im(-2);
         else
-            gen_op_subl_A0_4();
+            gen_op_addl_A0_im(-4);
         if (s->ss32) {
             if (s->addseg) {
                 gen_op_movl_T1_A0();
-                gen_op_addl_A0_SS();
+                gen_op_addl_A0_seg(R_SS);
             }
         } else {
             gen_op_andl_A0_ffff();
             gen_op_movl_T1_A0();
-            gen_op_addl_A0_SS();
+            gen_op_addl_A0_seg(R_SS);
         }
-        gen_op_st_T0_A0[s->dflag + 1 + s->mem_index]();
+        gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
         if (s->ss32 && !s->addseg)
-            gen_op_movl_ESP_A0();
+            gen_op_mov_reg_A0(1, R_ESP);
         else
-            gen_op_mov_reg_T1[s->ss32 + 1][R_ESP]();
+            gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
     }
 }
 
@@ -2160,35 +2201,35 @@
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        gen_op_movq_A0_reg[R_ESP]();
+        gen_op_movq_A0_reg(R_ESP);
         if (s->dflag) {
-            gen_op_subq_A0_8();
-            gen_op_st_T1_A0[OT_QUAD + s->mem_index]();
+            gen_op_addq_A0_im(-8);
+            gen_op_st_T1_A0(OT_QUAD + s->mem_index);
         } else {
-            gen_op_subq_A0_2();
-            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+            gen_op_addq_A0_im(-2);
+            gen_op_st_T0_A0(OT_WORD + s->mem_index);
         }
-        gen_op_movq_ESP_A0();
+        gen_op_mov_reg_A0(2, R_ESP);
     } else
 #endif
     {
-        gen_op_movl_A0_reg[R_ESP]();
+        gen_op_movl_A0_reg(R_ESP);
         if (!s->dflag)
-            gen_op_subl_A0_2();
+            gen_op_addl_A0_im(-2);
         else
-            gen_op_subl_A0_4();
+            gen_op_addl_A0_im(-4);
         if (s->ss32) {
             if (s->addseg) {
-                gen_op_addl_A0_SS();
+                gen_op_addl_A0_seg(R_SS);
             }
         } else {
             gen_op_andl_A0_ffff();
-            gen_op_addl_A0_SS();
+            gen_op_addl_A0_seg(R_SS);
         }
-        gen_op_st_T1_A0[s->dflag + 1 + s->mem_index]();
+        gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
 
         if (s->ss32 && !s->addseg)
-            gen_op_movl_ESP_A0();
+            gen_op_mov_reg_A0(1, R_ESP);
         else
             gen_stack_update(s, (-2) << s->dflag);
     }
@@ -2199,20 +2240,20 @@
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        gen_op_movq_A0_reg[R_ESP]();
-        gen_op_ld_T0_A0[(s->dflag ? OT_QUAD : OT_WORD) + s->mem_index]();
+        gen_op_movq_A0_reg(R_ESP);
+        gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
     } else
 #endif
     {
-        gen_op_movl_A0_reg[R_ESP]();
+        gen_op_movl_A0_reg(R_ESP);
         if (s->ss32) {
             if (s->addseg)
-                gen_op_addl_A0_SS();
+                gen_op_addl_A0_seg(R_SS);
         } else {
             gen_op_andl_A0_ffff();
-            gen_op_addl_A0_SS();
+            gen_op_addl_A0_seg(R_SS);
         }
-        gen_op_ld_T0_A0[s->dflag + 1 + s->mem_index]();
+        gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
     }
 }
 
@@ -2230,53 +2271,53 @@
 
 static void gen_stack_A0(DisasContext *s)
 {
-    gen_op_movl_A0_ESP();
+    gen_op_movl_A0_reg(R_ESP);
     if (!s->ss32)
         gen_op_andl_A0_ffff();
     gen_op_movl_T1_A0();
     if (s->addseg)
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+        gen_op_addl_A0_seg(R_SS);
 }
 
 /* NOTE: wrap around in 16 bit not fully handled */
 static void gen_pusha(DisasContext *s)
 {
     int i;
-    gen_op_movl_A0_ESP();
+    gen_op_movl_A0_reg(R_ESP);
     gen_op_addl_A0_im(-16 <<  s->dflag);
     if (!s->ss32)
         gen_op_andl_A0_ffff();
     gen_op_movl_T1_A0();
     if (s->addseg)
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+        gen_op_addl_A0_seg(R_SS);
     for(i = 0;i < 8; i++) {
-        gen_op_mov_TN_reg[OT_LONG][0][7 - i]();
-        gen_op_st_T0_A0[OT_WORD + s->dflag + s->mem_index]();
+        gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
+        gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
         gen_op_addl_A0_im(2 <<  s->dflag);
     }
-    gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
+    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
 }
 
 /* NOTE: wrap around in 16 bit not fully handled */
 static void gen_popa(DisasContext *s)
 {
     int i;
-    gen_op_movl_A0_ESP();
+    gen_op_movl_A0_reg(R_ESP);
     if (!s->ss32)
         gen_op_andl_A0_ffff();
     gen_op_movl_T1_A0();
     gen_op_addl_T1_im(16 <<  s->dflag);
     if (s->addseg)
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+        gen_op_addl_A0_seg(R_SS);
     for(i = 0;i < 8; i++) {
         /* ESP is not reloaded */
         if (i != 3) {
-            gen_op_ld_T0_A0[OT_WORD + s->dflag + s->mem_index]();
-            gen_op_mov_reg_T0[OT_WORD + s->dflag][7 - i]();
+            gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
+            gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
         }
         gen_op_addl_A0_im(2 <<  s->dflag);
     }
-    gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
+    gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
 }
 
 static void gen_enter(DisasContext *s, int esp_addend, int level)
@@ -2289,41 +2330,41 @@
         ot = s->dflag ? OT_QUAD : OT_WORD;
         opsize = 1 << ot;
 
-        gen_op_movl_A0_ESP();
+        gen_op_movl_A0_reg(R_ESP);
         gen_op_addq_A0_im(-opsize);
         gen_op_movl_T1_A0();
 
         /* push bp */
-        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
-        gen_op_st_T0_A0[ot + s->mem_index]();
+        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+        gen_op_st_T0_A0(ot + s->mem_index);
         if (level) {
             gen_op_enter64_level(level, (ot == OT_QUAD));
         }
-        gen_op_mov_reg_T1[ot][R_EBP]();
+        gen_op_mov_reg_T1(ot, R_EBP);
         gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
-        gen_op_mov_reg_T1[OT_QUAD][R_ESP]();
+        gen_op_mov_reg_T1(OT_QUAD, R_ESP);
     } else
 #endif
     {
         ot = s->dflag + OT_WORD;
         opsize = 2 << s->dflag;
 
-        gen_op_movl_A0_ESP();
+        gen_op_movl_A0_reg(R_ESP);
         gen_op_addl_A0_im(-opsize);
         if (!s->ss32)
             gen_op_andl_A0_ffff();
         gen_op_movl_T1_A0();
         if (s->addseg)
-            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+            gen_op_addl_A0_seg(R_SS);
         /* push bp */
-        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
-        gen_op_st_T0_A0[ot + s->mem_index]();
+        gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+        gen_op_st_T0_A0(ot + s->mem_index);
         if (level) {
             gen_op_enter_level(level, s->dflag);
         }
-        gen_op_mov_reg_T1[ot][R_EBP]();
+        gen_op_mov_reg_T1(ot, R_EBP);
         gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
-        gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
+        gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
     }
 }
 
@@ -2371,8 +2412,7 @@
     } else if (s->tf) {
 	gen_op_single_step();
     } else {
-        gen_op_movl_T0_0();
-        gen_op_exit_tb();
+        tcg_gen_exit_tb(0);
     }
     s->is_jmp = 3;
 }
@@ -2399,42 +2439,6 @@
     gen_jmp_tb(s, eip, 0);
 }
 
-static void gen_movtl_T0_im(target_ulong val)
-{
-#ifdef TARGET_X86_64
-    if ((int32_t)val == val) {
-        gen_op_movl_T0_im(val);
-    } else {
-        gen_op_movq_T0_im64(val >> 32, val);
-    }
-#else
-    gen_op_movl_T0_im(val);
-#endif
-}
-
-static void gen_movtl_T1_im(target_ulong val)
-{
-#ifdef TARGET_X86_64
-    if ((int32_t)val == val) {
-        gen_op_movl_T1_im(val);
-    } else {
-        gen_op_movq_T1_im64(val >> 32, val);
-    }
-#else
-    gen_op_movl_T1_im(val);
-#endif
-}
-
-static void gen_add_A0_im(DisasContext *s, int val)
-{
-#ifdef TARGET_X86_64
-    if (CODE64(s))
-        gen_op_addq_A0_im(val);
-    else
-#endif
-        gen_op_addl_A0_im(val);
-}
-
 static GenOpFunc1 *gen_ldq_env_A0[3] = {
     gen_op_ldq_raw_env_A0,
 #ifndef CONFIG_USER_ONLY
@@ -2764,7 +2768,7 @@
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_movl_T0_0();
                 gen_op_movl_env_T0(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
@@ -2921,7 +2925,7 @@
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 gen_op_movl_T0_env(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_op_st_T0_A0[OT_LONG + s->mem_index]();
+                gen_op_st_T0_A0(OT_LONG + s->mem_index);
             } else {
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
@@ -2991,12 +2995,12 @@
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
             gen_op_movmskps(offsetof(CPUX86State,xmm_regs[rm]));
-            gen_op_mov_reg_T0[OT_LONG][reg]();
+            gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
             gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[rm]));
-            gen_op_mov_reg_T0[OT_LONG][reg]();
+            gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
@@ -3066,7 +3070,7 @@
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
                 } else {
-                    gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                     gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
@@ -3076,7 +3080,7 @@
             }
             sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
                           (b & 1) * 4](op2_offset);
-            gen_op_mov_reg_T0[ot][reg]();
+            gen_op_mov_reg_T0(ot, reg);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3106,7 +3110,7 @@
                 gen_op_pextrw_mmx(offsetof(CPUX86State,fpregs[rm].mmx), val);
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_T0[OT_LONG][reg]();
+            gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
@@ -3144,7 +3148,7 @@
                 gen_op_pmovmskb_mmx(offsetof(CPUX86State,fpregs[rm].mmx));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_T0[OT_LONG][reg]();
+            gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         default:
             goto illegal_op;
@@ -3158,11 +3162,11 @@
                 goto illegal_op;
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                gen_op_movq_A0_reg[R_EDI]();
+                gen_op_movq_A0_reg(R_EDI);
             } else
 #endif
             {
-                gen_op_movl_A0_reg[R_EDI]();
+                gen_op_movl_A0_reg(R_EDI);
                 if (s->aflag == 0)
                     gen_op_andl_A0_ffff();
             }
@@ -3186,7 +3190,7 @@
                     /* specific case for SSE single instructions */
                     if (b1 == 2) {
                         /* 32 bit access */
-                        gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                        gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
                     } else {
                         /* 64 bit access */
@@ -3412,13 +3416,13 @@
                     /* xor reg, reg optimisation */
                     gen_op_movl_T0_0();
                     s->cc_op = CC_OP_LOGICB + ot;
-                    gen_op_mov_reg_T0[ot][reg]();
+                    gen_op_mov_reg_T0(ot, reg);
                     gen_op_update1_cc();
                     break;
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_TN_reg[ot][1][reg]();
+                gen_op_mov_TN_reg(ot, 1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -3428,11 +3432,11 @@
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                    gen_op_ld_T1_A0[ot + s->mem_index]();
+                    gen_op_ld_T1_A0(ot + s->mem_index);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_TN_reg[ot][1][rm]();
+                    gen_op_mov_TN_reg(ot, 1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
@@ -3514,9 +3518,9 @@
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
         }
 
         switch(op) {
@@ -3529,17 +3533,17 @@
         case 2: /* not */
             gen_op_notl_T0();
             if (mod != 3) {
-                gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_st_T0_A0(ot + s->mem_index);
             } else {
-                gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_reg_T0(ot, rm);
             }
             break;
         case 3: /* neg */
             gen_op_negl_T0();
             if (mod != 3) {
-                gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_st_T0_A0(ot + s->mem_index);
             } else {
-                gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_reg_T0(ot, rm);
             }
             gen_op_update_neg_cc();
             s->cc_op = CC_OP_SUBB + ot;
@@ -3603,7 +3607,12 @@
             default:
             case OT_LONG:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_op_divl_EAX_T0();
+#ifdef MACRO_TEST
+                /* XXX: this is just a test */
+                tcg_gen_macro_2(cpu_T[0], cpu_T[0], MACRO_TEST);
+#else
+                tcg_gen_helper_0_1(helper_divl_EAX_T0, cpu_T[0]);
+#endif
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
@@ -3626,7 +3635,7 @@
             default:
             case OT_LONG:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_op_idivl_EAX_T0();
+                tcg_gen_helper_0_1(helper_idivl_EAX_T0, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
@@ -3671,9 +3680,9 @@
         if (mod != 3) {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_T0_A0[ot + s->mem_index]();
+                gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
         }
 
         switch(op) {
@@ -3702,9 +3711,9 @@
             gen_eob(s);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_op_ld_T1_A0(ot + s->mem_index);
             gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
-            gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
+            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         do_lcall:
             if (s->pe && !s->vm86) {
                 if (s->cc_op != CC_OP_DYNAMIC)
@@ -3723,9 +3732,9 @@
             gen_eob(s);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_op_ld_T1_A0(ot + s->mem_index);
             gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
-            gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
+            gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         do_ljmp:
             if (s->pe && !s->vm86) {
                 if (s->cc_op != CC_OP_DYNAMIC)
@@ -3760,7 +3769,7 @@
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_mov_TN_reg(ot, 1, reg);
         gen_op_testl_T0_T1_cc();
         s->cc_op = CC_OP_LOGICB + ot;
         break;
@@ -3773,7 +3782,7 @@
             ot = dflag + OT_WORD;
         val = insn_get(s, ot);
 
-        gen_op_mov_TN_reg[ot][0][OR_EAX]();
+        gen_op_mov_TN_reg(ot, 0, OR_EAX);
         gen_op_movl_T1_im(val);
         gen_op_testl_T0_T1_cc();
         s->cc_op = CC_OP_LOGICB + ot;
@@ -3819,7 +3828,7 @@
             val = (int8_t)insn_get(s, OT_BYTE);
             gen_op_movl_T1_im(val);
         } else {
-            gen_op_mov_TN_reg[ot][1][reg]();
+            gen_op_mov_TN_reg(ot, 1, reg);
         }
 
 #ifdef TARGET_X86_64
@@ -3832,7 +3841,7 @@
         } else {
             gen_op_imulw_T0_T1();
         }
-        gen_op_mov_reg_T0[ot][reg]();
+        gen_op_mov_reg_T0(ot, reg);
         s->cc_op = CC_OP_MULB + ot;
         break;
     case 0x1c0:
@@ -3846,18 +3855,18 @@
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_TN_reg[ot][0][reg]();
-            gen_op_mov_TN_reg[ot][1][rm]();
+            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_mov_TN_reg(ot, 1, rm);
             gen_op_addl_T0_T1();
-            gen_op_mov_reg_T1[ot][reg]();
-            gen_op_mov_reg_T0[ot][rm]();
+            gen_op_mov_reg_T1(ot, reg);
+            gen_op_mov_reg_T0(ot, rm);
         } else {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_mov_TN_reg[ot][0][reg]();
-            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_ld_T1_A0(ot + s->mem_index);
             gen_op_addl_T0_T1();
-            gen_op_st_T0_A0[ot + s->mem_index]();
-            gen_op_mov_reg_T1[ot][reg]();
+            gen_op_st_T0_A0(ot + s->mem_index);
+            gen_op_mov_reg_T1(ot, reg);
         }
         gen_op_update2_cc();
         s->cc_op = CC_OP_ADDB + ot;
@@ -3871,15 +3880,15 @@
         modrm = ldub_code(s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
-        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_mov_TN_reg(ot, 1, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
             gen_op_cmpxchg_T0_T1_EAX_cc[ot]();
-            gen_op_mov_reg_T0[ot][rm]();
+            gen_op_mov_reg_T0(ot, rm);
         } else {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T0_A0(ot + s->mem_index);
             gen_op_cmpxchg_mem_T0_T1_EAX_cc[ot + s->mem_index]();
         }
         s->cc_op = CC_OP_SUBB + ot;
@@ -3900,7 +3909,7 @@
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_TN_reg[OT_LONG][0][(b & 7) | REX_B(s)]();
+        gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
         gen_push_T0(s);
         break;
     case 0x58 ... 0x5f: /* pop */
@@ -3912,7 +3921,7 @@
         gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s);
-        gen_op_mov_reg_T0[ot][(b & 7) | REX_B(s)]();
+        gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -3951,7 +3960,7 @@
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_T0[ot][rm]();
+            gen_op_mov_reg_T0(ot, rm);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -3972,14 +3981,14 @@
     case 0xc9: /* leave */
         /* XXX: exception not precise (ESP is updated before potential exception) */
         if (CODE64(s)) {
-            gen_op_mov_TN_reg[OT_QUAD][0][R_EBP]();
-            gen_op_mov_reg_T0[OT_QUAD][R_ESP]();
+            gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
+            gen_op_mov_reg_T0(OT_QUAD, R_ESP);
         } else if (s->ss32) {
-            gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
-            gen_op_mov_reg_T0[OT_LONG][R_ESP]();
+            gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+            gen_op_mov_reg_T0(OT_LONG, R_ESP);
         } else {
-            gen_op_mov_TN_reg[OT_WORD][0][R_EBP]();
-            gen_op_mov_reg_T0[OT_WORD][R_ESP]();
+            gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
+            gen_op_mov_reg_T0(OT_WORD, R_ESP);
         }
         gen_pop_T0(s);
         if (CODE64(s)) {
@@ -3987,7 +3996,7 @@
         } else {
             ot = dflag + OT_WORD;
         }
-        gen_op_mov_reg_T0[ot][R_EBP]();
+        gen_op_mov_reg_T0(ot, R_EBP);
         gen_pop_update(s);
         break;
     case 0x06: /* push es */
@@ -4066,9 +4075,9 @@
         val = insn_get(s, ot);
         gen_op_movl_T0_im(val);
         if (mod != 3)
-            gen_op_st_T0_A0[ot + s->mem_index]();
+            gen_op_st_T0_A0(ot + s->mem_index);
         else
-            gen_op_mov_reg_T0[ot][(modrm & 7) | REX_B(s)]();
+            gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s));
         break;
     case 0x8a:
     case 0x8b: /* mov Ev, Gv */
@@ -4080,7 +4089,7 @@
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_T0[ot][reg]();
+        gen_op_mov_reg_T0(ot, reg);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = ldub_code(s->pc++);
@@ -4132,7 +4141,7 @@
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_TN_reg[ot][0][rm]();
+                gen_op_mov_TN_reg(ot, 0, rm);
                 switch(ot | (b & 8)) {
                 case OT_BYTE:
                     gen_op_movzbl_T0_T0();
@@ -4148,15 +4157,15 @@
                     gen_op_movswl_T0_T0();
                     break;
                 }
-                gen_op_mov_reg_T0[d_ot][reg]();
+                gen_op_mov_reg_T0(d_ot, reg);
             } else {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 if (b & 8) {
-                    gen_op_lds_T0_A0[ot + s->mem_index]();
+                    gen_op_lds_T0_A0(ot + s->mem_index);
                 } else {
-                    gen_op_ldu_T0_A0[ot + s->mem_index]();
+                    gen_op_ldu_T0_A0(ot + s->mem_index);
                 }
-                gen_op_mov_reg_T0[d_ot][reg]();
+                gen_op_mov_reg_T0(d_ot, reg);
             }
         }
         break;
@@ -4174,7 +4183,7 @@
         s->addseg = 0;
         gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
         s->addseg = val;
-        gen_op_mov_reg_A0[ot - OT_WORD][reg]();
+        gen_op_mov_reg_A0(ot - OT_WORD, reg);
         break;
 
     case 0xa0: /* mov EAX, Ov */
@@ -4192,10 +4201,7 @@
             if (s->aflag == 2) {
                 offset_addr = ldq_code(s->pc);
                 s->pc += 8;
-                if (offset_addr == (int32_t)offset_addr)
-                    gen_op_movq_A0_im(offset_addr);
-                else
-                    gen_op_movq_A0_im64(offset_addr >> 32, offset_addr);
+                gen_op_movq_A0_im(offset_addr);
             } else
 #endif
             {
@@ -4208,35 +4214,35 @@
             }
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_T0_A0[ot + s->mem_index]();
-                gen_op_mov_reg_T0[ot][R_EAX]();
+                gen_op_ld_T0_A0(ot + s->mem_index);
+                gen_op_mov_reg_T0(ot, R_EAX);
             } else {
-                gen_op_mov_TN_reg[ot][0][R_EAX]();
-                gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_mov_TN_reg(ot, 0, R_EAX);
+                gen_op_st_T0_A0(ot + s->mem_index);
             }
         }
         break;
     case 0xd7: /* xlat */
 #ifdef TARGET_X86_64
         if (s->aflag == 2) {
-            gen_op_movq_A0_reg[R_EBX]();
+            gen_op_movq_A0_reg(R_EBX);
             gen_op_addq_A0_AL();
         } else
 #endif
         {
-            gen_op_movl_A0_reg[R_EBX]();
+            gen_op_movl_A0_reg(R_EBX);
             gen_op_addl_A0_AL();
             if (s->aflag == 0)
                 gen_op_andl_A0_ffff();
         }
         gen_add_A0_ds_seg(s);
-        gen_op_ldu_T0_A0[OT_BYTE + s->mem_index]();
-        gen_op_mov_reg_T0[OT_BYTE][R_EAX]();
+        gen_op_ldu_T0_A0(OT_BYTE + s->mem_index);
+        gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(s, OT_BYTE);
         gen_op_movl_T0_im(val);
-        gen_op_mov_reg_T0[OT_BYTE][(b & 7) | REX_B(s)]();
+        gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -4247,7 +4253,7 @@
             s->pc += 8;
             reg = (b & 7) | REX_B(s);
             gen_movtl_T0_im(tmp);
-            gen_op_mov_reg_T0[OT_QUAD][reg]();
+            gen_op_mov_reg_T0(OT_QUAD, reg);
         } else
 #endif
         {
@@ -4255,7 +4261,7 @@
             val = insn_get(s, ot);
             reg = (b & 7) | REX_B(s);
             gen_op_movl_T0_im(val);
-            gen_op_mov_reg_T0[ot][reg]();
+            gen_op_mov_reg_T0(ot, reg);
         }
         break;
 
@@ -4276,21 +4282,21 @@
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_TN_reg[ot][0][reg]();
-            gen_op_mov_TN_reg[ot][1][rm]();
-            gen_op_mov_reg_T0[ot][rm]();
-            gen_op_mov_reg_T1[ot][reg]();
+            gen_op_mov_TN_reg(ot, 0, reg);
+            gen_op_mov_TN_reg(ot, 1, rm);
+            gen_op_mov_reg_T0(ot, rm);
+            gen_op_mov_reg_T1(ot, reg);
         } else {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_mov_TN_reg(ot, 0, reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
                 gen_op_lock();
-            gen_op_ld_T1_A0[ot + s->mem_index]();
-            gen_op_st_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T1_A0(ot + s->mem_index);
+            gen_op_st_T0_A0(ot + s->mem_index);
             if (!(prefixes & PREFIX_LOCK))
                 gen_op_unlock();
-            gen_op_mov_reg_T1[ot][reg]();
+            gen_op_mov_reg_T1(ot, reg);
         }
         break;
     case 0xc4: /* les Gv */
@@ -4319,13 +4325,13 @@
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-        gen_op_ld_T1_A0[ot + s->mem_index]();
+        gen_op_ld_T1_A0(ot + s->mem_index);
         gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
         /* load the segment first to handle exceptions properly */
-        gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
+        gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
         gen_movl_seg_T0(s, op, pc_start - s->cs_base);
         /* then put the data */
-        gen_op_mov_reg_T1[ot][reg]();
+        gen_op_mov_reg_T1(ot, reg);
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -4405,11 +4411,11 @@
 
         if (mod != 3) {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
         }
-        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_mov_TN_reg(ot, 1, reg);
 
         if (shift) {
             val = ldub_code(s->pc++);
@@ -4437,7 +4443,7 @@
             s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
         }
         if (mod == 3) {
-            gen_op_mov_reg_T0[ot][rm]();
+            gen_op_mov_reg_T0(ot, rm);
         }
         break;
 
@@ -4969,7 +4975,7 @@
         else
             ot = dflag ? OT_LONG : OT_WORD;
         gen_check_io(s, ot, 1, pc_start - s->cs_base);
-        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
         gen_op_andl_T0_ffff();
         if (gen_svm_check_io(s, pc_start,
                              SVM_IOIO_TYPE_MASK | (1 << (4+ot)) |
@@ -4988,7 +4994,7 @@
         else
             ot = dflag ? OT_LONG : OT_WORD;
         gen_check_io(s, ot, 1, pc_start - s->cs_base);
-        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
         gen_op_andl_T0_ffff();
         if (gen_svm_check_io(s, pc_start,
                              (1 << (4+ot)) | svm_is_rep(prefixes) |
@@ -5018,7 +5024,7 @@
                              (1 << (4+ot))))
             break;
         gen_op_in[ot]();
-        gen_op_mov_reg_T1[ot][R_EAX]();
+        gen_op_mov_reg_T1(ot, R_EAX);
         break;
     case 0xe6:
     case 0xe7:
@@ -5032,7 +5038,7 @@
         if (gen_svm_check_io(s, pc_start, svm_is_rep(prefixes) |
                              (1 << (4+ot))))
             break;
-        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_mov_TN_reg(ot, 1, R_EAX);
         gen_op_out[ot]();
         break;
     case 0xec:
@@ -5041,7 +5047,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
         gen_op_andl_T0_ffff();
         gen_check_io(s, ot, 0, pc_start - s->cs_base);
         if (gen_svm_check_io(s, pc_start,
@@ -5049,7 +5055,7 @@
                              (1 << (4+ot))))
             break;
         gen_op_in[ot]();
-        gen_op_mov_reg_T1[ot][R_EAX]();
+        gen_op_mov_reg_T1(ot, R_EAX);
         break;
     case 0xee:
     case 0xef:
@@ -5057,13 +5063,13 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
         gen_op_andl_T0_ffff();
         gen_check_io(s, ot, 0, pc_start - s->cs_base);
         if (gen_svm_check_io(s, pc_start,
                              svm_is_rep(prefixes) | (1 << (4+ot))))
             break;
-        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_mov_TN_reg(ot, 1, R_EAX);
         gen_op_out[ot]();
         break;
 
@@ -5101,7 +5107,7 @@
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_T0_A0[1 + s->dflag + s->mem_index]();
+            gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
             if (s->dflag == 0)
                 gen_op_andl_T0_ffff();
             /* NOTE: keeping EIP updated is not a problem in case of
@@ -5109,7 +5115,7 @@
             gen_op_jmp_T0();
             /* pop selector */
             gen_op_addl_A0_im(2 << s->dflag);
-            gen_op_ld_T0_A0[1 + s->dflag + s->mem_index]();
+            gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
             gen_op_movl_seg_T0_vm(offsetof(CPUX86State,segs[R_CS]));
             /* add stack offset */
             gen_stack_update(s, val + (4 << s->dflag));
@@ -5232,10 +5238,10 @@
         gen_setcc(s, b);
         if (mod != 3) {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T1_A0[ot + s->mem_index]();
+            gen_op_ld_T1_A0(ot + s->mem_index);
         } else {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_TN_reg[ot][1][rm]();
+            gen_op_mov_TN_reg(ot, 1, rm);
         }
         gen_op_cmov_reg_T1_T0[ot - OT_WORD][reg]();
         break;
@@ -5292,7 +5298,7 @@
     case 0x9e: /* sahf */
         if (CODE64(s))
             goto illegal_op;
-        gen_op_mov_TN_reg[OT_BYTE][0][R_AH]();
+        gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_op_movb_eflags_T0();
@@ -5304,7 +5310,7 @@
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_op_movl_T0_eflags();
-        gen_op_mov_reg_T0[OT_BYTE][R_AH]();
+        gen_op_mov_reg_T0(OT_BYTE, R_AH);
         break;
     case 0xf5: /* cmc */
         if (s->cc_op != CC_OP_DYNAMIC)
@@ -5342,9 +5348,9 @@
         if (mod != 3) {
             s->rip_offset = 1;
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
         }
         /* load shift */
         val = ldub_code(s->pc++);
@@ -5356,9 +5362,9 @@
         s->cc_op = CC_OP_SARB + ot;
         if (op != 0) {
             if (mod != 3)
-                gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_st_T0_A0(ot + s->mem_index);
             else
-                gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_reg_T0(ot, rm);
             gen_op_update_bt_cc();
         }
         break;
@@ -5379,22 +5385,22 @@
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_TN_reg[OT_LONG][1][reg]();
+        gen_op_mov_TN_reg(OT_LONG, 1, reg);
         if (mod != 3) {
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             /* specific case: we need to add a displacement */
             gen_op_add_bit_A0_T1[ot - OT_WORD]();
-            gen_op_ld_T0_A0[ot + s->mem_index]();
+            gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
-            gen_op_mov_TN_reg[ot][0][rm]();
+            gen_op_mov_TN_reg(ot, 0, rm);
         }
         gen_op_btx_T0_T1_cc[ot - OT_WORD][op]();
         s->cc_op = CC_OP_SARB + ot;
         if (op != 0) {
             if (mod != 3)
-                gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_st_T0_A0(ot + s->mem_index);
             else
-                gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_reg_T0(ot, rm);
             gen_op_update_bt_cc();
         }
         break;
@@ -5406,9 +5412,9 @@
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
         /* NOTE: in order to handle the 0 case, we must load the
            result. It could be optimized with a generated jump */
-        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_mov_TN_reg(ot, 1, reg);
         gen_op_bsx_T0_cc[ot - OT_WORD][b & 1]();
-        gen_op_mov_reg_T1[ot][reg]();
+        gen_op_mov_reg_T1(ot, reg);
         s->cc_op = CC_OP_LOGICB + ot;
         break;
         /************************/
@@ -5569,7 +5575,7 @@
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_TN_reg[ot][0][reg]();
+        gen_op_mov_TN_reg(ot, 0, reg);
         gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
         gen_jmp_im(pc_start - s->cs_base);
         if (ot == OT_WORD)
@@ -5581,16 +5587,27 @@
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == 2) {
-            gen_op_mov_TN_reg[OT_QUAD][0][reg]();
-            gen_op_bswapq_T0();
-            gen_op_mov_reg_T0[OT_QUAD][reg]();
+            gen_op_mov_TN_reg(OT_QUAD, 0, reg);
+            tcg_gen_bswap_i64(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_QUAD, reg);
         } else
-#endif
         {
-            gen_op_mov_TN_reg[OT_LONG][0][reg]();
-            gen_op_bswapl_T0();
-            gen_op_mov_reg_T0[OT_LONG][reg]();
+            int tmp0;
+            gen_op_mov_TN_reg(OT_LONG, 0, reg);
+            
+            tmp0 = tcg_temp_new(TCG_TYPE_I32);
+            tcg_gen_trunc_i64_i32(tmp0, cpu_T[0]);
+            tcg_gen_bswap_i32(tmp0, tmp0);
+            tcg_gen_extu_i32_i64(cpu_T[0], tmp0);
+            gen_op_mov_reg_T0(OT_LONG, reg);
         }
+#else
+        {
+            gen_op_mov_TN_reg(OT_LONG, 0, reg);
+            tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]);
+            gen_op_mov_reg_T0(OT_LONG, reg);
+        }
+#endif
         break;
     case 0xd6: /* salc */
         if (CODE64(s))
@@ -5821,12 +5838,12 @@
                 break;
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             gen_op_movl_T0_env(offsetof(CPUX86State, gdt.limit));
-            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+            gen_op_st_T0_A0(OT_WORD + s->mem_index);
             gen_add_A0_im(s, 2);
             gen_op_movtl_T0_env(offsetof(CPUX86State, gdt.base));
             if (!s->dflag)
                 gen_op_andl_T0_im(0xffffff);
-            gen_op_st_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+            gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
             break;
         case 1:
             if (mod == 3) {
@@ -5840,12 +5857,12 @@
                     gen_jmp_im(pc_start - s->cs_base);
 #ifdef TARGET_X86_64
                     if (s->aflag == 2) {
-                        gen_op_movq_A0_reg[R_EBX]();
+                        gen_op_movq_A0_reg(R_EBX);
                         gen_op_addq_A0_AL();
                     } else
 #endif
                     {
-                        gen_op_movl_A0_reg[R_EBX]();
+                        gen_op_movl_A0_reg(R_EBX);
                         gen_op_addl_A0_AL();
                         if (s->aflag == 0)
                             gen_op_andl_A0_ffff();
@@ -5875,12 +5892,12 @@
                     break;
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 gen_op_movl_T0_env(offsetof(CPUX86State, idt.limit));
-                gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+                gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
                 gen_op_movtl_T0_env(offsetof(CPUX86State, idt.base));
                 if (!s->dflag)
                     gen_op_andl_T0_im(0xffffff);
-                gen_op_st_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+                gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
             }
             break;
         case 2: /* lgdt */
@@ -5943,9 +5960,9 @@
                                             op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE))
                     break;
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T1_A0[OT_WORD + s->mem_index]();
+                gen_op_ld_T1_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
-                gen_op_ld_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+                gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
                 if (!s->dflag)
                     gen_op_andl_T0_im(0xffffff);
                 if (op == 2) {
@@ -6029,19 +6046,19 @@
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_TN_reg[OT_LONG][0][rm]();
+                gen_op_mov_TN_reg(OT_LONG, 0, rm);
                 /* sign extend */
                 if (d_ot == OT_QUAD)
                     gen_op_movslq_T0_T0();
-                gen_op_mov_reg_T0[d_ot][reg]();
+                gen_op_mov_reg_T0(d_ot, reg);
             } else {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 if (d_ot == OT_QUAD) {
-                    gen_op_lds_T0_A0[OT_LONG + s->mem_index]();
+                    gen_op_lds_T0_A0(OT_LONG + s->mem_index);
                 } else {
-                    gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                    gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 }
-                gen_op_mov_reg_T0[d_ot][reg]();
+                gen_op_mov_reg_T0(d_ot, reg);
             }
         } else
 #endif
@@ -6055,18 +6072,18 @@
             rm = modrm & 7;
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T0_A0[ot + s->mem_index]();
+                gen_op_ld_T0_A0(ot + s->mem_index);
             } else {
-                gen_op_mov_TN_reg[ot][0][rm]();
+                gen_op_mov_TN_reg(ot, 0, rm);
             }
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_op_arpl();
             s->cc_op = CC_OP_EFLAGS;
             if (mod != 3) {
-                gen_op_st_T0_A0[ot + s->mem_index]();
+                gen_op_st_T0_A0(ot + s->mem_index);
             } else {
-                gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_reg_T0(ot, rm);
             }
             gen_op_arpl_update();
         }
@@ -6079,7 +6096,7 @@
         modrm = ldub_code(s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_TN_reg[ot][1][reg]();
+        gen_op_mov_TN_reg(ot, 1, reg);
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         if (b == 0x102)
@@ -6087,7 +6104,7 @@
         else
             gen_op_lsl();
         s->cc_op = CC_OP_EFLAGS;
-        gen_op_mov_reg_T1[ot][reg]();
+        gen_op_mov_reg_T1(ot, reg);
         break;
     case 0x118:
         modrm = ldub_code(s->pc++);
@@ -6134,7 +6151,7 @@
             case 8:
                 if (b & 2) {
                     gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0 + reg);
-                    gen_op_mov_TN_reg[ot][0][rm]();
+                    gen_op_mov_TN_reg(ot, 0, rm);
                     gen_op_movl_crN_T0(reg);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -6146,7 +6163,7 @@
                     else
 #endif
                         gen_op_movtl_T0_env(offsetof(CPUX86State,cr[reg]));
-                    gen_op_mov_reg_T0[ot][rm]();
+                    gen_op_mov_reg_T0(ot, rm);
                 }
                 break;
             default:
@@ -6173,14 +6190,14 @@
                 goto illegal_op;
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_TN_reg[ot][0][rm]();
+                gen_op_mov_TN_reg(ot, 0, rm);
                 gen_op_movl_drN_T0(reg);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
                 gen_op_movtl_T0_env(offsetof(CPUX86State,dr[reg]));
-                gen_op_mov_reg_T0[ot][rm]();
+                gen_op_mov_reg_T0(ot, rm);
             }
         }
         break;
@@ -6246,11 +6263,11 @@
                 goto illegal_op;
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             if (op == 2) {
-                gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 gen_op_movl_env_T0(offsetof(CPUX86State, mxcsr));
             } else {
                 gen_op_movl_T0_env(offsetof(CPUX86State, mxcsr));
-                gen_op_st_T0_A0[OT_LONG + s->mem_index]();
+                gen_op_st_T0_A0(OT_LONG + s->mem_index);
             }
             break;
         case 5: /* lfence */
@@ -6647,6 +6664,17 @@
 #endif
 };
 
+static void tcg_macro_func(TCGContext *s, int macro_id, const int *dead_args)
+{
+    switch(macro_id) {
+#ifdef MACRO_TEST
+    case MACRO_TEST:
+        tcg_gen_helper_0_1(helper_divl_EAX_T0, cpu_T[0]);
+        break;
+#endif
+    }
+}
+
 void optimize_flags_init(void)
 {
     int i;
@@ -6655,6 +6683,25 @@
         if (opc_simpler[i] == 0)
             opc_simpler[i] = i;
     }
+
+    tcg_set_macro_func(&tcg_ctx, tcg_macro_func);
+
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+#if TARGET_LONG_BITS > HOST_LONG_BITS
+    cpu_T[0] = tcg_global_mem_new(TCG_TYPE_TL, 
+                                  TCG_AREG0, offsetof(CPUState, t0), "T0");
+    cpu_T[1] = tcg_global_mem_new(TCG_TYPE_TL,
+                                  TCG_AREG0, offsetof(CPUState, t1), "T1");
+    cpu_A0 = tcg_global_mem_new(TCG_TYPE_TL,
+                                TCG_AREG0, offsetof(CPUState, t2), "A0");
+#else
+    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG1, "T0");
+    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG2, "T1");
+    cpu_A0 = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG3, "A0");
+#endif
+    /* the helpers are only registered to print debug info */
+    TCG_HELPER(helper_divl_EAX_T0);
+    TCG_HELPER(helper_idivl_EAX_T0);
 }
 
 /* CPU flags computation optimization: we move backward thru the
@@ -6746,10 +6793,9 @@
         printf("ERROR addseg\n");
 #endif
 
-    gen_opc_ptr = gen_opc_buf;
+    cpu_tmp0 = tcg_temp_new(TCG_TYPE_TL);
+
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
-    gen_opparam_ptr = gen_opparam_buf;
-    nb_gen_labels = 0;
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
@@ -6824,9 +6870,9 @@
             disas_flags = !dc->code32;
 	target_disas(logfile, pc_start, pc_ptr - pc_start, disas_flags);
         fprintf(logfile, "\n");
-        if (loglevel & CPU_LOG_TB_OP) {
-            fprintf(logfile, "OP:\n");
-            dump_ops(gen_opc_buf, gen_opparam_buf);
+        if (loglevel & CPU_LOG_TB_OP_OPT) {
+            fprintf(logfile, "OP before opt:\n");
+            tcg_dump_ops(&tcg_ctx, logfile);
             fprintf(logfile, "\n");
         }
     }
@@ -6835,13 +6881,6 @@
     /* optimize flag computations */
     optimize_flags(gen_opc_buf, gen_opc_ptr - gen_opc_buf);
 
-#ifdef DEBUG_DISAS
-    if (loglevel & CPU_LOG_TB_OP_OPT) {
-        fprintf(logfile, "AFTER FLAGS OPT:\n");
-        dump_ops(gen_opc_buf, gen_opparam_buf);
-        fprintf(logfile, "\n");
-    }
-#endif
     if (!search_pc)
         tb->size = pc_ptr - pc_start;
     return 0;