Convert remaining MIPS FP instructions to TCG.


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4753 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 1092f87..cba9cd7 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -154,6 +154,7 @@
 #define FOP_PROTO(op)                            \
 DEF_HELPER(void, do_float_ ## op ## _s, (void))  \
 DEF_HELPER(void, do_float_ ## op ## _d, (void))
+FOP_PROTO(sqrt)
 FOP_PROTO(roundl)
 FOP_PROTO(roundw)
 FOP_PROTO(truncl)
@@ -174,6 +175,12 @@
 FOP_PROTO(sub)
 FOP_PROTO(mul)
 FOP_PROTO(div)
+FOP_PROTO(abs)
+FOP_PROTO(chs)
+FOP_PROTO(muladd)
+FOP_PROTO(mulsub)
+FOP_PROTO(nmuladd)
+FOP_PROTO(nmulsub)
 FOP_PROTO(recip1)
 FOP_PROTO(recip2)
 FOP_PROTO(rsqrt1)
diff --git a/target-mips/op.c b/target-mips/op.c
index 2c99ad7..5e778f2 100644
--- a/target-mips/op.c
+++ b/target-mips/op.c
@@ -295,295 +295,3 @@
 #else
 # define DEBUG_FPU_STATE() do { } while(0)
 #endif
-
-/* Float support.
-   Single precition routines have a "s" suffix, double precision a
-   "d" suffix, 32bit integer "w", 64bit integer "l", paired singe "ps",
-   paired single lowwer "pl", paired single upper "pu".  */
-
-#define FLOAT_OP(name, p) void OPPROTO op_float_##name##_##p(void)
-
-FLOAT_OP(pll, ps)
-{
-    DT2 = ((uint64_t)WT0 << 32) | WT1;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(plu, ps)
-{
-    DT2 = ((uint64_t)WT0 << 32) | WTH1;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(pul, ps)
-{
-    DT2 = ((uint64_t)WTH0 << 32) | WT1;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(puu, ps)
-{
-    DT2 = ((uint64_t)WTH0 << 32) | WTH1;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-
-FLOAT_OP(movf, d)
-{
-    if (!(env->fpu->fcr31 & PARAM1))
-        DT2 = DT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movf, s)
-{
-    if (!(env->fpu->fcr31 & PARAM1))
-        WT2 = WT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movf, ps)
-{
-    unsigned int mask = GET_FP_COND (env->fpu) >> PARAM1;
-    if (!(mask & 1))
-        WT2 = WT0;
-    if (!(mask & 2))
-        WTH2 = WTH0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movt, d)
-{
-    if (env->fpu->fcr31 & PARAM1)
-        DT2 = DT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movt, s)
-{
-    if (env->fpu->fcr31 & PARAM1)
-        WT2 = WT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movt, ps)
-{
-    unsigned int mask = GET_FP_COND (env->fpu) >> PARAM1;
-    if (mask & 1)
-        WT2 = WT0;
-    if (mask & 2)
-        WTH2 = WTH0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movz, d)
-{
-    if (!T0)
-        DT2 = DT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movz, s)
-{
-    if (!T0)
-        WT2 = WT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movz, ps)
-{
-    if (!T0) {
-        WT2 = WT0;
-        WTH2 = WTH0;
-    }
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movn, d)
-{
-    if (T0)
-        DT2 = DT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movn, s)
-{
-    if (T0)
-        WT2 = WT0;
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-FLOAT_OP(movn, ps)
-{
-    if (T0) {
-        WT2 = WT0;
-        WTH2 = WTH0;
-    }
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-
-/* ternary operations */
-#define FLOAT_TERNOP(name1, name2) \
-FLOAT_OP(name1 ## name2, d)        \
-{                                  \
-    FDT0 = float64_ ## name1 (FDT0, FDT1, &env->fpu->fp_status);    \
-    FDT2 = float64_ ## name2 (FDT0, FDT2, &env->fpu->fp_status);    \
-    DEBUG_FPU_STATE();             \
-    FORCE_RET();                   \
-}                                  \
-FLOAT_OP(name1 ## name2, s)        \
-{                                  \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    DEBUG_FPU_STATE();             \
-    FORCE_RET();                   \
-}                                  \
-FLOAT_OP(name1 ## name2, ps)       \
-{                                  \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FSTH0 = float32_ ## name1 (FSTH0, FSTH1, &env->fpu->fp_status); \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    FSTH2 = float32_ ## name2 (FSTH0, FSTH2, &env->fpu->fp_status); \
-    DEBUG_FPU_STATE();             \
-    FORCE_RET();                   \
-}
-FLOAT_TERNOP(mul, add)
-FLOAT_TERNOP(mul, sub)
-#undef FLOAT_TERNOP
-
-/* negated ternary operations */
-#define FLOAT_NTERNOP(name1, name2) \
-FLOAT_OP(n ## name1 ## name2, d)    \
-{                                   \
-    FDT0 = float64_ ## name1 (FDT0, FDT1, &env->fpu->fp_status);    \
-    FDT2 = float64_ ## name2 (FDT0, FDT2, &env->fpu->fp_status);    \
-    FDT2 = float64_chs(FDT2);       \
-    DEBUG_FPU_STATE();              \
-    FORCE_RET();                    \
-}                                   \
-FLOAT_OP(n ## name1 ## name2, s)    \
-{                                   \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    FST2 = float32_chs(FST2);       \
-    DEBUG_FPU_STATE();              \
-    FORCE_RET();                    \
-}                                   \
-FLOAT_OP(n ## name1 ## name2, ps)   \
-{                                   \
-    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
-    FSTH0 = float32_ ## name1 (FSTH0, FSTH1, &env->fpu->fp_status); \
-    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
-    FSTH2 = float32_ ## name2 (FSTH0, FSTH2, &env->fpu->fp_status); \
-    FST2 = float32_chs(FST2);       \
-    FSTH2 = float32_chs(FSTH2);     \
-    DEBUG_FPU_STATE();              \
-    FORCE_RET();                    \
-}
-FLOAT_NTERNOP(mul, add)
-FLOAT_NTERNOP(mul, sub)
-#undef FLOAT_NTERNOP
-
-/* unary operations, modifying fp status  */
-#define FLOAT_UNOP(name)  \
-FLOAT_OP(name, d)         \
-{                         \
-    FDT2 = float64_ ## name(FDT0, &env->fpu->fp_status); \
-    DEBUG_FPU_STATE();    \
-    FORCE_RET();          \
-}                         \
-FLOAT_OP(name, s)         \
-{                         \
-    FST2 = float32_ ## name(FST0, &env->fpu->fp_status); \
-    DEBUG_FPU_STATE();    \
-    FORCE_RET();          \
-}
-FLOAT_UNOP(sqrt)
-#undef FLOAT_UNOP
-
-/* unary operations, not modifying fp status  */
-#define FLOAT_UNOP(name)  \
-FLOAT_OP(name, d)         \
-{                         \
-    FDT2 = float64_ ## name(FDT0);   \
-    DEBUG_FPU_STATE();    \
-    FORCE_RET();          \
-}                         \
-FLOAT_OP(name, s)         \
-{                         \
-    FST2 = float32_ ## name(FST0);   \
-    DEBUG_FPU_STATE();    \
-    FORCE_RET();          \
-}                         \
-FLOAT_OP(name, ps)        \
-{                         \
-    FST2 = float32_ ## name(FST0);   \
-    FSTH2 = float32_ ## name(FSTH0); \
-    DEBUG_FPU_STATE();    \
-    FORCE_RET();          \
-}
-FLOAT_UNOP(abs)
-FLOAT_UNOP(chs)
-#undef FLOAT_UNOP
-
-FLOAT_OP(alnv, ps)
-{
-    switch (T0 & 0x7) {
-    case 0:
-        FST2 = FST0;
-        FSTH2 = FSTH0;
-        break;
-    case 4:
-#ifdef TARGET_WORDS_BIGENDIAN
-        FSTH2 = FST0;
-        FST2 = FSTH1;
-#else
-        FSTH2 = FST1;
-        FST2 = FSTH0;
-#endif
-        break;
-    default: /* unpredictable */
-        break;
-    }
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-
-void op_bc1f (void)
-{
-    T0 = !!(~GET_FP_COND(env->fpu) & (0x1 << PARAM1));
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-void op_bc1any2f (void)
-{
-    T0 = !!(~GET_FP_COND(env->fpu) & (0x3 << PARAM1));
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-void op_bc1any4f (void)
-{
-    T0 = !!(~GET_FP_COND(env->fpu) & (0xf << PARAM1));
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-
-void op_bc1t (void)
-{
-    T0 = !!(GET_FP_COND(env->fpu) & (0x1 << PARAM1));
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-void op_bc1any2t (void)
-{
-    T0 = !!(GET_FP_COND(env->fpu) & (0x3 << PARAM1));
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
-void op_bc1any4t (void)
-{
-    T0 = !!(GET_FP_COND(env->fpu) & (0xf << PARAM1));
-    DEBUG_FPU_STATE();
-    FORCE_RET();
-}
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index c5a4c93..696295b 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -1703,8 +1703,26 @@
         UPDATE_FP_FLAGS(env->fpu->fcr31, tmp);
 }
 
+/* Float support.
+   Single precition routines have a "s" suffix, double precision a
+   "d" suffix, 32bit integer "w", 64bit integer "l", paired single "ps",
+   paired single lower "pl", paired single upper "pu".  */
+
 #define FLOAT_OP(name, p) void do_float_##name##_##p(void)
 
+/* unary operations, modifying fp status  */
+#define FLOAT_UNOP(name)  \
+FLOAT_OP(name, d)         \
+{                         \
+    FDT2 = float64_ ## name(FDT0, &env->fpu->fp_status); \
+}                         \
+FLOAT_OP(name, s)         \
+{                         \
+    FST2 = float32_ ## name(FST0, &env->fpu->fp_status); \
+}
+FLOAT_UNOP(sqrt)
+#undef FLOAT_UNOP
+
 FLOAT_OP(cvtd, s)
 {
     set_float_exception_flags(0, &env->fpu->fp_status);
@@ -1943,6 +1961,25 @@
         WT2 = FLOAT_SNAN32;
 }
 
+/* unary operations, not modifying fp status  */
+#define FLOAT_UNOP(name)  \
+FLOAT_OP(name, d)         \
+{                         \
+    FDT2 = float64_ ## name(FDT0);   \
+}                         \
+FLOAT_OP(name, s)         \
+{                         \
+    FST2 = float32_ ## name(FST0);   \
+}                         \
+FLOAT_OP(name, ps)        \
+{                         \
+    FST2 = float32_ ## name(FST0);   \
+    FSTH2 = float32_ ## name(FSTH0); \
+}
+FLOAT_UNOP(abs)
+FLOAT_UNOP(chs)
+#undef FLOAT_UNOP
+
 /* MIPS specific unary operations */
 FLOAT_OP(recip, d)
 {
@@ -2051,6 +2088,56 @@
 FLOAT_BINOP(div)
 #undef FLOAT_BINOP
 
+/* ternary operations */
+#define FLOAT_TERNOP(name1, name2) \
+FLOAT_OP(name1 ## name2, d)        \
+{                                  \
+    FDT0 = float64_ ## name1 (FDT0, FDT1, &env->fpu->fp_status);    \
+    FDT2 = float64_ ## name2 (FDT0, FDT2, &env->fpu->fp_status);    \
+}                                  \
+FLOAT_OP(name1 ## name2, s)        \
+{                                  \
+    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
+    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
+}                                  \
+FLOAT_OP(name1 ## name2, ps)       \
+{                                  \
+    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
+    FSTH0 = float32_ ## name1 (FSTH0, FSTH1, &env->fpu->fp_status); \
+    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
+    FSTH2 = float32_ ## name2 (FSTH0, FSTH2, &env->fpu->fp_status); \
+}
+FLOAT_TERNOP(mul, add)
+FLOAT_TERNOP(mul, sub)
+#undef FLOAT_TERNOP
+
+/* negated ternary operations */
+#define FLOAT_NTERNOP(name1, name2) \
+FLOAT_OP(n ## name1 ## name2, d)    \
+{                                   \
+    FDT0 = float64_ ## name1 (FDT0, FDT1, &env->fpu->fp_status);    \
+    FDT2 = float64_ ## name2 (FDT0, FDT2, &env->fpu->fp_status);    \
+    FDT2 = float64_chs(FDT2);       \
+}                                   \
+FLOAT_OP(n ## name1 ## name2, s)    \
+{                                   \
+    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
+    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
+    FST2 = float32_chs(FST2);       \
+}                                   \
+FLOAT_OP(n ## name1 ## name2, ps)   \
+{                                   \
+    FST0 = float32_ ## name1 (FST0, FST1, &env->fpu->fp_status);    \
+    FSTH0 = float32_ ## name1 (FSTH0, FSTH1, &env->fpu->fp_status); \
+    FST2 = float32_ ## name2 (FST0, FST2, &env->fpu->fp_status);    \
+    FSTH2 = float32_ ## name2 (FSTH0, FSTH2, &env->fpu->fp_status); \
+    FST2 = float32_chs(FST2);       \
+    FSTH2 = float32_chs(FSTH2);     \
+}
+FLOAT_NTERNOP(mul, add)
+FLOAT_NTERNOP(mul, sub)
+#undef FLOAT_NTERNOP
+
 /* MIPS specific binary operations */
 FLOAT_OP(recip2, d)
 {
diff --git a/target-mips/translate.c b/target-mips/translate.c
index b7e3967..5698414 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -630,6 +630,21 @@
     tcg_gen_st_i32(t, current_fpu, 8 * reg + 4 * !FP_ENDIAN_IDX);
 }
 
+static inline void get_fp_cond (TCGv t)
+{
+    TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+    TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
+
+    tcg_gen_ld_i32(r_tmp1, current_fpu, offsetof(CPUMIPSFPUContext, fcr31));
+    tcg_gen_shri_i32(r_tmp2, r_tmp1, 24);
+    tcg_gen_andi_i32(r_tmp2, r_tmp2, 0xfe);
+    tcg_gen_shri_i32(r_tmp1, r_tmp1, 23);
+    tcg_gen_andi_i32(r_tmp1, r_tmp1, 0x1);
+    tcg_gen_or_i32(t, r_tmp1, r_tmp2);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+}
+
 #define FOP_CONDS(type, fmt)                                              \
 static GenOpFunc1 * fcmp ## type ## _ ## fmt ## _table[16] = {            \
     do_cmp ## type ## _ ## fmt ## _f,                                     \
@@ -5541,38 +5556,170 @@
 
     switch (op) {
     case OPC_BC1F:
-        gen_op_bc1f(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T[1], 0x1 << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1f";
         goto not_likely;
     case OPC_BC1FL:
-        gen_op_bc1f(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T[1], 0x1 << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1fl";
         goto likely;
     case OPC_BC1T:
-        gen_op_bc1t(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_movi_tl(cpu_T[1], 0x1 << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1t";
         goto not_likely;
     case OPC_BC1TL:
-        gen_op_bc1t(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_movi_tl(cpu_T[1], 0x1 << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1tl";
     likely:
         ctx->hflags |= MIPS_HFLAG_BL;
         tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, bcond));
         break;
     case OPC_BC1FANY2:
-        gen_op_bc1any2f(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T[1], 0x3 << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1any2f";
         goto not_likely;
     case OPC_BC1TANY2:
-        gen_op_bc1any2t(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_movi_tl(cpu_T[1], 0x3 << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1any2t";
         goto not_likely;
     case OPC_BC1FANY4:
-        gen_op_bc1any4f(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T[1], 0xf << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1any4f";
         goto not_likely;
     case OPC_BC1TANY4:
-        gen_op_bc1any4t(cc);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+            TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+
+            get_fp_cond(r_tmp1);
+            tcg_gen_ext_i32_tl(cpu_T[0], r_tmp1);
+            tcg_temp_free(r_tmp1);
+            tcg_gen_movi_tl(cpu_T[1], 0xf << cc);
+            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_movi_tl(cpu_T[0], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_T[0], 1);
+            gen_set_label(l2);
+        }
         opn = "bc1any4t";
     not_likely:
         ctx->hflags |= MIPS_HFLAG_BC;
@@ -5685,23 +5832,83 @@
     gen_store_gpr(cpu_T[0], rd);
 }
 
-#define GEN_MOVCF(fmt)                                                \
-static void glue(gen_movcf_, fmt) (DisasContext *ctx, int cc, int tf) \
-{                                                                     \
-    uint32_t ccbit;                                                   \
-                                                                      \
-    if (cc) {                                                         \
-        ccbit = 1 << (24 + cc);                                       \
-    } else                                                            \
-        ccbit = 1 << 23;                                              \
-    if (!tf)                                                          \
-        glue(gen_op_float_movf_, fmt)(ccbit);                         \
-    else                                                              \
-        glue(gen_op_float_movt_, fmt)(ccbit);                         \
+static inline void gen_movcf_s (int cc, int tf)
+{
+    uint32_t ccbit;
+    int cond;
+    TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+    int l1 = gen_new_label();
+
+    if (cc)
+        ccbit = 1 << (24 + cc);
+    else
+        ccbit = 1 << 23;
+
+    if (tf)
+        cond = TCG_COND_EQ;
+    else
+        cond = TCG_COND_NE;
+
+    tcg_gen_ld_i32(r_tmp1, current_fpu, offsetof(CPUMIPSFPUContext, fcr31));
+    tcg_gen_andi_i32(r_tmp1, r_tmp1, ccbit);
+    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
+    tcg_gen_movi_i32(fpu32_T[2], fpu32_T[0]);
+    gen_set_label(l1);
+    tcg_temp_free(r_tmp1);
 }
-GEN_MOVCF(d);
-GEN_MOVCF(s);
-#undef GEN_MOVCF
+
+static inline void gen_movcf_d (int cc, int tf)
+{
+    uint32_t ccbit;
+    int cond;
+    TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
+    int l1 = gen_new_label();
+
+    if (cc)
+        ccbit = 1 << (24 + cc);
+    else
+        ccbit = 1 << 23;
+
+    if (tf)
+        cond = TCG_COND_EQ;
+    else
+        cond = TCG_COND_NE;
+
+    tcg_gen_ld_i32(r_tmp1, current_fpu, offsetof(CPUMIPSFPUContext, fcr31));
+    tcg_gen_andi_i32(r_tmp1, r_tmp1, ccbit);
+    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
+    tcg_gen_movi_i64(fpu64_T[2], fpu64_T[0]);
+    gen_set_label(l1);
+    tcg_temp_free(r_tmp1);
+}
+
+static inline void gen_movcf_ps (int cc, int tf)
+{
+    int cond;
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+
+    if (tf)
+        cond = TCG_COND_EQ;
+    else
+        cond = TCG_COND_NE;
+
+    get_fp_cond(r_tmp1);
+    tcg_gen_shri_i32(r_tmp1, r_tmp1, cc);
+    tcg_gen_andi_i32(r_tmp2, r_tmp1, 0x1);
+    tcg_gen_brcondi_i32(cond, r_tmp2, 0, l1);
+    tcg_gen_movi_i32(fpu32_T[2], fpu32_T[0]);
+    gen_set_label(l1);
+    tcg_gen_andi_i32(r_tmp2, r_tmp1, 0x2);
+    tcg_gen_brcondi_i32(cond, r_tmp2, 0, l2);
+    tcg_gen_movi_i32(fpu32h_T[2], fpu32h_T[0]);
+    gen_set_label(l2);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+}
+
 
 static void gen_farith (DisasContext *ctx, uint32_t op1,
                         int ft, int fs, int fd, int cc)
@@ -5781,13 +5988,13 @@
         break;
     case FOP(4, 16):
         gen_load_fpr32(fpu32_T[0], fs);
-        gen_op_float_sqrt_s();
+        tcg_gen_helper_0_0(do_float_sqrt_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "sqrt.s";
         break;
     case FOP(5, 16):
         gen_load_fpr32(fpu32_T[0], fs);
-        gen_op_float_abs_s();
+        tcg_gen_helper_0_0(do_float_abs_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "abs.s";
         break;
@@ -5798,7 +6005,7 @@
         break;
     case FOP(7, 16):
         gen_load_fpr32(fpu32_T[0], fs);
-        gen_op_float_chs_s();
+        tcg_gen_helper_0_0(do_float_chs_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "neg.s";
         break;
@@ -5855,10 +6062,9 @@
         opn = "floor.w.s";
         break;
     case FOP(17, 16):
-        gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[2], fd);
-        gen_movcf_s(ctx, (ft >> 2) & 0x7, ft & 0x1);
+        gen_movcf_s((ft >> 2) & 0x7, ft & 0x1);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "movcf.s";
         break;
@@ -5866,7 +6072,13 @@
         gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[2], fd);
-        gen_op_float_movz_s();
+        {
+            int l1 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            gen_set_label(l1);
+        }
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "movz.s";
         break;
@@ -5874,7 +6086,13 @@
         gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[2], fd);
-        gen_op_float_movn_s();
+        {
+            int l1 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, l1);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            gen_set_label(l1);
+        }
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "movn.s";
         break;
@@ -6019,14 +6237,14 @@
     case FOP(4, 17):
         check_cp1_registers(ctx, fs | fd);
         gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_op_float_sqrt_d();
+        tcg_gen_helper_0_0(do_float_sqrt_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "sqrt.d";
         break;
     case FOP(5, 17):
         check_cp1_registers(ctx, fs | fd);
         gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_op_float_abs_d();
+        tcg_gen_helper_0_0(do_float_abs_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "abs.d";
         break;
@@ -6039,7 +6257,7 @@
     case FOP(7, 17):
         check_cp1_registers(ctx, fs | fd);
         gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_op_float_chs_d();
+        tcg_gen_helper_0_0(do_float_chs_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "neg.d";
         break;
@@ -6100,10 +6318,9 @@
         opn = "floor.w.d";
         break;
     case FOP(17, 17):
-        gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[2], fd);
-        gen_movcf_d(ctx, (ft >> 2) & 0x7, ft & 0x1);
+        gen_movcf_d((ft >> 2) & 0x7, ft & 0x1);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "movcf.d";
         break;
@@ -6111,7 +6328,13 @@
         gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[2], fd);
-        gen_op_float_movz_d();
+        {
+            int l1 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_mov_i64(fpu64_T[2], fpu64_T[0]);
+            gen_set_label(l1);
+        }
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "movz.d";
         break;
@@ -6119,7 +6342,13 @@
         gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[2], fd);
-        gen_op_float_movn_d();
+        {
+            int l1 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, l1);
+            tcg_gen_mov_i64(fpu64_T[2], fpu64_T[0]);
+            gen_set_label(l1);
+        }
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "movn.d";
         break;
@@ -6290,7 +6519,7 @@
         check_cp1_64bitmode(ctx);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_op_float_abs_ps();
+        tcg_gen_helper_0_0(do_float_abs_ps);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "abs.ps";
@@ -6307,22 +6536,18 @@
         check_cp1_64bitmode(ctx);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32h(fpu32h_T[0], fs);
-        gen_op_float_chs_ps();
+        tcg_gen_helper_0_0(do_float_chs_ps);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "neg.ps";
         break;
     case FOP(17, 22):
         check_cp1_64bitmode(ctx);
-        gen_load_gpr(cpu_T[0], ft);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32h(fpu32h_T[0], fs);
         gen_load_fpr32(fpu32_T[2], fd);
         gen_load_fpr32h(fpu32h_T[2], fd);
-        if (ft & 0x1)
-            gen_op_float_movt_ps ((ft >> 2) & 0x7);
-        else
-            gen_op_float_movf_ps ((ft >> 2) & 0x7);
+        gen_movcf_ps((ft >> 2) & 0x7, ft & 0x1);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "movcf.ps";
@@ -6334,7 +6559,14 @@
         gen_load_fpr32h(fpu32h_T[0], fs);
         gen_load_fpr32(fpu32_T[2], fd);
         gen_load_fpr32h(fpu32h_T[2], fd);
-        gen_op_float_movz_ps();
+        {
+            int l1 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            tcg_gen_mov_i32(fpu32h_T[2], fpu32h_T[0]);
+            gen_set_label(l1);
+        }
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "movz.ps";
@@ -6346,7 +6578,14 @@
         gen_load_fpr32h(fpu32h_T[0], fs);
         gen_load_fpr32(fpu32_T[2], fd);
         gen_load_fpr32h(fpu32h_T[2], fd);
-        gen_op_float_movn_ps();
+        {
+            int l1 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[0], 0, l1);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            tcg_gen_mov_i32(fpu32h_T[2], fpu32h_T[0]);
+            gen_set_label(l1);
+        }
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "movn.ps";
@@ -6440,32 +6679,32 @@
         check_cp1_64bitmode(ctx);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[1], ft);
-        gen_op_float_pll_ps();
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        gen_store_fpr32h(fpu32_T[0], fd);
+        gen_store_fpr32(fpu32_T[1], fd);
         opn = "pll.ps";
         break;
     case FOP(45, 22):
         check_cp1_64bitmode(ctx);
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_op_float_plu_ps();
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        gen_store_fpr32(fpu32h_T[1], fd);
+        gen_store_fpr32h(fpu32_T[0], fd);
         opn = "plu.ps";
         break;
     case FOP(46, 22):
         check_cp1_64bitmode(ctx);
         gen_load_fpr32h(fpu32h_T[0], fs);
         gen_load_fpr32(fpu32_T[1], ft);
-        gen_op_float_pul_ps();
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        gen_store_fpr32(fpu32_T[1], fd);
+        gen_store_fpr32h(fpu32h_T[0], fd);
         opn = "pul.ps";
         break;
     case FOP(47, 22):
         check_cp1_64bitmode(ctx);
         gen_load_fpr32h(fpu32h_T[0], fs);
         gen_load_fpr32h(fpu32h_T[1], ft);
-        gen_op_float_puu_ps();
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        gen_store_fpr32(fpu32h_T[1], fd);
+        gen_store_fpr32h(fpu32h_T[0], fd);
         opn = "puu.ps";
         break;
     case FOP(48, 22):
@@ -6595,10 +6834,32 @@
     case OPC_ALNV_PS:
         check_cp1_64bitmode(ctx);
         gen_load_gpr(cpu_T[0], fr);
-        gen_load_fpr64(ctx, fpu64_T[0], fs);
-        gen_load_fpr64(ctx, fpu64_T[1], ft);
-        gen_op_float_alnv_ps();
-        gen_store_fpr64(ctx, fpu64_T[2], fd);
+        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0x7);
+        gen_load_fpr32(fpu32_T[0], fs);
+        gen_load_fpr32h(fpu32h_T[0], fs);
+        gen_load_fpr32(fpu32_T[1], ft);
+        gen_load_fpr32h(fpu32h_T[1], ft);
+        {
+            int l1 = gen_new_label();
+            int l2 = gen_new_label();
+
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32_T[0]);
+            tcg_gen_mov_i32(fpu32h_T[2], fpu32h_T[0]);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 4, l2);
+#ifdef TARGET_WORDS_BIGENDIAN
+            tcg_gen_mov_i32(fpu32h_T[2], fpu32_T[0]);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32h_T[1]);
+#else
+            tcg_gen_mov_i32(fpu32h_T[2], fpu32_T[1]);
+            tcg_gen_mov_i32(fpu32_T[2], fpu32h_T[0]);
+#endif
+            gen_set_label(l2);
+        }
+        gen_store_fpr32(fpu32_T[2], fd);
+        gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "alnv.ps";
         break;
     case OPC_MADD_S:
@@ -6606,7 +6867,7 @@
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
-        gen_op_float_muladd_s();
+        tcg_gen_helper_0_0(do_float_muladd_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "madd.s";
         break;
@@ -6616,7 +6877,7 @@
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[1], ft);
         gen_load_fpr64(ctx, fpu64_T[2], fr);
-        gen_op_float_muladd_d();
+        tcg_gen_helper_0_0(do_float_muladd_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "madd.d";
         break;
@@ -6628,7 +6889,7 @@
         gen_load_fpr32h(fpu32h_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
         gen_load_fpr32h(fpu32h_T[2], fr);
-        gen_op_float_muladd_ps();
+        tcg_gen_helper_0_0(do_float_muladd_ps);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "madd.ps";
@@ -6638,7 +6899,7 @@
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
-        gen_op_float_mulsub_s();
+        tcg_gen_helper_0_0(do_float_mulsub_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "msub.s";
         break;
@@ -6648,7 +6909,7 @@
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[1], ft);
         gen_load_fpr64(ctx, fpu64_T[2], fr);
-        gen_op_float_mulsub_d();
+        tcg_gen_helper_0_0(do_float_mulsub_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "msub.d";
         break;
@@ -6660,7 +6921,7 @@
         gen_load_fpr32h(fpu32h_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
         gen_load_fpr32h(fpu32h_T[2], fr);
-        gen_op_float_mulsub_ps();
+        tcg_gen_helper_0_0(do_float_mulsub_ps);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "msub.ps";
@@ -6670,7 +6931,7 @@
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
-        gen_op_float_nmuladd_s();
+        tcg_gen_helper_0_0(do_float_nmuladd_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "nmadd.s";
         break;
@@ -6680,7 +6941,7 @@
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[1], ft);
         gen_load_fpr64(ctx, fpu64_T[2], fr);
-        gen_op_float_nmuladd_d();
+        tcg_gen_helper_0_0(do_float_nmuladd_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "nmadd.d";
         break;
@@ -6692,7 +6953,7 @@
         gen_load_fpr32h(fpu32h_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
         gen_load_fpr32h(fpu32h_T[2], fr);
-        gen_op_float_nmuladd_ps();
+        tcg_gen_helper_0_0(do_float_nmuladd_ps);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "nmadd.ps";
@@ -6702,7 +6963,7 @@
         gen_load_fpr32(fpu32_T[0], fs);
         gen_load_fpr32(fpu32_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
-        gen_op_float_nmulsub_s();
+        tcg_gen_helper_0_0(do_float_nmulsub_s);
         gen_store_fpr32(fpu32_T[2], fd);
         opn = "nmsub.s";
         break;
@@ -6712,7 +6973,7 @@
         gen_load_fpr64(ctx, fpu64_T[0], fs);
         gen_load_fpr64(ctx, fpu64_T[1], ft);
         gen_load_fpr64(ctx, fpu64_T[2], fr);
-        gen_op_float_nmulsub_d();
+        tcg_gen_helper_0_0(do_float_nmulsub_d);
         gen_store_fpr64(ctx, fpu64_T[2], fd);
         opn = "nmsub.d";
         break;
@@ -6724,7 +6985,7 @@
         gen_load_fpr32h(fpu32h_T[1], ft);
         gen_load_fpr32(fpu32_T[2], fr);
         gen_load_fpr32h(fpu32h_T[2], fr);
-        gen_op_float_nmulsub_ps();
+        tcg_gen_helper_0_0(do_float_nmulsub_ps);
         gen_store_fpr32(fpu32_T[2], fd);
         gen_store_fpr32h(fpu32h_T[2], fd);
         opn = "nmsub.ps";