SH4 delay slot code update, by Magnus Damm.


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@3761 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/cpu-exec.c b/cpu-exec.c
index bab71ee..e8b93a8 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -202,8 +202,8 @@
     cs_base = 0;
     pc = env->pc;
 #elif defined(TARGET_SH4)
-    flags = env->sr & (SR_MD | SR_RB);
-    cs_base = 0;         /* XXXXX */
+    flags = env->flags;
+    cs_base = 0;
     pc = env->pc;
 #elif defined(TARGET_ALPHA)
     flags = env->ps;
diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h
index 7970130..f9ecbb2 100644
--- a/target-sh4/cpu.h
+++ b/target-sh4/cpu.h
@@ -46,16 +46,16 @@
 #define FPSCR_SZ (1 << 20)
 #define FPSCR_PR (1 << 19)
 #define FPSCR_DN (1 << 18)
-
-#define DELAY_SLOT             (1 << 0) /* Must be the same as SR_T.  */
-/* This flag is set if the next insn is a delay slot for a conditional jump.
-   The dynamic value of the DELAY_SLOT determines whether the jup is taken. */
+#define DELAY_SLOT             (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
-/* Those are used in contexts only */
-#define BRANCH                 (1 << 2)
-#define BRANCH_CONDITIONAL     (1 << 3)
-#define MODE_CHANGE            (1 << 4)	/* Potential MD|RB change */
-#define BRANCH_EXCEPTION       (1 << 5)	/* Branch after exception */
+#define DELAY_SLOT_TRUE        (1 << 2)
+#define DELAY_SLOT_CLEARME     (1 << 3)
+/* The dynamic value of the DELAY_SLOT_TRUE flag determines whether the jump
+ * after the delay slot should be taken or not. It is calculated from SR_T.
+ *
+ * It is unclear if it is permitted to modify the SR_T flag in a delay slot.
+ * The use of DELAY_SLOT_TRUE flag makes us accept such SR_T modification.
+ */
 
 /* XXXXX The structure could be made more compact */
 typedef struct tlb_t {
diff --git a/target-sh4/op.c b/target-sh4/op.c
index 0902fca..0dcec49 100644
--- a/target-sh4/op.c
+++ b/target-sh4/op.c
@@ -19,16 +19,6 @@
  */
 #include "exec.h"
 
-static inline void set_flag(uint32_t flag)
-{
-    env->flags |= flag;
-}
-
-static inline void clr_flag(uint32_t flag)
-{
-    env->flags &= ~flag;
-}
-
 static inline void set_t(void)
 {
     env->sr |= SR_T;
@@ -110,28 +100,37 @@
 void OPPROTO op_bf_s(void)
 {
     env->delayed_pc = PARAM1;
-    set_flag(DELAY_SLOT_CONDITIONAL | ((~env->sr) & SR_T));
+    if (!(env->sr & SR_T)) {
+        env->flags |= DELAY_SLOT_TRUE;
+    }
     RETURN();
 }
 
 void OPPROTO op_bt_s(void)
 {
     env->delayed_pc = PARAM1;
-    set_flag(DELAY_SLOT_CONDITIONAL | (env->sr & SR_T));
+    if (env->sr & SR_T) {
+        env->flags |= DELAY_SLOT_TRUE;
+    }
+    RETURN();
+}
+
+void OPPROTO op_store_flags(void)
+{
+    env->flags &= DELAY_SLOT_TRUE;
+    env->flags |= PARAM1;
     RETURN();
 }
 
 void OPPROTO op_bra(void)
 {
     env->delayed_pc = PARAM1;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
 void OPPROTO op_braf_T0(void)
 {
     env->delayed_pc = PARAM1 + T0;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
@@ -139,7 +138,6 @@
 {
     env->pr = PARAM1;
     env->delayed_pc = PARAM2;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
@@ -147,7 +145,6 @@
 {
     env->pr = PARAM1;
     env->delayed_pc = PARAM1 + T0;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
@@ -155,26 +152,12 @@
 {
     env->pr = PARAM1;
     env->delayed_pc = T0;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
 void OPPROTO op_rts(void)
 {
     env->delayed_pc = env->pr;
-    set_flag(DELAY_SLOT);
-    RETURN();
-}
-
-void OPPROTO op_clr_delay_slot(void)
-{
-    clr_flag(DELAY_SLOT);
-    RETURN();
-}
-
-void OPPROTO op_clr_delay_slot_conditional(void)
-{
-    clr_flag(DELAY_SLOT_CONDITIONAL);
     RETURN();
 }
 
@@ -242,7 +225,6 @@
 {
     env->sr = env->ssr;
     env->delayed_pc = env->spc;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
@@ -458,7 +440,6 @@
 void OPPROTO op_jmp_T0(void)
 {
     env->delayed_pc = T0;
-    set_flag(DELAY_SLOT);
     RETURN();
 }
 
@@ -993,11 +974,10 @@
 
 void OPPROTO op_jdelayed(void)
 {
-    uint32_t flags;
-    flags = env->flags;
-    env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-    if (flags & DELAY_SLOT)
-	GOTO_LABEL_PARAM(1);
+    if (env->flags & DELAY_SLOT_TRUE) {
+        env->flags &= ~DELAY_SLOT_TRUE;
+        GOTO_LABEL_PARAM(1);
+    }
     RETURN();
 }
 
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 70b4837..802f926 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -57,11 +57,21 @@
     uint32_t fpscr;
     uint16_t opcode;
     uint32_t flags;
+    int bstate;
     int memidx;
     uint32_t delayed_pc;
     int singlestep_enabled;
 } DisasContext;
 
+enum {
+    BS_NONE     = 0, /* We go out of the TB without reaching a branch or an
+                      * exception condition
+                      */
+    BS_STOP     = 1, /* We want to stop translation for any reason */
+    BS_BRANCH   = 2, /* We reached a branch condition     */
+    BS_EXCP     = 3, /* We reached an exception condition */
+};
+
 #ifdef CONFIG_USER_ONLY
 
 #define GEN_OP_LD(width, reg) \
@@ -176,15 +186,6 @@
     gen_op_exit_tb();
 }
 
-/* Jump to pc after an exception */
-static void gen_jump_exception(DisasContext * ctx)
-{
-    gen_op_movl_imm_T0(0);
-    if (ctx->singlestep_enabled)
-	gen_op_debug();
-    gen_op_exit_tb();
-}
-
 static void gen_jump(DisasContext * ctx)
 {
     if (ctx->delayed_pc == (uint32_t) - 1) {
@@ -220,7 +221,7 @@
 
     l1 = gen_new_label();
     gen_op_jdelayed(l1);
-    gen_goto_tb(ctx, 1, ctx->pc);
+    gen_goto_tb(ctx, 1, ctx->pc + 2);
     gen_set_label(l1);
     gen_jump(ctx);
 }
@@ -248,10 +249,10 @@
 
 #define CHECK_NOT_DELAY_SLOT \
   if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) \
-  {gen_op_raise_slot_illegal_instruction (); ctx->flags |= BRANCH_EXCEPTION; \
+  {gen_op_raise_slot_illegal_instruction (); ctx->bstate = BS_EXCP; \
    return;}
 
-void decode_opc(DisasContext * ctx)
+void _decode_opc(DisasContext * ctx)
 {
 #if 0
     fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
@@ -290,11 +291,11 @@
 	return;
     case 0xfbfb:		/* frchg */
 	gen_op_frchg();
-	ctx->flags |= MODE_CHANGE;
+	ctx->bstate = BS_STOP;
 	return;
     case 0xf3fb:		/* fschg */
 	gen_op_fschg();
-	ctx->flags |= MODE_CHANGE;
+	ctx->bstate = BS_STOP;
 	return;
     case 0x0009:		/* nop */
 	return;
@@ -805,7 +806,7 @@
 	CHECK_NOT_DELAY_SLOT
 	    gen_conditional_jump(ctx, ctx->pc + 2,
 				 ctx->pc + 4 + B7_0s * 2);
-	ctx->flags |= BRANCH_CONDITIONAL;
+	ctx->bstate = BS_BRANCH;
 	return;
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
@@ -816,7 +817,7 @@
 	CHECK_NOT_DELAY_SLOT
 	    gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2,
 				 ctx->pc + 2);
-	ctx->flags |= BRANCH_CONDITIONAL;
+	ctx->bstate = BS_BRANCH;
 	return;
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
@@ -908,7 +909,7 @@
     case 0xc300:		/* trapa #imm */
 	CHECK_NOT_DELAY_SLOT gen_op_movl_imm_PC(ctx->pc);
 	gen_op_trapa(B7_0);
-	ctx->flags |= BRANCH;
+	ctx->bstate = BS_BRANCH;
 	return;
     case 0xc800:		/* tst #imm,R0 */
 	gen_op_tst_imm_rN(B7_0, REG(0));
@@ -1012,8 +1013,8 @@
     gen_op_movl_rN_T1 (REG(B11_8));				\
     gen_op_stl_T0_T1 (ctx);					\
     return;
-	LDST(sr, 0x400e, 0x4007, ldc, 0x0002, 0x4003, stc, ctx->flags |=
-	     MODE_CHANGE;)
+	LDST(sr, 0x400e, 0x4007, ldc, 0x0002, 0x4003, stc, ctx->bstate =
+	     BS_STOP;)
 	LDST(gbr, 0x401e, 0x4017, ldc, 0x0012, 0x4013, stc,)
 	LDST(vbr, 0x402e, 0x4027, ldc, 0x0022, 0x4023, stc,)
 	LDST(ssr, 0x403e, 0x4037, ldc, 0x0032, 0x4033, stc,)
@@ -1023,8 +1024,8 @@
 	LDST(macl, 0x401a, 0x4016, lds, 0x001a, 0x4012, sts,)
 	LDST(pr, 0x402a, 0x4026, lds, 0x002a, 0x4022, sts,)
 	LDST(fpul, 0x405a, 0x4056, lds, 0x005a, 0x4052, sts,)
-	LDST(fpscr, 0x406a, 0x4066, lds, 0x006a, 0x4062, sts, ctx->flags |=
-	     MODE_CHANGE;)
+	LDST(fpscr, 0x406a, 0x4066, lds, 0x006a, 0x4062, sts, ctx->bstate =
+	     BS_STOP;)
     case 0x00c3:		/* movca.l R0,@Rm */
 	gen_op_movl_rN_T0(REG(0));
 	gen_op_movl_rN_T1(REG(B11_8));
@@ -1141,7 +1142,28 @@
     fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
 	    ctx->opcode, ctx->pc);
     gen_op_raise_illegal_instruction();
-    ctx->flags |= BRANCH_EXCEPTION;
+    ctx->bstate = BS_EXCP;
+}
+
+void decode_opc(DisasContext * ctx)
+{
+    uint32_t old_flags = ctx->flags;
+
+    _decode_opc(ctx);
+
+    if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
+        if (ctx->flags & DELAY_SLOT_CLEARME) {
+            gen_op_store_flags(0);
+        }
+        ctx->flags = 0;
+        ctx->bstate = BS_BRANCH;
+        if (old_flags & DELAY_SLOT_CONDITIONAL) {
+	    gen_delayed_conditional_jump(ctx);
+        } else if (old_flags & DELAY_SLOT) {
+            gen_jump(ctx);
+	}
+
+    }
 }
 
 static inline int
@@ -1151,7 +1173,6 @@
     DisasContext ctx;
     target_ulong pc_start;
     static uint16_t *gen_opc_end;
-    uint32_t old_flags;
     int i, ii;
 
     pc_start = tb->pc;
@@ -1159,14 +1180,14 @@
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     gen_opparam_ptr = gen_opparam_buf;
     ctx.pc = pc_start;
-    ctx.flags = env->flags;
-    old_flags = 0;
+    ctx.flags = (uint32_t)tb->flags;
+    ctx.bstate = BS_NONE;
     ctx.sr = env->sr;
     ctx.fpscr = env->fpscr;
     ctx.memidx = (env->sr & SR_MD) ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
-    ctx.delayed_pc = -1;
+    ctx.delayed_pc = -1; /* use delayed pc from env pointer */
     ctx.tb = tb;
     ctx.singlestep_enabled = env->singlestep_enabled;
     nb_gen_labels = 0;
@@ -1180,18 +1201,14 @@
 #endif
 
     ii = -1;
-    while ((old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) == 0 &&
-	   (ctx.flags & (BRANCH | BRANCH_CONDITIONAL | MODE_CHANGE |
-			 BRANCH_EXCEPTION)) == 0 &&
-	   gen_opc_ptr < gen_opc_end && ctx.sr == env->sr) {
-	old_flags = ctx.flags;
+    while (ctx.bstate == BS_NONE && gen_opc_ptr < gen_opc_end) {
 	if (env->nb_breakpoints > 0) {
 	    for (i = 0; i < env->nb_breakpoints; i++) {
 		if (ctx.pc == env->breakpoints[i]) {
 		    /* We have hit a breakpoint - make sure PC is up-to-date */
 		    gen_op_movl_imm_PC(ctx.pc);
 		    gen_op_debug();
-		    ctx.flags |= BRANCH_EXCEPTION;
+		    ctx.bstate = BS_EXCP;
 		    break;
 		}
 	    }
@@ -1204,6 +1221,7 @@
                     gen_opc_instr_start[ii++] = 0;
             }
             gen_opc_pc[ii] = ctx.pc;
+            gen_opc_hflags[ii] = ctx.flags;
             gen_opc_instr_start[ii] = 1;
         }
 #if 0
@@ -1221,21 +1239,30 @@
 	break;
 #endif
     }
-
-    if (old_flags & DELAY_SLOT_CONDITIONAL) {
-	gen_delayed_conditional_jump(&ctx);
-    } else if (old_flags & DELAY_SLOT) {
-	gen_op_clr_delay_slot();
-	gen_jump(&ctx);
-    } else if (ctx.flags & BRANCH_EXCEPTION) {
-        gen_jump_exception(&ctx);
-    } else if ((ctx.flags & (BRANCH | BRANCH_CONDITIONAL)) == 0) {
-        gen_goto_tb(&ctx, 0, ctx.pc);
-    }
-
     if (env->singlestep_enabled) {
-	gen_op_debug();
+        gen_op_debug();
+    } else {
+	switch (ctx.bstate) {
+        case BS_STOP:
+            /* gen_op_interrupt_restart(); */
+            /* fall through */
+        case BS_NONE:
+            if (ctx.flags) {
+                gen_op_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
+	    }
+            gen_goto_tb(&ctx, 0, ctx.pc);
+            break;
+        case BS_EXCP:
+            /* gen_op_interrupt_restart(); */
+            gen_op_movl_imm_T0(0);
+            gen_op_exit_tb();
+            break;
+        case BS_BRANCH:
+        default:
+            break;
+	}
     }
+
     *gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         i = gen_opc_ptr - gen_opc_buf;
diff --git a/translate-all.c b/translate-all.c
index e9eef65..f4944c0 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -53,7 +53,7 @@
 #elif defined(TARGET_SPARC)
 target_ulong gen_opc_npc[OPC_BUF_SIZE];
 target_ulong gen_opc_jump_pc[2];
-#elif defined(TARGET_MIPS)
+#elif defined(TARGET_MIPS) || defined(TARGET_SH4)
 uint32_t gen_opc_hflags[OPC_BUF_SIZE];
 #endif
 
@@ -298,6 +298,9 @@
     env->hflags |= gen_opc_hflags[j];
 #elif defined(TARGET_ALPHA)
     env->pc = gen_opc_pc[j];
+#elif defined(TARGET_SH4)
+    env->pc = gen_opc_pc[j];
+    env->flags = gen_opc_hflags[j];
 #endif
     return 0;
 }