diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index ac042d5..b62c537 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -38,6 +38,28 @@
 #define EXCP_MMU_FAULT   4
 #define EXCP_BREAK      16 /* trap.  */
 
+/* Register aliases. R0 - R15 */
+#define R_FP  8
+#define R_SP  14
+#define R_ACR 15
+
+/* Support regs, P0 - P15  */
+#define PR_BZ  0
+#define PR_VR  1
+#define PR_PID 2
+#define PR_SRS 3
+#define PR_WZ  4
+#define PR_EXS 5
+#define PR_EDA 6
+#define PR_MOF 7
+#define PR_DZ  8
+#define PR_EBP 9
+#define PR_ERP 10
+#define PR_SRP 11
+#define PR_CCS 13
+#define PR_USP 14
+#define PR_SPC 15
+
 /* CPU flags.  */
 #define S_FLAG 0x200
 #define R_FLAG 0x100
@@ -77,27 +99,16 @@
 #define NB_MMU_MODES 2
 
 typedef struct CPUCRISState {
-	uint32_t debug1;
-	uint32_t debug2;
-	uint32_t debug3;
-
-	/*
-	 * We just store the stores to the tlbset here for later evaluation
-	 * when the hw needs access to them.
-	 *
-	 * One for I and another for D.
-	 */
-	struct
-	{
-		uint32_t hi;
-		uint32_t lo;
-	} tlbsets[2][4][16];
-
-	uint32_t sregs[256][16]; /* grrr why so many??  */
 	uint32_t regs[16];
+	/* P0 - P15 are referred to as special registers in the docs.  */
 	uint32_t pregs[16];
+
+	/* Pseudo register for the PC. Not directly accessable on CRIS.  */
 	uint32_t pc;
 
+	/* Pseudo register for the kernel stack.  */
+	uint32_t ksp;
+
 	/* These are setup up by the guest code just before transfering the
 	   control back to the host.  */
 	int jmp;
@@ -114,20 +125,19 @@
 	/* size of the operation, 1 = byte, 2 = word, 4 = dword.  */
 	int cc_size;
 
-	/* extended arithmetics.  */
+	/* Extended arithmetics.  */
 	int cc_x_live;
 	int cc_x;
 
-	int features;
-
 	int exception_index;
 	int interrupt_request;
 	int interrupt_vector;
 	int fault_vector;
 	int trap_vector;
 
-	int user_mode_only;
-	int halted;
+	uint32_t debug1;
+	uint32_t debug2;
+	uint32_t debug3;
 
 	struct
 	{
@@ -136,6 +146,31 @@
 		int exec_stores;
 	} stats;
 
+	/* FIXME: add a check in the translator to avoid writing to support
+	   register sets beyond the 4th. The ISA allows up to 256! but in
+	   practice there is no core that implements more than 4.
+
+	   Support function registers are used to control units close to the
+	   core. Accesses do not pass down the normal hierarchy.
+	*/
+	uint32_t sregs[4][16];
+
+	/*
+	 * We just store the stores to the tlbset here for later evaluation
+	 * when the hw needs access to them.
+	 *
+	 * One for I and another for D.
+	 */
+	struct
+	{
+		uint32_t hi;
+		uint32_t lo;
+	} tlbsets[2][4][16];
+
+	int features;
+	int user_mode_only;
+	int halted;
+
 	jmp_buf jmp_env;
 	CPU_COMMON
 } CPUCRISState;
@@ -225,40 +260,20 @@
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_USER_IDX 1
-/* CRIS FIXME: I guess we want to validate supervisor mode acceses here.  */
 static inline int cpu_mmu_index (CPUState *env)
 {
-    return 0;
+	return !!(env->pregs[PR_CCS] & U_FLAG);
 }
 
-#include "cpu-all.h"
-
-/* Register aliases. R0 - R15 */
-#define R_FP  8
-#define R_SP  14
-#define R_ACR 15
-
-/* Support regs, P0 - P15  */
-#define PR_BZ  0
-#define PR_VR  1
-#define PR_PID 2
-#define PR_SRS 3
-#define PR_WZ  4
-#define PR_MOF 7
-#define PR_DZ  8
-#define PR_EBP 9
-#define PR_ERP 10
-#define PR_SRP 11
-#define PR_CCS 13
-
 /* Support function regs.  */
 #define SFR_RW_GC_CFG      0][0
-#define SFR_RW_MM_CFG      2][0
-#define SFR_RW_MM_KBASE_LO 2][1
-#define SFR_RW_MM_KBASE_HI 2][2
-#define SFR_R_MM_CAUSE     2][3
-#define SFR_RW_MM_TLB_SEL  2][4
-#define SFR_RW_MM_TLB_LO   2][5
-#define SFR_RW_MM_TLB_HI   2][6
+#define SFR_RW_MM_CFG      env->pregs[PR_SRS]][0
+#define SFR_RW_MM_KBASE_LO env->pregs[PR_SRS]][1
+#define SFR_RW_MM_KBASE_HI env->pregs[PR_SRS]][2
+#define SFR_R_MM_CAUSE     env->pregs[PR_SRS]][3
+#define SFR_RW_MM_TLB_SEL  env->pregs[PR_SRS]][4
+#define SFR_RW_MM_TLB_LO   env->pregs[PR_SRS]][5
+#define SFR_RW_MM_TLB_HI   env->pregs[PR_SRS]][6
 
+#include "cpu-all.h"
 #endif
diff --git a/target-cris/helper.c b/target-cris/helper.c
index 8872138..1bd35e6 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -61,7 +61,7 @@
 	uint32_t ccs;
 	/* Apply the ccs shift.  */
 	ccs = env->pregs[PR_CCS];
-	ccs = (ccs & 0xc0000000) | ((ccs << 12) >> 2);
+	ccs = ((ccs & 0xc0000000) | ((ccs << 12) >> 2)) & ~0x3ff;
 	env->pregs[PR_CCS] = ccs;
 }
 
@@ -73,7 +73,7 @@
 	int r = -1;
 	target_ulong phy;
 
-	D(printf ("%s addr=%x pc=%x\n", __func__, address, env->pc));
+	D(printf ("%s addr=%x pc=%x rw=%x\n", __func__, address, env->pc, rw));
 	address &= TARGET_PAGE_MASK;
 	prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
 	miss = cris_mmu_translate(&res, env, address, rw, mmu_idx);
@@ -86,12 +86,14 @@
 	else
 	{
 		phy = res.phy;
-		prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+		prot = res.prot;
+		address &= TARGET_PAGE_MASK;
 		r = tlb_set_page(env, address, phy, prot, mmu_idx, is_softmmu);
 	}
-	D(printf("%s returns %d irqreq=%x addr=%x ismmu=%d\n", 
-			__func__, r, env->interrupt_request, 
-			address, is_softmmu));
+	if (r > 0)
+		D(fprintf(logfile, "%s returns %d irqreq=%x addr=%x ismmu=%d vec=%x\n", 
+			 __func__, r, env->interrupt_request, 
+			 address, is_softmmu, res.bf_vec));
 	return r;
 }
 
@@ -100,8 +102,8 @@
 	int ex_vec = -1;
 
 	D(fprintf (stderr, "exception index=%d interrupt_req=%d\n",
-		 env->exception_index,
-		 env->interrupt_request));
+		   env->exception_index,
+		   env->interrupt_request));
 
 	switch (env->exception_index)
 	{
@@ -113,40 +115,46 @@
 			break;
 
 		case EXCP_MMU_FAULT:
-			/* ERP is already setup by translate-all.c through
-			   re-translation of the aborted TB combined with 
-			   pc searching.  */
 			ex_vec = env->fault_vector;
+			env->pregs[PR_ERP] = env->pc;
 			break;
 
 		default:
-		{
-			/* Maybe the irq was acked by sw before we got a
-			   change to take it.  */
-			if (env->interrupt_request & CPU_INTERRUPT_HARD) {
-				/* Vectors below 0x30 are internal
-				   exceptions, i.e not interrupt requests
-				   from the interrupt controller.  */
-				if (env->interrupt_vector < 0x30)
-					return;
-				/* Is the core accepting interrupts?  */
-				if (!(env->pregs[PR_CCS] & I_FLAG)) {
-					return;
-				}
-				/* The interrupt controller gives us the
-				   vector.  */
-				ex_vec = env->interrupt_vector;
-				/* Normal interrupts are taken between
-				   TB's.  env->pc is valid here.  */
-				env->pregs[PR_ERP] = env->pc;
-			}
-		}
-		break;
+			/* Is the core accepting interrupts?  */
+			if (!(env->pregs[PR_CCS] & I_FLAG))
+				return;
+			/* The interrupt controller gives us the
+			   vector.  */
+			ex_vec = env->interrupt_vector;
+			/* Normal interrupts are taken between
+			   TB's.  env->pc is valid here.  */
+			env->pregs[PR_ERP] = env->pc;
+			break;
 	}
+
+	if ((env->pregs[PR_CCS] & U_FLAG)) {
+		D(fprintf(logfile, "excp isr=%x PC=%x ERP=%x pid=%x ccs=%x cc=%d %x\n",
+			  ex_vec, env->pc,
+			  env->pregs[PR_ERP], env->pregs[PR_PID],
+			  env->pregs[PR_CCS],
+			  env->cc_op, env->cc_mask));
+	}
+	
 	env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);
-	/* Apply the CRIS CCS shift.  */
+
+	if (env->pregs[PR_CCS] & U_FLAG) {
+		/* Swap stack pointers.  */
+		env->pregs[PR_USP] = env->regs[R_SP];
+		env->regs[R_SP] = env->ksp;
+	}
+
+	/* Apply the CRIS CCS shift. Clears U if set.  */
 	cris_shift_ccs(env);
-	D(printf ("%s ebp=%x isr=%x vec=%x\n", __func__, ebp, isr, ex_vec));
+	D(fprintf (logfile, "%s isr=%x vec=%x ccs=%x pid=%d erp=%x\n", 
+		   __func__, env->pc, ex_vec, 
+		   env->pregs[PR_CCS],
+		   env->pregs[PR_PID], 
+		   env->pregs[PR_ERP]));
 }
 
 target_phys_addr_t cpu_get_phys_page_debug(CPUState * env, target_ulong addr)
diff --git a/target-cris/helper.h b/target-cris/helper.h
index c2af326..d20b9f7 100644
--- a/target-cris/helper.h
+++ b/target-cris/helper.h
@@ -1,3 +1,15 @@
 #define TCG_HELPER_PROTO
 void TCG_HELPER_PROTO helper_tlb_update(uint32_t T0);
+void TCG_HELPER_PROTO helper_tlb_flush(void);
+void TCG_HELPER_PROTO helper_dump(uint32_t a0, uint32_t a1, uint32_t a2);
+void TCG_HELPER_PROTO helper_dummy(void);
+void TCG_HELPER_PROTO helper_rfe(void);
+void TCG_HELPER_PROTO helper_store(uint32_t a0);
 
+void TCG_HELPER_PROTO helper_evaluate_flags_muls(void);
+void TCG_HELPER_PROTO helper_evaluate_flags_mulu(void);
+void TCG_HELPER_PROTO helper_evaluate_flags_mcp(void);
+void TCG_HELPER_PROTO helper_evaluate_flags_alu_4(void);
+void TCG_HELPER_PROTO helper_evaluate_flags_move_4 (void);
+void TCG_HELPER_PROTO helper_evaluate_flags_move_2 (void);
+void TCG_HELPER_PROTO helper_evaluate_flags (void);
diff --git a/target-cris/mmu.c b/target-cris/mmu.c
index 84a1747..ac711fb 100644
--- a/target-cris/mmu.c
+++ b/target-cris/mmu.c
@@ -73,11 +73,30 @@
 	val <<= offset;
 
 	val &= mask;
-	D(printf ("val=%x mask=%x dst=%x\n", val, mask, *dst));
 	*dst &= ~(mask);
 	*dst |= val;
 }
 
+static void dump_tlb(CPUState *env, int mmu)
+{
+	int set;
+	int idx;
+	uint32_t hi, lo, tlb_vpn, tlb_pfn;
+
+	for (set = 0; set < 4; set++) {
+		for (idx = 0; idx < 16; idx++) {
+			lo = env->tlbsets[mmu][set][idx].lo;
+			hi = env->tlbsets[mmu][set][idx].hi;
+			tlb_vpn = EXTRACT_FIELD(hi, 13, 31);
+			tlb_pfn = EXTRACT_FIELD(lo, 13, 31);
+
+			printf ("TLB: [%d][%d] hi=%x lo=%x v=%x p=%x\n", 
+					set, idx, hi, lo, tlb_vpn, tlb_pfn);
+		}
+	}
+}
+
+/* rw 0 = read, 1 = write, 2 = exec.  */
 static int cris_mmu_translate_page(struct cris_mmu_result_t *res,
 				   CPUState *env, uint32_t vaddr,
 				   int rw, int usermode)
@@ -88,53 +107,63 @@
 	uint32_t tlb_vpn, tlb_pfn = 0;
 	int tlb_pid, tlb_g, tlb_v, tlb_k, tlb_w, tlb_x;
 	int cfg_v, cfg_k, cfg_w, cfg_x;	
-	int i, match = 0;
+	int set, match = 0;
 	uint32_t r_cause;
 	uint32_t r_cfg;
 	int rwcause;
-	int update_sel = 0;
+	int mmu = 1; /* Data mmu is default.  */
+	int vect_base;
 
 	r_cause = env->sregs[SFR_R_MM_CAUSE];
 	r_cfg = env->sregs[SFR_RW_MM_CFG];
-	rwcause = rw ? CRIS_MMU_ERR_WRITE : CRIS_MMU_ERR_READ;
+
+	switch (rw) {
+		case 2: rwcause = CRIS_MMU_ERR_EXEC; mmu = 0; break;
+		case 1: rwcause = CRIS_MMU_ERR_WRITE; break;
+		default:
+		case 0: rwcause = CRIS_MMU_ERR_READ; break;
+	}
+
+	/* I exception vectors 4 - 7, D 8 - 11.  */
+	vect_base = (mmu + 1) * 4;
 
 	vpage = vaddr >> 13;
-	idx = vpage & 15;
 
 	/* We know the index which to check on each set.
 	   Scan both I and D.  */
 #if 0
-	for (i = 0; i < 4; i++) {
-		int j;
-		for (j = 0; j < 16; j++) {
-			lo = env->tlbsets[1][i][j].lo;
-			hi = env->tlbsets[1][i][j].hi;
+	for (set = 0; set < 4; set++) {
+		for (idx = 0; idx < 16; idx++) {
+			lo = env->tlbsets[mmu][set][idx].lo;
+			hi = env->tlbsets[mmu][set][idx].hi;
 			tlb_vpn = EXTRACT_FIELD(hi, 13, 31);
 			tlb_pfn = EXTRACT_FIELD(lo, 13, 31);
 
 			printf ("TLB: [%d][%d] hi=%x lo=%x v=%x p=%x\n", 
-					i, j, hi, lo, tlb_vpn, tlb_pfn);
+					set, idx, hi, lo, tlb_vpn, tlb_pfn);
 		}
 	}
 #endif
-	for (i = 0; i < 4; i++)
+
+	idx = vpage & 15;
+	for (set = 0; set < 4; set++)
 	{
-		lo = env->tlbsets[1][i][idx].lo;
-		hi = env->tlbsets[1][i][idx].hi;
+		lo = env->tlbsets[mmu][set][idx].lo;
+		hi = env->tlbsets[mmu][set][idx].hi;
 
 		tlb_vpn = EXTRACT_FIELD(hi, 13, 31);
 		tlb_pfn = EXTRACT_FIELD(lo, 13, 31);
 
-		D(printf ("TLB[%d][%d] tlbv=%x vpage=%x -> pfn=%x\n", 
-				i, idx, tlb_vpn, vpage, tlb_pfn));
+		D(printf("TLB[%d][%d] v=%x vpage=%x -> pfn=%x lo=%x hi=%x\n", 
+				i, idx, tlb_vpn, vpage, tlb_pfn, lo, hi));
 		if (tlb_vpn == vpage) {
 			match = 1;
 			break;
 		}
 	}
 
+	res->bf_vec = vect_base;
 	if (match) {
-
 		cfg_w  = EXTRACT_FIELD(r_cfg, 19, 19);
 		cfg_k  = EXTRACT_FIELD(r_cfg, 18, 18);
 		cfg_x  = EXTRACT_FIELD(r_cfg, 17, 17);
@@ -158,54 +187,67 @@
 		set_exception_vector(0x0a, d_mmu_access);
 		set_exception_vector(0x0b, d_mmu_write);
 		*/
-		if (cfg_v && !tlb_v) {
-			printf ("tlb: invalid\n");
+		if (!tlb_g 
+		    && tlb_pid != (env->pregs[PR_PID] & 0xff)) {
+			D(printf ("tlb: wrong pid %x %x pc=%x\n", 
+				 tlb_pid, env->pregs[PR_PID], env->pc));
+			match = 0;
+			res->bf_vec = vect_base;
+		} else if (rw == 1 && cfg_w && !tlb_w) {
+			D(printf ("tlb: write protected %x lo=%x\n", 
+				vaddr, lo));
+			match = 0;
+			res->bf_vec = vect_base + 3;
+		} else if (cfg_v && !tlb_v) {
+			D(printf ("tlb: invalid %x\n", vaddr));
 			set_field(&r_cause, rwcause, 8, 9);
 			match = 0;
-			res->bf_vec = 0x9;
-			update_sel = 1;
+			res->bf_vec = vect_base + 1;
 		}
-		else if (!tlb_g 
-			 && tlb_pid != 0xff
-			 && tlb_pid != env->pregs[PR_PID]
-			 && cfg_w && !tlb_w) {
-			printf ("tlb: wrong pid\n");
-			match = 0;
-			res->bf_vec = 0xa;
-		}
-		else if (rw && cfg_w && !tlb_w) {
-			printf ("tlb: write protected\n");
-			match = 0;
-			res->bf_vec = 0xb;
-		}
-	} else
-		update_sel = 1;
 
-	if (update_sel) {
-		/* miss.  */
-		env->sregs[SFR_RW_MM_TLB_SEL] = 0;
-		D(printf ("tlb: miss %x vp=%x\n", 
-			env->sregs[SFR_RW_MM_TLB_SEL], vpage & 15));
-		set_field(&env->sregs[SFR_RW_MM_TLB_SEL], vpage & 15, 0, 4);
-		set_field(&env->sregs[SFR_RW_MM_TLB_SEL], 0, 4, 5);
-		res->bf_vec = 0x8;
+		res->prot = 0;
+		if (match) {
+			res->prot |= PAGE_READ;
+			if (tlb_w)
+				res->prot |= PAGE_WRITE;
+			if (tlb_x)
+				res->prot |= PAGE_EXEC;
+		}
+		else
+			D(dump_tlb(env, mmu));
+
+		env->sregs[SFR_RW_MM_TLB_HI] = hi;
+		env->sregs[SFR_RW_MM_TLB_LO] = lo;
 	}
 
 	if (!match) {
-		set_field(&r_cause, rwcause, 8, 9);
+		/* miss.  */
+		idx = vpage & 15;
+		set = 0;
+
+		/* Update RW_MM_TLB_SEL.  */
+		env->sregs[SFR_RW_MM_TLB_SEL] = 0;
+		set_field(&env->sregs[SFR_RW_MM_TLB_SEL], idx, 0, 4);
+		set_field(&env->sregs[SFR_RW_MM_TLB_SEL], set, 4, 5);
+
+		/* Update RW_MM_CAUSE.  */
+		set_field(&r_cause, rwcause, 8, 2);
 		set_field(&r_cause, vpage, 13, 19);
 		set_field(&r_cause, env->pregs[PR_PID], 0, 8);
 		env->sregs[SFR_R_MM_CAUSE] = r_cause;
+		D(printf("refill vaddr=%x pc=%x\n", vaddr, env->pc));
 	}
-	D(printf ("%s mtch=%d pc=%x va=%x vpn=%x tlbvpn=%x pfn=%x pid=%x"
-		  " %x cause=%x sel=%x r13=%x\n",
-		  __func__, match, env->pc,
+
+
+	D(printf ("%s rw=%d mtch=%d pc=%x va=%x vpn=%x tlbvpn=%x pfn=%x pid=%x"
+		  " %x cause=%x sel=%x sp=%x %x %x\n",
+		  __func__, rw, match, env->pc,
 		  vaddr, vpage,
 		  tlb_vpn, tlb_pfn, tlb_pid, 
 		  env->pregs[PR_PID],
 		  r_cause,
 		  env->sregs[SFR_RW_MM_TLB_SEL],
-		  env->regs[13]));
+		  env->regs[R_SP], env->pregs[PR_USP], env->ksp));
 
 	res->pfn = tlb_pfn;
 	return !match;
@@ -236,10 +278,17 @@
 	int seg;
 	int miss = 0;
 	int is_user = mmu_idx == MMU_USER_IDX;
+	uint32_t old_srs;
+
+	old_srs= env->pregs[PR_SRS];
+
+	/* rw == 2 means exec, map the access to the insn mmu.  */
+	env->pregs[PR_SRS] = rw == 2 ? 1 : 2;
 
 	if (!cris_mmu_enabled(env->sregs[SFR_RW_GC_CFG])) {
 		res->phy = vaddr;
-		return 0;
+		res->prot = PAGE_BITS;		
+		goto done;
 	}
 
 	seg = vaddr >> 28;
@@ -251,17 +300,16 @@
 		base = cris_mmu_translate_seg(env, seg);
 		phy = base | (0x0fffffff & vaddr);
 		res->phy = phy;
+		res->prot = PAGE_BITS;		
 	}
 	else
 	{
 		miss = cris_mmu_translate_page(res, env, vaddr, rw, is_user);
-		if (!miss) {
-			phy &= 8191;
-			phy |= (res->pfn << 13);
-			res->phy = phy;
-		}
+		phy = (res->pfn << 13);
+		res->phy = phy;
 	}
-	D(printf ("miss=%d v=%x -> p=%x\n", miss, vaddr, phy));
+  done:
+	env->pregs[PR_SRS] = old_srs;
 	return miss;
 }
 #endif
diff --git a/target-cris/mmu.h b/target-cris/mmu.h
index aef8c1b..32c2b68 100644
--- a/target-cris/mmu.h
+++ b/target-cris/mmu.h
@@ -7,6 +7,7 @@
 {
 	uint32_t phy;
 	uint32_t pfn;
+	int prot;
 	int bf_vec;
 };
 
diff --git a/target-cris/op.c b/target-cris/op.c
index d44185c..a446e20 100644
--- a/target-cris/op.c
+++ b/target-cris/op.c
@@ -192,17 +192,32 @@
 }
 void OPPROTO op_ccs_rshift (void)
 {
-	uint32_t ccs;
+	register uint32_t ccs;
 
 	/* Apply the ccs shift.  */
 	ccs = env->pregs[PR_CCS];
 	ccs = (ccs & 0xc0000000) | ((ccs & 0x0fffffff) >> 10);
+	if (ccs & U_FLAG)
+	{
+		/* Enter user mode.  */
+		env->ksp = env->regs[R_SP];
+		env->regs[R_SP] = env->pregs[PR_USP];
+	}
+
 	env->pregs[PR_CCS] = ccs;
+
 	RETURN();
 }
 
 void OPPROTO op_setf (void)
 {
+	if (!(env->pregs[PR_CCS] & U_FLAG) && (PARAM1 & U_FLAG))
+	{
+		/* Enter user mode.  */
+		env->ksp = env->regs[R_SP];
+		env->regs[R_SP] = env->pregs[PR_USP];
+	}
+
 	env->pregs[PR_CCS] |= PARAM1;
 	RETURN();
 }
@@ -265,7 +280,11 @@
 
 void OPPROTO op_movl_sreg_T0 (void)
 {
-	env->sregs[env->pregs[PR_SRS]][PARAM1] = T0;
+	uint32_t srs;
+	srs = env->pregs[PR_SRS];
+	srs &= 3;
+
+	env->sregs[srs][PARAM1] = T0;
 	RETURN();
 }
 
@@ -285,7 +304,10 @@
 void OPPROTO op_movl_tlb_lo_T0 (void)
 {
 	uint32_t srs;
+
+	env->pregs[PR_SRS] &= 3;
 	srs = env->pregs[PR_SRS];
+
 	if (srs == 1 || srs == 2)
 	{
 		uint32_t set;
@@ -309,7 +331,28 @@
 
 void OPPROTO op_movl_T0_sreg (void)
 {
-	T0 = env->sregs[env->pregs[PR_SRS]][PARAM1];
+	uint32_t srs;
+	env->pregs[PR_SRS] &= 3;
+	srs = env->pregs[PR_SRS];
+	
+	if (srs == 1 || srs == 2)
+	{
+		uint32_t set;
+		uint32_t idx;
+		uint32_t lo, hi;
+
+		idx = set = env->sregs[SFR_RW_MM_TLB_SEL];
+		set >>= 4;
+		set &= 3;
+		idx &= 15;
+
+		/* Update the mirror regs.  */
+		hi = env->tlbsets[srs - 1][set][idx].hi;
+		lo = env->tlbsets[srs - 1][set][idx].lo;
+		env->sregs[SFR_RW_MM_TLB_HI] = hi;
+		env->sregs[SFR_RW_MM_TLB_LO] = lo;
+	}
+	T0 = env->sregs[srs][PARAM1];
 	RETURN();
 }
 
@@ -363,340 +406,6 @@
 	RETURN();
 }
 
-/* FIXME: is this allowed?  */
-extern inline void evaluate_flags_writeback(uint32_t flags)
-{
-	int x;
-
-	/* Extended arithmetics, leave the z flag alone.  */
-	env->debug3 = env->pregs[PR_CCS];
-
-	if (env->cc_x_live)
-		x = env->cc_x;
-	else
-		x = env->pregs[PR_CCS] & X_FLAG;
-
-	if ((x || env->cc_op == CC_OP_ADDC)
-	    && flags & Z_FLAG)
-		env->cc_mask &= ~Z_FLAG;
-
-	/* all insn clear the x-flag except setf or clrf.  */
-	env->pregs[PR_CCS] &= ~(env->cc_mask | X_FLAG);
-	flags &= env->cc_mask;
-	env->pregs[PR_CCS] |= flags;
-	RETURN();
-}
-
-void OPPROTO op_evaluate_flags_muls(void)
-{
-	uint32_t src;
-	uint32_t dst;
-	uint32_t res;
-	uint32_t flags = 0;
-	/* were gonna have to redo the muls.  */
-	int64_t tmp, t0 ,t1;
-	int32_t mof;
-	int dneg;
-
-	src = env->cc_src;
-	dst = env->cc_dest;
-	res = env->cc_result;
-
-
-	/* cast into signed values to make GCC sign extend.  */
-	t0 = (int32_t)src;
-	t1 = (int32_t)dst;
-	dneg = ((int32_t)res) < 0;
-
-	tmp = t0 * t1;
-	mof = tmp >> 32;
-	if (tmp == 0)
-		flags |= Z_FLAG;
-	else if (tmp < 0)
-		flags |= N_FLAG;
-	if ((dneg && mof != -1)
-	    || (!dneg && mof != 0))
-		flags |= V_FLAG;
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-
-void OPPROTO op_evaluate_flags_mulu(void)
-{
-	uint32_t src;
-	uint32_t dst;
-	uint32_t res;
-	uint32_t flags = 0;
-	/* were gonna have to redo the muls.  */
-	uint64_t tmp, t0 ,t1;
-	uint32_t mof;
-
-	src = env->cc_src;
-	dst = env->cc_dest;
-	res = env->cc_result;
-
-
-	/* cast into signed values to make GCC sign extend.  */
-	t0 = src;
-	t1 = dst;
-
-	tmp = t0 * t1;
-	mof = tmp >> 32;
-	if (tmp == 0)
-		flags |= Z_FLAG;
-	else if (tmp >> 63)
-		flags |= N_FLAG;
-	if (mof)
-		flags |= V_FLAG;
-
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-
-void OPPROTO op_evaluate_flags_mcp(void)
-{
-	uint32_t src;
-	uint32_t dst;
-	uint32_t res;
-	uint32_t flags = 0;
-
-	src = env->cc_src;
-	dst = env->cc_dest;
-	res = env->cc_result;
-
-	if ((res & 0x80000000L) != 0L)
-	{
-		flags |= N_FLAG;
-		if (((src & 0x80000000L) == 0L)
-		    && ((dst & 0x80000000L) == 0L))
-		{
-			flags |= V_FLAG;
-		}
-		else if (((src & 0x80000000L) != 0L) &&
-			 ((dst & 0x80000000L) != 0L))
-		{
-			flags |= R_FLAG;
-		}
-	}
-	else
-	{
-		if (res == 0L)
-			flags |= Z_FLAG;
-		if (((src & 0x80000000L) != 0L)
-		    && ((dst & 0x80000000L) != 0L))
-			flags |= V_FLAG;
-		if ((dst & 0x80000000L) != 0L
-		    || (src & 0x80000000L) != 0L)
-			flags |= R_FLAG;
-	}
-
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-
-void OPPROTO op_evaluate_flags_alu_4(void)
-{
-	uint32_t src;
-	uint32_t dst;
-	uint32_t res;
-	uint32_t flags = 0;
-
-	src = env->cc_src;
-	dst = env->cc_dest;
-	res = env->cc_result;
-
-	if ((res & 0x80000000L) != 0L)
-	{
-		flags |= N_FLAG;
-		if (((src & 0x80000000L) == 0L)
-		    && ((dst & 0x80000000L) == 0L))
-		{
-			flags |= V_FLAG;
-		}
-		else if (((src & 0x80000000L) != 0L) &&
-			 ((dst & 0x80000000L) != 0L))
-		{
-			flags |= C_FLAG;
-		}
-	}
-	else
-	{
-		if (res == 0L)
-			flags |= Z_FLAG;
-		if (((src & 0x80000000L) != 0L)
-		    && ((dst & 0x80000000L) != 0L))
-			flags |= V_FLAG;
-		if ((dst & 0x80000000L) != 0L
-		    || (src & 0x80000000L) != 0L)
-			flags |= C_FLAG;
-	}
-
-	if (env->cc_op == CC_OP_SUB
-	    || env->cc_op == CC_OP_CMP) {
-		flags ^= C_FLAG;
-	}
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-
-void OPPROTO op_evaluate_flags_move_4 (void)
-{
-	uint32_t src;
-	uint32_t res;
-	uint32_t flags = 0;
-
-	src = env->cc_src;
-	res = env->cc_result;
-
-	if ((int32_t)res < 0)
-		flags |= N_FLAG;
-	else if (res == 0L)
-		flags |= Z_FLAG;
-
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-void OPPROTO op_evaluate_flags_move_2 (void)
-{
-	uint32_t src;
-	uint32_t flags = 0;
-	uint16_t res;
-
-	src = env->cc_src;
-	res = env->cc_result;
-
-	if ((int16_t)res < 0L)
-		flags |= N_FLAG;
-	else if (res == 0)
-		flags |= Z_FLAG;
-
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-
-/* TODO: This is expensive. We could split things up and only evaluate part of
-   CCR on a need to know basis. For now, we simply re-evaluate everything.  */
-void OPPROTO op_evaluate_flags (void)
-{
-	uint32_t src;
-	uint32_t dst;
-	uint32_t res;
-	uint32_t flags = 0;
-
-	src = env->cc_src;
-	dst = env->cc_dest;
-	res = env->cc_result;
-
-
-	/* Now, evaluate the flags. This stuff is based on
-	   Per Zander's CRISv10 simulator.  */
-	switch (env->cc_size)
-	{
-		case 1:
-			if ((res & 0x80L) != 0L)
-			{
-				flags |= N_FLAG;
-				if (((src & 0x80L) == 0L)
-				    && ((dst & 0x80L) == 0L))
-				{
-					flags |= V_FLAG;
-				}
-				else if (((src & 0x80L) != 0L)
-					 && ((dst & 0x80L) != 0L))
-				{
-					flags |= C_FLAG;
-				}
-			}
-			else
-			{
-				if ((res & 0xFFL) == 0L)
-				{
-					flags |= Z_FLAG;
-				}
-				if (((src & 0x80L) != 0L)
-				    && ((dst & 0x80L) != 0L))
-				{
-					flags |= V_FLAG;
-				}
-				if ((dst & 0x80L) != 0L
-				    || (src & 0x80L) != 0L)
-				{
-					flags |= C_FLAG;
-				}
-			}
-			break;
-		case 2:
-			if ((res & 0x8000L) != 0L)
-			{
-				flags |= N_FLAG;
-				if (((src & 0x8000L) == 0L)
-				    && ((dst & 0x8000L) == 0L))
-				{
-					flags |= V_FLAG;
-				}
-				else if (((src & 0x8000L) != 0L)
-					 && ((dst & 0x8000L) != 0L))
-				{
-					flags |= C_FLAG;
-				}
-			}
-			else
-			{
-				if ((res & 0xFFFFL) == 0L)
-				{
-					flags |= Z_FLAG;
-				}
-				if (((src & 0x8000L) != 0L)
-				    && ((dst & 0x8000L) != 0L))
-				{
-					flags |= V_FLAG;
-				}
-				if ((dst & 0x8000L) != 0L
-				    || (src & 0x8000L) != 0L)
-				{
-					flags |= C_FLAG;
-				}
-			}
-			break;
-		case 4:
-			if ((res & 0x80000000L) != 0L)
-			{
-				flags |= N_FLAG;
-				if (((src & 0x80000000L) == 0L)
-				    && ((dst & 0x80000000L) == 0L))
-				{
-					flags |= V_FLAG;
-				}
-				else if (((src & 0x80000000L) != 0L) &&
-					 ((dst & 0x80000000L) != 0L))
-				{
-					flags |= C_FLAG;
-				}
-			}
-			else
-			{
-				if (res == 0L)
-					flags |= Z_FLAG;
-				if (((src & 0x80000000L) != 0L)
-				    && ((dst & 0x80000000L) != 0L))
-					flags |= V_FLAG;
-				if ((dst & 0x80000000L) != 0L
-				    || (src & 0x80000000L) != 0L)
-					flags |= C_FLAG;
-			}
-			break;
-		default:
-			break;
-	}
-
-	if (env->cc_op == CC_OP_SUB
-	    || env->cc_op == CC_OP_CMP) {
-		flags ^= C_FLAG;
-	}
-	evaluate_flags_writeback(flags);
-	RETURN();
-}
-
 void OPPROTO op_extb_T0_T0 (void)
 {
 	T0 = ((int8_t)T0);
@@ -1274,17 +983,3 @@
 	env->pc = env->btarget;
 	RETURN();
 }
-
-/* Load and store */
-#define MEMSUFFIX _raw
-#include "op_mem.c"
-#undef MEMSUFFIX
-#if !defined(CONFIG_USER_ONLY)
-#define MEMSUFFIX _user
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _kernel
-#include "op_mem.c"
-#undef MEMSUFFIX
-#endif
diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c
index 701c835..7c629c7 100644
--- a/target-cris/op_helper.c
+++ b/target-cris/op_helper.c
@@ -59,6 +59,9 @@
        generated code */
     saved_env = env;
     env = cpu_single_env;
+
+    D(fprintf(logfile, "%s ra=%x acr=%x %x\n", __func__, retaddr,
+	    env->regs[R_ACR], saved_env->regs[R_ACR]));
     ret = cpu_cris_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
     if (__builtin_expect(ret, 0)) {
         if (retaddr) {
@@ -80,16 +83,380 @@
 {
 #if !defined(CONFIG_USER_ONLY)
 	uint32_t vaddr;
+	uint32_t srs = env->pregs[PR_SRS];
+
+	if (srs != 1 && srs != 2)
+		return;
 
 	vaddr = cris_mmu_tlb_latest_update(env, T0);
-	D(printf("flush vaddr %x\n", vaddr));
+	D(printf("flush old_vaddr=%x vaddr=%x T0=%x\n", vaddr, 
+		 env->sregs[SFR_R_MM_CAUSE] & TARGET_PAGE_MASK, T0));
 	tlb_flush_page(env, vaddr);
 #endif
 }
 
+void helper_tlb_flush(void)
+{
+	tlb_flush(env, 1);
+}
+
+void helper_dump(uint32_t a0, uint32_t a1)
+{
+	(fprintf(logfile, "%s: a0=%x a1=%x\n", __func__, a0, a1)); 
+}
+
+void helper_dummy(void)
+{
+
+}
+
+/* Only used for debugging at the moment.  */
+void helper_rfe(void)
+{
+	D(fprintf(logfile, "rfe: erp=%x pid=%x ccs=%x btarget=%x\n", 
+		 env->pregs[PR_ERP], env->pregs[PR_PID],
+		 env->pregs[PR_CCS],
+		 env->btarget));
+}
+
+void helper_store(uint32_t a0)
+{
+	if (env->pregs[PR_CCS] & P_FLAG )
+	{
+		cpu_abort(env, "cond_store_failed! pc=%x a0=%x\n",
+			  env->pc, a0);
+	}
+}
+
 void do_unassigned_access(target_phys_addr_t addr, int is_write, int is_exec,
                           int is_asi)
 {
 	D(printf("%s addr=%x w=%d ex=%d asi=%d\n", 
 		__func__, addr, is_write, is_exec, is_asi));
 }
+
+static void evaluate_flags_writeback(uint32_t flags)
+{
+	int x;
+
+	/* Extended arithmetics, leave the z flag alone.  */
+	env->debug3 = env->pregs[PR_CCS];
+
+	if (env->cc_x_live)
+		x = env->cc_x;
+	else
+		x = env->pregs[PR_CCS] & X_FLAG;
+
+	if ((x || env->cc_op == CC_OP_ADDC)
+	    && flags & Z_FLAG)
+		env->cc_mask &= ~Z_FLAG;
+
+	/* all insn clear the x-flag except setf or clrf.  */
+	env->pregs[PR_CCS] &= ~(env->cc_mask | X_FLAG);
+	flags &= env->cc_mask;
+	env->pregs[PR_CCS] |= flags;
+	RETURN();
+}
+
+void helper_evaluate_flags_muls(void)
+{
+	uint32_t src;
+	uint32_t dst;
+	uint32_t res;
+	uint32_t flags = 0;
+	/* were gonna have to redo the muls.  */
+	int64_t tmp, t0 ,t1;
+	int32_t mof;
+	int dneg;
+
+	src = env->cc_src;
+	dst = env->cc_dest;
+	res = env->cc_result;
+
+
+	/* cast into signed values to make GCC sign extend.  */
+	t0 = (int32_t)src;
+	t1 = (int32_t)dst;
+	dneg = ((int32_t)res) < 0;
+
+	tmp = t0 * t1;
+	mof = tmp >> 32;
+	if (tmp == 0)
+		flags |= Z_FLAG;
+	else if (tmp < 0)
+		flags |= N_FLAG;
+	if ((dneg && mof != -1)
+	    || (!dneg && mof != 0))
+		flags |= V_FLAG;
+	evaluate_flags_writeback(flags);
+}
+
+void  helper_evaluate_flags_mulu(void)
+{
+	uint32_t src;
+	uint32_t dst;
+	uint32_t res;
+	uint32_t flags = 0;
+	/* were gonna have to redo the muls.  */
+	uint64_t tmp, t0 ,t1;
+	uint32_t mof;
+
+	src = env->cc_src;
+	dst = env->cc_dest;
+	res = env->cc_result;
+
+
+	/* cast into signed values to make GCC sign extend.  */
+	t0 = src;
+	t1 = dst;
+
+	tmp = t0 * t1;
+	mof = tmp >> 32;
+	if (tmp == 0)
+		flags |= Z_FLAG;
+	else if (tmp >> 63)
+		flags |= N_FLAG;
+	if (mof)
+		flags |= V_FLAG;
+
+	evaluate_flags_writeback(flags);
+}
+
+void  helper_evaluate_flags_mcp(void)
+{
+	uint32_t src;
+	uint32_t dst;
+	uint32_t res;
+	uint32_t flags = 0;
+
+	src = env->cc_src;
+	dst = env->cc_dest;
+	res = env->cc_result;
+
+	if ((res & 0x80000000L) != 0L)
+	{
+		flags |= N_FLAG;
+		if (((src & 0x80000000L) == 0L)
+		    && ((dst & 0x80000000L) == 0L))
+		{
+			flags |= V_FLAG;
+		}
+		else if (((src & 0x80000000L) != 0L) &&
+			 ((dst & 0x80000000L) != 0L))
+		{
+			flags |= R_FLAG;
+		}
+	}
+	else
+	{
+		if (res == 0L)
+			flags |= Z_FLAG;
+		if (((src & 0x80000000L) != 0L)
+		    && ((dst & 0x80000000L) != 0L))
+			flags |= V_FLAG;
+		if ((dst & 0x80000000L) != 0L
+		    || (src & 0x80000000L) != 0L)
+			flags |= R_FLAG;
+	}
+
+	evaluate_flags_writeback(flags);
+}
+
+void  helper_evaluate_flags_alu_4(void)
+{
+	uint32_t src;
+	uint32_t dst;
+	uint32_t res;
+	uint32_t flags = 0;
+
+	src = env->cc_src;
+	dst = env->cc_dest;
+	res = env->cc_result;
+
+	if ((res & 0x80000000L) != 0L)
+	{
+		flags |= N_FLAG;
+		if (((src & 0x80000000L) == 0L)
+		    && ((dst & 0x80000000L) == 0L))
+		{
+			flags |= V_FLAG;
+		}
+		else if (((src & 0x80000000L) != 0L) &&
+			 ((dst & 0x80000000L) != 0L))
+		{
+			flags |= C_FLAG;
+		}
+	}
+	else
+	{
+		if (res == 0L)
+			flags |= Z_FLAG;
+		if (((src & 0x80000000L) != 0L)
+		    && ((dst & 0x80000000L) != 0L))
+			flags |= V_FLAG;
+		if ((dst & 0x80000000L) != 0L
+		    || (src & 0x80000000L) != 0L)
+			flags |= C_FLAG;
+	}
+
+	if (env->cc_op == CC_OP_SUB
+	    || env->cc_op == CC_OP_CMP) {
+		flags ^= C_FLAG;
+	}
+	evaluate_flags_writeback(flags);
+}
+
+void  helper_evaluate_flags_move_4 (void)
+{
+	uint32_t src;
+	uint32_t res;
+	uint32_t flags = 0;
+
+	src = env->cc_src;
+	res = env->cc_result;
+
+	if ((int32_t)res < 0)
+		flags |= N_FLAG;
+	else if (res == 0L)
+		flags |= Z_FLAG;
+
+	evaluate_flags_writeback(flags);
+}
+void  helper_evaluate_flags_move_2 (void)
+{
+	uint32_t src;
+	uint32_t flags = 0;
+	uint16_t res;
+
+	src = env->cc_src;
+	res = env->cc_result;
+
+	if ((int16_t)res < 0L)
+		flags |= N_FLAG;
+	else if (res == 0)
+		flags |= Z_FLAG;
+
+	evaluate_flags_writeback(flags);
+}
+
+/* TODO: This is expensive. We could split things up and only evaluate part of
+   CCR on a need to know basis. For now, we simply re-evaluate everything.  */
+void helper_evaluate_flags (void)
+{
+	uint32_t src;
+	uint32_t dst;
+	uint32_t res;
+	uint32_t flags = 0;
+
+	src = env->cc_src;
+	dst = env->cc_dest;
+	res = env->cc_result;
+
+
+	/* Now, evaluate the flags. This stuff is based on
+	   Per Zander's CRISv10 simulator.  */
+	switch (env->cc_size)
+	{
+		case 1:
+			if ((res & 0x80L) != 0L)
+			{
+				flags |= N_FLAG;
+				if (((src & 0x80L) == 0L)
+				    && ((dst & 0x80L) == 0L))
+				{
+					flags |= V_FLAG;
+				}
+				else if (((src & 0x80L) != 0L)
+					 && ((dst & 0x80L) != 0L))
+				{
+					flags |= C_FLAG;
+				}
+			}
+			else
+			{
+				if ((res & 0xFFL) == 0L)
+				{
+					flags |= Z_FLAG;
+				}
+				if (((src & 0x80L) != 0L)
+				    && ((dst & 0x80L) != 0L))
+				{
+					flags |= V_FLAG;
+				}
+				if ((dst & 0x80L) != 0L
+				    || (src & 0x80L) != 0L)
+				{
+					flags |= C_FLAG;
+				}
+			}
+			break;
+		case 2:
+			if ((res & 0x8000L) != 0L)
+			{
+				flags |= N_FLAG;
+				if (((src & 0x8000L) == 0L)
+				    && ((dst & 0x8000L) == 0L))
+				{
+					flags |= V_FLAG;
+				}
+				else if (((src & 0x8000L) != 0L)
+					 && ((dst & 0x8000L) != 0L))
+				{
+					flags |= C_FLAG;
+				}
+			}
+			else
+			{
+				if ((res & 0xFFFFL) == 0L)
+				{
+					flags |= Z_FLAG;
+				}
+				if (((src & 0x8000L) != 0L)
+				    && ((dst & 0x8000L) != 0L))
+				{
+					flags |= V_FLAG;
+				}
+				if ((dst & 0x8000L) != 0L
+				    || (src & 0x8000L) != 0L)
+				{
+					flags |= C_FLAG;
+				}
+			}
+			break;
+		case 4:
+			if ((res & 0x80000000L) != 0L)
+			{
+				flags |= N_FLAG;
+				if (((src & 0x80000000L) == 0L)
+				    && ((dst & 0x80000000L) == 0L))
+				{
+					flags |= V_FLAG;
+				}
+				else if (((src & 0x80000000L) != 0L) &&
+					 ((dst & 0x80000000L) != 0L))
+				{
+					flags |= C_FLAG;
+				}
+			}
+			else
+			{
+				if (res == 0L)
+					flags |= Z_FLAG;
+				if (((src & 0x80000000L) != 0L)
+				    && ((dst & 0x80000000L) != 0L))
+					flags |= V_FLAG;
+				if ((dst & 0x80000000L) != 0L
+				    || (src & 0x80000000L) != 0L)
+					flags |= C_FLAG;
+			}
+			break;
+		default:
+			break;
+	}
+
+	if (env->cc_op == CC_OP_SUB
+	    || env->cc_op == CC_OP_CMP) {
+		flags ^= C_FLAG;
+	}
+	evaluate_flags_writeback(flags);
+}
diff --git a/target-cris/translate.c b/target-cris/translate.c
index 108fb7a..43c861d 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -19,6 +19,12 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+/*
+ * FIXME:
+ * The condition code translation is in desperate need of attention. It's slow
+ * and for system simulation it seems buggy. It sucks.
+ */
+
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -48,6 +54,7 @@
 #define DIS(x)
 #endif
 
+#define D(x)
 #define BUG() (gen_BUG(dc, __FILE__, __LINE__))
 #define BUG_ON(x) ({if (x) BUG();})
 
@@ -73,10 +80,13 @@
 TCGv cc_size;
 TCGv cc_mask;
 
+TCGv env_btarget;
+TCGv env_pc;
+
 /* This is the state at translation time.  */
 typedef struct DisasContext {
 	CPUState *env;
-	target_ulong pc, insn_pc;
+	target_ulong pc, ppc;
 
 	/* Decoder.  */
 	uint32_t ir;
@@ -91,12 +101,13 @@
 	int cc_op;
 	int cc_size;
 	uint32_t cc_mask;
-	int flags_live;
-	int flagx_live;
+	int flags_live; /* Wether or not $ccs is uptodate.  */
+	int flagx_live; /* Wether or not flags_x has the x flag known at
+			   translation time.  */
 	int flags_x;
-	uint32_t tb_entry_flags;
+	int clear_x; /* Clear x after this insn?  */
 
-	int memidx; /* user or kernel mode.  */
+	int user; /* user or kernel mode.  */
 	int is_jmp;
 	int dyn_jmp;
 
@@ -119,37 +130,6 @@
 	cris_prepare_jmp (dc, 0x70000000 + line);
 }
 
-#ifdef CONFIG_USER_ONLY
-#define GEN_OP_LD(width, reg) \
-  void gen_op_ld##width##_T0_##reg (DisasContext *dc) { \
-    gen_op_ld##width##_T0_##reg##_raw(); \
-  }
-#define GEN_OP_ST(width, reg) \
-  void gen_op_st##width##_##reg##_T1 (DisasContext *dc) { \
-    gen_op_st##width##_##reg##_T1_raw(); \
-  }
-#else
-#define GEN_OP_LD(width, reg) \
-  void gen_op_ld##width##_T0_##reg (DisasContext *dc) { \
-    if (dc->memidx) gen_op_ld##width##_T0_##reg##_kernel(); \
-    else gen_op_ld##width##_T0_##reg##_user();\
-  }
-#define GEN_OP_ST(width, reg) \
-  void gen_op_st##width##_##reg##_T1 (DisasContext *dc) { \
-    if (dc->memidx) gen_op_st##width##_##reg##_T1_kernel(); \
-    else gen_op_st##width##_##reg##_T1_user();\
-  }
-#endif
-
-GEN_OP_LD(ub, T0)
-GEN_OP_LD(b, T0)
-GEN_OP_ST(b, T0)
-GEN_OP_LD(uw, T0)
-GEN_OP_LD(w, T0)
-GEN_OP_ST(w, T0)
-GEN_OP_LD(l, T0)
-GEN_OP_ST(l, T0)
-
 const char *regnames[] =
 {
 	"$r0", "$r1", "$r2", "$r3",
@@ -182,35 +162,65 @@
 #define t_gen_mov_env_TN(member, tn) \
  _t_gen_mov_env_TN(offsetof(CPUState, member), (tn))
 
-#define t_gen_mov_TN_reg(tn, regno) \
- tcg_gen_mov_tl(tn, cpu_R[regno])
-#define t_gen_mov_reg_TN(regno, tn) \
- tcg_gen_mov_tl(cpu_R[regno], tn)
+static inline void t_gen_mov_TN_reg(TCGv tn, int r)
+{
+	if (r < 0 || r > 15)
+		fprintf(stderr, "wrong register read $r%d\n", r);
+	tcg_gen_mov_tl(tn, cpu_R[r]);
+}
+static inline void t_gen_mov_reg_TN(int r, TCGv tn)
+{
+	if (r < 0 || r > 15)
+		fprintf(stderr, "wrong register write $r%d\n", r);
+	tcg_gen_mov_tl(cpu_R[r], tn);
+}
 
 static inline void _t_gen_mov_TN_env(TCGv tn, int offset)
 {
+	if (offset > sizeof (CPUState))
+		fprintf(stderr, "wrong load from env from off=%d\n", offset);
 	tcg_gen_ld_tl(tn, cpu_env, offset);
 }
 static inline void _t_gen_mov_env_TN(int offset, TCGv tn)
 {
+	if (offset > sizeof (CPUState))
+		fprintf(stderr, "wrong store to env at off=%d\n", offset);
 	tcg_gen_st_tl(tn, cpu_env, offset);
 }
 
 static inline void t_gen_mov_TN_preg(TCGv tn, int r)
 {
+	if (r < 0 || r > 15)
+		fprintf(stderr, "wrong register read $p%d\n", r);
 	if (r == PR_BZ || r == PR_WZ || r == PR_DZ)
 		tcg_gen_mov_tl(tn, tcg_const_tl(0));
 	else if (r == PR_VR)
 		tcg_gen_mov_tl(tn, tcg_const_tl(32));
+	else if (r == PR_EXS) {
+		printf("read from EXS!\n");
+		tcg_gen_mov_tl(tn, cpu_PR[r]);
+	}
+	else if (r == PR_EDA) {
+		printf("read from EDA!\n");
+		tcg_gen_mov_tl(tn, cpu_PR[r]);
+	}
 	else
 		tcg_gen_mov_tl(tn, cpu_PR[r]);
 }
 static inline void t_gen_mov_preg_TN(int r, TCGv tn)
 {
+	if (r < 0 || r > 15)
+		fprintf(stderr, "wrong register write $p%d\n", r);
 	if (r == PR_BZ || r == PR_WZ || r == PR_DZ)
 		return;
-	else
+	else if (r == PR_SRS)
+		tcg_gen_andi_tl(cpu_PR[r], tn, 3);
+	else {
+		if (r == PR_PID) {
+			tcg_gen_helper_0_0(helper_tlb_flush);
+		}
 		tcg_gen_mov_tl(cpu_PR[r], tn);
+	}
 }
 
 static inline void t_gen_mov_TN_im(TCGv tn, int32_t val)
@@ -253,9 +263,7 @@
 	tcg_gen_sar_tl(d, a, b);
 	tcg_gen_brcond_tl(TCG_COND_LE, b, tcg_const_tl(31), l1);
 	/* Clear dst if shift operands were to large.  */
-	tcg_gen_movi_tl(d, 0);
-	tcg_gen_brcond_tl(TCG_COND_LT, b, tcg_const_tl(0x80000000), l1);
-	tcg_gen_movi_tl(d, 0xffffffff);
+	tcg_gen_sar_tl(d, a, tcg_const_tl(30));
 	gen_set_label(l1);
 }
 
@@ -274,6 +282,9 @@
 	tcg_gen_trunc_i64_i32(d, t0);
 	tcg_gen_shri_i64(t0, t0, 32);
 	tcg_gen_trunc_i64_i32(d2, t0);
+
+	tcg_gen_discard_i64(t0);
+	tcg_gen_discard_i64(t1);
 }
 
 /* 64-bit unsigned muls, lower result in d and upper in d2.  */
@@ -291,6 +302,9 @@
 	tcg_gen_trunc_i64_i32(d, t0);
 	tcg_gen_shri_i64(t0, t0, 32);
 	tcg_gen_trunc_i64_i32(d2, t0);
+
+	tcg_gen_discard_i64(t0);
+	tcg_gen_discard_i64(t1);
 }
 
 /* Extended arithmetics on CRIS.  */
@@ -305,6 +319,7 @@
 	if (flag)
 		tcg_gen_shri_tl(c, c, flag);
 	tcg_gen_add_tl(d, d, c);
+	tcg_gen_discard_tl(c);
 }
 
 static inline void t_gen_addx_carry(TCGv d)
@@ -323,6 +338,8 @@
 
 	tcg_gen_and_tl(x, x, c);
 	tcg_gen_add_tl(d, d, x);        
+	tcg_gen_discard_tl(x);
+	tcg_gen_discard_tl(c);
 }
 
 static inline void t_gen_subx_carry(TCGv d)
@@ -341,6 +358,8 @@
 
 	tcg_gen_and_tl(x, x, c);
 	tcg_gen_sub_tl(d, d, x);
+	tcg_gen_discard_tl(x);
+	tcg_gen_discard_tl(c);
 }
 
 /* Swap the two bytes within each half word of the s operand.
@@ -359,6 +378,8 @@
 	tcg_gen_shri_tl(t, org_s, 8);
 	tcg_gen_andi_tl(t, t, 0x00ff00ff);
 	tcg_gen_or_tl(d, d, t);
+	tcg_gen_discard_tl(t);
+	tcg_gen_discard_tl(org_s);
 }
 
 /* Swap the halfwords of the s operand.  */
@@ -371,6 +392,7 @@
 	tcg_gen_shli_tl(d, t, 16);
 	tcg_gen_shri_tl(t, t, 16);
 	tcg_gen_or_tl(d, d, t);
+	tcg_gen_discard_tl(t);
 }
 
 /* Reverse the within each byte.
@@ -417,6 +439,8 @@
 		tcg_gen_andi_tl(t, t,  bitrev[i].mask);
 		tcg_gen_or_tl(d, d, t);
 	}
+	tcg_gen_discard_tl(t);
+	tcg_gen_discard_tl(org_s);
 }
 
 static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
@@ -449,53 +473,58 @@
 
 static inline void cris_clear_x_flag(DisasContext *dc)
 {
-	if (!dc->flagx_live || dc->cc_op != CC_OP_FLAGS) {
-		t_gen_mov_TN_preg(cpu_T[0], PR_CCS);
-		tcg_gen_andi_i32(cpu_T[0], cpu_T[0], ~X_FLAG);
-		t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
-		dc->flagx_live = 1;
-		dc->flags_x = 0;
-	}
+	if (!dc->flagx_live 
+	    || (dc->flagx_live && dc->flags_x)
+	    || dc->cc_op != CC_OP_FLAGS)
+		tcg_gen_andi_i32(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG);
+	dc->flagx_live = 1;
+	dc->flags_x = 0;
 }
 
 static void cris_evaluate_flags(DisasContext *dc)
 {
 	if (!dc->flags_live) {
+		tcg_gen_movi_tl(cc_op, dc->cc_op);
+		tcg_gen_movi_tl(cc_size, dc->cc_size);
+		tcg_gen_movi_tl(cc_mask, dc->cc_mask);
+
 		switch (dc->cc_op)
 		{
 			case CC_OP_MCP:
-				gen_op_evaluate_flags_mcp ();
+				tcg_gen_helper_0_0(helper_evaluate_flags_mcp);
 				break;
 			case CC_OP_MULS:
-				gen_op_evaluate_flags_muls ();
+				tcg_gen_helper_0_0(helper_evaluate_flags_muls);
 				break;
 			case CC_OP_MULU:
-				gen_op_evaluate_flags_mulu ();
+				tcg_gen_helper_0_0(helper_evaluate_flags_mulu);
 				break;
 			case CC_OP_MOVE:
 				switch (dc->cc_size)
 				{
 					case 4:
-						gen_op_evaluate_flags_move_4();
+						tcg_gen_helper_0_0(helper_evaluate_flags_move_4);
 						break;
 					case 2:
-						gen_op_evaluate_flags_move_2();
+						tcg_gen_helper_0_0(helper_evaluate_flags_move_2);
 						break;
 					default:
-						gen_op_evaluate_flags ();
+						tcg_gen_helper_0_0(helper_evaluate_flags);
 						break;
 				}
 				break;
-
+			case CC_OP_FLAGS:
+				/* live.  */
+				break;
 			default:
 			{
 				switch (dc->cc_size)
 				{
 					case 4:
-						gen_op_evaluate_flags_alu_4 ();
+						tcg_gen_helper_0_0(helper_evaluate_flags_alu_4);
 						break;
 					default:
-						gen_op_evaluate_flags ();
+						tcg_gen_helper_0_0(helper_evaluate_flags);
 						break;
 				}
 			}
@@ -525,16 +554,11 @@
 		dc->flags_live = 0;
 }
 
-static void cris_update_cc_op(DisasContext *dc, int op)
+static void cris_update_cc_op(DisasContext *dc, int op, int size)
 {
 	dc->cc_op = op;
-	dc->flags_live = 0;
-	tcg_gen_movi_tl(cc_op, op);
-}
-static void cris_update_cc_size(DisasContext *dc, int size)
-{
 	dc->cc_size = size;
-	tcg_gen_movi_tl(cc_size, size);
+	dc->flags_live = 0;
 }
 
 /* op is the operation.
@@ -545,10 +569,8 @@
 {
 	int writeback = 1;
 	if (dc->update_cc) {
-		cris_update_cc_op(dc, op);
-		cris_update_cc_size(dc, size);
+		cris_update_cc_op(dc, op, size);
 		tcg_gen_mov_tl(cc_dest, cpu_T[0]);
-		tcg_gen_movi_tl(cc_mask, dc->cc_mask);
 
 		/* FIXME: This shouldn't be needed. But we don't pass the
 		 tests without it. Investigate.  */
@@ -623,6 +645,7 @@
 			mof = tcg_temp_new(TCG_TYPE_TL);
 			t_gen_muls(cpu_T[0], mof, cpu_T[0], cpu_T[1]);
 			t_gen_mov_preg_TN(PR_MOF, mof);
+			tcg_gen_discard_tl(mof);
 		}
 		break;
 		case CC_OP_MULU:
@@ -631,6 +654,7 @@
 			mof = tcg_temp_new(TCG_TYPE_TL);
 			t_gen_mulu(cpu_T[0], mof, cpu_T[0], cpu_T[1]);
 			t_gen_mov_preg_TN(PR_MOF, mof);
+			tcg_gen_discard_tl(mof);
 		}
 		break;
 		case CC_OP_DSTEP:
@@ -820,10 +844,10 @@
 		gen_tst_cc (dc, cond);
 		gen_op_evaluate_bcc ();
 	}
-	tcg_gen_movi_tl(cpu_T[0], dc->delayed_pc);
-	t_gen_mov_env_TN(btarget, cpu_T[0]);
+	tcg_gen_movi_tl(env_btarget, dc->delayed_pc);
 }
 
+
 /* Dynamic jumps, when the dest is in a live reg for example.  */
 void cris_prepare_dyn_jmp (DisasContext *dc)
 {
@@ -844,36 +868,46 @@
 	dc->bcc = CC_A;
 }
 
-void gen_load_T0_T0 (DisasContext *dc, unsigned int size, int sign)
+void gen_load(DisasContext *dc, TCGv dst, TCGv addr, 
+	      unsigned int size, int sign)
 {
+	int mem_index = cpu_mmu_index(dc->env);
+
+	/* FIXME: qemu_ld does not act as a barrier?  */
+	tcg_gen_helper_0_0(helper_dummy);
+	cris_evaluate_flags(dc);
 	if (size == 1) {
 		if (sign)
-			gen_op_ldb_T0_T0(dc);
+			tcg_gen_qemu_ld8s(dst, addr, mem_index);
 		else
-			gen_op_ldub_T0_T0(dc);
+			tcg_gen_qemu_ld8u(dst, addr, mem_index);
 	}
 	else if (size == 2) {
 		if (sign)
-			gen_op_ldw_T0_T0(dc);
+			tcg_gen_qemu_ld16s(dst, addr, mem_index);
 		else
-			gen_op_lduw_T0_T0(dc);
+			tcg_gen_qemu_ld16u(dst, addr, mem_index);
 	}
 	else {
-		gen_op_ldl_T0_T0(dc);
+		tcg_gen_qemu_ld32s(dst, addr, mem_index);
 	}
 }
 
 void gen_store_T0_T1 (DisasContext *dc, unsigned int size)
 {
+	int mem_index = cpu_mmu_index(dc->env);
+
+	/* FIXME: qemu_st does not act as a barrier?  */
+	tcg_gen_helper_0_0(helper_dummy);
+	cris_evaluate_flags(dc);
+
 	/* Remember, operands are flipped. CRIS has reversed order.  */
-	if (size == 1) {
-		gen_op_stb_T0_T1(dc);
-	}
-	else if (size == 2) {
-		gen_op_stw_T0_T1(dc);
-	}
+	if (size == 1)
+		tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], mem_index);
+	else if (size == 2)
+		tcg_gen_qemu_st16(cpu_T[1], cpu_T[0], mem_index);
 	else
-		gen_op_stl_T0_T1(dc);
+		tcg_gen_qemu_st32(cpu_T[1], cpu_T[0], mem_index);
 }
 
 static inline void t_gen_sext(TCGv d, TCGv s, int size)
@@ -995,9 +1029,7 @@
 		tcg_gen_movi_tl(cpu_T[1], imm);
 		dc->postinc = 0;
 	} else {
-		t_gen_mov_TN_reg(cpu_T[0], rs);
-		gen_load_T0_T0(dc, memsize, 0);
-		tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
+		gen_load(dc, cpu_T[1], cpu_R[rs], memsize, 0);
 		if (s_ext)
 			t_gen_sext(cpu_T[1], cpu_T[1], memsize);
 		else
@@ -1021,6 +1053,8 @@
 }
 #endif
 
+/* Start of insn decoders.  */
+
 static unsigned int dec_bccq(DisasContext *dc)
 {
 	int32_t offset;
@@ -1043,7 +1077,7 @@
 }
 static unsigned int dec_addoq(DisasContext *dc)
 {
-	uint32_t imm;
+	int32_t imm;
 
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 7);
 	imm = sign_extend(dc->op1, 7);
@@ -1051,9 +1085,7 @@
 	DIS(fprintf (logfile, "addoq %d, $r%u\n", imm, dc->op2));
 	cris_cc_mask(dc, 0);
 	/* Fetch register operand,  */
-	t_gen_mov_TN_reg(cpu_T[0], dc->op2);
-	tcg_gen_movi_tl(cpu_T[1], imm);
-	crisv32_alu_op(dc, CC_OP_ADD, R_ACR, 4);
+	tcg_gen_addi_tl(cpu_R[R_ACR], cpu_R[dc->op2], imm);
 	return 2;
 }
 static unsigned int dec_addq(DisasContext *dc)
@@ -1140,7 +1172,7 @@
 	t_gen_mov_TN_im(cpu_T[1], dc->op1);
 	crisv32_alu_op(dc, CC_OP_BTST, dc->op2, 4);
 
-	cris_update_cc_op(dc, CC_OP_FLAGS);
+	cris_update_cc_op(dc, CC_OP_FLAGS, 4);
 	t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
 	dc->flags_live = 1;
 	return 2;
@@ -1461,11 +1493,11 @@
 static unsigned int dec_addi_acr(DisasContext *dc)
 {
 	DIS(fprintf (logfile, "addi.%c $r%u, $r%u, $acr\n",
-		    memsize_char(memsize_zz(dc)), dc->op2, dc->op1));
+		  memsize_char(memsize_zz(dc)), dc->op2, dc->op1));
 	cris_cc_mask(dc, 0);
 	dec_prep_alu_r(dc, dc->op1, dc->op2, 4, 0);
 	t_gen_lsl(cpu_T[0], cpu_T[0], tcg_const_tl(dc->zzsize));
-
+	
 	tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
 	t_gen_mov_reg_TN(R_ACR, cpu_T[0]);
 	return 2;
@@ -1490,7 +1522,7 @@
 	dec_prep_alu_r(dc, dc->op1, dc->op2, 4, 0);
 	crisv32_alu_op(dc, CC_OP_BTST, dc->op2, 4);
 
-	cris_update_cc_op(dc, CC_OP_FLAGS);
+	cris_update_cc_op(dc, CC_OP_FLAGS, 4);
 	t_gen_mov_preg_TN(PR_CCS, cpu_T[0]);
 	dc->flags_live = 1;
 	return 2;
@@ -1630,12 +1662,15 @@
 
 	/* Simply decode the flags.  */
 	cris_evaluate_flags (dc);
-	cris_update_cc_op(dc, CC_OP_FLAGS);
+	cris_update_cc_op(dc, CC_OP_FLAGS, 4);
+	tcg_gen_movi_tl(cc_op, dc->cc_op);
+
 	if (set)
 		gen_op_setf(flags);
 	else
 		gen_op_clrf(flags);
 	dc->flags_live = 1;
+	dc->clear_x = 0;
 	return 2;
 }
 
@@ -1669,8 +1704,25 @@
 {
 	DIS(fprintf (logfile, "move $r%u, $p%u\n", dc->op1, dc->op2));
 	cris_cc_mask(dc, 0);
-	t_gen_mov_TN_reg(cpu_T[0], dc->op1);
+
+	if (dc->op2 == PR_CCS) {
+		cris_evaluate_flags(dc);
+		t_gen_mov_TN_reg(cpu_T[0], dc->op1);
+		if (dc->user) {
+			/* User space is not allowed to touch all flags.  */
+			tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0x39f);
+			tcg_gen_andi_tl(cpu_T[1], cpu_PR[PR_CCS], ~0x39f);
+			tcg_gen_or_tl(cpu_T[0], cpu_T[1], cpu_T[0]);
+		}
+	}
+	else
+		t_gen_mov_TN_reg(cpu_T[0], dc->op1);
+
 	t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+	if (dc->op2 == PR_CCS) {
+		cris_update_cc_op(dc, CC_OP_FLAGS, 4);
+		dc->flags_live = 1;
+	}
 	return 2;
 }
 static unsigned int dec_move_pr(DisasContext *dc)
@@ -1681,7 +1733,10 @@
 	   Treat it specially. */
 	if (dc->op2 == 0)
 		tcg_gen_movi_tl(cpu_T[1], 0);
-	else
+	else if (dc->op2 == PR_CCS) {
+		cris_evaluate_flags(dc);
+		t_gen_mov_TN_preg(cpu_T[1], dc->op2);
+	} else
 		t_gen_mov_TN_preg(cpu_T[1], dc->op2);
 	crisv32_alu_op(dc, CC_OP_MOVE, dc->op1, preg_sizes[dc->op2]);
 	return 2;
@@ -1696,8 +1751,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZ);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZ);
 	crisv32_alu_op(dc, CC_OP_MOVE, dc->op2, memsize);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1713,8 +1768,8 @@
 		    dc->op2));
 
 	/* sign extend.  */
-	cris_cc_mask(dc, CC_MASK_NZ);
 	insn_len = dec_prep_alu_m(dc, 1, memsize);
+	cris_cc_mask(dc, CC_MASK_NZ);
 	crisv32_alu_op(dc, CC_OP_MOVE, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1730,8 +1785,8 @@
 		    dc->op2));
 
 	/* sign extend.  */
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_ADD, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1747,8 +1802,8 @@
 		    dc->op2));
 
 	/* sign extend.  */
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 1, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_ADD, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1764,8 +1819,8 @@
 		    dc->op2));
 
 	/* sign extend.  */
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_SUB, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1781,8 +1836,8 @@
 		    dc->op2));
 
 	/* sign extend.  */
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 1, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_SUB, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1798,8 +1853,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZ);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZ);
 	crisv32_alu_op(dc, CC_OP_MOVE, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1814,8 +1869,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_CMP, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1830,8 +1885,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 1, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_CMP, dc->op2, memsize_zz(dc));
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1846,8 +1901,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_CMP, dc->op2, memsize_zz(dc));
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1862,9 +1917,10 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
+	insn_len = dec_prep_alu_m(dc, 0, memsize);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	gen_op_clrf(3);
-	insn_len = dec_prep_alu_m(dc, 0, memsize);
+
 	tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
 	tcg_gen_movi_tl(cpu_T[1], 0);
 	crisv32_alu_op(dc, CC_OP_CMP, dc->op2, memsize_zz(dc));
@@ -1881,8 +1937,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZ);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZ);
 	crisv32_alu_op(dc, CC_OP_AND, dc->op2, memsize_zz(dc));
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1897,8 +1953,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_ADD, dc->op2, memsize_zz(dc));
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1913,8 +1969,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, 0);
 	insn_len = dec_prep_alu_m(dc, 1, memsize);
+	cris_cc_mask(dc, 0);
 	crisv32_alu_op(dc, CC_OP_ADD, R_ACR, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1929,8 +1985,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, CC_MASK_NZ);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZ);
 	crisv32_alu_op(dc, CC_OP_BOUND, dc->op2, 4);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1944,8 +2000,8 @@
 		    dc->op2));
 
 	cris_evaluate_flags(dc);
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, 4);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_ADDC, dc->op2, 4);
 	do_postinc(dc, 4);
 	return insn_len;
@@ -1960,8 +2016,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2, dc->ir, dc->zzsize));
 
-	cris_cc_mask(dc, CC_MASK_NZVC);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZVC);
 	crisv32_alu_op(dc, CC_OP_SUB, dc->op2, memsize);
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1976,8 +2032,8 @@
 		    dc->op1, dc->postinc ? "+]" : "]",
 		    dc->op2, dc->pc));
 
-	cris_cc_mask(dc, CC_MASK_NZ);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, CC_MASK_NZ);
 	crisv32_alu_op(dc, CC_OP_OR, dc->op2, memsize_zz(dc));
 	do_postinc(dc, memsize);
 	return insn_len;
@@ -1994,8 +2050,18 @@
 		    dc->postinc ? "+]" : "]",
 		    dc->op2));
 
-	cris_cc_mask(dc, 0);
 	insn_len = dec_prep_alu_m(dc, 0, memsize);
+	cris_cc_mask(dc, 0);
+	if (dc->op2 == PR_CCS) {
+		cris_evaluate_flags(dc);
+		if (dc->user) {
+			/* User space is not allowed to touch all flags.  */
+			tcg_gen_andi_tl(cpu_T[1], cpu_T[1], 0x39f);
+			tcg_gen_andi_tl(cpu_T[0], cpu_PR[PR_CCS], ~0x39f);
+			tcg_gen_or_tl(cpu_T[1], cpu_T[0], cpu_T[1]);
+		}
+	}
+
 	t_gen_mov_preg_TN(dc->op2, cpu_T[1]);
 
 	do_postinc(dc, memsize);
@@ -2012,11 +2078,11 @@
 		     memsize_char(memsize), 
 		     dc->op2, dc->op1, dc->postinc ? "+]" : "]"));
 
-	cris_cc_mask(dc, 0);
 	/* prepare store. Address in T0, value in T1.  */
 	t_gen_mov_TN_preg(cpu_T[1], dc->op2);
 	t_gen_mov_TN_reg(cpu_T[0], dc->op1);
 	gen_store_T0_T1(dc, memsize);
+	cris_cc_mask(dc, 0);
 	if (dc->postinc)
 	{
 		tcg_gen_addi_tl(cpu_T[0], cpu_T[0], memsize);
@@ -2032,19 +2098,20 @@
 	DIS(fprintf (logfile, "movem [$r%u%s, $r%u\n", dc->op1,
 		    dc->postinc ? "+]" : "]", dc->op2));
 
-	cris_cc_mask(dc, 0);
 	/* fetch the address into T0 and T1.  */
 	t_gen_mov_TN_reg(cpu_T[1], dc->op1);
 	for (i = 0; i <= dc->op2; i++) {
 		/* Perform the load onto regnum i. Always dword wide.  */
 		tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
-		gen_load_T0_T0(dc, 4, 0);
-		t_gen_mov_reg_TN(i, cpu_T[0]);
+		gen_load(dc, cpu_R[i], cpu_T[1], 4, 0);
 		tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 4);
 	}
 	/* writeback the updated pointer value.  */
 	if (dc->postinc)
 		t_gen_mov_reg_TN(dc->op1, cpu_T[1]);
+
+	/* gen_load might want to evaluate the previous insns flags.  */
+	cris_cc_mask(dc, 0);
 	return 2;
 }
 
@@ -2055,7 +2122,6 @@
 	DIS(fprintf (logfile, "movem $r%u, [$r%u%s\n", dc->op2, dc->op1,
 		     dc->postinc ? "+]" : "]"));
 
-	cris_cc_mask(dc, 0);
 	for (i = 0; i <= dc->op2; i++) {
 		/* Fetch register i into T1.  */
 		t_gen_mov_TN_reg(cpu_T[1], i);
@@ -2073,6 +2139,7 @@
 		/* writeback the updated pointer value.  */
 		t_gen_mov_reg_TN(dc->op1, cpu_T[0]);
 	}
+	cris_cc_mask(dc, 0);
 	return 2;
 }
 
@@ -2085,7 +2152,6 @@
 	DIS(fprintf (logfile, "move.%d $r%u, [$r%u]\n",
 		     memsize, dc->op2, dc->op1));
 
-	cris_cc_mask(dc, 0);
 	/* prepare store.  */
 	t_gen_mov_TN_reg(cpu_T[0], dc->op1);
 	t_gen_mov_TN_reg(cpu_T[1], dc->op2);
@@ -2095,6 +2161,7 @@
 		tcg_gen_addi_tl(cpu_T[0], cpu_T[0], memsize);
 		t_gen_mov_reg_TN(dc->op1, cpu_T[0]);
 	}
+	cris_cc_mask(dc, 0);
 	return 2;
 }
 
@@ -2112,13 +2179,17 @@
 {
 	unsigned int rd;
 	int32_t imm;
+	int32_t pc;
 
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
 	imm = ldl_code(dc->pc + 2);
 	DIS(fprintf (logfile, "lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2));
-	t_gen_mov_reg_TN(rd, tcg_const_tl(dc->pc + imm));
+
+	pc = dc->pc;
+	pc += imm;
+	t_gen_mov_reg_TN(rd, tcg_const_tl(pc));
 	return 6;
 }
 
@@ -2127,9 +2198,10 @@
 {
 	DIS(fprintf (logfile, "jump $p%u\n", dc->op2));
 	cris_cc_mask(dc, 0);
-	/* Store the return address in Pd.  */
+
 	t_gen_mov_TN_preg(cpu_T[0], dc->op2);
-	t_gen_mov_env_TN(btarget, cpu_T[0]);
+	/* rete will often have low bit set to indicate delayslot.  */
+	tcg_gen_andi_tl(env_btarget, cpu_T[0], ~1);
 	cris_prepare_dyn_jmp(dc);
 	return 2;
 }
@@ -2139,11 +2211,12 @@
 {
 	DIS(fprintf (logfile, "jas $r%u, $p%u\n", dc->op1, dc->op2));
 	cris_cc_mask(dc, 0);
-	/* Stor the return address in Pd.  */
-	t_gen_mov_TN_reg(cpu_T[0], dc->op1);
-	t_gen_mov_env_TN(btarget, cpu_T[0]);
-	tcg_gen_movi_tl(cpu_T[0], dc->pc + 4);
-	t_gen_mov_preg_TN(dc->op2, cpu_T[0]);
+	/* Store the return address in Pd.  */
+	tcg_gen_mov_tl(env_btarget, cpu_R[dc->op1]);
+	if (dc->op2 > 15)
+		abort();
+	tcg_gen_movi_tl(cpu_PR[dc->op2], dc->pc + 4);
+
 	cris_prepare_dyn_jmp(dc);
 	return 2;
 }
@@ -2157,7 +2230,7 @@
 	DIS(fprintf (logfile, "jas 0x%x\n", imm));
 	cris_cc_mask(dc, 0);
 	/* Stor the return address in Pd.  */
-	t_gen_mov_env_TN(btarget, tcg_const_tl(imm));
+	tcg_gen_movi_tl(env_btarget, imm);
 	t_gen_mov_preg_TN(dc->op2, tcg_const_tl(dc->pc + 8));
 	cris_prepare_dyn_jmp(dc);
 	return 6;
@@ -2260,6 +2333,11 @@
 			/* rfe.  */
 			cris_evaluate_flags(dc);
 			gen_op_ccs_rshift();
+			/* FIXME: don't set the P-FLAG if R is set.  */
+			tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], P_FLAG);
+			/* Debug helper.  */
+			tcg_gen_helper_0_0(helper_rfe);
+			dc->is_jmp = DISAS_UPDATE;
 			break;
 		case 5:
 			/* rfn.  */
@@ -2271,7 +2349,7 @@
 			t_gen_mov_env_TN(pc, cpu_T[0]);
 			/* Breaks start at 16 in the exception vector.  */
 			gen_op_break_im(dc->op1 + 16);
-			dc->is_jmp = DISAS_SWI;
+			dc->is_jmp = DISAS_UPDATE;
 			break;
 		default:
 			printf ("op2=%x\n", dc->op2);
@@ -2477,6 +2555,9 @@
 	if (!logfile)
 		logfile = stderr;
 
+	if (tb->pc & 1)
+		cpu_abort(env, "unaligned pc=%x erp=%x\n",
+			  env->pc, env->pregs[PR_ERP]);
 	pc_start = tb->pc;
 	dc->env = env;
 	dc->tb = tb;
@@ -2484,10 +2565,35 @@
 	gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
 
 	dc->is_jmp = DISAS_NEXT;
+	dc->ppc = pc_start;
 	dc->pc = pc_start;
 	dc->singlestep_enabled = env->singlestep_enabled;
+	dc->flags_live = 1;
 	dc->flagx_live = 0;
 	dc->flags_x = 0;
+	dc->cc_mask = 0;
+	cris_update_cc_op(dc, CC_OP_FLAGS, 4);
+
+	dc->user = env->pregs[PR_CCS] & U_FLAG;
+	dc->delayed_branch = 0;
+
+	if (loglevel & CPU_LOG_TB_IN_ASM) {
+		fprintf(logfile,
+			"search=%d pc=%x ccs=%x pid=%x usp=%x\n"
+			"%x.%x.%x.%x\n"
+			"%x.%x.%x.%x\n"
+			"%x.%x.%x.%x\n"
+			"%x.%x.%x.%x\n",
+			search_pc, env->pc, env->pregs[PR_CCS], 
+			env->pregs[PR_PID], env->pregs[PR_USP],
+			env->regs[0], env->regs[1], env->regs[2], env->regs[3],
+			env->regs[4], env->regs[5], env->regs[6], env->regs[7],
+			env->regs[8], env->regs[9],
+			env->regs[10], env->regs[11],
+			env->regs[12], env->regs[13],
+			env->regs[14], env->regs[15]);
+		
+	}
 
 	next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 	lj = -1;
@@ -2505,14 +2611,23 @@
 				while (lj < j)
 					gen_opc_instr_start[lj++] = 0;
 			}
-			gen_opc_pc[lj] = dc->pc;
-			gen_opc_instr_start[lj] = 1;
+			if (dc->delayed_branch == 1) {
+				gen_opc_pc[lj] = dc->ppc | 1;
+				gen_opc_instr_start[lj] = 0;
+			}
+			else {
+				gen_opc_pc[lj] = dc->pc;
+				gen_opc_instr_start[lj] = 1;
+			}
 		}
 
+		dc->clear_x = 1;
 		insn_len = cris_decoder(dc);
 		STATS(gen_op_exec_insn());
+		dc->ppc = dc->pc;
 		dc->pc += insn_len;
-		cris_clear_x_flag(dc);
+		if (dc->clear_x)
+			cris_clear_x_flag(dc);
 
 		/* Check for delayed branches here. If we do it before
 		   actually genereating any host code, the simulator will just
@@ -2523,12 +2638,12 @@
 			{
 				if (dc->bcc == CC_A) {
 					gen_op_jmp1 ();
-					dc->is_jmp = DISAS_UPDATE;
+					dc->is_jmp = DISAS_JUMP;
 				}
 				else {
 					/* Conditional jmp.  */
 					gen_op_cc_jmp (dc->delayed_pc, dc->pc);
-					dc->is_jmp = DISAS_UPDATE;
+					dc->is_jmp = DISAS_JUMP;
 				}
 			}
 		}
@@ -2536,11 +2651,19 @@
 		if (env->singlestep_enabled)
 			break;
 	} while (!dc->is_jmp && gen_opc_ptr < gen_opc_end
-		 && dc->pc < next_page_start);
+		 && ((dc->pc < next_page_start) || dc->delayed_branch));
+
+	if (dc->delayed_branch == 1) {
+		/* Reexecute the last insn.  */
+		dc->pc = dc->ppc;
+	}
 
 	if (!dc->is_jmp) {
+		D(printf("!jmp pc=%x jmp=%d db=%d\n", dc->pc, 
+			 dc->is_jmp, dc->delayed_branch));
+		/* T0 and env_pc should hold the new pc.  */
 		tcg_gen_movi_tl(cpu_T[0], dc->pc);
-		t_gen_mov_env_TN(pc, cpu_T[0]);
+		tcg_gen_mov_tl(env_pc, cpu_T[0]);
 	}
 
 	cris_evaluate_flags (dc);
@@ -2580,7 +2703,8 @@
 		fprintf(logfile, "--------------\n");
 		fprintf(logfile, "IN: %s\n", lookup_symbol(pc_start));
 		target_disas(logfile, pc_start, dc->pc + 4 - pc_start, 0);
-		fprintf(logfile, "\n");
+		fprintf(logfile, "\nisize=%d osize=%d\n", 
+			dc->pc - pc_start, gen_opc_ptr - gen_opc_buf);
 	}
 #endif
 	return 0;
@@ -2626,7 +2750,7 @@
 			cpu_fprintf(f, "\n");
 	}
 	srs = env->pregs[PR_SRS];
-	cpu_fprintf(f, "\nsupport function regs bank %d:\n", srs);
+	cpu_fprintf(f, "\nsupport function regs bank %x:\n", srs);
 	if (srs < 256) {
 		for (i = 0; i < 16; i++) {
 			cpu_fprintf(f, "s%2.2d=%8.8x ",
@@ -2682,6 +2806,13 @@
 				     offsetof(CPUState, cc_mask),
 				     "cc_mask");
 
+	env_pc = tcg_global_mem_new(TCG_TYPE_PTR, TCG_AREG0, 
+				     offsetof(CPUState, pc),
+				     "pc");
+	env_btarget = tcg_global_mem_new(TCG_TYPE_PTR, TCG_AREG0, 
+				     offsetof(CPUState, btarget),
+				     "btarget");
+
 	for (i = 0; i < 16; i++) {
 		cpu_R[i] = tcg_global_mem_new(TCG_TYPE_PTR, TCG_AREG0, 
 					      offsetof(CPUState, regs[i]), 
@@ -2693,6 +2824,21 @@
 					       pregnames[i]);
 	}
 
+	TCG_HELPER(helper_tlb_update);
+	TCG_HELPER(helper_tlb_flush);
+	TCG_HELPER(helper_rfe);
+	TCG_HELPER(helper_store);
+	TCG_HELPER(helper_dump);
+	TCG_HELPER(helper_dummy);
+
+	TCG_HELPER(helper_evaluate_flags_muls);
+	TCG_HELPER(helper_evaluate_flags_mulu);
+	TCG_HELPER(helper_evaluate_flags_mcp);
+	TCG_HELPER(helper_evaluate_flags_alu_4);
+	TCG_HELPER(helper_evaluate_flags_move_4);
+	TCG_HELPER(helper_evaluate_flags_move_2);
+	TCG_HELPER(helper_evaluate_flags);
+
 	cpu_reset(env);
 	return env;
 }
@@ -2701,10 +2847,17 @@
 {
 	memset(env, 0, offsetof(CPUCRISState, breakpoints));
 	tlb_flush(env, 1);
+
+#if defined(CONFIG_USER_ONLY)
+	/* start in user mode with interrupts enabled.  */
+	env->pregs[PR_CCS] |= U_FLAG | I_FLAG;
+#else
+	env->pregs[PR_CCS] = 0;
+#endif
 }
 
 void gen_pc_load(CPUState *env, struct TranslationBlock *tb,
                  unsigned long searched_pc, int pc_pos, void *puc)
 {
-    env->pregs[PR_ERP] = gen_opc_pc[pc_pos];
+    env->pc = gen_opc_pc[pc_pos];
 }
