target/xtensa: move WINDOW_BASE SR update to postprocessing

Opcodes that modify WINDOW_BASE SR don't have dependency on opcodes that
use windowed registers. If such opcodes are combined in a single
instruction they may not be correctly ordered. Instead of adding said
dependency use temporary register to store changed WINDOW_BASE value and
do actual register window rotation as a postprocessing step.
Not all opcodes that change WINDOW_BASE need this: retw, rfwo and rfwu
are also jump opcodes, so they are guaranteed to be translated last and
thus will not affect other opcodes in the same instruction.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index e7ef6a8..cbb8d87 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -82,6 +82,7 @@
 static TCGv_i32 cpu_FR[16];
 static TCGv_i32 cpu_SR[256];
 static TCGv_i32 cpu_UR[256];
+static TCGv_i32 cpu_windowbase_next;
 
 #include "exec/gen-icount.h"
 
@@ -253,6 +254,11 @@
                     uregnames[i].name);
         }
     }
+
+    cpu_windowbase_next =
+        tcg_global_mem_new_i32(cpu_env,
+                               offsetof(CPUXtensaState, windowbase_next),
+                               "windowbase_next");
 }
 
 static inline bool option_enabled(DisasContext *dc, int opt)
@@ -557,7 +563,7 @@
 #ifndef CONFIG_USER_ONLY
 static void gen_wsr_windowbase(DisasContext *dc, uint32_t sr, TCGv_i32 v)
 {
-    gen_helper_wsr_windowbase(cpu_env, v);
+    tcg_gen_mov_i32(cpu_windowbase_next, v);
 }
 
 static void gen_wsr_windowstart(DisasContext *dc, uint32_t sr, TCGv_i32 v)
@@ -859,6 +865,9 @@
     if (op_flags & XTENSA_OP_CHECK_INTERRUPTS) {
         gen_check_interrupts(dc);
     }
+    if (op_flags & XTENSA_OP_SYNC_REGISTER_WINDOW) {
+        gen_helper_sync_windowbase(cpu_env);
+    }
     if (op_flags & XTENSA_OP_EXIT_TB_M1) {
         slot = -1;
     }
@@ -2268,9 +2277,7 @@
 static void translate_rotw(DisasContext *dc, const uint32_t arg[],
                            const uint32_t par[])
 {
-    TCGv_i32 tmp = tcg_const_i32(arg[0]);
-    gen_helper_rotw(cpu_env, tmp);
-    tcg_temp_free(tmp);
+    tcg_gen_addi_i32(cpu_windowbase_next, cpu_SR[WINDOW_BASE], arg[0]);
 }
 
 static void translate_rsil(DisasContext *dc, const uint32_t arg[],
@@ -2972,7 +2979,8 @@
         .translate = translate_entry,
         .test_ill = test_ill_entry,
         .test_overflow = test_overflow_entry,
-        .op_flags = XTENSA_OP_EXIT_TB_M1,
+        .op_flags = XTENSA_OP_EXIT_TB_M1 |
+            XTENSA_OP_SYNC_REGISTER_WINDOW,
     }, {
         .name = "esync",
         .translate = translate_nop,
@@ -3554,7 +3562,9 @@
     }, {
         .name = "rotw",
         .translate = translate_rotw,
-        .op_flags = XTENSA_OP_PRIVILEGED | XTENSA_OP_EXIT_TB_M1,
+        .op_flags = XTENSA_OP_PRIVILEGED |
+            XTENSA_OP_EXIT_TB_M1 |
+            XTENSA_OP_SYNC_REGISTER_WINDOW,
     }, {
         .name = "rsil",
         .translate = translate_rsil,
@@ -4622,7 +4632,9 @@
         .translate = translate_wsr,
         .test_ill = test_ill_wsr,
         .par = (const uint32_t[]){WINDOW_BASE},
-        .op_flags = XTENSA_OP_PRIVILEGED | XTENSA_OP_EXIT_TB_M1,
+        .op_flags = XTENSA_OP_PRIVILEGED |
+            XTENSA_OP_EXIT_TB_M1 |
+            XTENSA_OP_SYNC_REGISTER_WINDOW,
     }, {
         .name = "wsr.windowstart",
         .translate = translate_wsr,
@@ -5108,7 +5120,9 @@
         .translate = translate_xsr,
         .test_ill = test_ill_xsr,
         .par = (const uint32_t[]){WINDOW_BASE},
-        .op_flags = XTENSA_OP_PRIVILEGED | XTENSA_OP_EXIT_TB_M1,
+        .op_flags = XTENSA_OP_PRIVILEGED |
+            XTENSA_OP_EXIT_TB_M1 |
+            XTENSA_OP_SYNC_REGISTER_WINDOW,
     }, {
         .name = "xsr.windowstart",
         .translate = translate_xsr,