tcg: Optimize qemu_ld/st by generating slow paths at the end of a block

Add optimized TCG qemu_ld/st generation which locates the code of TLB miss
cases at the end of a block after generating the other IRs.
Currently, this optimization supports only i386 and x86_64 hosts.

Signed-off-by: Yeongkyoon Lee <yeongkyoon.lee@samsung.com>
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 1133438..42052db 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -299,6 +299,14 @@
 
     gen_opc_ptr = gen_opc_buf;
     gen_opparam_ptr = gen_opparam_buf;
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+    /* Initialize qemu_ld/st labels to assist code generation at the end of TB
+       for TLB miss cases at the end of TB */
+    s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) *
+                                     TCG_MAX_QEMU_LDST);
+    s->nb_qemu_ldst_labels = 0;
+#endif
 }
 
 static inline void tcg_temp_alloc(TCGContext *s, int n)
@@ -2314,6 +2322,10 @@
 #endif
     }
  the_end:
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+    /* Generate TB finalization at the end of block */
+    tcg_out_tb_finalize(s);
+#endif
     return -1;
 }