tcg: Lower indirect registers in a separate pass

Rather than rely on recursion during the middle of register allocation,
lower indirect registers to loads and stores off the indirect base into
plain temps.

For an x86_64 host, with sufficient registers, this results in identical
code, modulo the actual register assignments.

For an i686 host, with insufficient registers, this means that temps can
be (temporarily) spilled to the stack in order to satisfy an allocation.
This as opposed to the possibility of not being able to spill, to allocate
a register for the indirect base, in order to perform a spill.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8df7fc7..cffe89b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -82,33 +82,6 @@
     }
 }
 
-static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
-                                TCGOpcode opc, int nargs)
-{
-    int oi = s->gen_next_op_idx;
-    int pi = s->gen_next_parm_idx;
-    int prev = old_op->prev;
-    int next = old_op - s->gen_op_buf;
-    TCGOp *new_op;
-
-    tcg_debug_assert(oi < OPC_BUF_SIZE);
-    tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
-    s->gen_next_op_idx = oi + 1;
-    s->gen_next_parm_idx = pi + nargs;
-
-    new_op = &s->gen_op_buf[oi];
-    *new_op = (TCGOp){
-        .opc = opc,
-        .args = pi,
-        .prev = prev,
-        .next = next
-    };
-    s->gen_op_buf[prev].next = oi;
-    old_op->prev = oi;
-
-    return new_op;
-}
-
 static int op_bits(TCGOpcode op)
 {
     const TCGOpDef *def = &tcg_op_defs[op];
@@ -1116,7 +1089,7 @@
                 uint64_t a = ((uint64_t)ah << 32) | al;
                 uint64_t b = ((uint64_t)bh << 32) | bl;
                 TCGArg rl, rh;
-                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
                 if (opc == INDEX_op_add2_i32) {
@@ -1142,7 +1115,7 @@
                 uint32_t b = temps[args[3]].val;
                 uint64_t r = (uint64_t)a * b;
                 TCGArg rl, rh;
-                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
                 rl = args[0];