tcg: Add muluh and mulsh opcodes
Use them in places where mulu2 and muls2 are used.
Optimize mulx2 with dead low part to mulxh.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 51e5092..26ee28b 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -61,6 +61,8 @@
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_div_i64 0
#define TCG_TARGET_HAS_rem_i64 0
@@ -87,6 +89,8 @@
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
enum {
TCG_AREG0 = TCG_REG_X19,
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 5cd9d6a..ed48092 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -80,6 +80,8 @@
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
#define TCG_TARGET_HAS_rem_i32 0
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 25467bd..0f6f2ff 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -100,6 +100,8 @@
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index e3f6bb9..b7d1a55 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -96,6 +96,8 @@
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -122,6 +124,8 @@
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 1
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
#endif
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index f32d519..ee6b2c8 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -146,6 +146,10 @@
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index a438950..6cb7c2f 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -89,6 +89,8 @@
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
diff --git a/tcg/optimize.c b/tcg/optimize.c
index b35868a..e8dedf3 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -198,6 +198,8 @@
static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
{
+ uint64_t l64, h64;
+
switch (op) {
CASE_OP_32_64(add):
return x + y;
@@ -290,6 +292,18 @@
case INDEX_op_ext32u_i64:
return (uint32_t)x;
+ case INDEX_op_muluh_i32:
+ return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
+ case INDEX_op_mulsh_i32:
+ return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
+
+ case INDEX_op_muluh_i64:
+ mulu64(&l64, &h64, x, y);
+ return h64;
+ case INDEX_op_mulsh_i64:
+ muls64(&l64, &h64, x, y);
+ return h64;
+
default:
fprintf(stderr,
"Unrecognized operation %d in do_constant_folding.\n", op);
@@ -531,6 +545,8 @@
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
swap_commutative(args[0], &args[1], &args[2]);
break;
CASE_OP_32_64(brcond):
@@ -771,6 +787,8 @@
switch (op) {
CASE_OP_32_64(and):
CASE_OP_32_64(mul):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
s->gen_opc_buf[op_index] = op_to_movi(op);
@@ -882,6 +900,8 @@
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
s->gen_opc_buf[op_index] = op_to_movi(op);
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index b42d97c..613c5ff 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -96,6 +96,8 @@
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_AREG0 TCG_REG_R27
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 48fc6e2..0789daf 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -95,6 +95,8 @@
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rem_i64 0
@@ -118,6 +120,8 @@
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 1
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_AREG0 TCG_REG_R27
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 42ca36c..b02f170 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -69,6 +69,8 @@
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
@@ -94,6 +96,8 @@
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
extern bool tcg_target_deposit_valid(int ofs, int len);
#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index dab52d7..1a696bc 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -107,6 +107,8 @@
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div_i64 1
@@ -134,6 +136,8 @@
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
#endif
#define TCG_AREG0 TCG_REG_I0
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 364964d..3de7545 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -1039,10 +1039,18 @@
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i32();
- tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
- TCGV_LOW(arg1), TCGV_LOW(arg2));
- /* Allow the optimizer room to replace mulu2 with two moves. */
- tcg_gen_op0(INDEX_op_nop);
+ if (TCG_TARGET_HAS_mulu2_i32) {
+ tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
+ TCGV_LOW(arg1), TCGV_LOW(arg2));
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
+ } else {
+ tcg_debug_assert(TCG_TARGET_HAS_muluh_i32);
+ tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0),
+ TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0),
+ TCGV_LOW(arg1), TCGV_LOW(arg2));
+ }
tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
@@ -2401,6 +2409,12 @@
tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace mulu2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
+ } else if (TCG_TARGET_HAS_muluh_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+ tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
+ tcg_gen_mov_i32(rl, t);
+ tcg_temp_free_i32(t);
} else {
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
@@ -2420,6 +2434,12 @@
tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace muls2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
+ } else if (TCG_TARGET_HAS_mulsh_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+ tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
+ tcg_gen_mov_i32(rl, t);
+ tcg_temp_free_i32(t);
} else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
TCGv_i32 t0 = tcg_temp_new_i32();
TCGv_i32 t1 = tcg_temp_new_i32();
@@ -2499,6 +2519,12 @@
tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace mulu2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
+ } else if (TCG_TARGET_HAS_muluh_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+ tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t);
+ tcg_temp_free_i64(t);
} else if (TCG_TARGET_HAS_mulu2_i64) {
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
@@ -2540,6 +2566,12 @@
tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace muls2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
+ } else if (TCG_TARGET_HAS_mulsh_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+ tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t);
+ tcg_temp_free_i64(t);
} else {
TCGv_i64 t0 = tcg_temp_new_i64();
int sizemask = 0;
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index a8af5b9..a75c29d 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -91,6 +91,8 @@
DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
+DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
+DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
@@ -167,6 +169,8 @@
DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
+DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
+DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
/* QEMU specific */
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 19bd5a3..541a442 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1252,12 +1252,13 @@
static void tcg_liveness_analysis(TCGContext *s)
{
int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
- TCGOpcode op, op_new;
+ TCGOpcode op, op_new, op_new2;
TCGArg *args;
const TCGOpDef *def;
uint8_t *dead_temps, *mem_temps;
uint16_t dead_args;
uint8_t sync_args;
+ bool have_op_new2;
s->gen_opc_ptr++; /* skip end */
@@ -1394,29 +1395,52 @@
goto do_not_remove;
case INDEX_op_mulu2_i32:
+ op_new = INDEX_op_mul_i32;
+ op_new2 = INDEX_op_muluh_i32;
+ have_op_new2 = TCG_TARGET_HAS_muluh_i32;
+ goto do_mul2;
case INDEX_op_muls2_i32:
op_new = INDEX_op_mul_i32;
+ op_new2 = INDEX_op_mulsh_i32;
+ have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
goto do_mul2;
case INDEX_op_mulu2_i64:
+ op_new = INDEX_op_mul_i64;
+ op_new2 = INDEX_op_muluh_i64;
+ have_op_new2 = TCG_TARGET_HAS_muluh_i64;
+ goto do_mul2;
case INDEX_op_muls2_i64:
op_new = INDEX_op_mul_i64;
+ op_new2 = INDEX_op_mulsh_i64;
+ have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
+ goto do_mul2;
do_mul2:
args -= 4;
nb_iargs = 2;
nb_oargs = 2;
- /* Likewise, test for the high part of the operation dead. */
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ /* Both parts of the operation are dead. */
goto do_remove;
}
+ /* The high part of the operation is dead; generate the low. */
s->gen_opc_buf[op_index] = op = op_new;
args[1] = args[2];
args[2] = args[3];
- assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
- /* Fall through and mark the single-word operation live. */
- nb_oargs = 1;
+ } else if (have_op_new2 && dead_temps[args[0]]
+ && !mem_temps[args[0]]) {
+ /* The low part of the operation is dead; generate the high. */
+ s->gen_opc_buf[op_index] = op = op_new2;
+ args[0] = args[1];
+ args[1] = args[2];
+ args[2] = args[3];
+ } else {
+ goto do_not_remove;
}
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
+ /* Mark the single-word operation live. */
+ nb_oargs = 1;
goto do_not_remove;
default:
diff --git a/tcg/tcg.h b/tcg/tcg.h
index f3f9889..3f869dd 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -85,6 +85,8 @@
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
/* Turn some undef macros into true macros. */
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index d7fc14e..ff12b4b 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -76,6 +76,8 @@
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_bswap16_i64 1
@@ -100,13 +102,14 @@
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
-
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
#endif /* TCG_TARGET_REG_BITS == 64 */
/* Number of registers available.