target-arm: optimize thumb 32-bit multiply
Current implementation of thumb mul instruction is implemented as a
32x32->64 multiply which then uses only 32 least significant bits of
the result. Replace that with a simple 32x32->32 multiply.
Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
Acked-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 813f661..9d13d42 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -310,22 +310,6 @@
return tmp1;
}
-/* Unsigned 32x32->64 multiply. */
-static void gen_mull(TCGv a, TCGv b)
-{
- TCGv_i64 tmp1 = tcg_temp_new_i64();
- TCGv_i64 tmp2 = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(tmp1, a);
- tcg_gen_extu_i32_i64(tmp2, b);
- tcg_gen_mul_i64(tmp1, tmp1, tmp2);
- tcg_temp_free_i64(tmp2);
- tcg_gen_trunc_i64_i32(a, tmp1);
- tcg_gen_shri_i64(tmp1, tmp1, 32);
- tcg_gen_trunc_i64_i32(b, tmp1);
- tcg_temp_free_i64(tmp1);
-}
-
/* Signed 32x32->64 multiply. */
static void gen_imull(TCGv a, TCGv b)
{
@@ -8363,7 +8347,7 @@
gen_logic_CC(tmp);
break;
case 0xd: /* mul */
- gen_mull(tmp, tmp2);
+ tcg_gen_mul_i32(tmp, tmp, tmp2);
if (!s->condexec_mask)
gen_logic_CC(tmp);
break;