target/arm: Optimize MVE VDUP
Optimize the MVE VDUP insns by using TCG vector ops when possible.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210913095440.13462-8-peter.maydell@linaro.org
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index d30c7e5..13de552 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -500,11 +500,15 @@
return true;
}
- qd = mve_qreg_ptr(a->qd);
rt = load_reg(s, a->rt);
- tcg_gen_dup_i32(a->size, rt, rt);
- gen_helper_mve_vdup(cpu_env, qd, rt);
- tcg_temp_free_ptr(qd);
+ if (mve_no_predication(s)) {
+ tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
+ } else {
+ qd = mve_qreg_ptr(a->qd);
+ tcg_gen_dup_i32(a->size, rt, rt);
+ gen_helper_mve_vdup(cpu_env, qd, rt);
+ tcg_temp_free_ptr(qd);
+ }
tcg_temp_free_i32(rt);
mve_update_eci(s);
return true;