target/arm: Implement VFP fp16 VRINT*
Implement the fp16 version of the VFP VRINT* insns.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-19-peter.maydell@linaro.org
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 0319372..f5ad508 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -242,8 +242,10 @@
DEF_HELPER_3(sar_cc, i32, env, i32, i32)
DEF_HELPER_3(ror_cc, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 869b67b..7ce044f 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -341,7 +341,7 @@
static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
{
uint32_t rd, rm;
- bool dp = a->dp;
+ int sz = a->sz;
TCGv_ptr fpst;
TCGv_i32 tcg_rmode;
int rounding = fp_decode_rm[a->rm];
@@ -350,12 +350,16 @@
return false;
}
- if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist */
- if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vd) & 0x10)) {
return false;
}
@@ -367,12 +371,16 @@
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR);
+ if (sz == 1) {
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ } else {
+ fpst = fpstatus_ptr(FPST_FPCR);
+ }
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- if (dp) {
+ if (sz == 3) {
TCGv_i64 tcg_op;
TCGv_i64 tcg_res;
tcg_op = tcg_temp_new_i64();
@@ -388,7 +396,11 @@
tcg_op = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_op, rm);
- gen_helper_rints(tcg_res, tcg_op, fpst);
+ if (sz == 1) {
+ gen_helper_rinth(tcg_res, tcg_op, fpst);
+ } else {
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+ }
neon_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
@@ -2638,6 +2650,29 @@
return true;
}
+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth(tmp, tmp, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
{
TCGv_ptr fpst;
@@ -2693,6 +2728,34 @@
return true;
}
+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+ TCGv_i32 tcg_rmode;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ tcg_rmode = tcg_const_i32(float_round_to_zero);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ gen_helper_rinth(tmp, tmp, fpst);
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tcg_rmode);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
{
TCGv_ptr fpst;
@@ -2758,6 +2821,29 @@
return true;
}
+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ neon_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth_exact(tmp, tmp, fpst);
+ neon_store_reg32(tmp, a->vd);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
{
TCGv_ptr fpst;
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
index 8ba7b17..9615544 100644
--- a/target/arm/vfp-uncond.decode
+++ b/target/arm/vfp-uncond.decode
@@ -60,10 +60,12 @@
VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=1
VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
- vm=%vm_sp vd=%vd_sp dp=0
+ vm=%vm_sp vd=%vd_sp sz=2
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
- vm=%vm_dp vd=%vd_dp dp=1
+ vm=%vm_dp vd=%vd_dp sz=3
# VCVT float to int with specified rounding mode; Vd is always single-precision
VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index a8f1137..9a79e99 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -195,12 +195,15 @@
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp
+VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
+VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
+VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index ab3f0b1..586dfd2 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -1019,6 +1019,11 @@
}
/* ARMv8 round to integral */
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
float32 HELPER(rints_exact)(float32 x, void *fp_status)
{
return float32_round_to_int(x, fp_status);
@@ -1029,6 +1034,22 @@
return float64_round_to_int(x, fp_status);
}
+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
float32 HELPER(rints)(float32 x, void *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;