Merge tag 'pull-misc-20240605' of https://gitlab.com/rth7680/qemu into staging
util/hexdump: Use a GString for qemu_hexdump_line.
system/qtest: Replace sprintf by qemu_hexdump_line
hw/scsi/scsi-disk: Use qemu_hexdump_line to avoid sprintf
hw/ide/atapi: Use qemu_hexdump_line to avoid sprintf
hw/dma/pl330: Use qemu_hexdump_line to avoid sprintf
disas/microblaze: Reorg to avoid intermediate sprintf
disas/riscv: Use GString in format_inst
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmZg1RMdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+6mgf6AjEdU91vBXAUxabs
# kmVl5HaAD3NHU1VCM+ruPQkm6xv4kLlMsTibmkiS7+WZYvHfPlGfozjRJxtvZj8K
# 8J2Qp9iHjny8NQPkMCValDvmzkxaIT7ZzYCBdS4jfTdIThuYNJnXsI3NNP7ghnl6
# xv8O62dQbc5gjWF8G+q6PKWSxY6BEuFJ3Pt82cJ/Fj/8bhsjd48pgiLv66F/+q1z
# U9Gy8fWqmkKEzTqBigSYU98yae5CA89T6JBKtgFV07pkYa4A7BUyCR5EBirARyhM
# P0OAqR1GCAbSXWFaJ1sSpU8ATq33FoSQYwWwcmEET7FZYZqvbd6Jd4HtpOPqmu9W
# Fc4taw==
# =VgLB
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 05 Jun 2024 02:13:55 PM PDT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]
* tag 'pull-misc-20240605' of https://gitlab.com/rth7680/qemu:
disas/riscv: Use GString in format_inst
disas/microblaze: Split get_field_special
disas/microblaze: Print registers directly with PRIrfsl
disas/microblaze: Print immediates directly with PRIimm
disas/microblaze: Print registers directly with PRIreg
disas/microblaze: Merge op->name output into each fprintf
disas/microblaze: Re-indent print_insn_microblaze
disas/microblaze: Split out print_immval_addr
hw/dma/pl330: Use qemu_hexdump_line to avoid sprintf
hw/ide/atapi: Use qemu_hexdump_line to avoid sprintf
hw/scsi/scsi-disk: Use qemu_hexdump_line to avoid sprintf
system/qtest: Replace sprintf by qemu_hexdump_line
hw/mips/malta: Add re-usable rng_seed_hex_new() method
util/hexdump: Inline g_string_append_printf "%02x"
util/hexdump: Add unit_len and block_len to qemu_hexdump_line
util/hexdump: Use a GString for qemu_hexdump_line
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index f573014..8f3b97d 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -447,6 +447,17 @@
} else {
return 1;
}
+#elif defined(TARGET_SPARC)
+ /* Prefer SNaN over QNaN, order B then A. */
+ if (is_snan(b_cls)) {
+ return 1;
+ } else if (is_snan(a_cls)) {
+ return 0;
+ } else if (is_qnan(b_cls)) {
+ return 1;
+ } else {
+ return 0;
+ }
#elif defined(TARGET_XTENSA)
/*
* Xtensa has two NaN propagation modes.
@@ -624,6 +635,26 @@
float_raise(float_flag_invalid | float_flag_invalid_imz, status);
}
return 3; /* default NaN */
+#elif defined(TARGET_SPARC)
+ /* For (inf,0,nan) return c. */
+ if (infzero) {
+ float_raise(float_flag_invalid | float_flag_invalid_imz, status);
+ return 2;
+ }
+ /* Prefer SNaN over QNaN, order C, B, A. */
+ if (is_snan(c_cls)) {
+ return 2;
+ } else if (is_snan(b_cls)) {
+ return 1;
+ } else if (is_snan(a_cls)) {
+ return 0;
+ } else if (is_qnan(c_cls)) {
+ return 2;
+ } else if (is_qnan(b_cls)) {
+ return 1;
+ } else {
+ return 0;
+ }
#elif defined(TARGET_XTENSA)
/*
* For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index c1e1511..0d4dc1f 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1003,6 +1003,9 @@
r |= features & CPU_FEATURE_FSMULD ? HWCAP_SPARC_FSMULD : 0;
r |= features & CPU_FEATURE_VIS1 ? HWCAP_SPARC_VIS : 0;
r |= features & CPU_FEATURE_VIS2 ? HWCAP_SPARC_VIS2 : 0;
+ r |= features & CPU_FEATURE_FMAF ? HWCAP_SPARC_FMAF : 0;
+ r |= features & CPU_FEATURE_VIS3 ? HWCAP_SPARC_VIS3 : 0;
+ r |= features & CPU_FEATURE_IMA ? HWCAP_SPARC_IMA : 0;
#endif
return r;
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index d508d0c..3b41128 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -102,6 +102,7 @@
IOCTL(BLKRAGET, IOC_R, MK_PTR(TYPE_LONG))
IOCTL(BLKSSZGET, IOC_R, MK_PTR(TYPE_INT))
IOCTL(BLKBSZGET, IOC_R, MK_PTR(TYPE_INT))
+ IOCTL(BLKBSZSET, IOC_W, MK_PTR(TYPE_INT))
IOCTL_SPECIAL(BLKPG, IOC_W, do_ioctl_blkpg,
MK_PTR(MK_STRUCT(STRUCT_blkpg_ioctl_arg)))
diff --git a/target/sparc/asi.h b/target/sparc/asi.h
index a668296..14ffaa3 100644
--- a/target/sparc/asi.h
+++ b/target/sparc/asi.h
@@ -144,6 +144,8 @@
* ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4
* and later ASIs.
*/
+#define ASI_MON_AIUP 0x12 /* (VIS4) Primary, user, monitor */
+#define ASI_MON_AIUS 0x13 /* (VIS4) Secondary, user, monitor */
#define ASI_REAL 0x14 /* Real address, cacheable */
#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cacheable */
#define ASI_REAL_IO 0x15 /* Real address, non-cacheable */
@@ -257,6 +259,8 @@
#define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/
#define ASI_INTR_R 0x7f /* IRQ vector dispatch read */
#define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */
+#define ASI_MON_P 0x84 /* (VIS4) Primary, monitor */
+#define ASI_MON_S 0x85 /* (VIS4) Secondary, monitor */
#define ASI_PIC 0xb0 /* (NG4) PIC registers */
#define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */
#define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */
diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc
index d800f18..be81005 100644
--- a/target/sparc/cpu-feature.h.inc
+++ b/target/sparc/cpu-feature.h.inc
@@ -12,3 +12,7 @@
FEATURE(CACHE_CTRL)
FEATURE(POWERDOWN)
FEATURE(CASA)
+FEATURE(FMAF)
+FEATURE(VIS3)
+FEATURE(IMA)
+FEATURE(VIS4)
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 5be1592..9bacfb6 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -549,6 +549,10 @@
[CPU_FEATURE_BIT_HYPV] = "hypv",
[CPU_FEATURE_BIT_VIS1] = "vis1",
[CPU_FEATURE_BIT_VIS2] = "vis2",
+ [CPU_FEATURE_BIT_FMAF] = "fmaf",
+ [CPU_FEATURE_BIT_VIS3] = "vis3",
+ [CPU_FEATURE_BIT_IMA] = "ima",
+ [CPU_FEATURE_BIT_VIS4] = "vis4",
#else
[CPU_FEATURE_BIT_MUL] = "mul",
[CPU_FEATURE_BIT_DIV] = "div",
@@ -877,6 +881,14 @@
CPU_FEATURE_BIT_VIS1, false),
DEFINE_PROP_BIT("vis2", SPARCCPU, env.def.features,
CPU_FEATURE_BIT_VIS2, false),
+ DEFINE_PROP_BIT("fmaf", SPARCCPU, env.def.features,
+ CPU_FEATURE_BIT_FMAF, false),
+ DEFINE_PROP_BIT("vis3", SPARCCPU, env.def.features,
+ CPU_FEATURE_BIT_VIS3, false),
+ DEFINE_PROP_BIT("ima", SPARCCPU, env.def.features,
+ CPU_FEATURE_BIT_IMA, false),
+ DEFINE_PROP_BIT("vis4", SPARCCPU, env.def.features,
+ CPU_FEATURE_BIT_VIS4, false),
#else
DEFINE_PROP_BIT("mul", SPARCCPU, env.def.features,
CPU_FEATURE_BIT_MUL, false),
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 1205a59..0b30665 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -343,6 +343,90 @@
return f128_ret(ret);
}
+float32 helper_fmadds(CPUSPARCState *env, float32 s1,
+ float32 s2, float32 s3, uint32_t op)
+{
+ float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
+float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
+ float64 s2, float64 s3, uint32_t op)
+{
+ float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
+float32 helper_fnadds(CPUSPARCState *env, float32 src1, float32 src2)
+{
+ float32 ret = float32_add(src1, src2, &env->fp_status);
+
+ /*
+ * NaN inputs or result do not get a sign change.
+ * Nor, apparently, does zero: on hardware, -(x + -x) yields +0.
+ */
+ if (!float32_is_any_nan(ret) && !float32_is_zero(ret)) {
+ ret = float32_chs(ret);
+ }
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
+float32 helper_fnmuls(CPUSPARCState *env, float32 src1, float32 src2)
+{
+ float32 ret = float32_mul(src1, src2, &env->fp_status);
+
+ /* NaN inputs or result do not get a sign change. */
+ if (!float32_is_any_nan(ret)) {
+ ret = float32_chs(ret);
+ }
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
+float64 helper_fnaddd(CPUSPARCState *env, float64 src1, float64 src2)
+{
+ float64 ret = float64_add(src1, src2, &env->fp_status);
+
+ /*
+ * NaN inputs or result do not get a sign change.
+ * Nor, apparently, does zero: on hardware, -(x + -x) yields +0.
+ */
+ if (!float64_is_any_nan(ret) && !float64_is_zero(ret)) {
+ ret = float64_chs(ret);
+ }
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
+float64 helper_fnmuld(CPUSPARCState *env, float64 src1, float64 src2)
+{
+ float64 ret = float64_mul(src1, src2, &env->fp_status);
+
+ /* NaN inputs or result do not get a sign change. */
+ if (!float64_is_any_nan(ret)) {
+ ret = float64_chs(ret);
+ }
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
+float64 helper_fnsmuld(CPUSPARCState *env, float32 src1, float32 src2)
+{
+ float64 ret = float64_mul(float32_to_float64(src1, &env->fp_status),
+ float32_to_float64(src2, &env->fp_status),
+ &env->fp_status);
+
+ /* NaN inputs or result do not get a sign change. */
+ if (!float64_is_any_nan(ret)) {
+ ret = float64_chs(ret);
+ }
+ check_ieee_exceptions(env, GETPC());
+ return ret;
+}
+
static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra)
{
check_ieee_exceptions(env, ra);
@@ -406,6 +490,52 @@
return finish_fcmp(env, r, GETPC());
}
+uint32_t helper_flcmps(float32 src1, float32 src2)
+{
+ /*
+ * FLCMP never raises an exception nor modifies any FSR fields.
+ * Perform the comparison with a dummy fp environment.
+ */
+ float_status discard = { };
+ FloatRelation r = float32_compare_quiet(src1, src2, &discard);
+
+ switch (r) {
+ case float_relation_equal:
+ if (src2 == float32_zero && src1 != float32_zero) {
+ return 1; /* -0.0 < +0.0 */
+ }
+ return 0;
+ case float_relation_less:
+ return 1;
+ case float_relation_greater:
+ return 0;
+ case float_relation_unordered:
+ return float32_is_any_nan(src2) ? 3 : 2;
+ }
+ g_assert_not_reached();
+}
+
+uint32_t helper_flcmpd(float64 src1, float64 src2)
+{
+ float_status discard = { };
+ FloatRelation r = float64_compare_quiet(src1, src2, &discard);
+
+ switch (r) {
+ case float_relation_equal:
+ if (src2 == float64_zero && src1 != float64_zero) {
+ return 1; /* -0.0 < +0.0 */
+ }
+ return 0;
+ case float_relation_less:
+ return 1;
+ case float_relation_greater:
+ return 0;
+ case float_relation_unordered:
+ return float64_is_any_nan(src2) ? 3 : 2;
+ }
+ g_assert_not_reached();
+}
+
target_ulong cpu_get_fsr(CPUSPARCState *env)
{
target_ulong fsr = env->fsr | env->fsr_cexc_ftt;
@@ -472,3 +602,9 @@
env->fsr_cexc_ftt |= fsr & FSR_CEXC_MASK;
set_fsr_nonsplit(env, fsr);
}
+
+void helper_set_fsr_nofcc(CPUSPARCState *env, uint32_t fsr)
+{
+ env->fsr_cexc_ftt = fsr & (FSR_CEXC_MASK | FSR_FTT_MASK);
+ set_fsr_nonsplit(env, fsr);
+}
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 97fbf6f..134e519 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -40,6 +40,7 @@
DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
#endif
DEF_HELPER_FLAGS_1(get_fsr, TCG_CALL_NO_WG_SE, tl, env)
+DEF_HELPER_FLAGS_2(set_fsr_nofcc, TCG_CALL_NO_RWG, void, env, i32)
DEF_HELPER_FLAGS_2(set_fsr_nofcc_noftt, TCG_CALL_NO_RWG, void, env, i32)
DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_WG, f32, env, f32)
DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_WG, f64, env, f64)
@@ -50,12 +51,17 @@
DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, i32, env, f64, f64)
DEF_HELPER_FLAGS_3(fcmpq, TCG_CALL_NO_WG, i32, env, i128, i128)
DEF_HELPER_FLAGS_3(fcmpeq, TCG_CALL_NO_WG, i32, env, i128, i128)
+DEF_HELPER_FLAGS_2(flcmps, TCG_CALL_NO_RWG_SE, i32, f32, f32)
+DEF_HELPER_FLAGS_2(flcmpd, TCG_CALL_NO_RWG_SE, i32, f64, f64)
DEF_HELPER_2(raise_exception, noreturn, env, int)
DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
+DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(faddq, TCG_CALL_NO_WG, i128, env, i128, i128)
DEF_HELPER_FLAGS_3(fsubq, TCG_CALL_NO_WG, i128, env, i128, i128)
@@ -66,8 +72,12 @@
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
+DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_WG, f64, env, f32, f32)
+DEF_HELPER_FLAGS_3(fnsmuld, TCG_CALL_NO_WG, f64, env, f32, f32)
DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_WG, i128, env, f64, f64)
DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_WG, f64, env, s32)
@@ -105,15 +115,28 @@
DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_NO_RWG_SE, i32, i64, i64)
DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
-#define VIS_CMPHELPER(name) \
+DEF_HELPER_FLAGS_2(cmask8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmean16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fslas16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fslas32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+#define VIS_CMPHELPER(name) \
+ DEF_HELPER_FLAGS_2(f##name##8, TCG_CALL_NO_RWG_SE, \
+ i64, i64, i64) \
DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE, \
- i64, i64, i64) \
+ i64, i64, i64) \
DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_NO_RWG_SE, \
i64, i64, i64)
VIS_CMPHELPER(cmpgt)
VIS_CMPHELPER(cmpeq)
VIS_CMPHELPER(cmple)
VIS_CMPHELPER(cmpne)
+VIS_CMPHELPER(cmpugt)
+VIS_CMPHELPER(cmpule)
+DEF_HELPER_FLAGS_2(xmulx, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(xmulxhi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
#endif
#undef VIS_HELPER
#undef VIS_CMPHELPER
diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index e2d8a07..fbcb4f7 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -26,6 +26,15 @@
## Major Opcode 10 -- integer, floating-point, vis, and system insns.
##
+%dfp_rd 25:5 !function=extract_dfpreg
+%dfp_rs1 14:5 !function=extract_dfpreg
+%dfp_rs2 0:5 !function=extract_dfpreg
+%dfp_rs3 9:5 !function=extract_dfpreg
+
+%qfp_rd 25:5 !function=extract_qfpreg
+%qfp_rs1 14:5 !function=extract_qfpreg
+%qfp_rs2 0:5 !function=extract_qfpreg
+
&r_r_ri rd rs1 rs2_or_imm imm:bool
@n_r_ri .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri rd=0
@r_r_ri .. rd:5 ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri
@@ -37,11 +46,45 @@
&r_r_r rd rs1 rs2
@r_r_r .. rd:5 ...... rs1:5 . ........ rs2:5 &r_r_r
+@d_r_r .. ..... ...... rs1:5 . ........ rs2:5 \
+ &r_r_r rd=%dfp_rd
+@r_d_d .. rd:5 ...... ..... . ........ ..... \
+ &r_r_r rs1=%dfp_rs1 rs2=%dfp_rs2
+@d_r_d .. ..... ...... rs1:5 . ........ ..... \
+ &r_r_r rd=%dfp_rd rs2=%dfp_rs2
+@d_d_d .. ..... ...... ..... . ........ ..... \
+ &r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2
+@q_q_q .. ..... ...... ..... . ........ ..... \
+ &r_r_r rd=%qfp_rd rs1=%qfp_rs1 rs2=%qfp_rs2
+@q_d_d .. ..... ...... ..... . ........ ..... \
+ &r_r_r rd=%qfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2
+
@r_r_r_swap .. rd:5 ...... rs2:5 . ........ rs1:5 &r_r_r
+@d_d_d_swap .. ..... ...... ..... . ........ ..... \
+ &r_r_r rd=%dfp_rd rs1=%dfp_rs2 rs2=%dfp_rs1
&r_r rd rs
@r_r1 .. rd:5 ...... rs:5 . ........ ..... &r_r
@r_r2 .. rd:5 ...... ..... . ........ rs:5 &r_r
+@r_d2 .. rd:5 ...... ..... . ........ ..... &r_r rs=%dfp_rs2
+@r_q2 .. rd:5 ...... ..... . ........ ..... &r_r rs=%qfp_rs2
+@d_r2 .. ..... ...... ..... . ........ rs:5 &r_r rd=%dfp_rd
+@q_r2 .. ..... ...... ..... . ........ rs:5 &r_r rd=%qfp_rd
+@d_d1 .. ..... ...... ..... . ........ ..... \
+ &r_r rd=%dfp_rd rs=%dfp_rs1
+@d_d2 .. ..... ...... ..... . ........ ..... \
+ &r_r rd=%dfp_rd rs=%dfp_rs2
+@d_q2 .. ..... ...... ..... . ........ ..... \
+ &r_r rd=%dfp_rd rs=%qfp_rs2
+@q_q2 .. ..... ...... ..... . ........ ..... \
+ &r_r rd=%qfp_rd rs=%qfp_rs2
+@q_d2 .. ..... ...... ..... . ........ ..... \
+ &r_r rd=%qfp_rd rs=%dfp_rs2
+
+&r_r_r_r rd rs1 rs2 rs3
+@r_r_r_r .. rd:5 ...... rs1:5 rs3:5 .... rs2:5 &r_r_r_r
+@d_d_d_d .. ..... ...... ..... ..... .... ..... \
+ &r_r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 rs3=%dfp_rs3
{
[
@@ -81,6 +124,7 @@
WRTICK_CMPR 10 10111 110000 ..... . ............. @n_r_ri
WRSTICK 10 11000 110000 ..... . ............. @n_r_ri
WRSTICK_CMPR 10 11001 110000 ..... . ............. @n_r_ri
+ WRMWAIT 10 11100 110000 ..... . ............. @n_r_ri
]
# Before v8, rs1==0 was WRY, and the rest executed as nop.
[
@@ -241,68 +285,89 @@
RETRY 10 00001 111110 00000 0 0000000000000
FMOVs 10 ..... 110100 00000 0 0000 0001 ..... @r_r2
-FMOVd 10 ..... 110100 00000 0 0000 0010 ..... @r_r2
-FMOVq 10 ..... 110100 00000 0 0000 0011 ..... @r_r2
+FMOVd 10 ..... 110100 00000 0 0000 0010 ..... @d_d2
+FMOVq 10 ..... 110100 00000 0 0000 0011 ..... @q_q2
FNEGs 10 ..... 110100 00000 0 0000 0101 ..... @r_r2
-FNEGd 10 ..... 110100 00000 0 0000 0110 ..... @r_r2
-FNEGq 10 ..... 110100 00000 0 0000 0111 ..... @r_r2
+FNEGd 10 ..... 110100 00000 0 0000 0110 ..... @d_d2
+FNEGq 10 ..... 110100 00000 0 0000 0111 ..... @q_q2
FABSs 10 ..... 110100 00000 0 0000 1001 ..... @r_r2
-FABSd 10 ..... 110100 00000 0 0000 1010 ..... @r_r2
-FABSq 10 ..... 110100 00000 0 0000 1011 ..... @r_r2
+FABSd 10 ..... 110100 00000 0 0000 1010 ..... @d_d2
+FABSq 10 ..... 110100 00000 0 0000 1011 ..... @q_q2
FSQRTs 10 ..... 110100 00000 0 0010 1001 ..... @r_r2
-FSQRTd 10 ..... 110100 00000 0 0010 1010 ..... @r_r2
-FSQRTq 10 ..... 110100 00000 0 0010 1011 ..... @r_r2
+FSQRTd 10 ..... 110100 00000 0 0010 1010 ..... @d_d2
+FSQRTq 10 ..... 110100 00000 0 0010 1011 ..... @q_q2
FADDs 10 ..... 110100 ..... 0 0100 0001 ..... @r_r_r
-FADDd 10 ..... 110100 ..... 0 0100 0010 ..... @r_r_r
-FADDq 10 ..... 110100 ..... 0 0100 0011 ..... @r_r_r
+FADDd 10 ..... 110100 ..... 0 0100 0010 ..... @d_d_d
+FADDq 10 ..... 110100 ..... 0 0100 0011 ..... @q_q_q
FSUBs 10 ..... 110100 ..... 0 0100 0101 ..... @r_r_r
-FSUBd 10 ..... 110100 ..... 0 0100 0110 ..... @r_r_r
-FSUBq 10 ..... 110100 ..... 0 0100 0111 ..... @r_r_r
+FSUBd 10 ..... 110100 ..... 0 0100 0110 ..... @d_d_d
+FSUBq 10 ..... 110100 ..... 0 0100 0111 ..... @q_q_q
FMULs 10 ..... 110100 ..... 0 0100 1001 ..... @r_r_r
-FMULd 10 ..... 110100 ..... 0 0100 1010 ..... @r_r_r
-FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @r_r_r
+FMULd 10 ..... 110100 ..... 0 0100 1010 ..... @d_d_d
+FMULq 10 ..... 110100 ..... 0 0100 1011 ..... @q_q_q
FDIVs 10 ..... 110100 ..... 0 0100 1101 ..... @r_r_r
-FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @r_r_r
-FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @r_r_r
-FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @r_r_r
-FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @r_r_r
+FDIVd 10 ..... 110100 ..... 0 0100 1110 ..... @d_d_d
+FDIVq 10 ..... 110100 ..... 0 0100 1111 ..... @q_q_q
+FNADDs 10 ..... 110100 ..... 0 0101 0001 ..... @r_r_r
+FNADDd 10 ..... 110100 ..... 0 0101 0010 ..... @d_d_d
+FNMULs 10 ..... 110100 ..... 0 0101 1001 ..... @r_r_r
+FNMULd 10 ..... 110100 ..... 0 0101 1010 ..... @d_d_d
+FHADDs 10 ..... 110100 ..... 0 0110 0001 ..... @r_r_r
+FHADDd 10 ..... 110100 ..... 0 0110 0010 ..... @d_d_d
+FHSUBs 10 ..... 110100 ..... 0 0110 0101 ..... @r_r_r
+FHSUBd 10 ..... 110100 ..... 0 0110 0110 ..... @d_d_d
+FsMULd 10 ..... 110100 ..... 0 0110 1001 ..... @d_r_r
+FdMULq 10 ..... 110100 ..... 0 0110 1110 ..... @q_d_d
+FNHADDs 10 ..... 110100 ..... 0 0111 0001 ..... @r_r_r
+FNHADDd 10 ..... 110100 ..... 0 0111 0010 ..... @d_d_d
+FNsMULd 10 ..... 110100 ..... 0 0111 1001 ..... @d_r_r
FsTOx 10 ..... 110100 00000 0 1000 0001 ..... @r_r2
-FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_r2
-FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_r2
+FdTOx 10 ..... 110100 00000 0 1000 0010 ..... @r_d2
+FqTOx 10 ..... 110100 00000 0 1000 0011 ..... @r_q2
FxTOs 10 ..... 110100 00000 0 1000 0100 ..... @r_r2
-FxTOd 10 ..... 110100 00000 0 1000 1000 ..... @r_r2
-FxTOq 10 ..... 110100 00000 0 1000 1100 ..... @r_r2
+FxTOd 10 ..... 110100 00000 0 1000 1000 ..... @d_r2
+FxTOq 10 ..... 110100 00000 0 1000 1100 ..... @q_r2
FiTOs 10 ..... 110100 00000 0 1100 0100 ..... @r_r2
-FdTOs 10 ..... 110100 00000 0 1100 0110 ..... @r_r2
-FqTOs 10 ..... 110100 00000 0 1100 0111 ..... @r_r2
-FiTOd 10 ..... 110100 00000 0 1100 1000 ..... @r_r2
-FsTOd 10 ..... 110100 00000 0 1100 1001 ..... @r_r2
-FqTOd 10 ..... 110100 00000 0 1100 1011 ..... @r_r2
-FiTOq 10 ..... 110100 00000 0 1100 1100 ..... @r_r2
-FsTOq 10 ..... 110100 00000 0 1100 1101 ..... @r_r2
-FdTOq 10 ..... 110100 00000 0 1100 1110 ..... @r_r2
+FdTOs 10 ..... 110100 00000 0 1100 0110 ..... @r_d2
+FqTOs 10 ..... 110100 00000 0 1100 0111 ..... @r_q2
+FiTOd 10 ..... 110100 00000 0 1100 1000 ..... @d_r2
+FsTOd 10 ..... 110100 00000 0 1100 1001 ..... @d_r2
+FqTOd 10 ..... 110100 00000 0 1100 1011 ..... @d_q2
+FiTOq 10 ..... 110100 00000 0 1100 1100 ..... @q_r2
+FsTOq 10 ..... 110100 00000 0 1100 1101 ..... @q_r2
+FdTOq 10 ..... 110100 00000 0 1100 1110 ..... @q_d2
FsTOi 10 ..... 110100 00000 0 1101 0001 ..... @r_r2
-FdTOi 10 ..... 110100 00000 0 1101 0010 ..... @r_r2
-FqTOi 10 ..... 110100 00000 0 1101 0011 ..... @r_r2
+FdTOi 10 ..... 110100 00000 0 1101 0010 ..... @r_d2
+FqTOi 10 ..... 110100 00000 0 1101 0011 ..... @r_q2
FMOVscc 10 rd:5 110101 0 cond:4 1 cc:1 0 000001 rs2:5
-FMOVdcc 10 rd:5 110101 0 cond:4 1 cc:1 0 000010 rs2:5
-FMOVqcc 10 rd:5 110101 0 cond:4 1 cc:1 0 000011 rs2:5
+FMOVdcc 10 ..... 110101 0 cond:4 1 cc:1 0 000010 ..... \
+ rd=%dfp_rd rs2=%dfp_rs2
+FMOVqcc 10 ..... 110101 0 cond:4 1 cc:1 0 000011 ..... \
+ rd=%qfp_rd rs2=%qfp_rs2
FMOVsfcc 10 rd:5 110101 0 cond:4 0 cc:2 000001 rs2:5
-FMOVdfcc 10 rd:5 110101 0 cond:4 0 cc:2 000010 rs2:5
-FMOVqfcc 10 rd:5 110101 0 cond:4 0 cc:2 000011 rs2:5
+FMOVdfcc 10 ..... 110101 0 cond:4 0 cc:2 000010 ..... \
+ rd=%dfp_rd rs2=%dfp_rs2
+FMOVqfcc 10 ..... 110101 0 cond:4 0 cc:2 000011 ..... \
+ rd=%qfp_rd rs2=%qfp_rs2
FMOVRs 10 rd:5 110101 rs1:5 0 cond:3 00101 rs2:5
-FMOVRd 10 rd:5 110101 rs1:5 0 cond:3 00110 rs2:5
-FMOVRq 10 rd:5 110101 rs1:5 0 cond:3 00111 rs2:5
+FMOVRd 10 ..... 110101 rs1:5 0 cond:3 00110 ..... \
+ rd=%dfp_rd rs2=%dfp_rs2
+FMOVRq 10 ..... 110101 rs1:5 0 cond:3 00111 ..... \
+ rd=%qfp_rd rs2=%qfp_rs2
FCMPs 10 000 cc:2 110101 rs1:5 0 0101 0001 rs2:5
-FCMPd 10 000 cc:2 110101 rs1:5 0 0101 0010 rs2:5
-FCMPq 10 000 cc:2 110101 rs1:5 0 0101 0011 rs2:5
+FCMPd 10 000 cc:2 110101 ..... 0 0101 0010 ..... \
+ rs1=%dfp_rs1 rs2=%dfp_rs2
+FCMPq 10 000 cc:2 110101 ..... 0 0101 0011 ..... \
+ rs1=%qfp_rs1 rs2=%qfp_rs2
FCMPEs 10 000 cc:2 110101 rs1:5 0 0101 0101 rs2:5
-FCMPEd 10 000 cc:2 110101 rs1:5 0 0101 0110 rs2:5
-FCMPEq 10 000 cc:2 110101 rs1:5 0 0101 0111 rs2:5
+FCMPEd 10 000 cc:2 110101 ..... 0 0101 0110 ..... \
+ rs1=%dfp_rs1 rs2=%dfp_rs2
+FCMPEq 10 000 cc:2 110101 ..... 0 0101 0111 ..... \
+ rs1=%qfp_rs1 rs2=%qfp_rs2
{
[
@@ -323,93 +388,187 @@
ARRAY16 10 ..... 110110 ..... 0 0001 0010 ..... @r_r_r
ARRAY32 10 ..... 110110 ..... 0 0001 0100 ..... @r_r_r
+ ADDXC 10 ..... 110110 ..... 0 0001 0001 ..... @r_r_r
+ ADDXCcc 10 ..... 110110 ..... 0 0001 0011 ..... @r_r_r
+ UMULXHI 10 ..... 110110 ..... 0 0001 0110 ..... @r_r_r
+ LZCNT 10 ..... 110110 00000 0 0001 0111 ..... @r_r2
+ XMULX 10 ..... 110110 ..... 1 0001 0101 ..... @r_r_r
+ XMULXHI 10 ..... 110110 ..... 1 0001 0110 ..... @r_r_r
+
ALIGNADDR 10 ..... 110110 ..... 0 0001 1000 ..... @r_r_r
ALIGNADDRL 10 ..... 110110 ..... 0 0001 1010 ..... @r_r_r
BMASK 10 ..... 110110 ..... 0 0001 1001 ..... @r_r_r
- FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_r_r
- FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_r_r
- FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_r_r
- FPCMPEQ16 10 ..... 110110 ..... 0 0010 1010 ..... @r_r_r
- FPCMPLE32 10 ..... 110110 ..... 0 0010 0100 ..... @r_r_r
- FPCMPNE32 10 ..... 110110 ..... 0 0010 0110 ..... @r_r_r
- FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_r_r
- FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_r_r
+ CMASK8 10 00000 110110 00000 0 0001 1011 rs2:5
+ CMASK16 10 00000 110110 00000 0 0001 1101 rs2:5
+ CMASK32 10 00000 110110 00000 0 0001 1111 rs2:5
- FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @r_r_r
- FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @r_r_r
- FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @r_r_r
- FMUL8SUx16 10 ..... 110110 ..... 0 0011 0110 ..... @r_r_r
- FMUL8ULx16 10 ..... 110110 ..... 0 0011 0111 ..... @r_r_r
- FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 ..... @r_r_r
- FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 ..... @r_r_r
- FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @r_r_r
- FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_r2
- FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_r2
- PDIST 10 ..... 110110 ..... 0 0011 1110 ..... @r_r_r
+ FPCMPLE16 10 ..... 110110 ..... 0 0010 0000 ..... @r_d_d
+ FPCMPNE16 10 ..... 110110 ..... 0 0010 0010 ..... @r_d_d
+ FPCMPGT16 10 ..... 110110 ..... 0 0010 1000 ..... @r_d_d
+ FPCMPEQ16 10 ..... 110110 ..... 0 0010 1010 ..... @r_d_d
+ FPCMPLE32 10 ..... 110110 ..... 0 0010 0100 ..... @r_d_d
+ FPCMPNE32 10 ..... 110110 ..... 0 0010 0110 ..... @r_d_d
+ FPCMPGT32 10 ..... 110110 ..... 0 0010 1100 ..... @r_d_d
+ FPCMPEQ32 10 ..... 110110 ..... 0 0010 1110 ..... @r_d_d
- FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @r_r_r
- FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @r_r_r
- BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @r_r_r
- FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @r_r2
+ FSLL16 10 ..... 110110 ..... 0 0010 0001 ..... @d_d_d
+ FSRL16 10 ..... 110110 ..... 0 0010 0011 ..... @d_d_d
+ FSLAS16 10 ..... 110110 ..... 0 0010 1001 ..... @d_d_d
+ FSRA16 10 ..... 110110 ..... 0 0010 1011 ..... @d_d_d
+ FSLL32 10 ..... 110110 ..... 0 0010 0101 ..... @d_d_d
+ FSRL32 10 ..... 110110 ..... 0 0010 0111 ..... @d_d_d
+ FSLAS32 10 ..... 110110 ..... 0 0010 1101 ..... @d_d_d
+ FSRA32 10 ..... 110110 ..... 0 0010 1111 ..... @d_d_d
- FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @r_r1 # FSRC1d
+ FPCMPULE8 10 ..... 110110 ..... 1 0010 0000 ..... @r_d_d
+ FPCMPUGT8 10 ..... 110110 ..... 1 0010 1000 ..... @r_d_d
+ FPCMPNE8 10 ..... 110110 ..... 1 0010 0010 ..... @r_d_d
+ FPCMPEQ8 10 ..... 110110 ..... 1 0010 1010 ..... @r_d_d
+ FPCMPLE8 10 ..... 110110 ..... 0 0011 0100 ..... @r_d_d
+ FPCMPGT8 10 ..... 110110 ..... 0 0011 1100 ..... @r_d_d
+ FPCMPULE16 10 ..... 110110 ..... 1 0010 1110 ..... @r_d_d
+ FPCMPUGT16 10 ..... 110110 ..... 1 0010 1011 ..... @r_d_d
+ FPCMPULE32 10 ..... 110110 ..... 1 0010 1111 ..... @r_d_d
+ FPCMPUGT32 10 ..... 110110 ..... 1 0010 1100 ..... @r_d_d
+
+ FMUL8x16 10 ..... 110110 ..... 0 0011 0001 ..... @d_r_d
+ FMUL8x16AU 10 ..... 110110 ..... 0 0011 0011 ..... @d_r_r
+ FMUL8x16AL 10 ..... 110110 ..... 0 0011 0101 ..... @d_r_r
+ FMUL8SUx16 10 ..... 110110 ..... 0 0011 0110 ..... @d_d_d
+ FMUL8ULx16 10 ..... 110110 ..... 0 0011 0111 ..... @d_d_d
+ FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 ..... @d_r_r
+ FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 ..... @d_r_r
+ FPACK32 10 ..... 110110 ..... 0 0011 1010 ..... @d_d_d
+ FPACK16 10 ..... 110110 00000 0 0011 1011 ..... @r_d2
+ FPACKFIX 10 ..... 110110 00000 0 0011 1101 ..... @r_d2
+ PDIST 10 ..... 110110 ..... 0 0011 1110 ..... \
+ &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2
+ PDISTN 10 ..... 110110 ..... 0 0011 1111 ..... @r_d_d
+
+ FMEAN16 10 ..... 110110 ..... 0 0100 0000 ..... @d_d_d
+ SUBXC 10 ..... 110110 ..... 0 0100 0001 ..... @r_r_r
+ SUBXCcc 10 ..... 110110 ..... 0 0100 0011 ..... @r_r_r
+ FCHKSM16 10 ..... 110110 ..... 0 0100 0100 ..... @d_d_d
+ FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @d_d_d
+ FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @d_r_r
+ BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @d_d_d
+ FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @d_r2
+ FALIGNDATAi 10 ..... 110110 ..... 0 0100 1001 ..... @d_r_d
+
+ FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @d_d1 # FSRC1d
FSRCs 10 ..... 110110 ..... 0 0111 0101 00000 @r_r1 # FSRC1s
- FSRCd 10 ..... 110110 00000 0 0111 1000 ..... @r_r2 # FSRC2d
+ FSRCd 10 ..... 110110 00000 0 0111 1000 ..... @d_d2 # FSRC2d
FSRCs 10 ..... 110110 00000 0 0111 1001 ..... @r_r2 # FSRC2s
- FNOTd 10 ..... 110110 ..... 0 0110 1010 00000 @r_r1 # FNOT1d
+ FNOTd 10 ..... 110110 ..... 0 0110 1010 00000 @d_d1 # FNOT1d
FNOTs 10 ..... 110110 ..... 0 0110 1011 00000 @r_r1 # FNOT1s
- FNOTd 10 ..... 110110 00000 0 0110 0110 ..... @r_r2 # FNOT2d
+ FNOTd 10 ..... 110110 00000 0 0110 0110 ..... @d_d2 # FNOT2d
FNOTs 10 ..... 110110 00000 0 0110 0111 ..... @r_r2 # FNOT2s
- FPADD16 10 ..... 110110 ..... 0 0101 0000 ..... @r_r_r
+ FPADD16 10 ..... 110110 ..... 0 0101 0000 ..... @d_d_d
FPADD16s 10 ..... 110110 ..... 0 0101 0001 ..... @r_r_r
- FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @r_r_r
+ FPADD32 10 ..... 110110 ..... 0 0101 0010 ..... @d_d_d
FPADD32s 10 ..... 110110 ..... 0 0101 0011 ..... @r_r_r
- FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @r_r_r
+ FPADD64 10 ..... 110110 ..... 0 0100 0010 ..... @d_d_d
+ FPSUB16 10 ..... 110110 ..... 0 0101 0100 ..... @d_d_d
FPSUB16s 10 ..... 110110 ..... 0 0101 0101 ..... @r_r_r
- FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @r_r_r
+ FPSUB32 10 ..... 110110 ..... 0 0101 0110 ..... @d_d_d
FPSUB32s 10 ..... 110110 ..... 0 0101 0111 ..... @r_r_r
+ FPSUB64 10 ..... 110110 ..... 0 0100 0110 ..... @d_d_d
- FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @r_r_r
+ FPADDS16 10 ..... 110110 ..... 0 0101 1000 ..... @d_d_d
+ FPADDS16s 10 ..... 110110 ..... 0 0101 1001 ..... @r_r_r
+ FPADDS32 10 ..... 110110 ..... 0 0101 1010 ..... @d_d_d
+ FPADDS32s 10 ..... 110110 ..... 0 0101 1011 ..... @r_r_r
+ FPSUBS16 10 ..... 110110 ..... 0 0101 1100 ..... @d_d_d
+ FPSUBS16s 10 ..... 110110 ..... 0 0101 1101 ..... @r_r_r
+ FPSUBS32 10 ..... 110110 ..... 0 0101 1110 ..... @d_d_d
+ FPSUBS32s 10 ..... 110110 ..... 0 0101 1111 ..... @r_r_r
+
+ FNORd 10 ..... 110110 ..... 0 0110 0010 ..... @d_d_d
FNORs 10 ..... 110110 ..... 0 0110 0011 ..... @r_r_r
- FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @r_r_r # FANDNOT2d
+ FANDNOTd 10 ..... 110110 ..... 0 0110 0100 ..... @d_d_d # FANDNOT2d
FANDNOTs 10 ..... 110110 ..... 0 0110 0101 ..... @r_r_r # FANDNOT2s
- FANDNOTd 10 ..... 110110 ..... 0 0110 1000 ..... @r_r_r_swap # ... 1d
+ FANDNOTd 10 ..... 110110 ..... 0 0110 1000 ..... @d_d_d_swap # ... 1d
FANDNOTs 10 ..... 110110 ..... 0 0110 1001 ..... @r_r_r_swap # ... 1s
- FXORd 10 ..... 110110 ..... 0 0110 1100 ..... @r_r_r
+ FXORd 10 ..... 110110 ..... 0 0110 1100 ..... @d_d_d
FXORs 10 ..... 110110 ..... 0 0110 1101 ..... @r_r_r
- FNANDd 10 ..... 110110 ..... 0 0110 1110 ..... @r_r_r
+ FNANDd 10 ..... 110110 ..... 0 0110 1110 ..... @d_d_d
FNANDs 10 ..... 110110 ..... 0 0110 1111 ..... @r_r_r
- FANDd 10 ..... 110110 ..... 0 0111 0000 ..... @r_r_r
+ FANDd 10 ..... 110110 ..... 0 0111 0000 ..... @d_d_d
FANDs 10 ..... 110110 ..... 0 0111 0001 ..... @r_r_r
- FXNORd 10 ..... 110110 ..... 0 0111 0010 ..... @r_r_r
+ FXNORd 10 ..... 110110 ..... 0 0111 0010 ..... @d_d_d
FXNORs 10 ..... 110110 ..... 0 0111 0011 ..... @r_r_r
- FORNOTd 10 ..... 110110 ..... 0 0111 0110 ..... @r_r_r # FORNOT2d
+ FORNOTd 10 ..... 110110 ..... 0 0111 0110 ..... @d_d_d # FORNOT2d
FORNOTs 10 ..... 110110 ..... 0 0111 0111 ..... @r_r_r # FORNOT2s
- FORNOTd 10 ..... 110110 ..... 0 0111 1010 ..... @r_r_r_swap # ... 1d
+ FORNOTd 10 ..... 110110 ..... 0 0111 1010 ..... @d_d_d_swap # ... 1d
FORNOTs 10 ..... 110110 ..... 0 0111 1011 ..... @r_r_r_swap # ... 1s
- FORd 10 ..... 110110 ..... 0 0111 1100 ..... @r_r_r
+ FORd 10 ..... 110110 ..... 0 0111 1100 ..... @d_d_d
FORs 10 ..... 110110 ..... 0 0111 1101 ..... @r_r_r
- FZEROd 10 rd:5 110110 00000 0 0110 0000 00000
+ FZEROd 10 ..... 110110 00000 0 0110 0000 00000 rd=%dfp_rd
FZEROs 10 rd:5 110110 00000 0 0110 0001 00000
- FONEd 10 rd:5 110110 00000 0 0111 1110 00000
+ FONEd 10 ..... 110110 00000 0 0111 1110 00000 rd=%dfp_rd
FONEs 10 rd:5 110110 00000 0 0111 1111 00000
+
+ MOVsTOuw 10 ..... 110110 00000 1 0001 0001 ..... @r_r2
+ MOVsTOsw 10 ..... 110110 00000 1 0001 0011 ..... @r_r2
+ MOVwTOs 10 ..... 110110 00000 1 0001 1001 ..... @r_r2
+ MOVdTOx 10 ..... 110110 00000 1 0001 0000 ..... @r_d2
+ MOVxTOd 10 ..... 110110 00000 1 0001 1000 ..... @d_r2
+
+ FPADD8 10 ..... 110110 ..... 1 0010 0100 ..... @d_d_d
+ FPADDS8 10 ..... 110110 ..... 1 0010 0110 ..... @d_d_d
+ FPADDUS8 10 ..... 110110 ..... 1 0010 0111 ..... @d_d_d
+ FPADDUS16 10 ..... 110110 ..... 1 0010 0011 ..... @d_d_d
+ FPSUB8 10 ..... 110110 ..... 1 0101 0100 ..... @d_d_d
+ FPSUBS8 10 ..... 110110 ..... 1 0101 0110 ..... @d_d_d
+ FPSUBUS8 10 ..... 110110 ..... 1 0101 0111 ..... @d_d_d
+ FPSUBUS16 10 ..... 110110 ..... 1 0101 0011 ..... @d_d_d
+
+ FPMIN8 10 ..... 110110 ..... 1 0001 1010 ..... @d_d_d
+ FPMIN16 10 ..... 110110 ..... 1 0001 1011 ..... @d_d_d
+ FPMIN32 10 ..... 110110 ..... 1 0001 1100 ..... @d_d_d
+ FPMINU8 10 ..... 110110 ..... 1 0101 1010 ..... @d_d_d
+ FPMINU16 10 ..... 110110 ..... 1 0101 1011 ..... @d_d_d
+ FPMINU32 10 ..... 110110 ..... 1 0101 1100 ..... @d_d_d
+
+ FPMAX8 10 ..... 110110 ..... 1 0001 1101 ..... @d_d_d
+ FPMAX16 10 ..... 110110 ..... 1 0001 1110 ..... @d_d_d
+ FPMAX32 10 ..... 110110 ..... 1 0001 1111 ..... @d_d_d
+ FPMAXU8 10 ..... 110110 ..... 1 0101 1101 ..... @d_d_d
+ FPMAXU16 10 ..... 110110 ..... 1 0101 1110 ..... @d_d_d
+ FPMAXU32 10 ..... 110110 ..... 1 0101 1111 ..... @d_d_d
+
+ FLCMPs 10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5
+ FLCMPd 10 000 cc:2 110110 ..... 1 0101 0010 ..... \
+ rs1=%dfp_rs1 rs2=%dfp_rs2
]
NCP 10 ----- 110110 ----- --------- ----- # v8 CPop1
}
-NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2
+{
+ [
+ FMADDs 10 ..... 110111 ..... ..... 0001 ..... @r_r_r_r
+ FMADDd 10 ..... 110111 ..... ..... 0010 ..... @d_d_d_d
+ FMSUBs 10 ..... 110111 ..... ..... 0101 ..... @r_r_r_r
+ FMSUBd 10 ..... 110111 ..... ..... 0110 ..... @d_d_d_d
+ FNMSUBs 10 ..... 110111 ..... ..... 1001 ..... @r_r_r_r
+ FNMSUBd 10 ..... 110111 ..... ..... 1010 ..... @d_d_d_d
+ FNMADDs 10 ..... 110111 ..... ..... 1101 ..... @r_r_r_r
+ FNMADDd 10 ..... 110111 ..... ..... 1110 ..... @d_d_d_d
+
+ FPMADDX 10 ..... 110111 ..... ..... 0000 ..... @d_d_d_d
+ FPMADDXHI 10 ..... 110111 ..... ..... 0100 ..... @d_d_d_d
+ ]
+ NCP 10 ----- 110111 ----- --------- ----- # v8 CPop2
+}
##
## Major Opcode 11 -- load and store instructions
##
-%dfp_rd 25:5 !function=extract_dfpreg
-%qfp_rd 25:5 !function=extract_qfpreg
-
&r_r_ri_asi rd rs1 rs2_or_imm asi imm:bool
@r_r_ri_na .. rd:5 ...... rs1:5 imm:1 rs2_or_imm:s13 &r_r_ri_asi asi=-1
@d_r_ri_na .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13 \
@@ -477,6 +636,7 @@
LDF 11 ..... 100000 ..... . ............. @r_r_ri_na
LDFSR 11 00000 100001 ..... . ............. @n_r_ri
LDXFSR 11 00001 100001 ..... . ............. @n_r_ri
+LDXEFSR 11 00011 100001 ..... . ............. @n_r_ri
LDQF 11 ..... 100010 ..... . ............. @q_r_ri_na
LDDF 11 ..... 100011 ..... . ............. @d_r_ri_na
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 7bdf99e..2d48e98 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -1395,6 +1395,10 @@
case ASI_TWINX_PL: /* Primary, twinx, LE */
case ASI_TWINX_S: /* Secondary, twinx */
case ASI_TWINX_SL: /* Secondary, twinx, LE */
+ case ASI_MON_P:
+ case ASI_MON_S:
+ case ASI_MON_AIUP:
+ case ASI_MON_AIUS:
/* These are always handled inline. */
g_assert_not_reached();
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index dca0728..1136390 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -28,6 +28,7 @@
#include "exec/helper-gen.h"
#include "exec/translator.h"
#include "exec/log.h"
+#include "fpu/softfloat.h"
#include "asi.h"
#define HELPER_H "helper.h"
@@ -60,14 +61,27 @@
# define gen_helper_write_softint(E, S) qemu_build_not_reached()
# define gen_helper_wrpil(E, S) qemu_build_not_reached()
# define gen_helper_wrpstate(E, S) qemu_build_not_reached()
+# define gen_helper_cmask8 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_cmask16 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_cmask32 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpeq8 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpeq16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpeq32 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpgt8 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpgt16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpgt32 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmple8 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmple16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmple32 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpne8 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpne16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fcmpne32 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpule8 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpule16 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpule32 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpugt8 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpugt16 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpugt32 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fdtox ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; })
@@ -75,11 +89,15 @@
# define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fslas16 ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fslas32 ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fstox ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fxtod ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fxtoq ({ qemu_build_not_reached(); NULL; })
# define gen_helper_fxtos ({ qemu_build_not_reached(); NULL; })
# define gen_helper_pdist ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_xmulx ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_xmulxhi ({ qemu_build_not_reached(); NULL; })
# define MAXTL_MASK 0
#endif
@@ -123,8 +141,7 @@
#define cpu_xcc_C ({ qemu_build_not_reached(); NULL; })
#endif
-/* Floating point registers */
-static TCGv_i64 cpu_fpr[TARGET_DPREGS];
+/* Floating point comparison registers */
static TCGv_i32 cpu_fcc[TARGET_FCCREGS];
#define env_field_offsetof(X) offsetof(CPUSPARCState, X)
@@ -190,14 +207,6 @@
#define GET_FIELDs(x,a,b) sign_extend (GET_FIELD(x,a,b), (b) - (a) + 1)
#define GET_FIELD_SPs(x,a,b) sign_extend (GET_FIELD_SP(x,a,b), ((b) - (a) + 1))
-#ifdef TARGET_SPARC64
-#define DFPREG(r) (((r & 1) << 5) | (r & 0x1e))
-#define QFPREG(r) (((r & 1) << 5) | (r & 0x1c))
-#else
-#define DFPREG(r) (r & 0x1e)
-#define QFPREG(r) (r & 0x1c)
-#endif
-
#define UA2005_HTRAP_MASK 0xff
#define V8_TRAP_MASK 0x7f
@@ -217,59 +226,72 @@
}
/* floating point registers moves */
+
+static int gen_offset_fpr_F(unsigned int reg)
+{
+ int ret;
+
+ tcg_debug_assert(reg < 32);
+ ret= offsetof(CPUSPARCState, fpr[reg / 2]);
+ if (reg & 1) {
+ ret += offsetof(CPU_DoubleU, l.lower);
+ } else {
+ ret += offsetof(CPU_DoubleU, l.upper);
+ }
+ return ret;
+}
+
static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
{
TCGv_i32 ret = tcg_temp_new_i32();
- if (src & 1) {
- tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]);
- } else {
- tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]);
- }
+ tcg_gen_ld_i32(ret, tcg_env, gen_offset_fpr_F(src));
return ret;
}
static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v)
{
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(t, v);
- tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t,
- (dst & 1 ? 0 : 32), 32);
+ tcg_gen_st_i32(v, tcg_env, gen_offset_fpr_F(dst));
gen_update_fprs_dirty(dc, dst);
}
+static int gen_offset_fpr_D(unsigned int reg)
+{
+ tcg_debug_assert(reg < 64);
+ tcg_debug_assert(reg % 2 == 0);
+ return offsetof(CPUSPARCState, fpr[reg / 2]);
+}
+
static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
{
- src = DFPREG(src);
- return cpu_fpr[src / 2];
+ TCGv_i64 ret = tcg_temp_new_i64();
+ tcg_gen_ld_i64(ret, tcg_env, gen_offset_fpr_D(src));
+ return ret;
}
static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v)
{
- dst = DFPREG(dst);
- tcg_gen_mov_i64(cpu_fpr[dst / 2], v);
+ tcg_gen_st_i64(v, tcg_env, gen_offset_fpr_D(dst));
gen_update_fprs_dirty(dc, dst);
}
-static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst)
-{
- return cpu_fpr[DFPREG(dst) / 2];
-}
-
static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src)
{
TCGv_i128 ret = tcg_temp_new_i128();
+ TCGv_i64 h = gen_load_fpr_D(dc, src);
+ TCGv_i64 l = gen_load_fpr_D(dc, src + 2);
- src = QFPREG(src);
- tcg_gen_concat_i64_i128(ret, cpu_fpr[src / 2 + 1], cpu_fpr[src / 2]);
+ tcg_gen_concat_i64_i128(ret, l, h);
return ret;
}
static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst, TCGv_i128 v)
{
- dst = DFPREG(dst);
- tcg_gen_extr_i128_i64(cpu_fpr[dst / 2 + 1], cpu_fpr[dst / 2], v);
- gen_update_fprs_dirty(dc, dst);
+ TCGv_i64 h = tcg_temp_new_i64();
+ TCGv_i64 l = tcg_temp_new_i64();
+
+ tcg_gen_extr_i128_i64(l, h, v);
+ gen_store_fpr_D(dc, dst, h);
+ gen_store_fpr_D(dc, dst + 2, l);
}
/* moves */
@@ -428,6 +450,17 @@
gen_op_addcc_int(dst, src1, src2, gen_carry32());
}
+static void gen_op_addxc(TCGv dst, TCGv src1, TCGv src2)
+{
+ tcg_gen_add_tl(dst, src1, src2);
+ tcg_gen_add_tl(dst, dst, cpu_cc_C);
+}
+
+static void gen_op_addxccc(TCGv dst, TCGv src1, TCGv src2)
+{
+ gen_op_addcc_int(dst, src1, src2, cpu_cc_C);
+}
+
static void gen_op_subcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin)
{
TCGv z = tcg_constant_tl(0);
@@ -482,6 +515,17 @@
gen_op_subcc_int(dst, src1, src2, gen_carry32());
}
+static void gen_op_subxc(TCGv dst, TCGv src1, TCGv src2)
+{
+ tcg_gen_sub_tl(dst, src1, src2);
+ tcg_gen_sub_tl(dst, dst, cpu_cc_C);
+}
+
+static void gen_op_subxccc(TCGv dst, TCGv src1, TCGv src2)
+{
+ gen_op_subcc_int(dst, src1, src2, cpu_cc_C);
+}
+
static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
{
TCGv zero = tcg_constant_tl(0);
@@ -556,6 +600,32 @@
gen_op_multiply(dst, src1, src2, 1);
}
+static void gen_op_umulxhi(TCGv dst, TCGv src1, TCGv src2)
+{
+ TCGv discard = tcg_temp_new();
+ tcg_gen_mulu2_tl(discard, dst, src1, src2);
+}
+
+static void gen_op_fpmaddx(TCGv_i64 dst, TCGv_i64 src1,
+ TCGv_i64 src2, TCGv_i64 src3)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_mul_i64(t, src1, src2);
+ tcg_gen_add_i64(dst, src3, t);
+}
+
+static void gen_op_fpmaddxhi(TCGv_i64 dst, TCGv_i64 src1,
+ TCGv_i64 src2, TCGv_i64 src3)
+{
+ TCGv_i64 l = tcg_temp_new_i64();
+ TCGv_i64 h = tcg_temp_new_i64();
+ TCGv_i64 z = tcg_constant_i64(0);
+
+ tcg_gen_mulu2_i64(l, h, src1, src2);
+ tcg_gen_add2_i64(l, dst, l, h, src3, z);
+}
+
static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2)
{
#ifdef TARGET_SPARC64
@@ -633,6 +703,11 @@
tcg_gen_ctpop_tl(dst, src2);
}
+static void gen_op_lzcnt(TCGv dst, TCGv src)
+{
+ tcg_gen_clzi_tl(dst, src, TARGET_LONG_BITS);
+}
+
#ifndef TARGET_SPARC64
static void gen_helper_array8(TCGv dst, TCGv src1, TCGv src2)
{
@@ -679,7 +754,80 @@
#endif
}
-static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2)
+static void gen_op_fpadds16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+ TCGv_i32 t[2];
+
+ for (int i = 0; i < 2; i++) {
+ TCGv_i32 u = tcg_temp_new_i32();
+ TCGv_i32 v = tcg_temp_new_i32();
+
+ tcg_gen_sextract_i32(u, src1, i * 16, 16);
+ tcg_gen_sextract_i32(v, src2, i * 16, 16);
+ tcg_gen_add_i32(u, u, v);
+ tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN));
+ tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX));
+ t[i] = u;
+ }
+ tcg_gen_deposit_i32(d, t[0], t[1], 16, 16);
+}
+
+static void gen_op_fpsubs16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+ TCGv_i32 t[2];
+
+ for (int i = 0; i < 2; i++) {
+ TCGv_i32 u = tcg_temp_new_i32();
+ TCGv_i32 v = tcg_temp_new_i32();
+
+ tcg_gen_sextract_i32(u, src1, i * 16, 16);
+ tcg_gen_sextract_i32(v, src2, i * 16, 16);
+ tcg_gen_sub_i32(u, u, v);
+ tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN));
+ tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX));
+ t[i] = u;
+ }
+ tcg_gen_deposit_i32(d, t[0], t[1], 16, 16);
+}
+
+static void gen_op_fpadds32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+ TCGv_i32 r = tcg_temp_new_i32();
+ TCGv_i32 t = tcg_temp_new_i32();
+ TCGv_i32 v = tcg_temp_new_i32();
+ TCGv_i32 z = tcg_constant_i32(0);
+
+ tcg_gen_add_i32(r, src1, src2);
+ tcg_gen_xor_i32(t, src1, src2);
+ tcg_gen_xor_i32(v, r, src2);
+ tcg_gen_andc_i32(v, v, t);
+
+ tcg_gen_setcond_i32(TCG_COND_GE, t, r, z);
+ tcg_gen_addi_i32(t, t, INT32_MAX);
+
+ tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r);
+}
+
+static void gen_op_fpsubs32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+ TCGv_i32 r = tcg_temp_new_i32();
+ TCGv_i32 t = tcg_temp_new_i32();
+ TCGv_i32 v = tcg_temp_new_i32();
+ TCGv_i32 z = tcg_constant_i32(0);
+
+ tcg_gen_sub_i32(r, src1, src2);
+ tcg_gen_xor_i32(t, src1, src2);
+ tcg_gen_xor_i32(v, r, src1);
+ tcg_gen_and_i32(v, v, t);
+
+ tcg_gen_setcond_i32(TCG_COND_GE, t, r, z);
+ tcg_gen_addi_i32(t, t, INT32_MAX);
+
+ tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r);
+}
+
+static void gen_op_faligndata_i(TCGv_i64 dst, TCGv_i64 s1,
+ TCGv_i64 s2, TCGv gsr)
{
#ifdef TARGET_SPARC64
TCGv t1, t2, shift;
@@ -688,7 +836,7 @@
t2 = tcg_temp_new();
shift = tcg_temp_new();
- tcg_gen_andi_tl(shift, cpu_gsr, 7);
+ tcg_gen_andi_tl(shift, gsr, 7);
tcg_gen_shli_tl(shift, shift, 3);
tcg_gen_shl_tl(t1, s1, shift);
@@ -706,6 +854,11 @@
#endif
}
+static void gen_op_faligndata_g(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2)
+{
+ gen_op_faligndata_i(dst, s1, s2, cpu_gsr);
+}
+
static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2)
{
#ifdef TARGET_SPARC64
@@ -715,6 +868,15 @@
#endif
}
+static void gen_op_pdistn(TCGv dst, TCGv_i64 src1, TCGv_i64 src2)
+{
+#ifdef TARGET_SPARC64
+ gen_helper_pdist(dst, tcg_constant_i64(0), src1, src2);
+#else
+ g_assert_not_reached();
+#endif
+}
+
static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2)
{
tcg_gen_ext16s_i32(src2, src2);
@@ -769,6 +931,66 @@
tcg_gen_concat_i32_i64(dst, t0, t1);
}
+#ifdef TARGET_SPARC64
+static void gen_vec_fchksm16(unsigned vece, TCGv_vec dst,
+ TCGv_vec src1, TCGv_vec src2)
+{
+ TCGv_vec a = tcg_temp_new_vec_matching(dst);
+ TCGv_vec c = tcg_temp_new_vec_matching(dst);
+
+ tcg_gen_add_vec(vece, a, src1, src2);
+ tcg_gen_cmp_vec(TCG_COND_LTU, vece, c, a, src1);
+ /* Vector cmp produces -1 for true, so subtract to add carry. */
+ tcg_gen_sub_vec(vece, dst, a, c);
+}
+
+static void gen_op_fchksm16(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_cmp_vec, INDEX_op_add_vec, INDEX_op_sub_vec,
+ };
+ static const GVecGen3 op = {
+ .fni8 = gen_helper_fchksm16,
+ .fniv = gen_vec_fchksm16,
+ .opt_opc = vecop_list,
+ .vece = MO_16,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op);
+}
+
+static void gen_vec_fmean16(unsigned vece, TCGv_vec dst,
+ TCGv_vec src1, TCGv_vec src2)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(dst);
+
+ tcg_gen_or_vec(vece, t, src1, src2);
+ tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(dst, vece, 1));
+ tcg_gen_sari_vec(vece, src1, src1, 1);
+ tcg_gen_sari_vec(vece, src2, src2, 1);
+ tcg_gen_add_vec(vece, dst, src1, src2);
+ tcg_gen_add_vec(vece, dst, dst, t);
+}
+
+static void gen_op_fmean16(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_add_vec, INDEX_op_sari_vec,
+ };
+ static const GVecGen3 op = {
+ .fni8 = gen_helper_fmean16,
+ .fniv = gen_vec_fmean16,
+ .opt_opc = vecop_list,
+ .vece = MO_16,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op);
+}
+#else
+#define gen_op_fchksm16 ({ qemu_build_not_reached(); NULL; })
+#define gen_op_fmean16 ({ qemu_build_not_reached(); NULL; })
+#endif
+
static void finishing_insn(DisasContext *dc)
{
/*
@@ -1138,6 +1360,97 @@
tcg_gen_concat_i64_i128(dst, l, h);
}
+static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+}
+
+static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+}
+
+static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+ int op = float_muladd_negate_c;
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+ int op = float_muladd_negate_c;
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+ int op = float_muladd_negate_c | float_muladd_negate_result;
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+ int op = float_muladd_negate_c | float_muladd_negate_result;
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+ int op = float_muladd_negate_result;
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+ int op = float_muladd_negate_result;
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
+static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
+{
+ TCGv_i32 one = tcg_constant_i32(float32_one);
+ int op = float_muladd_halve_result;
+ gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
+{
+ TCGv_i64 one = tcg_constant_i64(float64_one);
+ int op = float_muladd_halve_result;
+ gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
+static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
+{
+ TCGv_i32 one = tcg_constant_i32(float32_one);
+ int op = float_muladd_negate_c | float_muladd_halve_result;
+ gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
+{
+ TCGv_i64 one = tcg_constant_i64(float64_one);
+ int op = float_muladd_negate_c | float_muladd_halve_result;
+ gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
+static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
+{
+ TCGv_i32 one = tcg_constant_i32(float32_one);
+ int op = float_muladd_negate_result | float_muladd_halve_result;
+ gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
+{
+ TCGv_i64 one = tcg_constant_i64(float64_one);
+ int op = float_muladd_negate_result | float_muladd_halve_result;
+ gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
{
/*
@@ -1294,6 +1607,7 @@
case ASI_BLK_AIUP_L_4V:
case ASI_BLK_AIUP:
case ASI_BLK_AIUPL:
+ case ASI_MON_AIUP:
mem_idx = MMU_USER_IDX;
break;
case ASI_AIUS: /* As if user secondary */
@@ -1304,6 +1618,7 @@
case ASI_BLK_AIUS_L_4V:
case ASI_BLK_AIUS:
case ASI_BLK_AIUSL:
+ case ASI_MON_AIUS:
mem_idx = MMU_USER_SECONDARY_IDX;
break;
case ASI_S: /* Secondary */
@@ -1317,6 +1632,7 @@
case ASI_FL8_SL:
case ASI_FL16_S:
case ASI_FL16_SL:
+ case ASI_MON_S:
if (mem_idx == MMU_USER_IDX) {
mem_idx = MMU_USER_SECONDARY_IDX;
} else if (mem_idx == MMU_KERNEL_IDX) {
@@ -1334,6 +1650,7 @@
case ASI_FL8_PL:
case ASI_FL16_P:
case ASI_FL16_PL:
+ case ASI_MON_P:
break;
}
switch (asi) {
@@ -1351,6 +1668,10 @@
case ASI_SL:
case ASI_P:
case ASI_PL:
+ case ASI_MON_P:
+ case ASI_MON_S:
+ case ASI_MON_AIUP:
+ case ASI_MON_AIUS:
type = GET_ASI_DIRECT;
break;
case ASI_TWINX_REAL:
@@ -1627,7 +1948,7 @@
MemOp memop = da->memop;
MemOp size = memop & MO_SIZE;
TCGv_i32 d32;
- TCGv_i64 d64;
+ TCGv_i64 d64, l64;
TCGv addr_tmp;
/* TODO: Use 128-bit load/store below. */
@@ -1649,16 +1970,20 @@
break;
case MO_64:
- tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx, memop);
+ d64 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop);
+ gen_store_fpr_D(dc, rd, d64);
break;
case MO_128:
d64 = tcg_temp_new_i64();
+ l64 = tcg_temp_new_i64();
tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop);
addr_tmp = tcg_temp_new();
tcg_gen_addi_tl(addr_tmp, addr, 8);
- tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop);
- tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+ tcg_gen_qemu_ld_i64(l64, addr_tmp, da->mem_idx, memop);
+ gen_store_fpr_D(dc, rd, d64);
+ gen_store_fpr_D(dc, rd + 2, l64);
break;
default:
g_assert_not_reached();
@@ -1670,9 +1995,11 @@
if (orig_size == MO_64 && (rd & 7) == 0) {
/* The first operation checks required alignment. */
addr_tmp = tcg_temp_new();
+ d64 = tcg_temp_new_i64();
for (int i = 0; ; ++i) {
- tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx,
+ tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx,
memop | (i == 0 ? MO_ALIGN_64 : 0));
+ gen_store_fpr_D(dc, rd + 2 * i, d64);
if (i == 7) {
break;
}
@@ -1687,8 +2014,9 @@
case GET_ASI_SHORT:
/* Valid for lddfa only. */
if (orig_size == MO_64) {
- tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
- memop | MO_ALIGN);
+ d64 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop | MO_ALIGN);
+ gen_store_fpr_D(dc, rd, d64);
} else {
gen_exception(dc, TT_ILL_INSN);
}
@@ -1713,17 +2041,19 @@
gen_store_fpr_F(dc, rd, d32);
break;
case MO_64:
- gen_helper_ld_asi(cpu_fpr[rd / 2], tcg_env, addr,
- r_asi, r_mop);
+ d64 = tcg_temp_new_i64();
+ gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop);
+ gen_store_fpr_D(dc, rd, d64);
break;
case MO_128:
d64 = tcg_temp_new_i64();
+ l64 = tcg_temp_new_i64();
gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop);
addr_tmp = tcg_temp_new();
tcg_gen_addi_tl(addr_tmp, addr, 8);
- gen_helper_ld_asi(cpu_fpr[rd / 2 + 1], tcg_env, addr_tmp,
- r_asi, r_mop);
- tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+ gen_helper_ld_asi(l64, tcg_env, addr_tmp, r_asi, r_mop);
+ gen_store_fpr_D(dc, rd, d64);
+ gen_store_fpr_D(dc, rd + 2, l64);
break;
default:
g_assert_not_reached();
@@ -1739,6 +2069,7 @@
MemOp memop = da->memop;
MemOp size = memop & MO_SIZE;
TCGv_i32 d32;
+ TCGv_i64 d64;
TCGv addr_tmp;
/* TODO: Use 128-bit load/store below. */
@@ -1758,8 +2089,8 @@
tcg_gen_qemu_st_i32(d32, addr, da->mem_idx, memop | MO_ALIGN);
break;
case MO_64:
- tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
- memop | MO_ALIGN_4);
+ d64 = gen_load_fpr_D(dc, rd);
+ tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_4);
break;
case MO_128:
/* Only 4-byte alignment required. However, it is legal for the
@@ -1767,11 +2098,12 @@
required to fix it up. Requiring 16-byte alignment here avoids
having to probe the second page before performing the first
write. */
- tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
- memop | MO_ALIGN_16);
+ d64 = gen_load_fpr_D(dc, rd);
+ tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_16);
addr_tmp = tcg_temp_new();
tcg_gen_addi_tl(addr_tmp, addr, 8);
- tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop);
+ d64 = gen_load_fpr_D(dc, rd + 2);
+ tcg_gen_qemu_st_i64(d64, addr_tmp, da->mem_idx, memop);
break;
default:
g_assert_not_reached();
@@ -1784,7 +2116,8 @@
/* The first operation checks required alignment. */
addr_tmp = tcg_temp_new();
for (int i = 0; ; ++i) {
- tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx,
+ d64 = gen_load_fpr_D(dc, rd + 2 * i);
+ tcg_gen_qemu_st_i64(d64, addr, da->mem_idx,
memop | (i == 0 ? MO_ALIGN_64 : 0));
if (i == 7) {
break;
@@ -1800,8 +2133,8 @@
case GET_ASI_SHORT:
/* Valid for stdfa only. */
if (orig_size == MO_64) {
- tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
- memop | MO_ALIGN);
+ d64 = gen_load_fpr_D(dc, rd);
+ tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN);
} else {
gen_exception(dc, TT_ILL_INSN);
}
@@ -2032,7 +2365,7 @@
static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
{
#ifdef TARGET_SPARC64
- TCGv_i64 dst = gen_dest_fpr_D(dc, rd);
+ TCGv_i64 dst = tcg_temp_new_i64();
tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, tcg_constant_tl(cmp->c2),
gen_load_fpr_D(dc, rs),
gen_load_fpr_D(dc, rd));
@@ -2045,16 +2378,18 @@
static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
{
#ifdef TARGET_SPARC64
- int qd = QFPREG(rd);
- int qs = QFPREG(rs);
TCGv c2 = tcg_constant_tl(cmp->c2);
+ TCGv_i64 h = tcg_temp_new_i64();
+ TCGv_i64 l = tcg_temp_new_i64();
- tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, c2,
- cpu_fpr[qs / 2], cpu_fpr[qd / 2]);
- tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, c2,
- cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]);
-
- gen_update_fprs_dirty(dc, qd);
+ tcg_gen_movcond_i64(cmp->cond, h, cmp->c1, c2,
+ gen_load_fpr_D(dc, rs),
+ gen_load_fpr_D(dc, rd));
+ tcg_gen_movcond_i64(cmp->cond, l, cmp->c1, c2,
+ gen_load_fpr_D(dc, rs + 2),
+ gen_load_fpr_D(dc, rd + 2));
+ gen_store_fpr_D(dc, rd, h);
+ gen_store_fpr_D(dc, rd + 2, l);
#else
qemu_build_not_reached();
#endif
@@ -2086,12 +2421,20 @@
static int extract_dfpreg(DisasContext *dc, int x)
{
- return DFPREG(x);
+ int r = x & 0x1e;
+#ifdef TARGET_SPARC64
+ r |= (x & 1) << 5;
+#endif
+ return r;
}
static int extract_qfpreg(DisasContext *dc, int x)
{
- return QFPREG(x);
+ int r = x & 0x1c;
+#ifdef TARGET_SPARC64
+ r |= (x & 1) << 5;
+#endif
+ return r;
}
/* Include the auto-generated decoder. */
@@ -2110,10 +2453,15 @@
# define avail_MUL(C) true
# define avail_POWERDOWN(C) false
# define avail_64(C) true
+# define avail_FMAF(C) ((C)->def->features & CPU_FEATURE_FMAF)
# define avail_GL(C) ((C)->def->features & CPU_FEATURE_GL)
# define avail_HYPV(C) ((C)->def->features & CPU_FEATURE_HYPV)
+# define avail_IMA(C) ((C)->def->features & CPU_FEATURE_IMA)
# define avail_VIS1(C) ((C)->def->features & CPU_FEATURE_VIS1)
# define avail_VIS2(C) ((C)->def->features & CPU_FEATURE_VIS2)
+# define avail_VIS3(C) ((C)->def->features & CPU_FEATURE_VIS3)
+# define avail_VIS3B(C) avail_VIS3(C)
+# define avail_VIS4(C) ((C)->def->features & CPU_FEATURE_VIS4)
#else
# define avail_32(C) true
# define avail_ASR17(C) ((C)->def->features & CPU_FEATURE_ASR17)
@@ -2122,10 +2470,15 @@
# define avail_MUL(C) ((C)->def->features & CPU_FEATURE_MUL)
# define avail_POWERDOWN(C) ((C)->def->features & CPU_FEATURE_POWERDOWN)
# define avail_64(C) false
+# define avail_FMAF(C) false
# define avail_GL(C) false
# define avail_HYPV(C) false
+# define avail_IMA(C) false
# define avail_VIS1(C) false
# define avail_VIS2(C) false
+# define avail_VIS3(C) false
+# define avail_VIS3B(C) false
+# define avail_VIS4(C) false
#endif
/* Default case for non jump instructions. */
@@ -2999,6 +3352,17 @@
TRANS(WRPOWERDOWN, POWERDOWN, do_wr_special, a, supervisor(dc), do_wrpowerdown)
+static void do_wrmwait(DisasContext *dc, TCGv src)
+{
+ /*
+ * TODO: This is a stub version of mwait, which merely recognizes
+ * interrupts immediately and does not wait.
+ */
+ dc->base.is_jmp = DISAS_EXIT;
+}
+
+TRANS(WRMWAIT, VIS4, do_wr_special, a, true, do_wrmwait)
+
static void do_wrpsr(DisasContext *dc, TCGv src)
{
gen_helper_wrpsr(tcg_env, src);
@@ -3519,11 +3883,10 @@
}
static bool gen_edge(DisasContext *dc, arg_r_r_r *a,
- int width, bool cc, bool left)
+ int width, bool cc, bool little_endian)
{
- TCGv dst, s1, s2, lo1, lo2;
- uint64_t amask, tabl, tabr;
- int shift, imask, omask;
+ TCGv dst, s1, s2, l, r, t, m;
+ uint64_t amask = address_mask_i(dc, -8);
dst = gen_dest_gpr(dc, a->rd);
s1 = gen_load_gpr(dc, a->rs1);
@@ -3533,75 +3896,52 @@
gen_op_subcc(cpu_cc_N, s1, s2);
}
- /*
- * Theory of operation: there are two tables, left and right (not to
- * be confused with the left and right versions of the opcode). These
- * are indexed by the low 3 bits of the inputs. To make things "easy",
- * these tables are loaded into two constants, TABL and TABR below.
- * The operation index = (input & imask) << shift calculates the index
- * into the constant, while val = (table >> index) & omask calculates
- * the value we're looking for.
- */
+ l = tcg_temp_new();
+ r = tcg_temp_new();
+ t = tcg_temp_new();
+
switch (width) {
case 8:
- imask = 0x7;
- shift = 3;
- omask = 0xff;
- if (left) {
- tabl = 0x80c0e0f0f8fcfeffULL;
- tabr = 0xff7f3f1f0f070301ULL;
- } else {
- tabl = 0x0103070f1f3f7fffULL;
- tabr = 0xfffefcf8f0e0c080ULL;
- }
+ tcg_gen_andi_tl(l, s1, 7);
+ tcg_gen_andi_tl(r, s2, 7);
+ tcg_gen_xori_tl(r, r, 7);
+ m = tcg_constant_tl(0xff);
break;
case 16:
- imask = 0x6;
- shift = 1;
- omask = 0xf;
- if (left) {
- tabl = 0x8cef;
- tabr = 0xf731;
- } else {
- tabl = 0x137f;
- tabr = 0xfec8;
- }
+ tcg_gen_extract_tl(l, s1, 1, 2);
+ tcg_gen_extract_tl(r, s2, 1, 2);
+ tcg_gen_xori_tl(r, r, 3);
+ m = tcg_constant_tl(0xf);
break;
case 32:
- imask = 0x4;
- shift = 0;
- omask = 0x3;
- if (left) {
- tabl = (2 << 2) | 3;
- tabr = (3 << 2) | 1;
- } else {
- tabl = (1 << 2) | 3;
- tabr = (3 << 2) | 2;
- }
+ tcg_gen_extract_tl(l, s1, 2, 1);
+ tcg_gen_extract_tl(r, s2, 2, 1);
+ tcg_gen_xori_tl(r, r, 1);
+ m = tcg_constant_tl(0x3);
break;
default:
abort();
}
- lo1 = tcg_temp_new();
- lo2 = tcg_temp_new();
- tcg_gen_andi_tl(lo1, s1, imask);
- tcg_gen_andi_tl(lo2, s2, imask);
- tcg_gen_shli_tl(lo1, lo1, shift);
- tcg_gen_shli_tl(lo2, lo2, shift);
+ /* Compute Left Edge */
+ if (little_endian) {
+ tcg_gen_shl_tl(l, m, l);
+ tcg_gen_and_tl(l, l, m);
+ } else {
+ tcg_gen_shr_tl(l, m, l);
+ }
+ /* Compute Right Edge */
+ if (little_endian) {
+ tcg_gen_shr_tl(r, m, r);
+ } else {
+ tcg_gen_shl_tl(r, m, r);
+ tcg_gen_and_tl(r, r, m);
+ }
- tcg_gen_shr_tl(lo1, tcg_constant_tl(tabl), lo1);
- tcg_gen_shr_tl(lo2, tcg_constant_tl(tabr), lo2);
- tcg_gen_andi_tl(lo1, lo1, omask);
- tcg_gen_andi_tl(lo2, lo2, omask);
-
- amask = address_mask_i(dc, -8);
- tcg_gen_andi_tl(s1, s1, amask);
- tcg_gen_andi_tl(s2, s2, amask);
-
- /* Compute dst = (s1 == s2 ? lo1 : lo1 & lo2). */
- tcg_gen_and_tl(lo2, lo2, lo1);
- tcg_gen_movcond_tl(TCG_COND_EQ, dst, s1, s2, lo1, lo2);
+ /* Compute dst = (s1 == s2 under amask ? l : l & r) */
+ tcg_gen_xor_tl(t, s1, s2);
+ tcg_gen_and_tl(r, r, l);
+ tcg_gen_movcond_tl(TCG_COND_TSTEQ, dst, t, tcg_constant_tl(amask), r, l);
gen_store_gpr(dc, a->rd, dst);
return advance_pc(dc);
@@ -3621,6 +3961,19 @@
TRANS(EDGE32N, VIS2, gen_edge, a, 32, 0, 0)
TRANS(EDGE32LN, VIS2, gen_edge, a, 32, 0, 1)
+static bool do_rr(DisasContext *dc, arg_r_r *a,
+ void (*func)(TCGv, TCGv))
+{
+ TCGv dst = gen_dest_gpr(dc, a->rd);
+ TCGv src = gen_load_gpr(dc, a->rs);
+
+ func(dst, src);
+ gen_store_gpr(dc, a->rd, dst);
+ return advance_pc(dc);
+}
+
+TRANS(LZCNT, VIS3, do_rr, a, gen_op_lzcnt)
+
static bool do_rrr(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv, TCGv, TCGv))
{
@@ -3637,6 +3990,14 @@
TRANS(ARRAY16, VIS1, do_rrr, a, gen_op_array16)
TRANS(ARRAY32, VIS1, do_rrr, a, gen_op_array32)
+TRANS(ADDXC, VIS3, do_rrr, a, gen_op_addxc)
+TRANS(ADDXCcc, VIS3, do_rrr, a, gen_op_addxccc)
+
+TRANS(SUBXC, VIS4, do_rrr, a, gen_op_subxc)
+TRANS(SUBXCcc, VIS4, do_rrr, a, gen_op_subxccc)
+
+TRANS(UMULXHI, VIS3, do_rrr, a, gen_op_umulxhi)
+
static void gen_op_alignaddr(TCGv dst, TCGv s1, TCGv s2)
{
#ifdef TARGET_SPARC64
@@ -3679,6 +4040,16 @@
TRANS(BMASK, VIS2, do_rrr, a, gen_op_bmask)
+static bool do_cmask(DisasContext *dc, int rs2, void (*func)(TCGv, TCGv, TCGv))
+{
+ func(cpu_gsr, cpu_gsr, gen_load_gpr(dc, rs2));
+ return true;
+}
+
+TRANS(CMASK8, VIS3, do_cmask, a->rs2, gen_helper_cmask8)
+TRANS(CMASK16, VIS3, do_cmask, a->rs2, gen_helper_cmask16)
+TRANS(CMASK32, VIS3, do_cmask, a->rs2, gen_helper_cmask32)
+
static bool do_shift_r(DisasContext *dc, arg_shiftr *a, bool l, bool u)
{
TCGv dst, src1, src2;
@@ -4193,7 +4564,7 @@
return advance_pc(dc);
}
-static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a)
+static bool do_ldxfsr(DisasContext *dc, arg_r_r_ri *a, bool entire)
{
#ifdef TARGET_SPARC64
TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
@@ -4218,13 +4589,20 @@
tcg_gen_extract_i32(cpu_fcc[2], hi, FSR_FCC2_SHIFT - 32, 2);
tcg_gen_extract_i32(cpu_fcc[3], hi, FSR_FCC3_SHIFT - 32, 2);
- gen_helper_set_fsr_nofcc_noftt(tcg_env, lo);
+ if (entire) {
+ gen_helper_set_fsr_nofcc(tcg_env, lo);
+ } else {
+ gen_helper_set_fsr_nofcc_noftt(tcg_env, lo);
+ }
return advance_pc(dc);
#else
return false;
#endif
}
+TRANS(LDXFSR, 64, do_ldxfsr, a, false)
+TRANS(LDXEFSR, VIS3B, do_ldxfsr, a, true)
+
static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop)
{
TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
@@ -4246,39 +4624,24 @@
TRANS(STFSR, ALL, do_stfsr, a, MO_TEUL)
TRANS(STXFSR, 64, do_stfsr, a, MO_TEUQ)
-static bool do_fc(DisasContext *dc, int rd, bool c)
+static bool do_fc(DisasContext *dc, int rd, int32_t c)
{
- uint64_t mask;
-
if (gen_trap_ifnofpu(dc)) {
return true;
}
-
- if (rd & 1) {
- mask = MAKE_64BIT_MASK(0, 32);
- } else {
- mask = MAKE_64BIT_MASK(32, 32);
- }
- if (c) {
- tcg_gen_ori_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], mask);
- } else {
- tcg_gen_andi_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], ~mask);
- }
- gen_update_fprs_dirty(dc, rd);
+ gen_store_fpr_F(dc, rd, tcg_constant_i32(c));
return advance_pc(dc);
}
TRANS(FZEROs, VIS1, do_fc, a->rd, 0)
-TRANS(FONEs, VIS1, do_fc, a->rd, 1)
+TRANS(FONEs, VIS1, do_fc, a->rd, -1)
static bool do_dc(DisasContext *dc, int rd, int64_t c)
{
if (gen_trap_ifnofpu(dc)) {
return true;
}
-
- tcg_gen_movi_i64(cpu_fpr[rd / 2], c);
- gen_update_fprs_dirty(dc, rd);
+ gen_store_fpr_D(dc, rd, tcg_constant_i64(c));
return advance_pc(dc);
}
@@ -4375,7 +4738,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src = gen_load_fpr_D(dc, a->rs);
func(dst, src);
gen_store_fpr_D(dc, a->rd, dst);
@@ -4397,7 +4760,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src = gen_load_fpr_D(dc, a->rs);
func(dst, tcg_env, src);
gen_store_fpr_D(dc, a->rd, dst);
@@ -4437,7 +4800,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src = gen_load_fpr_F(dc, a->rs);
func(dst, tcg_env, src);
gen_store_fpr_D(dc, a->rd, dst);
@@ -4528,7 +4891,7 @@
}
src = gen_load_fpr_Q(dc, a->rs);
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
func(dst, tcg_env, src);
gen_store_fpr_D(dc, a->rd, dst);
return advance_pc(dc);
@@ -4612,6 +4975,15 @@
TRANS(FORNOTs, VIS1, do_fff, a, tcg_gen_orc_i32)
TRANS(FORs, VIS1, do_fff, a, tcg_gen_or_i32)
+TRANS(FHADDs, VIS3, do_fff, a, gen_op_fhadds)
+TRANS(FHSUBs, VIS3, do_fff, a, gen_op_fhsubs)
+TRANS(FNHADDs, VIS3, do_fff, a, gen_op_fnhadds)
+
+TRANS(FPADDS16s, VIS3, do_fff, a, gen_op_fpadds16s)
+TRANS(FPSUBS16s, VIS3, do_fff, a, gen_op_fpsubs16s)
+TRANS(FPADDS32s, VIS3, do_fff, a, gen_op_fpadds32s)
+TRANS(FPSUBS32s, VIS3, do_fff, a, gen_op_fpsubs32s)
+
static bool do_env_fff(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
{
@@ -4632,6 +5004,8 @@
TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs)
TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls)
TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs)
+TRANS(FNADDs, VIS3, do_env_fff, a, gen_helper_fnadds)
+TRANS(FNMULs, VIS3, do_env_fff, a, gen_helper_fnmuls)
static bool do_dff(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv_i64, TCGv_i32, TCGv_i32))
@@ -4643,7 +5017,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src1 = gen_load_fpr_F(dc, a->rs1);
src2 = gen_load_fpr_F(dc, a->rs2);
func(dst, src1, src2);
@@ -4667,7 +5041,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src1 = gen_load_fpr_F(dc, a->rs1);
src2 = gen_load_fpr_D(dc, a->rs2);
func(dst, src1, src2);
@@ -4677,6 +5051,63 @@
TRANS(FMUL8x16, VIS1, do_dfd, a, gen_helper_fmul8x16)
+static bool do_gvec_ddd(DisasContext *dc, arg_r_r_r *a, MemOp vece,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t))
+{
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+
+ func(vece, gen_offset_fpr_D(a->rd), gen_offset_fpr_D(a->rs1),
+ gen_offset_fpr_D(a->rs2), 8, 8);
+ return advance_pc(dc);
+}
+
+TRANS(FPADD8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_add)
+TRANS(FPADD16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_add)
+TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add)
+
+TRANS(FPSUB8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sub)
+TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub)
+TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub)
+
+TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16)
+TRANS(FMEAN16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fmean16)
+
+TRANS(FPADDS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ssadd)
+TRANS(FPADDS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ssadd)
+TRANS(FPADDS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_ssadd)
+TRANS(FPADDUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_usadd)
+TRANS(FPADDUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_usadd)
+
+TRANS(FPSUBS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sssub)
+TRANS(FPSUBS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sssub)
+TRANS(FPSUBS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sssub)
+TRANS(FPSUBUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ussub)
+TRANS(FPSUBUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ussub)
+
+TRANS(FSLL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shlv)
+TRANS(FSLL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shlv)
+TRANS(FSRL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shrv)
+TRANS(FSRL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shrv)
+TRANS(FSRA16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sarv)
+TRANS(FSRA32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sarv)
+
+TRANS(FPMIN8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smin)
+TRANS(FPMIN16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smin)
+TRANS(FPMIN32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smin)
+TRANS(FPMINU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umin)
+TRANS(FPMINU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umin)
+TRANS(FPMINU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umin)
+
+TRANS(FPMAX8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smax)
+TRANS(FPMAX16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smax)
+TRANS(FPMAX32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smax)
+TRANS(FPMAXU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umax)
+TRANS(FPMAXU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umax)
+TRANS(FPMAXU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umax)
+
static bool do_ddd(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv_i64, TCGv_i64, TCGv_i64))
{
@@ -4686,7 +5117,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src1 = gen_load_fpr_D(dc, a->rs1);
src2 = gen_load_fpr_D(dc, a->rs2);
func(dst, src1, src2);
@@ -4697,10 +5128,6 @@
TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16)
TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16)
-TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64)
-TRANS(FPADD32, VIS1, do_ddd, a, tcg_gen_vec_add32_i64)
-TRANS(FPSUB16, VIS1, do_ddd, a, tcg_gen_vec_sub16_i64)
-TRANS(FPSUB32, VIS1, do_ddd, a, tcg_gen_vec_sub32_i64)
TRANS(FNORd, VIS1, do_ddd, a, tcg_gen_nor_i64)
TRANS(FANDNOTd, VIS1, do_ddd, a, tcg_gen_andc_i64)
TRANS(FXORd, VIS1, do_ddd, a, tcg_gen_xor_i64)
@@ -4711,9 +5138,18 @@
TRANS(FORd, VIS1, do_ddd, a, tcg_gen_or_i64)
TRANS(FPACK32, VIS1, do_ddd, a, gen_op_fpack32)
-TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata)
+TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata_g)
TRANS(BSHUFFLE, VIS2, do_ddd, a, gen_op_bshuffle)
+TRANS(FHADDd, VIS3, do_ddd, a, gen_op_fhaddd)
+TRANS(FHSUBd, VIS3, do_ddd, a, gen_op_fhsubd)
+TRANS(FNHADDd, VIS3, do_ddd, a, gen_op_fnhaddd)
+
+TRANS(FPADD64, VIS3B, do_ddd, a, tcg_gen_add_i64)
+TRANS(FPSUB64, VIS3B, do_ddd, a, tcg_gen_sub_i64)
+TRANS(FSLAS16, VIS3, do_ddd, a, gen_helper_fslas16)
+TRANS(FSLAS32, VIS3, do_ddd, a, gen_helper_fslas32)
+
static bool do_rdd(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv, TCGv_i64, TCGv_i64))
{
@@ -4736,11 +5172,26 @@
TRANS(FPCMPNE16, VIS1, do_rdd, a, gen_helper_fcmpne16)
TRANS(FPCMPGT16, VIS1, do_rdd, a, gen_helper_fcmpgt16)
TRANS(FPCMPEQ16, VIS1, do_rdd, a, gen_helper_fcmpeq16)
+TRANS(FPCMPULE16, VIS4, do_rdd, a, gen_helper_fcmpule16)
+TRANS(FPCMPUGT16, VIS4, do_rdd, a, gen_helper_fcmpugt16)
TRANS(FPCMPLE32, VIS1, do_rdd, a, gen_helper_fcmple32)
TRANS(FPCMPNE32, VIS1, do_rdd, a, gen_helper_fcmpne32)
TRANS(FPCMPGT32, VIS1, do_rdd, a, gen_helper_fcmpgt32)
TRANS(FPCMPEQ32, VIS1, do_rdd, a, gen_helper_fcmpeq32)
+TRANS(FPCMPULE32, VIS4, do_rdd, a, gen_helper_fcmpule32)
+TRANS(FPCMPUGT32, VIS4, do_rdd, a, gen_helper_fcmpugt32)
+
+TRANS(FPCMPEQ8, VIS3B, do_rdd, a, gen_helper_fcmpeq8)
+TRANS(FPCMPNE8, VIS3B, do_rdd, a, gen_helper_fcmpne8)
+TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8)
+TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8)
+TRANS(FPCMPLE8, VIS4, do_rdd, a, gen_helper_fcmple8)
+TRANS(FPCMPGT8, VIS4, do_rdd, a, gen_helper_fcmpgt8)
+
+TRANS(PDISTN, VIS3, do_rdd, a, gen_op_pdistn)
+TRANS(XMULX, VIS3, do_rrr, a, gen_helper_xmulx)
+TRANS(XMULXHI, VIS3, do_rrr, a, gen_helper_xmulxhi)
static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64))
@@ -4751,7 +5202,7 @@
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src1 = gen_load_fpr_D(dc, a->rs1);
src2 = gen_load_fpr_D(dc, a->rs2);
func(dst, tcg_env, src1, src2);
@@ -4763,6 +5214,8 @@
TRANS(FSUBd, ALL, do_env_ddd, a, gen_helper_fsubd)
TRANS(FMULd, ALL, do_env_ddd, a, gen_helper_fmuld)
TRANS(FDIVd, ALL, do_env_ddd, a, gen_helper_fdivd)
+TRANS(FNADDd, VIS3, do_env_ddd, a, gen_helper_fnaddd)
+TRANS(FNMULd, VIS3, do_env_ddd, a, gen_helper_fnmuld)
static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a)
{
@@ -4776,7 +5229,7 @@
return raise_unimpfpop(dc);
}
- dst = gen_dest_fpr_D(dc, a->rd);
+ dst = tcg_temp_new_i64();
src1 = gen_load_fpr_F(dc, a->rs1);
src2 = gen_load_fpr_F(dc, a->rs2);
gen_helper_fsmuld(dst, tcg_env, src1, src2);
@@ -4784,25 +5237,94 @@
return advance_pc(dc);
}
-static bool do_dddd(DisasContext *dc, arg_r_r_r *a,
- void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+static bool trans_FNsMULd(DisasContext *dc, arg_r_r_r *a)
{
- TCGv_i64 dst, src0, src1, src2;
+ TCGv_i64 dst;
+ TCGv_i32 src1, src2;
+
+ if (!avail_VIS3(dc)) {
+ return false;
+ }
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+ dst = tcg_temp_new_i64();
+ src1 = gen_load_fpr_F(dc, a->rs1);
+ src2 = gen_load_fpr_F(dc, a->rs2);
+ gen_helper_fnsmuld(dst, tcg_env, src1, src2);
+ gen_store_fpr_D(dc, a->rd, dst);
+ return advance_pc(dc);
+}
+
+static bool do_ffff(DisasContext *dc, arg_r_r_r_r *a,
+ void (*func)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 dst, src1, src2, src3;
if (gen_trap_ifnofpu(dc)) {
return true;
}
- dst = gen_dest_fpr_D(dc, a->rd);
- src0 = gen_load_fpr_D(dc, a->rd);
+ src1 = gen_load_fpr_F(dc, a->rs1);
+ src2 = gen_load_fpr_F(dc, a->rs2);
+ src3 = gen_load_fpr_F(dc, a->rs3);
+ dst = tcg_temp_new_i32();
+ func(dst, src1, src2, src3);
+ gen_store_fpr_F(dc, a->rd, dst);
+ return advance_pc(dc);
+}
+
+TRANS(FMADDs, FMAF, do_ffff, a, gen_op_fmadds)
+TRANS(FMSUBs, FMAF, do_ffff, a, gen_op_fmsubs)
+TRANS(FNMSUBs, FMAF, do_ffff, a, gen_op_fnmsubs)
+TRANS(FNMADDs, FMAF, do_ffff, a, gen_op_fnmadds)
+
+static bool do_dddd(DisasContext *dc, arg_r_r_r_r *a,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 dst, src1, src2, src3;
+
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+
+ dst = tcg_temp_new_i64();
src1 = gen_load_fpr_D(dc, a->rs1);
src2 = gen_load_fpr_D(dc, a->rs2);
- func(dst, src0, src1, src2);
+ src3 = gen_load_fpr_D(dc, a->rs3);
+ func(dst, src1, src2, src3);
gen_store_fpr_D(dc, a->rd, dst);
return advance_pc(dc);
}
TRANS(PDIST, VIS1, do_dddd, a, gen_helper_pdist)
+TRANS(FMADDd, FMAF, do_dddd, a, gen_op_fmaddd)
+TRANS(FMSUBd, FMAF, do_dddd, a, gen_op_fmsubd)
+TRANS(FNMSUBd, FMAF, do_dddd, a, gen_op_fnmsubd)
+TRANS(FNMADDd, FMAF, do_dddd, a, gen_op_fnmaddd)
+TRANS(FPMADDX, IMA, do_dddd, a, gen_op_fpmaddx)
+TRANS(FPMADDXHI, IMA, do_dddd, a, gen_op_fpmaddxhi)
+
+static bool trans_FALIGNDATAi(DisasContext *dc, arg_r_r_r *a)
+{
+ TCGv_i64 dst, src1, src2;
+ TCGv src3;
+
+ if (!avail_VIS4(dc)) {
+ return false;
+ }
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+
+ dst = tcg_temp_new_i64();
+ src1 = gen_load_fpr_D(dc, a->rd);
+ src2 = gen_load_fpr_D(dc, a->rs2);
+ src3 = gen_load_gpr(dc, a->rs1);
+ gen_op_faligndata_i(dst, src1, src2, src3);
+ gen_store_fpr_D(dc, a->rd, dst);
+ return advance_pc(dc);
+}
static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a,
void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128))
@@ -4991,6 +5513,76 @@
TRANS(FCMPq, ALL, do_fcmpq, a, false)
TRANS(FCMPEq, ALL, do_fcmpq, a, true)
+static bool trans_FLCMPs(DisasContext *dc, arg_FLCMPs *a)
+{
+ TCGv_i32 src1, src2;
+
+ if (!avail_VIS3(dc)) {
+ return false;
+ }
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+
+ src1 = gen_load_fpr_F(dc, a->rs1);
+ src2 = gen_load_fpr_F(dc, a->rs2);
+ gen_helper_flcmps(cpu_fcc[a->cc], src1, src2);
+ return advance_pc(dc);
+}
+
+static bool trans_FLCMPd(DisasContext *dc, arg_FLCMPd *a)
+{
+ TCGv_i64 src1, src2;
+
+ if (!avail_VIS3(dc)) {
+ return false;
+ }
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+
+ src1 = gen_load_fpr_D(dc, a->rs1);
+ src2 = gen_load_fpr_D(dc, a->rs2);
+ gen_helper_flcmpd(cpu_fcc[a->cc], src1, src2);
+ return advance_pc(dc);
+}
+
+static bool do_movf2r(DisasContext *dc, arg_r_r *a,
+ int (*offset)(unsigned int),
+ void (*load)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ TCGv dst;
+
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+ dst = gen_dest_gpr(dc, a->rd);
+ load(dst, tcg_env, offset(a->rs));
+ gen_store_gpr(dc, a->rd, dst);
+ return advance_pc(dc);
+}
+
+TRANS(MOVsTOsw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32s_tl)
+TRANS(MOVsTOuw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32u_tl)
+TRANS(MOVdTOx, VIS3B, do_movf2r, a, gen_offset_fpr_D, tcg_gen_ld_tl)
+
+static bool do_movr2f(DisasContext *dc, arg_r_r *a,
+ int (*offset)(unsigned int),
+ void (*store)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ TCGv src;
+
+ if (gen_trap_ifnofpu(dc)) {
+ return true;
+ }
+ src = gen_load_gpr(dc, a->rs);
+ store(src, tcg_env, offset(a->rd));
+ return advance_pc(dc);
+}
+
+TRANS(MOVwTOs, VIS3B, do_movr2f, a, gen_offset_fpr_F, tcg_gen_st32_tl)
+TRANS(MOVxTOd, VIS3B, do_movr2f, a, gen_offset_fpr_D, tcg_gen_st_tl)
+
static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
@@ -5172,12 +5764,6 @@
"l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7",
"i0", "i1", "i2", "i3", "i4", "i5", "i6", "i7",
};
- static const char fregnames[32][4] = {
- "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14",
- "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30",
- "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46",
- "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
- };
static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = {
#ifdef TARGET_SPARC64
@@ -5234,12 +5820,6 @@
(i - 8) * sizeof(target_ulong),
gregnames[i]);
}
-
- for (i = 0; i < TARGET_DPREGS; i++) {
- cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env,
- offsetof(CPUSPARCState, fpr[i]),
- fregnames[i]);
- }
}
void sparc_restore_state_to_opc(CPUState *cs,
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c
index e15c6bb..371f544 100644
--- a/target/sparc/vis_helper.c
+++ b/target/sparc/vis_helper.c
@@ -20,26 +20,44 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
+#include "crypto/clmul.h"
-/* This function uses non-native bit order */
-#define GET_FIELD(X, FROM, TO) \
- ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
-
-/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
-#define GET_FIELD_SP(X, FROM, TO) \
- GET_FIELD(X, 63 - (TO), 63 - (FROM))
-
-target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
+target_ulong helper_array8(target_ulong rs1, target_ulong rs2)
{
- return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
- (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
- (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
- (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
- (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
- (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
- (((pixel_addr >> 55) & 1) << 4) |
- (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
- GET_FIELD_SP(pixel_addr, 11, 12);
+ /*
+ * From Oracle SPARC Architecture 2015:
+ * Architecturally, an illegal R[rs2] value (>5) causes the array
+ * instructions to produce undefined results. For historic reference,
+ * past implementations of these instructions have ignored R[rs2]{63:3}
+ * and have treated R[rs2] values of 6 and 7 as if they were 5.
+ */
+ target_ulong n = MIN(rs2 & 7, 5);
+
+ target_ulong x_int = (rs1 >> 11) & 0x7ff;
+ target_ulong y_int = (rs1 >> 33) & 0x7ff;
+ target_ulong z_int = rs1 >> 55;
+
+ target_ulong lower_x = x_int & 3;
+ target_ulong lower_y = y_int & 3;
+ target_ulong lower_z = z_int & 1;
+
+ target_ulong middle_x = (x_int >> 2) & 15;
+ target_ulong middle_y = (y_int >> 2) & 15;
+ target_ulong middle_z = (z_int >> 1) & 15;
+
+ target_ulong upper_x = (x_int >> 6) & ((1 << n) - 1);
+ target_ulong upper_y = (y_int >> 6) & ((1 << n) - 1);
+ target_ulong upper_z = z_int >> 5;
+
+ return (upper_z << (17 + 2 * n))
+ | (upper_y << (17 + n))
+ | (upper_x << 17)
+ | (middle_z << 13)
+ | (middle_y << 9)
+ | (middle_x << 5)
+ | (lower_z << 4)
+ | (lower_y << 2)
+ | lower_x;
}
#if HOST_BIG_ENDIAN
@@ -48,6 +66,7 @@
#define VIS_W64(n) w[3 - (n)]
#define VIS_SW64(n) sw[3 - (n)]
#define VIS_L64(n) l[1 - (n)]
+#define VIS_SL64(n) sl[1 - (n)]
#define VIS_B32(n) b[3 - (n)]
#define VIS_W32(n) w[1 - (n)]
#else
@@ -56,6 +75,7 @@
#define VIS_W64(n) w[n]
#define VIS_SW64(n) sw[n]
#define VIS_L64(n) l[n]
+#define VIS_SL64(n) sl[n]
#define VIS_B32(n) b[n]
#define VIS_W32(n) w[n]
#endif
@@ -66,6 +86,7 @@
uint16_t w[4];
int16_t sw[4];
uint32_t l[2];
+ int32_t sl[2];
uint64_t ll;
float64 d;
} VIS64;
@@ -157,10 +178,10 @@
s.ll = src1;
d.ll = src2;
- d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));
- d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
- d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
- d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
+ d.VIS_W64(0) = (s.VIS_B64(0) * d.VIS_SW64(0) + 0x8000) >> 16;
+ d.VIS_W64(1) = (s.VIS_B64(2) * d.VIS_SW64(1) + 0x8000) >> 16;
+ d.VIS_W64(2) = (s.VIS_B64(4) * d.VIS_SW64(2) + 0x8000) >> 16;
+ d.VIS_W64(3) = (s.VIS_B64(6) * d.VIS_SW64(3) + 0x8000) >> 16;
return d.ll;
}
@@ -180,46 +201,171 @@
return d.ll;
}
-#define VIS_CMPHELPER(name, F) \
- uint64_t name##16(uint64_t src1, uint64_t src2) \
- { \
- VIS64 s, d; \
- \
- s.ll = src1; \
- d.ll = src2; \
- \
- d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \
- d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \
- d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \
- d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \
- d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \
- \
- return d.ll; \
- } \
- \
- uint64_t name##32(uint64_t src1, uint64_t src2) \
- { \
- VIS64 s, d; \
- \
- s.ll = src1; \
- d.ll = src2; \
- \
- d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \
- d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \
- d.VIS_L64(1) = 0; \
- \
- return d.ll; \
+uint64_t helper_fcmpeq8(uint64_t src1, uint64_t src2)
+{
+ uint64_t a = src1 ^ src2;
+ uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
+ uint64_t c = ~(((a & m) + m) | a | m);
+
+ /* a.......b.......c.......d.......e.......f.......g.......h....... */
+ c |= c << 7;
+ /* ab......bc......cd......de......ef......fg......gh......h....... */
+ c |= c << 14;
+ /* abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... */
+ c |= c << 28;
+ /* abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... */
+ return c >> 56;
+}
+
+uint64_t helper_fcmpne8(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmpeq8(src1, src2) ^ 0xff;
+}
+
+uint64_t helper_fcmple8(uint64_t src1, uint64_t src2)
+{
+ VIS64 s1, s2;
+ uint64_t r = 0;
+
+ s1.ll = src1;
+ s2.ll = src2;
+
+ for (int i = 0; i < 8; ++i) {
+ r |= (s1.VIS_SB64(i) <= s2.VIS_SB64(i)) << i;
}
+ return r;
+}
-#define FCMPGT(a, b) ((a) > (b))
-#define FCMPEQ(a, b) ((a) == (b))
-#define FCMPLE(a, b) ((a) <= (b))
-#define FCMPNE(a, b) ((a) != (b))
+uint64_t helper_fcmpgt8(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmple8(src1, src2) ^ 0xff;
+}
-VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
-VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
-VIS_CMPHELPER(helper_fcmple, FCMPLE)
-VIS_CMPHELPER(helper_fcmpne, FCMPNE)
+uint64_t helper_fcmpule8(uint64_t src1, uint64_t src2)
+{
+ VIS64 s1, s2;
+ uint64_t r = 0;
+
+ s1.ll = src1;
+ s2.ll = src2;
+
+ for (int i = 0; i < 8; ++i) {
+ r |= (s1.VIS_B64(i) <= s2.VIS_B64(i)) << i;
+ }
+ return r;
+}
+
+uint64_t helper_fcmpugt8(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmpule8(src1, src2) ^ 0xff;
+}
+
+uint64_t helper_fcmpeq16(uint64_t src1, uint64_t src2)
+{
+ uint64_t a = src1 ^ src2;
+ uint64_t m = 0x7fff7fff7fff7fffULL;
+ uint64_t c = ~(((a & m) + m) | a | m);
+
+ /* a...............b...............c...............d............... */
+ c |= c << 15;
+ /* ab..............bc..............cd..............d............... */
+ c |= c << 30;
+ /* abcd............bcd.............cd..............d............... */
+ return c >> 60;
+}
+
+uint64_t helper_fcmpne16(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmpeq16(src1, src2) ^ 0xf;
+}
+
+uint64_t helper_fcmple16(uint64_t src1, uint64_t src2)
+{
+ VIS64 s1, s2;
+ uint64_t r = 0;
+
+ s1.ll = src1;
+ s2.ll = src2;
+
+ for (int i = 0; i < 4; ++i) {
+ r |= (s1.VIS_SW64(i) <= s2.VIS_SW64(i)) << i;
+ }
+ return r;
+}
+
+uint64_t helper_fcmpgt16(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmple16(src1, src2) ^ 0xf;
+}
+
+uint64_t helper_fcmpule16(uint64_t src1, uint64_t src2)
+{
+ VIS64 s1, s2;
+ uint64_t r = 0;
+
+ s1.ll = src1;
+ s2.ll = src2;
+
+ for (int i = 0; i < 4; ++i) {
+ r |= (s1.VIS_W64(i) <= s2.VIS_W64(i)) << i;
+ }
+ return r;
+}
+
+uint64_t helper_fcmpugt16(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmpule16(src1, src2) ^ 0xf;
+}
+
+uint64_t helper_fcmpeq32(uint64_t src1, uint64_t src2)
+{
+ uint64_t a = src1 ^ src2;
+ return ((uint32_t)a == 0) | (a >> 32 ? 0 : 2);
+}
+
+uint64_t helper_fcmpne32(uint64_t src1, uint64_t src2)
+{
+ uint64_t a = src1 ^ src2;
+ return ((uint32_t)a != 0) | (a >> 32 ? 2 : 0);
+}
+
+uint64_t helper_fcmple32(uint64_t src1, uint64_t src2)
+{
+ VIS64 s1, s2;
+ uint64_t r = 0;
+
+ s1.ll = src1;
+ s2.ll = src2;
+
+ for (int i = 0; i < 2; ++i) {
+ r |= (s1.VIS_SL64(i) <= s2.VIS_SL64(i)) << i;
+ }
+ return r;
+}
+
+uint64_t helper_fcmpgt32(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmple32(src1, src2) ^ 3;
+}
+
+uint64_t helper_fcmpule32(uint64_t src1, uint64_t src2)
+{
+ VIS64 s1, s2;
+ uint64_t r = 0;
+
+ s1.ll = src1;
+ s2.ll = src2;
+
+ for (int i = 0; i < 2; ++i) {
+ r |= (s1.VIS_L64(i) <= s2.VIS_L64(i)) << i;
+ }
+ return r;
+}
+
+uint64_t helper_fcmpugt32(uint64_t src1, uint64_t src2)
+{
+ return helper_fcmpule32(src1, src2) ^ 3;
+}
uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
{
@@ -334,3 +480,131 @@
return r.ll;
}
+
+uint64_t helper_cmask8(uint64_t gsr, uint64_t src)
+{
+ uint32_t mask = 0;
+
+ mask |= (src & 0x01 ? 0x00000007 : 0x0000000f);
+ mask |= (src & 0x02 ? 0x00000060 : 0x000000e0);
+ mask |= (src & 0x04 ? 0x00000500 : 0x00000d00);
+ mask |= (src & 0x08 ? 0x00004000 : 0x0000c000);
+ mask |= (src & 0x10 ? 0x00030000 : 0x000b0000);
+ mask |= (src & 0x20 ? 0x00200000 : 0x00a00000);
+ mask |= (src & 0x40 ? 0x01000000 : 0x09000000);
+ mask |= (src & 0x80 ? 0x00000000 : 0x80000000);
+
+ return deposit64(gsr, 32, 32, mask);
+}
+
+uint64_t helper_cmask16(uint64_t gsr, uint64_t src)
+{
+ uint32_t mask = 0;
+
+ mask |= (src & 0x1 ? 0x00000067 : 0x000000ef);
+ mask |= (src & 0x2 ? 0x00004500 : 0x0000cd00);
+ mask |= (src & 0x4 ? 0x00230000 : 0x00ab0000);
+ mask |= (src & 0x8 ? 0x01000000 : 0x89000000);
+
+ return deposit64(gsr, 32, 32, mask);
+}
+
+uint64_t helper_cmask32(uint64_t gsr, uint64_t src)
+{
+ uint32_t mask = 0;
+
+ mask |= (src & 0x1 ? 0x00004567 : 0x0000cdef);
+ mask |= (src & 0x2 ? 0x01230000 : 0x89ab0000);
+
+ return deposit64(gsr, 32, 32, mask);
+}
+
+static inline uint16_t do_fchksm16(uint16_t src1, uint16_t src2)
+{
+ uint16_t a = src1 + src2;
+ uint16_t c = a < src1;
+ return a + c;
+}
+
+uint64_t helper_fchksm16(uint64_t src1, uint64_t src2)
+{
+ VIS64 r, s1, s2;
+
+ s1.ll = src1;
+ s2.ll = src2;
+ r.ll = 0;
+
+ r.VIS_W64(0) = do_fchksm16(s1.VIS_W64(0), s2.VIS_W64(0));
+ r.VIS_W64(1) = do_fchksm16(s1.VIS_W64(1), s2.VIS_W64(1));
+ r.VIS_W64(2) = do_fchksm16(s1.VIS_W64(2), s2.VIS_W64(2));
+ r.VIS_W64(3) = do_fchksm16(s1.VIS_W64(3), s2.VIS_W64(3));
+
+ return r.ll;
+}
+
+static inline int16_t do_fmean16(int16_t src1, int16_t src2)
+{
+ return (src1 + src2 + 1) / 2;
+}
+
+uint64_t helper_fmean16(uint64_t src1, uint64_t src2)
+{
+ VIS64 r, s1, s2;
+
+ s1.ll = src1;
+ s2.ll = src2;
+ r.ll = 0;
+
+ r.VIS_SW64(0) = do_fmean16(s1.VIS_SW64(0), s2.VIS_SW64(0));
+ r.VIS_SW64(1) = do_fmean16(s1.VIS_SW64(1), s2.VIS_SW64(1));
+ r.VIS_SW64(2) = do_fmean16(s1.VIS_SW64(2), s2.VIS_SW64(2));
+ r.VIS_SW64(3) = do_fmean16(s1.VIS_SW64(3), s2.VIS_SW64(3));
+
+ return r.ll;
+}
+
+uint64_t helper_fslas16(uint64_t src1, uint64_t src2)
+{
+ VIS64 r, s1, s2;
+
+ s1.ll = src1;
+ s2.ll = src2;
+ r.ll = 0;
+
+ for (int i = 0; i < 4; ++i) {
+ int t = s1.VIS_SW64(i) << (s2.VIS_W64(i) % 16);
+ t = MIN(t, INT16_MAX);
+ t = MAX(t, INT16_MIN);
+ r.VIS_SW64(i) = t;
+ }
+
+ return r.ll;
+}
+
+uint64_t helper_fslas32(uint64_t src1, uint64_t src2)
+{
+ VIS64 r, s1, s2;
+
+ s1.ll = src1;
+ s2.ll = src2;
+ r.ll = 0;
+
+ for (int i = 0; i < 2; ++i) {
+ int64_t t = (int64_t)(int32_t)s1.VIS_L64(i) << (s2.VIS_L64(i) % 32);
+ t = MIN(t, INT32_MAX);
+ t = MAX(t, INT32_MIN);
+ r.VIS_L64(i) = t;
+ }
+
+ return r.ll;
+}
+
+uint64_t helper_xmulx(uint64_t src1, uint64_t src2)
+{
+ return int128_getlo(clmul_64(src1, src2));
+}
+
+uint64_t helper_xmulxhi(uint64_t src1, uint64_t src2)
+{
+ return int128_gethi(clmul_64(src1, src2));
+}