Merge tag 'pull-sparc-20240911' of https://gitlab.com/rth7680/qemu into staging
target/sparc: Implement single entry FP Queue
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmbifAAdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+XAwgAlj//8JuNoRB/2hi0
# gU3Ifjrs+r+AZrcsG7pTOmYTZa6cYqJX4XsYoNq1S4FHky239vNKPQOQEadkmLGv
# wKH0fBjzvydOKRfrhEK2VLlhMyhGyuv59psfCCUB5HZEiueSHFFAvfjUtKNpjzRT
# KE2fwL6iKK3IXeKC6ynq0bkC/OymnLUYSgSslA6C1x1sReNz5Y6ZsGUEZRwODY4f
# q6s6JS2aBn1L9nJTzwXH/J5Ue8iix53d6EZ42QHqqwzRvAWHtfFqoMLc9P6Dg8P7
# FmiwHAErwr7Pj5cqcnl2C0zTp3LXg5xXpTJysi8CFJvCsObNRh9gL15W3xy9qBFX
# 2WfqWQ==
# =kxM7
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 12 Sep 2024 06:28:32 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-sparc-20240911' of https://gitlab.com/rth7680/qemu:
target/sparc: Add gen_trap_if_nofpu_fpexception
target/sparc: Implement STDFQ
target/sparc: Add FSR_QNE to tb_flags
target/sparc: Populate sparc32 FQ when raising fp exception
target/sparc: Add FQ and FSR.QNE
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index dfd9512..f517e5a 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -184,6 +184,8 @@
#define FSR_FTT_SEQ_ERROR (4ULL << 14)
#define FSR_FTT_INVAL_FPR (6ULL << 14)
+#define FSR_QNE (1ULL << 13)
+
#define FSR_FCC0_SHIFT 10
#define FSR_FCC1_SHIFT 32
#define FSR_FCC2_SHIFT 34
@@ -438,6 +440,26 @@
uint32_t fsr_cexc_ftt; /* cexc, ftt */
uint32_t fcc[TARGET_FCCREGS]; /* fcc* */
+#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
+ /*
+ * Single-element FPU fault queue, with address and insn,
+ * packaged into the double-word with which it is stored.
+ */
+ uint32_t fsr_qne; /* qne */
+ union {
+ uint64_t d;
+ struct {
+#if HOST_BIG_ENDIAN
+ uint32_t addr;
+ uint32_t insn;
+#else
+ uint32_t insn;
+ uint32_t addr;
+#endif
+ } s;
+ } fq;
+#endif
+
CPU_DoubleU fpr[TARGET_DPREGS]; /* floating point registers */
uint32_t cwp; /* index of current register window (extracted
from PSR) */
@@ -722,6 +744,7 @@
#define TB_FLAG_AM_ENABLED (1 << 5)
#define TB_FLAG_SUPER (1 << 6)
#define TB_FLAG_HYPER (1 << 7)
+#define TB_FLAG_FSR_QNE (1 << 8)
#define TB_FLAG_ASI_SHIFT 24
static inline void cpu_get_tb_cpu_state(CPUSPARCState *env, vaddr *pc,
@@ -753,7 +776,12 @@
if (env->psref) {
flags |= TB_FLAG_FPU_ENABLED;
}
-#endif
+#ifndef CONFIG_USER_ONLY
+ if (env->fsr_qne) {
+ flags |= TB_FLAG_FSR_QNE;
+ }
+#endif /* !CONFIG_USER_ONLY */
+#endif /* TARGET_SPARC64 */
*pflags = flags;
}
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 0b30665..b669238 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -545,6 +545,8 @@
fsr |= (uint64_t)env->fcc[1] << FSR_FCC1_SHIFT;
fsr |= (uint64_t)env->fcc[2] << FSR_FCC2_SHIFT;
fsr |= (uint64_t)env->fcc[3] << FSR_FCC3_SHIFT;
+#elif !defined(CONFIG_USER_ONLY)
+ fsr |= env->fsr_qne;
#endif
/* VER is kept completely separate until re-assembly. */
@@ -591,6 +593,8 @@
env->fcc[1] = extract64(fsr, FSR_FCC1_SHIFT, 2);
env->fcc[2] = extract64(fsr, FSR_FCC2_SHIFT, 2);
env->fcc[3] = extract64(fsr, FSR_FCC3_SHIFT, 2);
+#elif !defined(CONFIG_USER_ONLY)
+ env->fsr_qne = fsr & FSR_QNE;
#endif
set_fsr_nonsplit(env, fsr);
diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index 5fd4781..923f348 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -645,7 +645,7 @@
STXFSR 11 00001 100101 ..... . ............. @n_r_ri
{
STQF 11 ..... 100110 ..... . ............. @q_r_ri_na # v9
- STDFQ 11 ----- 100110 ----- - -------------
+ STDFQ 11 ..... 100110 ..... . ............. @r_r_ri # v7,v8
}
STDF 11 ..... 100111 ..... . ............. @d_r_ri_na
diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c
index 6b7d65b..f2dd8bc 100644
--- a/target/sparc/int32_helper.c
+++ b/target/sparc/int32_helper.c
@@ -21,10 +21,10 @@
#include "qemu/main-loop.h"
#include "cpu.h"
#include "trace.h"
+#include "exec/cpu_ldst.h"
#include "exec/log.h"
#include "sysemu/runstate.h"
-
static const char * const excp_names[0x80] = {
[TT_TFAULT] = "Instruction Access Fault",
[TT_ILL_INSN] = "Illegal Instruction",
@@ -116,22 +116,9 @@
qemu_log("%6d: %s (v=%02x)\n", count, name, intno);
log_cpu_state(cs, 0);
-#if 0
- {
- int i;
- uint8_t *ptr;
-
- qemu_log(" code=");
- ptr = (uint8_t *)env->pc;
- for (i = 0; i < 16; i++) {
- qemu_log(" %02x", ldub(ptr + i));
- }
- qemu_log("\n");
- }
-#endif
count++;
}
-#if !defined(CONFIG_USER_ONLY)
+#ifndef CONFIG_USER_ONLY
if (env->psret == 0) {
if (cs->exception_index == 0x80 &&
env->def.features & CPU_FEATURE_TA0_SHUTDOWN) {
@@ -143,6 +130,29 @@
}
return;
}
+ if (intno == TT_FP_EXCP) {
+ /*
+ * The sparc32 fpu has three states related to exception handling.
+ * The FPop that signals an exception transitions from fp_execute
+ * to fp_exception_pending. A subsequent FPop transitions from
+ * fp_exception_pending to fp_exception, which forces the trap.
+ *
+ * If the queue is not empty, this trap is due to execution of an
+ * illegal FPop while in fp_exception state. Here we are to
+ * re-enter fp_exception_pending state without queuing the insn.
+ *
+ * We do not model the fp_exception_pending state, but instead
+ * skip directly to fp_exception state. We advance pc/npc to
+ * mimic delayed trap delivery as if by the subsequent insn.
+ */
+ if (!env->fsr_qne) {
+ env->fsr_qne = FSR_QNE;
+ env->fq.s.addr = env->pc;
+ env->fq.s.insn = cpu_ldl_code(env, env->pc);
+ }
+ env->pc = env->npc;
+ env->npc = env->npc + 4;
+ }
#endif
env->psret = 0;
cwp = cpu_cwp_dec(env, env->cwp - 1);
diff --git a/target/sparc/machine.c b/target/sparc/machine.c
index 48e0cf2..222e570 100644
--- a/target/sparc/machine.c
+++ b/target/sparc/machine.c
@@ -143,6 +143,24 @@
.get = get_xcc,
.put = put_xcc,
};
+#else
+static bool fq_needed(void *opaque)
+{
+ SPARCCPU *cpu = opaque;
+ return cpu->env.fsr_qne;
+}
+
+static const VMStateDescription vmstate_fq = {
+ .name = "cpu/fq",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = fq_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(env.fq.s.addr, SPARCCPU),
+ VMSTATE_UINT32(env.fq.s.insn, SPARCCPU),
+ VMSTATE_END_OF_LIST()
+ },
+};
#endif
static int cpu_pre_save(void *opaque)
@@ -265,4 +283,11 @@
#endif
VMSTATE_END_OF_LIST()
},
+#ifndef TARGET_SPARC64
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_fq,
+ NULL
+ },
+#endif
+
};
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index c803e8d..cdd0a95 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -185,6 +185,8 @@
bool supervisor;
#ifdef TARGET_SPARC64
bool hypervisor;
+#else
+ bool fsr_qne;
#endif
#endif
@@ -1463,15 +1465,48 @@
gen_exception(dc, TT_FP_EXCP);
}
-static int gen_trap_ifnofpu(DisasContext *dc)
+static bool gen_trap_ifnofpu(DisasContext *dc)
{
#if !defined(CONFIG_USER_ONLY)
if (!dc->fpu_enabled) {
gen_exception(dc, TT_NFPU_INSN);
- return 1;
+ return true;
}
#endif
- return 0;
+ return false;
+}
+
+static bool gen_trap_iffpexception(DisasContext *dc)
+{
+#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
+ /*
+ * There are 3 states for the sparc32 fpu:
+ * Normally the fpu is in fp_execute, and all insns are allowed.
+ * When an exception is signaled, it moves to fp_exception_pending state.
+ * Upon seeing the next FPop, the fpu moves to fp_exception state,
+ * populates the FQ, and generates an fp_exception trap.
+ * The fpu remains in fp_exception state until FQ becomes empty
+ * after execution of a STDFQ instruction. While the fpu is in
+ * fp_exception state, and FPop, fp load or fp branch insn will
+ * return to fp_exception_pending state, set FSR.FTT to sequence_error,
+ * and the insn will not be entered into the FQ.
+ *
+ * In QEMU, we do not model the fp_exception_pending state and
+ * instead populate FQ and raise the exception immediately.
+ * But we can still honor fp_exception state by noticing when
+ * the FQ is not empty.
+ */
+ if (dc->fsr_qne) {
+ gen_op_fpexception_im(dc, FSR_FTT_SEQ_ERROR);
+ return true;
+ }
+#endif
+ return false;
+}
+
+static bool gen_trap_if_nofpu_fpexception(DisasContext *dc)
+{
+ return gen_trap_ifnofpu(dc) || gen_trap_iffpexception(dc);
}
/* asi moves */
@@ -2641,7 +2676,7 @@
{
DisasCompare cmp;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
gen_fcompare(&cmp, a->cc, a->cond);
@@ -4480,7 +4515,7 @@
if (addr == NULL) {
return false;
}
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (sz == MO_128 && gen_trap_float128(dc)) {
@@ -4508,6 +4543,7 @@
if (addr == NULL) {
return false;
}
+ /* Store insns are ok in fp_exception_pending state. */
if (gen_trap_ifnofpu(dc)) {
return true;
}
@@ -4529,17 +4565,41 @@
static bool trans_STDFQ(DisasContext *dc, arg_STDFQ *a)
{
+ TCGv addr;
+
if (!avail_32(dc)) {
return false;
}
+ addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
+ if (addr == NULL) {
+ return false;
+ }
if (!supervisor(dc)) {
return raise_priv(dc);
}
+#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
if (gen_trap_ifnofpu(dc)) {
return true;
}
- gen_op_fpexception_im(dc, FSR_FTT_SEQ_ERROR);
- return true;
+ if (!dc->fsr_qne) {
+ gen_op_fpexception_im(dc, FSR_FTT_SEQ_ERROR);
+ return true;
+ }
+
+ /* Store the single element from the queue. */
+ TCGv_i64 fq = tcg_temp_new_i64();
+ tcg_gen_ld_i64(fq, tcg_env, offsetof(CPUSPARCState, fq.d));
+ tcg_gen_qemu_st_i64(fq, addr, dc->mem_idx, MO_TEUQ | MO_ALIGN_4);
+
+ /* Mark the queue empty, transitioning to fp_execute state. */
+ tcg_gen_st_i32(tcg_constant_i32(0), tcg_env,
+ offsetof(CPUSPARCState, fsr_qne));
+ dc->fsr_qne = 0;
+
+ return advance_pc(dc);
+#else
+ qemu_build_not_reached();
+#endif
}
static bool trans_LDFSR(DisasContext *dc, arg_r_r_ri *a)
@@ -4550,7 +4610,7 @@
if (addr == NULL) {
return false;
}
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4574,7 +4634,7 @@
if (addr == NULL) {
return false;
}
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4611,6 +4671,7 @@
if (addr == NULL) {
return false;
}
+ /* Store insns are ok in fp_exception_pending state. */
if (gen_trap_ifnofpu(dc)) {
return true;
}
@@ -4653,7 +4714,7 @@
{
TCGv_i32 tmp;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4694,7 +4755,7 @@
{
TCGv_i32 tmp;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4714,7 +4775,7 @@
TCGv_i32 dst;
TCGv_i64 src;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4734,7 +4795,7 @@
{
TCGv_i64 dst, src;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4756,7 +4817,7 @@
{
TCGv_i64 dst, src;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4796,7 +4857,7 @@
TCGv_i64 dst;
TCGv_i32 src;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4839,7 +4900,7 @@
{
TCGv_i128 t;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -4860,7 +4921,7 @@
TCGv_i128 src;
TCGv_i32 dst;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -4883,7 +4944,7 @@
TCGv_i128 src;
TCGv_i64 dst;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -4906,7 +4967,7 @@
TCGv_i32 src;
TCGv_i128 dst;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -4929,10 +4990,7 @@
TCGv_i64 src;
TCGv_i128 dst;
- if (gen_trap_ifnofpu(dc)) {
- return true;
- }
- if (gen_trap_float128(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -4989,7 +5047,7 @@
{
TCGv_i32 src1, src2;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -5198,7 +5256,7 @@
{
TCGv_i64 dst, src1, src2;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -5222,7 +5280,7 @@
TCGv_i64 dst;
TCGv_i32 src1, src2;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (!(dc->def->features & CPU_FEATURE_FSMULD)) {
@@ -5331,7 +5389,7 @@
{
TCGv_i128 src1, src2;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -5355,7 +5413,7 @@
TCGv_i64 src1, src2;
TCGv_i128 dst;
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -5445,7 +5503,7 @@
if (avail_32(dc) && a->cc != 0) {
return false;
}
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -5469,7 +5527,7 @@
if (avail_32(dc) && a->cc != 0) {
return false;
}
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
@@ -5493,7 +5551,7 @@
if (avail_32(dc) && a->cc != 0) {
return false;
}
- if (gen_trap_ifnofpu(dc)) {
+ if (gen_trap_if_nofpu_fpexception(dc)) {
return true;
}
if (gen_trap_float128(dc)) {
@@ -5596,13 +5654,15 @@
dc->address_mask_32bit = tb_am_enabled(dc->base.tb->flags);
#ifndef CONFIG_USER_ONLY
dc->supervisor = (dc->base.tb->flags & TB_FLAG_SUPER) != 0;
+# ifdef TARGET_SPARC64
+ dc->hypervisor = (dc->base.tb->flags & TB_FLAG_HYPER) != 0;
+# else
+ dc->fsr_qne = (dc->base.tb->flags & TB_FLAG_FSR_QNE) != 0;
+# endif
#endif
#ifdef TARGET_SPARC64
dc->fprs_dirty = 0;
dc->asi = (dc->base.tb->flags >> TB_FLAG_ASI_SHIFT) & 0xff;
-#ifndef CONFIG_USER_ONLY
- dc->hypervisor = (dc->base.tb->flags & TB_FLAG_HYPER) != 0;
-#endif
#endif
/*
* if we reach a page boundary, we stop generation so that the