target/arm: Relax ordered/atomic alignment checks for LSE2
FEAT_LSE2 only requires that atomic operations not cross a
16-byte boundary. Ordered operations may be completely
unaligned if SCTLR.nAA is set.
Because this alignment check is so special, do it by hand.
Make sure not to keep TCG temps live across the branch.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20230530191438.411344-17-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index c3edf16..1c9370f 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -952,3 +952,10 @@
memset(mem, 0, blocklen);
}
+
+void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
+ uint32_t access_type, uint32_t mmu_idx)
+{
+ arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
+ mmu_idx, GETPC());
+}
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index ff56807..3d5957c 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -110,3 +110,6 @@
DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
+
+DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG,
+ noreturn, env, i64, i32, i32)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 91d28f8..adedebd 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -307,6 +307,89 @@
return clean_data_tbi(s, addr);
}
+/*
+ * Generate the special alignment check that applies to AccType_ATOMIC
+ * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
+ * naturally aligned, but it must not cross a 16-byte boundary.
+ * See AArch64.CheckAlignment().
+ */
+static void check_lse2_align(DisasContext *s, int rn, int imm,
+ bool is_write, MemOp mop)
+{
+ TCGv_i32 tmp;
+ TCGv_i64 addr;
+ TCGLabel *over_label;
+ MMUAccessType type;
+ int mmu_idx;
+
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
+ tcg_gen_addi_i32(tmp, tmp, imm & 15);
+ tcg_gen_andi_i32(tmp, tmp, 15);
+ tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
+
+ over_label = gen_new_label();
+ tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
+
+ addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
+
+ type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
+ mmu_idx = get_mem_index(s);
+ gen_helper_unaligned_access(cpu_env, addr, tcg_constant_i32(type),
+ tcg_constant_i32(mmu_idx));
+
+ gen_set_label(over_label);
+
+}
+
+/* Handle the alignment check for AccType_ATOMIC instructions. */
+static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
+{
+ MemOp size = mop & MO_SIZE;
+
+ if (size == MO_8) {
+ return mop;
+ }
+
+ /*
+ * If size == MO_128, this is a LDXP, and the operation is single-copy
+ * atomic for each doubleword, not the entire quadword; it still must
+ * be quadword aligned.
+ */
+ if (size == MO_128) {
+ return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+ MO_ATOM_IFALIGN_PAIR);
+ }
+ if (dc_isar_feature(aa64_lse2, s)) {
+ check_lse2_align(s, rn, 0, true, mop);
+ } else {
+ mop |= MO_ALIGN;
+ }
+ return finalize_memop(s, mop);
+}
+
+/* Handle the alignment check for AccType_ORDERED instructions. */
+static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
+ bool is_write, MemOp mop)
+{
+ MemOp size = mop & MO_SIZE;
+
+ if (size == MO_8) {
+ return mop;
+ }
+ if (size == MO_128) {
+ return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+ MO_ATOM_IFALIGN_PAIR);
+ }
+ if (!dc_isar_feature(aa64_lse2, s)) {
+ mop |= MO_ALIGN;
+ } else if (!s->naa) {
+ check_lse2_align(s, rn, imm, is_write, mop);
+ }
+ return finalize_memop(s, mop);
+}
+
typedef struct DisasCompare64 {
TCGCond cond;
TCGv_i64 value;
@@ -2372,21 +2455,7 @@
{
int idx = get_mem_index(s);
TCGv_i64 dirty_addr, clean_addr;
- MemOp memop;
-
- /*
- * For pairs:
- * if size == 2, the operation is single-copy atomic for the doubleword.
- * if size == 3, the operation is single-copy atomic for *each* doubleword,
- * not the entire quadword, however it must be quadword aligned.
- */
- memop = size + is_pair;
- if (memop == MO_128) {
- memop = finalize_memop_atom(s, MO_128 | MO_ALIGN,
- MO_ATOM_IFALIGN_PAIR);
- } else {
- memop = finalize_memop(s, memop | MO_ALIGN);
- }
+ MemOp memop = check_atomic_align(s, rn, size + is_pair);
s->is_ldex = true;
dirty_addr = cpu_reg_sp(s, rn);
@@ -2524,7 +2593,7 @@
if (rn == 31) {
gen_check_sp_alignment(s);
}
- memop = finalize_memop(s, size | MO_ALIGN);
+ memop = check_atomic_align(s, rn, size);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
memidx, memop);
@@ -2546,7 +2615,7 @@
}
/* This is a single atomic access, despite the "pair". */
- memop = finalize_memop(s, (size + 1) | MO_ALIGN);
+ memop = check_atomic_align(s, rn, size + 1);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
if (size == 2) {
@@ -2666,8 +2735,7 @@
gen_check_sp_alignment(s);
}
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- /* TODO: ARMv8.4-LSE SCTLR.nAA */
- memop = finalize_memop(s, size | MO_ALIGN);
+ memop = check_ordered_align(s, rn, 0, true, size);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
true, rn != 31, memop);
do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt,
@@ -2685,8 +2753,7 @@
if (rn == 31) {
gen_check_sp_alignment(s);
}
- /* TODO: ARMv8.4-LSE SCTLR.nAA */
- memop = finalize_memop(s, size | MO_ALIGN);
+ memop = check_ordered_align(s, rn, 0, false, size);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
false, rn != 31, memop);
do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true,
@@ -3367,7 +3434,7 @@
bool a = extract32(insn, 23, 1);
TCGv_i64 tcg_rs, tcg_rt, clean_addr;
AtomicThreeOpFn *fn = NULL;
- MemOp mop = finalize_memop(s, size | MO_ALIGN);
+ MemOp mop = size;
if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
unallocated_encoding(s);
@@ -3418,6 +3485,8 @@
if (rn == 31) {
gen_check_sp_alignment(s);
}
+
+ mop = check_atomic_align(s, rn, mop);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop);
if (o3_opc == 014) {
@@ -3542,16 +3611,13 @@
bool is_store = false;
bool extend = false;
bool iss_sf;
- MemOp mop;
+ MemOp mop = size;
if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
unallocated_encoding(s);
return;
}
- /* TODO: ARMv8.4-LSE SCTLR.nAA */
- mop = finalize_memop(s, size | MO_ALIGN);
-
switch (opc) {
case 0: /* STLURB */
is_store = true;
@@ -3583,6 +3649,8 @@
gen_check_sp_alignment(s);
}
+ mop = check_ordered_align(s, rn, offset, is_store, mop);
+
dirty_addr = read_cpu_reg_sp(s, rn, 1);
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
clean_addr = clean_data_tbi(s, dirty_addr);