target/arm: Implement the LDGM, STGM, STZGM instructions
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200626033144.790098-20-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 2fa61b8..7b628d1 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -113,3 +113,6 @@
DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 7ec7930..27d4b45 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -274,3 +274,87 @@
probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra);
}
}
+
+#define LDGM_STGM_SIZE (4 << GMID_EL1_BS)
+
+uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *tag_mem;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+ /* The tag is squashed to zero if the page does not support tags. */
+ if (!tag_mem) {
+ return 0;
+ }
+
+ QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+ /*
+ * We are loading 64-bits worth of tags. The ordering of elements
+ * within the word corresponds to a 64-bit little-endian operation.
+ */
+ return ldq_le_p(tag_mem);
+}
+
+void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *tag_mem;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ LDGM_STGM_SIZE, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+ /*
+ * Tag store only happens if the page support tags,
+ * and if the OS has enabled access to the tags.
+ */
+ if (!tag_mem) {
+ return;
+ }
+
+ QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+ /*
+ * We are storing 64-bits worth of tags. The ordering of elements
+ * within the word corresponds to a 64-bit little-endian operation.
+ */
+ stq_le_p(tag_mem, val);
+}
+
+void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ uintptr_t ra = GETPC();
+ int mmu_idx = cpu_mmu_index(env, false);
+ int log2_dcz_bytes, log2_tag_bytes;
+ intptr_t dcz_bytes, tag_bytes;
+ uint8_t *mem;
+
+ /*
+ * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1,
+ * i.e. 32 bytes, which is an unreasonably small dcz anyway,
+ * to make sure that we can access one complete tag byte here.
+ */
+ log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+ log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+ dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+ tag_bytes = (intptr_t)1 << log2_tag_bytes;
+ ptr &= -dcz_bytes;
+
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
+ MMU_DATA_STORE, tag_bytes, ra);
+ if (mem) {
+ int tag_pair = (val & 0xf) * 0x11;
+ memset(mem, tag_pair, tag_bytes);
+ }
+}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e2295a3..7dc4937 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3736,7 +3736,7 @@
uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
int op2 = extract32(insn, 10, 2);
int op1 = extract32(insn, 22, 2);
- bool is_load = false, is_pair = false, is_zero = false;
+ bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
int index = 0;
TCGv_i64 addr, clean_addr, tcg_rt;
@@ -3756,9 +3756,14 @@
if (op2 != 0) {
/* STG */
index = op2 - 2;
- break;
+ } else {
+ /* STZGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = is_zero = true;
}
- goto do_unallocated;
+ break;
case 1:
if (op2 != 0) {
/* STZG */
@@ -3774,17 +3779,27 @@
/* ST2G */
is_pair = true;
index = op2 - 2;
- break;
+ } else {
+ /* STGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = true;
}
- goto do_unallocated;
+ break;
case 3:
if (op2 != 0) {
/* STZ2G */
is_pair = is_zero = true;
index = op2 - 2;
- break;
+ } else {
+ /* LDGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = is_load = true;
}
- goto do_unallocated;
+ break;
default:
do_unallocated:
@@ -3792,7 +3807,9 @@
return;
}
- if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
+ if (is_mult
+ ? !dc_isar_feature(aa64_mte, s)
+ : !dc_isar_feature(aa64_mte_insn_reg, s)) {
goto do_unallocated;
}
@@ -3806,6 +3823,44 @@
tcg_gen_addi_i64(addr, addr, offset);
}
+ if (is_mult) {
+ tcg_rt = cpu_reg(s, rt);
+
+ if (is_zero) {
+ int size = 4 << s->dcz_blocksize;
+
+ if (s->ata) {
+ gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
+ }
+ /*
+ * The non-tags portion of STZGM is mostly like DC_ZVA,
+ * except the alignment happens before the access.
+ */
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_helper_dc_zva(cpu_env, clean_addr);
+ } else if (s->ata) {
+ if (is_load) {
+ gen_helper_ldgm(tcg_rt, cpu_env, addr);
+ } else {
+ gen_helper_stgm(cpu_env, addr, tcg_rt);
+ }
+ } else {
+ MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
+ int size = 4 << GMID_EL1_BS;
+
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_probe_access(s, clean_addr, acc, size);
+
+ if (is_load) {
+ /* The result tags are zeros. */
+ tcg_gen_movi_i64(tcg_rt, 0);
+ }
+ }
+ return;
+ }
+
if (is_load) {
tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
tcg_rt = cpu_reg(s, rt);
@@ -14472,6 +14527,7 @@
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
dc->features = env->features;
+ dc->dcz_blocksize = arm_cpu->dcz_blocksize;
/* Single step state. The code-generation logic here is:
* SS_ACTIVE == 0:
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 98bcc37..16f2699 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -91,6 +91,8 @@
* < 0, set by the current instruction.
*/
int8_t btype;
+ /* A copy of cpu->dcz_blocksize. */
+ uint8_t dcz_blocksize;
/* True if this page is guarded. */
bool guarded_page;
/* Bottom two bits of XScale c15_cpar coprocessor access control reg */