Merge tag 'pull-tcg-20230511-2' of https://gitlab.com/rth7680/qemu into staging
target/m68k: Fix gen_load_fp regression
accel/tcg: Ensure fairness with icount
disas: Move disas.c into the target-independent source sets
tcg: Use common routines for calling slow path helpers
tcg/*: Cleanups to qemu_ld/st constraints
tcg: Remove TARGET_ALIGNED_ONLY
accel/tcg: Reorg system mode load/store helpers
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRcxtYdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9arQf8Di7CnMQE/jW+8w6v
# 5af0dX8/St2JnCXzG+qiW6mJm50Cy4GunCN66JcCAswpENvQLLsJP13c+4KTeB1T
# rGBbedFXTw1LsaoOcBvwhq7RTIROz4GESTS4EZoJMlMhMv0VotekUPPz4NFMZRKX
# LMvShM2C+f2p4HmDnnbki7M3+tMqpgoGCeBFX8Jy7/5sbpS/7ceXRio3ZRAhasPu
# vjA0zqUtoTs7ijKpXf3uRl/c7xql+f0d7SDdCRt4OKasfLCCDwkjtMf6plZ2jzuS
# OgwKc5N1jaMF6erHYZJIbfLLdUl20/JJEcbpU3Eh1XuHnzn1msS9JDOm2tvzwsto
# OpOKUg==
# =Lhy3
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 11 May 2023 11:43:34 AM BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]
* tag 'pull-tcg-20230511-2' of https://gitlab.com/rth7680/qemu: (53 commits)
target/loongarch: Do not include tcg-ldst.h
accel/tcg: Reorg system mode store helpers
accel/tcg: Reorg system mode load helpers
accel/tcg: Introduce tlb_read_idx
accel/tcg: Add cpu_in_serial_context
tcg: Remove TARGET_ALIGNED_ONLY
target/sh4: Remove TARGET_ALIGNED_ONLY
target/sh4: Use MO_ALIGN where required
target/nios2: Remove TARGET_ALIGNED_ONLY
target/mips: Remove TARGET_ALIGNED_ONLY
target/mips: Use MO_ALIGN instead of 0
target/mips: Add missing default_tcg_memop_mask
target/mips: Add MO_ALIGN to gen_llwp, gen_scwp
tcg/s390x: Simplify constraints on qemu_ld/st
tcg/s390x: Use ALGFR in constructing softmmu host address
tcg/riscv: Simplify constraints on qemu_ld/st
tcg/ppc: Remove unused constraint J
tcg/ppc: Remove unused constraints A, B, C, D
tcg/ppc: Adjust constraints on qemu_ld/st
tcg/ppc: Reorg tcg_out_tlb_read
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index e7962c9..9a5fabf 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -22,6 +22,7 @@
#include "sysemu/tcg.h"
#include "exec/exec-all.h"
#include "qemu/plugin.h"
+#include "internal.h"
bool tcg_allowed;
@@ -81,6 +82,8 @@
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
{
+ /* Prevent looping if already executing in a serial context. */
+ g_assert(!cpu_in_serial_context(cpu));
cpu->exception_index = EXCP_ATOMIC;
cpu_loop_exit_restore(cpu, pc);
}
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 3117886..6177770 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1441,34 +1441,17 @@
}
}
-static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
-{
-#if TCG_OVERSIZED_GUEST
- return *(target_ulong *)((uintptr_t)entry + ofs);
-#else
- /* ofs might correspond to .addr_write, so use qatomic_read */
- return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
-#endif
-}
-
/* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb. */
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
- size_t elt_ofs, target_ulong page)
+ MMUAccessType access_type, target_ulong page)
{
size_t vidx;
assert_cpu_is_self(env_cpu(env));
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
- target_ulong cmp;
-
- /* elt_ofs might correspond to .addr_write, so use qatomic_read */
-#if TCG_OVERSIZED_GUEST
- cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
-#else
- cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
-#endif
+ target_ulong cmp = tlb_read_idx(vtlb, access_type);
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
@@ -1490,11 +1473,6 @@
return false;
}
-/* Macro to call the above, with local variables from the use context. */
-#define VICTIM_TLB_HIT(TY, ADDR) \
- victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
- (ADDR) & TARGET_PAGE_MASK)
-
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUTLBEntryFull *full, uintptr_t retaddr)
{
@@ -1527,29 +1505,12 @@
{
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
- target_ulong tlb_addr, page_addr;
- size_t elt_ofs;
- int flags;
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
+ target_ulong page_addr = addr & TARGET_PAGE_MASK;
+ int flags = TLB_FLAGS_MASK;
- switch (access_type) {
- case MMU_DATA_LOAD:
- elt_ofs = offsetof(CPUTLBEntry, addr_read);
- break;
- case MMU_DATA_STORE:
- elt_ofs = offsetof(CPUTLBEntry, addr_write);
- break;
- case MMU_INST_FETCH:
- elt_ofs = offsetof(CPUTLBEntry, addr_code);
- break;
- default:
- g_assert_not_reached();
- }
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
-
- flags = TLB_FLAGS_MASK;
- page_addr = addr & TARGET_PAGE_MASK;
if (!tlb_hit_page(tlb_addr, page_addr)) {
- if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) {
CPUState *cs = env_cpu(env);
if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
@@ -1571,7 +1532,7 @@
*/
flags &= ~TLB_INVALID_MASK;
}
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
+ tlb_addr = tlb_read_idx(entry, access_type);
}
flags &= tlb_addr;
@@ -1756,6 +1717,179 @@
#endif
/*
+ * Probe for a load/store operation.
+ * Return the host address and into @flags.
+ */
+
+typedef struct MMULookupPageData {
+ CPUTLBEntryFull *full;
+ void *haddr;
+ target_ulong addr;
+ int flags;
+ int size;
+} MMULookupPageData;
+
+typedef struct MMULookupLocals {
+ MMULookupPageData page[2];
+ MemOp memop;
+ int mmu_idx;
+} MMULookupLocals;
+
+/**
+ * mmu_lookup1: translate one page
+ * @env: cpu context
+ * @data: lookup parameters
+ * @mmu_idx: virtual address context
+ * @access_type: load/store/code
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Resolve the translation for the one page at @data.addr, filling in
+ * the rest of @data with the results. If the translation fails,
+ * tlb_fill will longjmp out. Return true if the softmmu tlb for
+ * @mmu_idx may have resized.
+ */
+static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
+ int mmu_idx, MMUAccessType access_type, uintptr_t ra)
+{
+ target_ulong addr = data->addr;
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
+ bool maybe_resized = false;
+
+ /* If the TLB entry is for a different page, reload and try again. */
+ if (!tlb_hit(tlb_addr, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
+ addr & TARGET_PAGE_MASK)) {
+ tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra);
+ maybe_resized = true;
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
+ }
+ tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
+ }
+
+ data->flags = tlb_addr & TLB_FLAGS_MASK;
+ data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+ /* Compute haddr speculatively; depending on flags it might be invalid. */
+ data->haddr = (void *)((uintptr_t)addr + entry->addend);
+
+ return maybe_resized;
+}
+
+/**
+ * mmu_watch_or_dirty
+ * @env: cpu context
+ * @data: lookup parameters
+ * @access_type: load/store/code
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Trigger watchpoints for @data.addr:@data.size;
+ * record writes to protected clean pages.
+ */
+static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
+ MMUAccessType access_type, uintptr_t ra)
+{
+ CPUTLBEntryFull *full = data->full;
+ target_ulong addr = data->addr;
+ int flags = data->flags;
+ int size = data->size;
+
+ /* On watchpoint hit, this will longjmp out. */
+ if (flags & TLB_WATCHPOINT) {
+ int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra);
+ flags &= ~TLB_WATCHPOINT;
+ }
+
+ /* Note that notdirty is only set for writes. */
+ if (flags & TLB_NOTDIRTY) {
+ notdirty_write(env_cpu(env), addr, size, full, ra);
+ flags &= ~TLB_NOTDIRTY;
+ }
+ data->flags = flags;
+}
+
+/**
+ * mmu_lookup: translate page(s)
+ * @env: cpu context
+ * @addr: virtual address
+ * @oi: combined mmu_idx and MemOp
+ * @ra: return address into tcg generated code, or 0
+ * @access_type: load/store/code
+ * @l: output result
+ *
+ * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
+ * bytes. Return true if the lookup crosses a page boundary.
+ */
+static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
+{
+ unsigned a_bits;
+ bool crosspage;
+ int flags;
+
+ l->memop = get_memop(oi);
+ l->mmu_idx = get_mmuidx(oi);
+
+ tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
+
+ /* Handle CPU specific unaligned behaviour */
+ a_bits = get_alignment_bits(l->memop);
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra);
+ }
+
+ l->page[0].addr = addr;
+ l->page[0].size = memop_size(l->memop);
+ l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
+ l->page[1].size = 0;
+ crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
+
+ if (likely(!crosspage)) {
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
+
+ flags = l->page[0].flags;
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
+ }
+ if (unlikely(flags & TLB_BSWAP)) {
+ l->memop ^= MO_BSWAP;
+ }
+ } else {
+ /* Finish compute of page crossing. */
+ int size0 = l->page[1].addr - addr;
+ l->page[1].size = l->page[0].size - size0;
+ l->page[0].size = size0;
+
+ /*
+ * Lookup both pages, recognizing exceptions from either. If the
+ * second lookup potentially resized, refresh first CPUTLBEntryFull.
+ */
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
+ if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) {
+ uintptr_t index = tlb_index(env, l->mmu_idx, addr);
+ l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index];
+ }
+
+ flags = l->page[0].flags | l->page[1].flags;
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
+ mmu_watch_or_dirty(env, &l->page[1], type, ra);
+ }
+
+ /*
+ * Since target/sparc is the only user of TLB_BSWAP, and all
+ * Sparc accesses are aligned, any treatment across two pages
+ * would be arbitrary. Refuse it until there's a use.
+ */
+ tcg_debug_assert((flags & TLB_BSWAP) == 0);
+ }
+
+ return crosspage;
+}
+
+/*
* Probe for an atomic operation. Do not allow unaligned operations,
* or io operations to proceed. Return the host address.
*
@@ -1802,7 +1936,8 @@
if (prot & PAGE_WRITE) {
tlb_addr = tlb_addr_write(tlbe);
if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
+ addr & TARGET_PAGE_MASK)) {
tlb_fill(env_cpu(env), addr, size,
MMU_DATA_STORE, mmu_idx, retaddr);
index = tlb_index(env, mmu_idx, addr);
@@ -1835,7 +1970,8 @@
} else /* if (prot & PAGE_READ) */ {
tlb_addr = tlbe->addr_read;
if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
+ addr & TARGET_PAGE_MASK)) {
tlb_fill(env_cpu(env), addr, size,
MMU_DATA_LOAD, mmu_idx, retaddr);
index = tlb_index(env, mmu_idx, addr);
@@ -1927,117 +2063,6 @@
}
}
-static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
- uintptr_t retaddr, MemOp op, bool code_read,
- FullLoadHelper *full_load)
-{
- const size_t tlb_off = code_read ?
- offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
- const MMUAccessType access_type =
- code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
- const size_t size = memop_size(op);
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index;
- CPUTLBEntry *entry;
- target_ulong tlb_addr;
- void *haddr;
- uint64_t res;
-
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, access_type,
- mmu_idx, retaddr);
- }
-
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
- addr & TARGET_PAGE_MASK)) {
- tlb_fill(env_cpu(env), addr, size,
- access_type, mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
- tlb_addr &= ~TLB_INVALID_MASK;
- }
-
- /* Handle anything that isn't just a straight memory access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUTLBEntryFull *full;
- bool need_swap;
-
- /* For anything that is unaligned, recurse through full_load. */
- if ((addr & (size - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
- /* Handle watchpoints. */
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
- /* On watchpoint hit, this will longjmp out. */
- cpu_check_watchpoint(env_cpu(env), addr, size,
- full->attrs, BP_MEM_READ, retaddr);
- }
-
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
-
- /* Handle I/O access. */
- if (likely(tlb_addr & TLB_MMIO)) {
- return io_readx(env, full, mmu_idx, addr, retaddr,
- access_type, op ^ (need_swap * MO_BSWAP));
- }
-
- haddr = (void *)((uintptr_t)addr + entry->addend);
-
- /*
- * Keep these two load_memop separate to ensure that the compiler
- * is able to fold the entire function to a single instruction.
- * There is a build-time assert inside to remind you of this. ;-)
- */
- if (unlikely(need_swap)) {
- return load_memop(haddr, op ^ MO_BSWAP);
- }
- return load_memop(haddr, op);
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (size > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
- >= TARGET_PAGE_SIZE)) {
- target_ulong addr1, addr2;
- uint64_t r1, r2;
- unsigned shift;
- do_unaligned_access:
- addr1 = addr & ~((target_ulong)size - 1);
- addr2 = addr1 + size;
- r1 = full_load(env, addr1, oi, retaddr);
- r2 = full_load(env, addr2, oi, retaddr);
- shift = (addr & (size - 1)) * 8;
-
- if (memop_big_endian(op)) {
- /* Big-endian combine. */
- res = (r1 << shift) | (r2 >> ((size * 8) - shift));
- } else {
- /* Little-endian combine. */
- res = (r1 >> shift) | (r2 << ((size * 8) - shift));
- }
- return res & MAKE_64BIT_MASK(0, size * 8);
- }
-
- haddr = (void *)((uintptr_t)addr + entry->addend);
- return load_memop(haddr, op);
-}
-
/*
* For the benefit of TCG generated code, we want to avoid the
* complication of ABI-specific return type promotion and always
@@ -2048,89 +2073,250 @@
* We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
*/
-static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+/**
+ * do_ld_mmio_beN:
+ * @env: cpu context
+ * @p: translation parameters
+ * @ret_be: accumulated data
+ * @mmu_idx: virtual address context
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
+ * The bytes are concatenated in big-endian order with @ret_be.
+ */
+static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t ret_be, int mmu_idx,
+ MMUAccessType type, uintptr_t ra)
{
- validate_memop(oi, MO_UB);
- return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
+ CPUTLBEntryFull *full = p->full;
+ target_ulong addr = p->addr;
+ int i, size = p->size;
+
+ QEMU_IOTHREAD_LOCK_GUARD();
+ for (i = 0; i < size; i++) {
+ uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
+ ret_be = (ret_be << 8) | x;
+ }
+ return ret_be;
+}
+
+/**
+ * do_ld_bytes_beN
+ * @p: translation parameters
+ * @ret_be: accumulated data
+ *
+ * Load @p->size bytes from @p->haddr, which is RAM.
+ * The bytes to concatenated in big-endian order with @ret_be.
+ */
+static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
+{
+ uint8_t *haddr = p->haddr;
+ int i, size = p->size;
+
+ for (i = 0; i < size; i++) {
+ ret_be = (ret_be << 8) | haddr[i];
+ }
+ return ret_be;
+}
+
+/*
+ * Wrapper for the above.
+ */
+static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t ret_be, int mmu_idx,
+ MMUAccessType type, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
+ } else {
+ return do_ld_bytes_beN(p, ret_be);
+ }
+}
+
+static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
+ } else {
+ return *(uint8_t *)p->haddr;
+ }
+}
+
+static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, MemOp memop, uintptr_t ra)
+{
+ uint64_t ret;
+
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
+ }
+
+ /* Perform the load host endian, then swap if necessary. */
+ ret = load_memop(p->haddr, MO_UW);
+ if (memop & MO_BSWAP) {
+ ret = bswap16(ret);
+ }
+ return ret;
+}
+
+static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, MemOp memop, uintptr_t ra)
+{
+ uint32_t ret;
+
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
+ }
+
+ /* Perform the load host endian. */
+ ret = load_memop(p->haddr, MO_UL);
+ if (memop & MO_BSWAP) {
+ ret = bswap32(ret);
+ }
+ return ret;
+}
+
+static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, MemOp memop, uintptr_t ra)
+{
+ uint64_t ret;
+
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
+ }
+
+ /* Perform the load host endian. */
+ ret = load_memop(p->haddr, MO_UQ);
+ if (memop & MO_BSWAP) {
+ ret = bswap64(ret);
+ }
+ return ret;
+}
+
+static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
+{
+ MMULookupLocals l;
+ bool crosspage;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ tcg_debug_assert(!crosspage);
+
+ return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
}
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_ldub_mmu(env, addr, oi, retaddr);
+ validate_memop(oi, MO_UB);
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
-static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
{
- validate_memop(oi, MO_LEUW);
- return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
- full_le_lduw_mmu);
+ MMULookupLocals l;
+ bool crosspage;
+ uint16_t ret;
+ uint8_t a, b;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ if (likely(!crosspage)) {
+ return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
+ }
+
+ a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
+ b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra);
+
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = a | (b << 8);
+ } else {
+ ret = b | (a << 8);
+ }
+ return ret;
}
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_le_lduw_mmu(env, addr, oi, retaddr);
-}
-
-static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- validate_memop(oi, MO_BEUW);
- return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
- full_be_lduw_mmu);
+ validate_memop(oi, MO_LEUW);
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_be_lduw_mmu(env, addr, oi, retaddr);
+ validate_memop(oi, MO_BEUW);
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
-static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
{
- validate_memop(oi, MO_LEUL);
- return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
- full_le_ldul_mmu);
+ MMULookupLocals l;
+ bool crosspage;
+ uint32_t ret;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ if (likely(!crosspage)) {
+ return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
+ }
+
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = bswap32(ret);
+ }
+ return ret;
}
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_le_ldul_mmu(env, addr, oi, retaddr);
-}
-
-static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- validate_memop(oi, MO_BEUL);
- return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
- full_be_ldul_mmu);
+ validate_memop(oi, MO_LEUL);
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_be_ldul_mmu(env, addr, oi, retaddr);
+ validate_memop(oi, MO_BEUL);
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
+}
+
+static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
+{
+ MMULookupLocals l;
+ bool crosspage;
+ uint64_t ret;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ if (likely(!crosspage)) {
+ return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
+ }
+
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = bswap64(ret);
+ }
+ return ret;
}
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_LEUQ);
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
- helper_le_ldq_mmu);
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUQ);
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
- helper_be_ldq_mmu);
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
/*
@@ -2173,56 +2359,85 @@
* Load helpers for cpu_ldst.h.
*/
-static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
- MemOpIdx oi, uintptr_t retaddr,
- FullLoadHelper *full_load)
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
- uint64_t ret;
-
- ret = full_load(env, addr, oi, retaddr);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
- return ret;
}
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
+ uint8_t ret;
+
+ validate_memop(oi, MO_UB);
+ ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
+ uint16_t ret;
+
+ validate_memop(oi, MO_BEUW);
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
+ uint32_t ret;
+
+ validate_memop(oi, MO_BEUL);
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu);
+ uint64_t ret;
+
+ validate_memop(oi, MO_BEUQ);
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
+ uint16_t ret;
+
+ validate_memop(oi, MO_LEUW);
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
+ uint32_t ret;
+
+ validate_memop(oi, MO_LEUL);
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
+ uint64_t ret;
+
+ validate_memop(oi, MO_LEUQ);
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
@@ -2317,324 +2532,300 @@
}
}
-static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr);
-
-static void __attribute__((noinline))
-store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
- uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
- bool big_endian)
+/**
+ * do_st_mmio_leN:
+ * @env: cpu context
+ * @p: translation parameters
+ * @val_le: data to store
+ * @mmu_idx: virtual address context
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
+ * The bytes to store are extracted in little-endian order from @val_le;
+ * return the bytes of @val_le beyond @p->size that have not been stored.
+ */
+static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
{
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
- uintptr_t index, index2;
- CPUTLBEntry *entry, *entry2;
- target_ulong page1, page2, tlb_addr, tlb_addr2;
- MemOpIdx oi;
- size_t size2;
- int i;
+ CPUTLBEntryFull *full = p->full;
+ target_ulong addr = p->addr;
+ int i, size = p->size;
- /*
- * Ensure the second page is in the TLB. Note that the first page
- * is already guaranteed to be filled, and that the second page
- * cannot evict the first. An exception to this rule is PAGE_WRITE_INV
- * handling: the first page could have evicted itself.
- */
- page1 = addr & TARGET_PAGE_MASK;
- page2 = (addr + size) & TARGET_PAGE_MASK;
- size2 = (addr + size) & ~TARGET_PAGE_MASK;
- index2 = tlb_index(env, mmu_idx, page2);
- entry2 = tlb_entry(env, mmu_idx, page2);
-
- tlb_addr2 = tlb_addr_write(entry2);
- if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
- if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
- tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index2 = tlb_index(env, mmu_idx, page2);
- entry2 = tlb_entry(env, mmu_idx, page2);
- }
- tlb_addr2 = tlb_addr_write(entry2);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ for (i = 0; i < size; i++, val_le >>= 8) {
+ io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
}
+ return val_le;
+}
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = tlb_addr_write(entry);
+/**
+ * do_st_bytes_leN:
+ * @p: translation parameters
+ * @val_le: data to store
+ *
+ * Store @p->size bytes at @p->haddr, which is RAM.
+ * The bytes to store are extracted in little-endian order from @val_le;
+ * return the bytes of @val_le beyond @p->size that have not been stored.
+ */
+static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
+{
+ uint8_t *haddr = p->haddr;
+ int i, size = p->size;
- /*
- * Handle watchpoints. Since this may trap, all checks
- * must happen before any store.
- */
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
- cpu_check_watchpoint(env_cpu(env), addr, size - size2,
- env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
- BP_MEM_WRITE, retaddr);
+ for (i = 0; i < size; i++, val_le >>= 8) {
+ haddr[i] = val_le;
}
- if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
- cpu_check_watchpoint(env_cpu(env), page2, size2,
- env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
- BP_MEM_WRITE, retaddr);
- }
+ return val_le;
+}
- /*
- * XXX: not efficient, but simple.
- * This loop must go in the forward direction to avoid issues
- * with self-modifying code in Windows 64-bit.
- */
- oi = make_memop_idx(MO_UB, mmu_idx);
- if (big_endian) {
- for (i = 0; i < size; ++i) {
- /* Big-endian extract. */
- uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
- }
+/*
+ * Wrapper for the above.
+ */
+static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ return val_le >> (p->size * 8);
} else {
- for (i = 0; i < size; ++i) {
- /* Little-endian extract. */
- uint8_t val8 = val >> (i * 8);
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
- }
+ return do_st_bytes_leN(p, val_le);
}
}
-static inline void QEMU_ALWAYS_INLINE
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr, MemOp op)
+static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
+ int mmu_idx, uintptr_t ra)
{
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
- const size_t size = memop_size(op);
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index;
- CPUTLBEntry *entry;
- target_ulong tlb_addr;
- void *haddr;
-
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
- mmu_idx, retaddr);
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ *(uint8_t *)p->haddr = val;
}
-
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = tlb_addr_write(entry);
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
- addr & TARGET_PAGE_MASK)) {
- tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
- }
-
- /* Handle anything that isn't just a straight memory access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUTLBEntryFull *full;
- bool need_swap;
-
- /* For anything that is unaligned, recurse through byte stores. */
- if ((addr & (size - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
- /* Handle watchpoints. */
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
- /* On watchpoint hit, this will longjmp out. */
- cpu_check_watchpoint(env_cpu(env), addr, size,
- full->attrs, BP_MEM_WRITE, retaddr);
- }
-
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
-
- /* Handle I/O access. */
- if (tlb_addr & TLB_MMIO) {
- io_writex(env, full, mmu_idx, val, addr, retaddr,
- op ^ (need_swap * MO_BSWAP));
- return;
- }
-
- /* Ignore writes to ROM. */
- if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
- return;
- }
-
- /* Handle clean RAM pages. */
- if (tlb_addr & TLB_NOTDIRTY) {
- notdirty_write(env_cpu(env), addr, size, full, retaddr);
- }
-
- haddr = (void *)((uintptr_t)addr + entry->addend);
-
- /*
- * Keep these two store_memop separate to ensure that the compiler
- * is able to fold the entire function to a single instruction.
- * There is a build-time assert inside to remind you of this. ;-)
- */
- if (unlikely(need_swap)) {
- store_memop(haddr, val, op ^ MO_BSWAP);
- } else {
- store_memop(haddr, val, op);
- }
- return;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (size > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
- >= TARGET_PAGE_SIZE)) {
- do_unaligned_access:
- store_helper_unaligned(env, addr, val, retaddr, size,
- mmu_idx, memop_big_endian(op));
- return;
- }
-
- haddr = (void *)((uintptr_t)addr + entry->addend);
- store_memop(haddr, val, op);
}
-static void __attribute__((noinline))
-full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
+ int mmu_idx, MemOp memop, uintptr_t ra)
{
- validate_memop(oi, MO_UB);
- store_helper(env, addr, val, oi, retaddr, MO_UB);
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ /* Swap to host endian if necessary, then store. */
+ if (memop & MO_BSWAP) {
+ val = bswap16(val);
+ }
+ store_memop(p->haddr, val, MO_UW);
+ }
+}
+
+static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
+ int mmu_idx, MemOp memop, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ /* Swap to host endian if necessary, then store. */
+ if (memop & MO_BSWAP) {
+ val = bswap32(val);
+ }
+ store_memop(p->haddr, val, MO_UL);
+ }
+}
+
+static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
+ int mmu_idx, MemOp memop, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ /* Swap to host endian if necessary, then store. */
+ if (memop & MO_BSWAP) {
+ val = bswap64(val);
+ }
+ store_memop(p->haddr, val, MO_UQ);
+ }
}
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
- MemOpIdx oi, uintptr_t retaddr)
+ MemOpIdx oi, uintptr_t ra)
{
- full_stb_mmu(env, addr, val, oi, retaddr);
+ MMULookupLocals l;
+ bool crosspage;
+
+ validate_memop(oi, MO_UB);
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ tcg_debug_assert(!crosspage);
+
+ do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
}
-static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+ MemOpIdx oi, uintptr_t ra)
{
- validate_memop(oi, MO_LEUW);
- store_helper(env, addr, val, oi, retaddr, MO_LEUW);
+ MMULookupLocals l;
+ bool crosspage;
+ uint8_t a, b;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ return;
+ }
+
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ a = val, b = val >> 8;
+ } else {
+ b = val, a = val >> 8;
+ }
+ do_st_1(env, &l.page[0], a, l.mmu_idx, ra);
+ do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
}
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_le_stw_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
-{
- validate_memop(oi, MO_BEUW);
- store_helper(env, addr, val, oi, retaddr, MO_BEUW);
+ validate_memop(oi, MO_LEUW);
+ do_st2_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_be_stw_mmu(env, addr, val, oi, retaddr);
+ validate_memop(oi, MO_BEUW);
+ do_st2_mmu(env, addr, val, oi, retaddr);
}
-static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ MemOpIdx oi, uintptr_t ra)
{
- validate_memop(oi, MO_LEUL);
- store_helper(env, addr, val, oi, retaddr, MO_LEUL);
+ MMULookupLocals l;
+ bool crosspage;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ return;
+ }
+
+ /* Swap to little endian for simplicity, then store by bytes. */
+ if ((l.memop & MO_BSWAP) != MO_LE) {
+ val = bswap32(val);
+ }
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
}
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_le_stl_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
-{
- validate_memop(oi, MO_BEUL);
- store_helper(env, addr, val, oi, retaddr, MO_BEUL);
+ validate_memop(oi, MO_LEUL);
+ do_st4_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_be_stl_mmu(env, addr, val, oi, retaddr);
+ validate_memop(oi, MO_BEUL);
+ do_st4_mmu(env, addr, val, oi, retaddr);
+}
+
+static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MMULookupLocals l;
+ bool crosspage;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ return;
+ }
+
+ /* Swap to little endian for simplicity, then store by bytes. */
+ if ((l.memop & MO_BSWAP) != MO_LE) {
+ val = bswap64(val);
+ }
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
}
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_LEUQ);
- store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
+ do_st8_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUQ);
- store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
+ do_st8_mmu(env, addr, val, oi, retaddr);
}
/*
* Store Helpers for cpu_ldst.h
*/
-typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
- uint64_t val, MemOpIdx oi, uintptr_t retaddr);
-
-static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
- uint64_t val, MemOpIdx oi, uintptr_t ra,
- FullStoreHelper *full_store)
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
- full_store(env, addr, val, oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
+ helper_ret_stb_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
+ helper_be_stw_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
+ helper_be_stl_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
+ helper_be_stq_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
+ helper_le_stw_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
+ helper_le_stl_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
+ helper_le_stq_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
@@ -2726,98 +2917,50 @@
/* Code access functions. */
-static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
-}
-
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
- return full_ldub_code(env, addr, oi, 0);
-}
-
-static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
+ return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
- return full_lduw_code(env, addr, oi, 0);
-}
-
-static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
+ return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
- return full_ldl_code(env, addr, oi, 0);
-}
-
-static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
+ return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
- return full_ldq_code(env, addr, oi, 0);
+ return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_ldub_code(env, addr, oi, retaddr);
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int idx = get_mmuidx(oi);
- uint16_t ret;
-
- ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
- if ((mop & MO_BSWAP) != MO_TE) {
- ret = bswap16(ret);
- }
- return ret;
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int idx = get_mmuidx(oi);
- uint32_t ret;
-
- ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
- if ((mop & MO_BSWAP) != MO_TE) {
- ret = bswap32(ret);
- }
- return ret;
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int idx = get_mmuidx(oi);
- uint64_t ret;
-
- ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
- if ((mop & MO_BSWAP) != MO_TE) {
- ret = bswap64(ret);
- }
- return ret;
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 7bb0fdb..24f225c 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -64,6 +64,15 @@
}
}
+/*
+ * Return true if CS is not running in parallel with other cpus, either
+ * because there are no other cpus or we are within an exclusive context.
+ */
+static inline bool cpu_in_serial_context(CPUState *cs)
+{
+ return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
+}
+
extern int64_t max_delay;
extern int64_t max_advance;
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0dd173f..991746f 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -760,7 +760,7 @@
if (tcg_enabled()) {
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
- if (cpu_in_exclusive_context(cpu)) {
+ if (cpu_in_serial_context(cpu)) {
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
} else {
async_safe_run_on_cpu(cpu, do_tb_flush,
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
index 84cc742..3d2cfbb 100644
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -89,7 +89,20 @@
}
}
-void icount_prepare_for_run(CPUState *cpu)
+/* Distribute the budget evenly across all CPUs */
+int64_t icount_percpu_budget(int cpu_count)
+{
+ int64_t limit = icount_get_limit();
+ int64_t timeslice = limit / cpu_count;
+
+ if (timeslice == 0) {
+ timeslice = limit;
+ }
+
+ return timeslice;
+}
+
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
{
int insns_left;
@@ -101,13 +114,13 @@
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
g_assert(cpu->icount_extra == 0);
- cpu->icount_budget = icount_get_limit();
+ replay_mutex_lock();
+
+ cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
insns_left = MIN(0xffff, cpu->icount_budget);
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
- replay_mutex_lock();
-
if (cpu->icount_budget == 0) {
/*
* We're called without the iothread lock, so must take it while
diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h
index 1b6fd9c..16a301b 100644
--- a/accel/tcg/tcg-accel-ops-icount.h
+++ b/accel/tcg/tcg-accel-ops-icount.h
@@ -11,7 +11,8 @@
#define TCG_ACCEL_OPS_ICOUNT_H
void icount_handle_deadline(void);
-void icount_prepare_for_run(CPUState *cpu);
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
+int64_t icount_percpu_budget(int cpu_count);
void icount_process_data(CPUState *cpu);
void icount_handle_interrupt(CPUState *cpu, int mask);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 290833a..5788efa 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -24,6 +24,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/lockable.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
#include "sysemu/cpu-timers.h"
@@ -140,6 +141,33 @@
}
/*
+ * Calculate the number of CPUs that we will process in a single iteration of
+ * the main CPU thread loop so that we can fairly distribute the instruction
+ * count across CPUs.
+ *
+ * The CPU count is cached based on the CPU list generation ID to avoid
+ * iterating the list every time.
+ */
+static int rr_cpu_count(void)
+{
+ static unsigned int last_gen_id = ~0;
+ static int cpu_count;
+ CPUState *cpu;
+
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
+
+ if (cpu_list_generation_id_get() != last_gen_id) {
+ cpu_count = 0;
+ CPU_FOREACH(cpu) {
+ ++cpu_count;
+ }
+ last_gen_id = cpu_list_generation_id_get();
+ }
+
+ return cpu_count;
+}
+
+/*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
@@ -185,11 +213,16 @@
cpu->exit_request = 1;
while (1) {
+ /* Only used for icount_enabled() */
+ int64_t cpu_budget = 0;
+
qemu_mutex_unlock_iothread();
replay_mutex_lock();
qemu_mutex_lock_iothread();
if (icount_enabled()) {
+ int cpu_count = rr_cpu_count();
+
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
icount_account_warp_timer();
/*
@@ -197,6 +230,8 @@
* waking up the I/O thread and waiting for completion.
*/
icount_handle_deadline();
+
+ cpu_budget = icount_percpu_budget(cpu_count);
}
replay_mutex_unlock();
@@ -218,7 +253,7 @@
qemu_mutex_unlock_iothread();
if (icount_enabled()) {
- icount_prepare_for_run(cpu);
+ icount_prepare_for_run(cpu, cpu_budget);
}
r = tcg_cpus_exec(cpu);
if (icount_enabled()) {
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index fbcdc94..1f650bd 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -352,9 +352,10 @@
static int symfind(const void *s0, const void *s1)
{
- target_ulong addr = *(target_ulong *)s0;
struct elf_sym *sym = (struct elf_sym *)s1;
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
int result = 0;
+
if (addr < sym->st_value) {
result = -1;
} else if (addr >= sym->st_value + sym->st_size) {
@@ -363,7 +364,7 @@
return result;
}
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
{
#if ELF_CLASS == ELFCLASS32
struct elf_sym *syms = s->disas_symtab.elf32;
diff --git a/configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak
index 71fa77d..b4569a9 100644
--- a/configs/targets/mips-linux-user.mak
+++ b/configs/targets/mips-linux-user.mak
@@ -2,5 +2,4 @@
TARGET_ABI_MIPSO32=y
TARGET_SYSTBL_ABI=o32
TARGET_SYSTBL=syscall_o32.tbl
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
diff --git a/configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak
index 7787a4d..d34b408 100644
--- a/configs/targets/mips-softmmu.mak
+++ b/configs/targets/mips-softmmu.mak
@@ -1,4 +1,3 @@
TARGET_ARCH=mips
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
TARGET_SUPPORTS_MTTCG=y
diff --git a/configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak
index 5a4771f..d2ff509 100644
--- a/configs/targets/mips64-linux-user.mak
+++ b/configs/targets/mips64-linux-user.mak
@@ -3,5 +3,4 @@
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n64
TARGET_SYSTBL=syscall_n64.tbl
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
diff --git a/configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak
index 568d666..12d9483 100644
--- a/configs/targets/mips64-softmmu.mak
+++ b/configs/targets/mips64-softmmu.mak
@@ -1,4 +1,3 @@
TARGET_ARCH=mips64
TARGET_BASE_ARCH=mips
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
diff --git a/configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak
index f348f35..f9efeec 100644
--- a/configs/targets/mips64el-linux-user.mak
+++ b/configs/targets/mips64el-linux-user.mak
@@ -3,4 +3,3 @@
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n64
TARGET_SYSTBL=syscall_n64.tbl
-TARGET_ALIGNED_ONLY=y
diff --git a/configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak
index 5a52aa4..8d9ab3d 100644
--- a/configs/targets/mips64el-softmmu.mak
+++ b/configs/targets/mips64el-softmmu.mak
@@ -1,4 +1,3 @@
TARGET_ARCH=mips64
TARGET_BASE_ARCH=mips
-TARGET_ALIGNED_ONLY=y
TARGET_NEED_FDT=y
diff --git a/configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak
index e237930..e8d7241 100644
--- a/configs/targets/mipsel-linux-user.mak
+++ b/configs/targets/mipsel-linux-user.mak
@@ -2,4 +2,3 @@
TARGET_ABI_MIPSO32=y
TARGET_SYSTBL_ABI=o32
TARGET_SYSTBL=syscall_o32.tbl
-TARGET_ALIGNED_ONLY=y
diff --git a/configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak
index c7c41f4..0829659 100644
--- a/configs/targets/mipsel-softmmu.mak
+++ b/configs/targets/mipsel-softmmu.mak
@@ -1,3 +1,2 @@
TARGET_ARCH=mips
-TARGET_ALIGNED_ONLY=y
TARGET_SUPPORTS_MTTCG=y
diff --git a/configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak
index 1e80b30..206095d 100644
--- a/configs/targets/mipsn32-linux-user.mak
+++ b/configs/targets/mipsn32-linux-user.mak
@@ -4,5 +4,4 @@
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n32
TARGET_SYSTBL=syscall_n32.tbl
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
diff --git a/configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak
index f31a9c3..ca2a3ed 100644
--- a/configs/targets/mipsn32el-linux-user.mak
+++ b/configs/targets/mipsn32el-linux-user.mak
@@ -4,4 +4,3 @@
TARGET_BASE_ARCH=mips
TARGET_SYSTBL_ABI=n32
TARGET_SYSTBL=syscall_n32.tbl
-TARGET_ALIGNED_ONLY=y
diff --git a/configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak
index 5823fc0..c99ae37 100644
--- a/configs/targets/nios2-softmmu.mak
+++ b/configs/targets/nios2-softmmu.mak
@@ -1,3 +1,2 @@
TARGET_ARCH=nios2
-TARGET_ALIGNED_ONLY=y
TARGET_NEED_FDT=y
diff --git a/configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak
index 0152d66..9908887 100644
--- a/configs/targets/sh4-linux-user.mak
+++ b/configs/targets/sh4-linux-user.mak
@@ -1,5 +1,4 @@
TARGET_ARCH=sh4
TARGET_SYSTBL_ABI=common
TARGET_SYSTBL=syscall.tbl
-TARGET_ALIGNED_ONLY=y
TARGET_HAS_BFLT=y
diff --git a/configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak
index 9589637..f9d62d9 100644
--- a/configs/targets/sh4-softmmu.mak
+++ b/configs/targets/sh4-softmmu.mak
@@ -1,2 +1 @@
TARGET_ARCH=sh4
-TARGET_ALIGNED_ONLY=y
diff --git a/configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak
index 6724165..9db6b36 100644
--- a/configs/targets/sh4eb-linux-user.mak
+++ b/configs/targets/sh4eb-linux-user.mak
@@ -1,6 +1,5 @@
TARGET_ARCH=sh4
TARGET_SYSTBL_ABI=common
TARGET_SYSTBL=syscall.tbl
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
TARGET_HAS_BFLT=y
diff --git a/configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak
index dc8b30b..226b1fc 100644
--- a/configs/targets/sh4eb-softmmu.mak
+++ b/configs/targets/sh4eb-softmmu.mak
@@ -1,3 +1,2 @@
TARGET_ARCH=sh4
-TARGET_ALIGNED_ONLY=y
TARGET_BIG_ENDIAN=y
diff --git a/cpus-common.c b/cpus-common.c
index a53716d..45c745e 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -25,7 +25,7 @@
#include "qemu/lockable.h"
#include "trace/trace-root.h"
-static QemuMutex qemu_cpu_list_lock;
+QemuMutex qemu_cpu_list_lock;
static QemuCond exclusive_cond;
static QemuCond exclusive_resume;
static QemuCond qemu_work_cond;
diff --git a/disas/disas-internal.h b/disas/disas-internal.h
new file mode 100644
index 0000000..84a01f1
--- /dev/null
+++ b/disas/disas-internal.h
@@ -0,0 +1,21 @@
+/*
+ * Definitions used internally in the disassembly code
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DISAS_INTERNAL_H
+#define DISAS_INTERNAL_H
+
+#include "disas/dis-asm.h"
+
+typedef struct CPUDebug {
+ struct disassemble_info info;
+ CPUState *cpu;
+} CPUDebug;
+
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu);
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
+ G_GNUC_PRINTF(2, 3);
+
+#endif
diff --git a/disas/disas-mon.c b/disas/disas-mon.c
new file mode 100644
index 0000000..48ac492
--- /dev/null
+++ b/disas/disas-mon.c
@@ -0,0 +1,65 @@
+/*
+ * Functions related to disassembly from the monitor
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "disas-internal.h"
+#include "disas/disas.h"
+#include "exec/memory.h"
+#include "hw/core/cpu.h"
+#include "monitor/monitor.h"
+
+static int
+physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
+ struct disassemble_info *info)
+{
+ CPUDebug *s = container_of(info, CPUDebug, info);
+ MemTxResult res;
+
+ res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
+ myaddr, length);
+ return res == MEMTX_OK ? 0 : EIO;
+}
+
+/* Disassembler for the monitor. */
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
+ int nb_insn, bool is_physical)
+{
+ int count, i;
+ CPUDebug s;
+ g_autoptr(GString) ds = g_string_new("");
+
+ disas_initialize_debug_target(&s, cpu);
+ s.info.fprintf_func = disas_gstring_printf;
+ s.info.stream = (FILE *)ds; /* abuse this slot */
+
+ if (is_physical) {
+ s.info.read_memory_func = physical_read_memory;
+ }
+ s.info.buffer_vma = pc;
+
+ if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
+ monitor_puts(mon, ds->str);
+ return;
+ }
+
+ if (!s.info.print_insn) {
+ monitor_printf(mon, "0x%08" PRIx64
+ ": Asm output not supported on this arch\n", pc);
+ return;
+ }
+
+ for (i = 0; i < nb_insn; i++) {
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
+ count = s.info.print_insn(pc, &s.info);
+ g_string_append_c(ds, '\n');
+ if (count < 0) {
+ break;
+ }
+ pc += count;
+ }
+
+ monitor_puts(mon, ds->str);
+}
diff --git a/disas.c b/disas/disas.c
similarity index 78%
rename from disas.c
rename to disas/disas.c
index b087c12..0d2d06c 100644
--- a/disas.c
+++ b/disas/disas.c
@@ -1,16 +1,12 @@
/* General "disassemble this chunk" code. Used for debugging. */
#include "qemu/osdep.h"
-#include "disas/dis-asm.h"
+#include "disas/disas-internal.h"
#include "elf.h"
#include "qemu/qemu-print.h"
-
#include "disas/disas.h"
#include "disas/capstone.h"
-
-typedef struct CPUDebug {
- struct disassemble_info info;
- CPUState *cpu;
-} CPUDebug;
+#include "hw/core/cpu.h"
+#include "exec/memory.h"
/* Filled in by elfload.c. Simplistic, but will do for now. */
struct syminfo *syminfos = NULL;
@@ -119,18 +115,18 @@
s->info.symbol_at_address_func = symbol_at_address;
}
-static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
{
initialize_debug(s);
s->cpu = cpu;
s->info.read_memory_func = target_read_memory;
s->info.print_address_func = print_address;
-#if TARGET_BIG_ENDIAN
- s->info.endian = BFD_ENDIAN_BIG;
-#else
- s->info.endian = BFD_ENDIAN_LITTLE;
-#endif
+ if (target_words_bigendian()) {
+ s->info.endian = BFD_ENDIAN_BIG;
+ } else {
+ s->info.endian = BFD_ENDIAN_LITTLE;
+ }
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->disas_set_info) {
@@ -168,7 +164,7 @@
# ifdef _ARCH_PPC64
s->info.cap_mode = CS_MODE_64;
# endif
-#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
+#elif defined(__riscv)
#if defined(_ILP32) || (__riscv_xlen == 32)
s->info.print_insn = print_insn_riscv32;
#elif defined(_LP64)
@@ -204,14 +200,13 @@
}
/* Disassemble this for me please... (debugging). */
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
- target_ulong size)
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
{
- target_ulong pc;
+ uint64_t pc;
int count;
CPUDebug s;
- initialize_debug_target(&s, cpu);
+ disas_initialize_debug_target(&s, cpu);
s.info.fprintf_func = fprintf;
s.info.stream = out;
s.info.buffer_vma = code;
@@ -226,11 +221,12 @@
}
for (pc = code; size > 0; pc += count, size -= count) {
- fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
- count = s.info.print_insn(pc, &s.info);
- fprintf(out, "\n");
- if (count < 0)
- break;
+ fprintf(out, "0x%08" PRIx64 ": ", pc);
+ count = s.info.print_insn(pc, &s.info);
+ fprintf(out, "\n");
+ if (count < 0) {
+ break;
+ }
if (size < count) {
fprintf(out,
"Disassembler disagrees with translator over instruction "
@@ -241,8 +237,7 @@
}
}
-static int G_GNUC_PRINTF(2, 3)
-gstring_printf(FILE *stream, const char *fmt, ...)
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
{
/* We abuse the FILE parameter to pass a GString. */
GString *s = (GString *)stream;
@@ -272,8 +267,8 @@
CPUDebug s;
GString *ds = g_string_new(NULL);
- initialize_debug_target(&s, cpu);
- s.info.fprintf_func = gstring_printf;
+ disas_initialize_debug_target(&s, cpu);
+ s.info.fprintf_func = disas_gstring_printf;
s.info.stream = (FILE *)ds; /* abuse this slot */
s.info.buffer_vma = addr;
s.info.buffer_length = size;
@@ -292,7 +287,7 @@
}
/* Disassemble this for me please... (debugging). */
-void disas(FILE *out, const void *code, unsigned long size)
+void disas(FILE *out, const void *code, size_t size)
{
uintptr_t pc;
int count;
@@ -324,7 +319,7 @@
}
/* Look up symbol for debugging purpose. Returns "" if unknown. */
-const char *lookup_symbol(target_ulong orig_addr)
+const char *lookup_symbol(uint64_t orig_addr)
{
const char *symbol = "";
struct syminfo *s;
@@ -338,61 +333,3 @@
return symbol;
}
-
-#if !defined(CONFIG_USER_ONLY)
-
-#include "monitor/monitor.h"
-
-static int
-physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
- struct disassemble_info *info)
-{
- CPUDebug *s = container_of(info, CPUDebug, info);
- MemTxResult res;
-
- res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
- myaddr, length);
- return res == MEMTX_OK ? 0 : EIO;
-}
-
-/* Disassembler for the monitor. */
-void monitor_disas(Monitor *mon, CPUState *cpu,
- target_ulong pc, int nb_insn, int is_physical)
-{
- int count, i;
- CPUDebug s;
- g_autoptr(GString) ds = g_string_new("");
-
- initialize_debug_target(&s, cpu);
- s.info.fprintf_func = gstring_printf;
- s.info.stream = (FILE *)ds; /* abuse this slot */
-
- if (is_physical) {
- s.info.read_memory_func = physical_read_memory;
- }
- s.info.buffer_vma = pc;
-
- if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
- monitor_puts(mon, ds->str);
- return;
- }
-
- if (!s.info.print_insn) {
- monitor_printf(mon, "0x" TARGET_FMT_lx
- ": Asm output not supported on this arch\n", pc);
- return;
- }
-
- for (i = 0; i < nb_insn; i++) {
- g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc);
- count = s.info.print_insn(pc, &s.info);
- g_string_append_c(ds, '\n');
- if (count < 0) {
- break;
- }
- pc += count;
- }
-
- monitor_puts(mon, ds->str);
-}
-#endif
diff --git a/disas/meson.build b/disas/meson.build
index c865bdd..832727e 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -10,4 +10,8 @@
common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
-common_ss.add(when: capstone, if_true: files('capstone.c'))
+common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
+common_ss.add(files('disas.c'))
+
+softmmu_ss.add(files('disas-mon.c'))
+specific_ss.add(capstone)
diff --git a/include/disas/disas.h b/include/disas/disas.h
index d363e95..176775e 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -1,34 +1,23 @@
#ifndef QEMU_DISAS_H
#define QEMU_DISAS_H
-#include "exec/hwaddr.h"
-
-#ifdef NEED_CPU_H
-#include "cpu.h"
-
/* Disassemble this for me please... (debugging). */
-void disas(FILE *out, const void *code, unsigned long size);
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
- target_ulong size);
+void disas(FILE *out, const void *code, size_t size);
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
-void monitor_disas(Monitor *mon, CPUState *cpu,
- target_ulong pc, int nb_insn, int is_physical);
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
+ int nb_insn, bool is_physical);
char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
/* Look up symbol for debugging purpose. Returns "" if unknown. */
-const char *lookup_symbol(target_ulong orig_addr);
-#endif
+const char *lookup_symbol(uint64_t orig_addr);
struct syminfo;
struct elf32_sym;
struct elf64_sym;
-#if defined(CONFIG_USER_ONLY)
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
-#else
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
-#endif
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
struct syminfo {
lookup_symbol_t lookup_symbol;
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 1be4a31..e5a55ed 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -32,6 +32,7 @@
#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size())
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
+extern QemuMutex qemu_cpu_list_lock;
void qemu_init_cpu_list(void);
void cpu_list_lock(void);
void cpu_list_unlock(void);
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index e1c498e..a6e0cf1 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -111,8 +111,11 @@
use the corresponding iotlb value. */
uintptr_t addend;
};
- /* padding to get a power of two size */
- uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
+ /*
+ * Padding to get a power of two size, as well as index
+ * access to addr_{read,write,code}.
+ */
+ target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE];
};
} CPUTLBEntry;
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index c141f03..7c867c9 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -360,13 +360,29 @@
/* Needed for TCG_OVERSIZED_GUEST */
#include "tcg/tcg.h"
+static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
+ MMUAccessType access_type)
+{
+ /* Do not rearrange the CPUTLBEntry structure members. */
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
+ MMU_DATA_LOAD * TARGET_LONG_SIZE);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
+ MMU_DATA_STORE * TARGET_LONG_SIZE);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
+ MMU_INST_FETCH * TARGET_LONG_SIZE);
+
+ const target_ulong *ptr = &entry->addr_idx[access_type];
+#if TCG_OVERSIZED_GUEST
+ return *ptr;
+#else
+ /* ofs might correspond to .addr_write, so use qatomic_read */
+ return qatomic_read(ptr);
+#endif
+}
+
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
{
-#if TCG_OVERSIZED_GUEST
- return entry->addr_write;
-#else
- return qatomic_read(&entry->addr_write);
-#endif
+ return tlb_read_idx(entry, MMU_DATA_STORE);
}
/* Find the TLB index corresponding to the mmu_idx + address pair. */
diff --git a/include/exec/memop.h b/include/exec/memop.h
index 25d0274..07f5f88 100644
--- a/include/exec/memop.h
+++ b/include/exec/memop.h
@@ -47,8 +47,6 @@
* MO_UNALN accesses are never checked for alignment.
* MO_ALIGN accesses will result in a call to the CPU's
* do_unaligned_access hook if the guest address is not aligned.
- * The default depends on whether the target CPU defines
- * TARGET_ALIGNED_ONLY.
*
* Some architectures (e.g. ARMv8) need the address which is aligned
* to a size more than the size of the memory access.
@@ -65,21 +63,14 @@
*/
MO_ASHIFT = 5,
MO_AMASK = 0x7 << MO_ASHIFT,
-#ifdef NEED_CPU_H
-#ifdef TARGET_ALIGNED_ONLY
- MO_ALIGN = 0,
- MO_UNALN = MO_AMASK,
-#else
- MO_ALIGN = MO_AMASK,
- MO_UNALN = 0,
-#endif
-#endif
+ MO_UNALN = 0,
MO_ALIGN_2 = 1 << MO_ASHIFT,
MO_ALIGN_4 = 2 << MO_ASHIFT,
MO_ALIGN_8 = 3 << MO_ASHIFT,
MO_ALIGN_16 = 4 << MO_ASHIFT,
MO_ALIGN_32 = 5 << MO_ASHIFT,
MO_ALIGN_64 = 6 << MO_ASHIFT,
+ MO_ALIGN = MO_AMASK,
/* Combinations of the above, for ease of use. */
MO_UB = MO_8,
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 140daa4..256736e 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -35,7 +35,6 @@
#pragma GCC poison TARGET_TRICORE
#pragma GCC poison TARGET_XTENSA
-#pragma GCC poison TARGET_ALIGNED_ONLY
#pragma GCC poison TARGET_HAS_BFLT
#pragma GCC poison TARGET_NAME
#pragma GCC poison TARGET_SUPPORTS_MTTCG
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 703f743..418ad92 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -17,6 +17,7 @@
#include "qemu/guest-random.h"
#include "qemu/units.h"
#include "qemu/selfmap.h"
+#include "qemu/lockable.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "target_signal.h"
@@ -3327,9 +3328,10 @@
static int symfind(const void *s0, const void *s1)
{
- target_ulong addr = *(target_ulong *)s0;
struct elf_sym *sym = (struct elf_sym *)s1;
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
int result = 0;
+
if (addr < sym->st_value) {
result = -1;
} else if (addr >= sym->st_value + sym->st_size) {
@@ -3338,7 +3340,7 @@
return result;
}
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
{
#if ELF_CLASS == ELFCLASS32
struct elf_sym *syms = s->disas_symtab.elf32;
@@ -4237,14 +4239,14 @@
info->notes_size += note_size(&info->notes[i]);
/* read and fill status of all threads */
- cpu_list_lock();
- CPU_FOREACH(cpu) {
- if (cpu == thread_cpu) {
- continue;
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
+ CPU_FOREACH(cpu) {
+ if (cpu == thread_cpu) {
+ continue;
+ }
+ fill_thread_info(info, cpu->env_ptr);
}
- fill_thread_info(info, cpu->env_ptr);
}
- cpu_list_unlock();
return (0);
}
diff --git a/meson.build b/meson.build
index 5c7af6f..d3cf489 100644
--- a/meson.build
+++ b/meson.build
@@ -3153,9 +3153,6 @@
subdir('softmmu')
-common_ss.add(capstone)
-specific_ss.add(files('disas.c'), capstone)
-
# Work around a gcc bug/misfeature wherein constant propagation looks
# through an alias:
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index 180ba38..388337a 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -150,25 +150,25 @@
retry:
init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- cpu_list_lock();
- gen_id = cpu_list_generation_id_get();
- records = vcpu_dirty_stat_alloc(stat);
- vcpu_dirty_stat_collect(stat, records, true);
- cpu_list_unlock();
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
+ gen_id = cpu_list_generation_id_get();
+ records = vcpu_dirty_stat_alloc(stat);
+ vcpu_dirty_stat_collect(stat, records, true);
+ }
duration = dirty_stat_wait(calc_time_ms, init_time_ms);
global_dirty_log_sync(flag, one_shot);
- cpu_list_lock();
- if (gen_id != cpu_list_generation_id_get()) {
- g_free(records);
- g_free(stat->rates);
- cpu_list_unlock();
- goto retry;
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
+ if (gen_id != cpu_list_generation_id_get()) {
+ g_free(records);
+ g_free(stat->rates);
+ cpu_list_unlock();
+ goto retry;
+ }
+ vcpu_dirty_stat_collect(stat, records, false);
}
- vcpu_dirty_stat_collect(stat, records, false);
- cpu_list_unlock();
for (i = 0; i < stat->nvcpu; i++) {
dirtyrate = do_calculate_dirtyrate(records[i], duration);
diff --git a/replay/replay.c b/replay/replay.c
index c39156c..0f7d766 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -74,7 +74,7 @@
int replay_get_instructions(void)
{
int res = 0;
- replay_mutex_lock();
+ g_assert(replay_mutex_locked());
if (replay_next_event_is(EVENT_INSTRUCTION)) {
res = replay_state.instruction_count;
if (replay_break_icount != -1LL) {
@@ -85,7 +85,6 @@
}
}
}
- replay_mutex_unlock();
return res;
}
diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c
index 7e02787..6526367 100644
--- a/target/loongarch/csr_helper.c
+++ b/target/loongarch/csr_helper.c
@@ -15,7 +15,6 @@
#include "exec/cpu_ldst.h"
#include "hw/irq.h"
#include "cpu-csr.h"
-#include "tcg/tcg-ldst.h"
target_ulong helper_csrrd_pgd(CPULoongArchState *env)
{
diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c
index 505853e..dda9845 100644
--- a/target/loongarch/iocsr_helper.c
+++ b/target/loongarch/iocsr_helper.c
@@ -12,7 +12,6 @@
#include "exec/helper-proto.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
-#include "tcg/tcg-ldst.h"
#define GET_MEMTXATTRS(cas) \
((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 744eb37..44d852b 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -959,6 +959,7 @@
switch (opsize) {
case OS_BYTE:
case OS_WORD:
+ case OS_LONG:
tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE);
gen_helper_exts32(cpu_env, fp, tmp);
break;
diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc
index e8b193a..211d102 100644
--- a/target/mips/tcg/micromips_translate.c.inc
+++ b/target/mips/tcg/micromips_translate.c.inc
@@ -977,20 +977,24 @@
gen_reserved_instruction(ctx);
return;
}
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
+ ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd);
tcg_gen_movi_tl(t1, 4);
gen_op_addr_add(ctx, t0, t0, t1);
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
+ ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd + 1);
break;
case SWP:
gen_load_gpr(t1, rd);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+ ctx->default_tcg_memop_mask);
tcg_gen_movi_tl(t1, 4);
gen_op_addr_add(ctx, t0, t0, t1);
gen_load_gpr(t1, rd + 1);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+ ctx->default_tcg_memop_mask);
break;
#ifdef TARGET_MIPS64
case LDP:
@@ -998,20 +1002,24 @@
gen_reserved_instruction(ctx);
return;
}
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
+ ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd);
tcg_gen_movi_tl(t1, 8);
gen_op_addr_add(ctx, t0, t0, t1);
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
+ ctx->default_tcg_memop_mask);
gen_store_gpr(t1, rd + 1);
break;
case SDP:
gen_load_gpr(t1, rd);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
+ ctx->default_tcg_memop_mask);
tcg_gen_movi_tl(t1, 8);
gen_op_addr_add(ctx, t0, t0, t1);
gen_load_gpr(t1, rd + 1);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
+ ctx->default_tcg_memop_mask);
break;
#endif
}
diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc
index 602f5f0..5cffe0e 100644
--- a/target/mips/tcg/mips16e_translate.c.inc
+++ b/target/mips/tcg/mips16e_translate.c.inc
@@ -172,22 +172,26 @@
case 4:
gen_base_offset_addr(ctx, t0, 29, 12);
gen_load_gpr(t1, 7);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+ ctx->default_tcg_memop_mask);
/* Fall through */
case 3:
gen_base_offset_addr(ctx, t0, 29, 8);
gen_load_gpr(t1, 6);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+ ctx->default_tcg_memop_mask);
/* Fall through */
case 2:
gen_base_offset_addr(ctx, t0, 29, 4);
gen_load_gpr(t1, 5);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+ ctx->default_tcg_memop_mask);
/* Fall through */
case 1:
gen_base_offset_addr(ctx, t0, 29, 0);
gen_load_gpr(t1, 4);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+ ctx->default_tcg_memop_mask);
}
gen_load_gpr(t0, 29);
@@ -196,7 +200,8 @@
tcg_gen_movi_tl(t2, -4); \
gen_op_addr_add(ctx, t0, t0, t2); \
gen_load_gpr(t1, reg); \
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \
+ ctx->default_tcg_memop_mask); \
} while (0)
if (do_ra) {
@@ -298,7 +303,8 @@
#define DECR_AND_LOAD(reg) do { \
tcg_gen_movi_tl(t2, -4); \
gen_op_addr_add(ctx, t0, t0, t2); \
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \
+ ctx->default_tcg_memop_mask); \
gen_store_gpr(t1, reg); \
} while (0)
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index bdd2070..be038b5 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -831,7 +831,8 @@
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
}
tcg_gen_add_tl(t1, t0, t1);
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
+ tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
+ ctx->default_tcg_memop_mask);
gen_store_mxu_gpr(t1, XRa);
}
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
index 97b9572..a98dde0 100644
--- a/target/mips/tcg/nanomips_translate.c.inc
+++ b/target/mips/tcg/nanomips_translate.c.inc
@@ -998,7 +998,7 @@
TCGv tmp2 = tcg_temp_new();
gen_base_offset_addr(ctx, taddr, base, offset);
- tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ);
+ tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN);
if (cpu_is_bigendian(ctx)) {
tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
} else {
@@ -1039,7 +1039,8 @@
tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
- eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
+ eva ? MIPS_HFLAG_UM : ctx->mem_idx,
+ MO_64 | MO_ALIGN);
if (reg1 != 0) {
tcg_gen_movi_tl(cpu_gpr[reg1], 1);
}
@@ -2640,52 +2641,49 @@
switch (extract32(ctx->opcode, 7, 4)) {
case NM_LBX:
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
- MO_SB);
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
gen_store_gpr(t0, rd);
break;
case NM_LHX:
/*case NM_LHXS:*/
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
- MO_TESW);
+ MO_TESW | ctx->default_tcg_memop_mask);
gen_store_gpr(t0, rd);
break;
case NM_LWX:
/*case NM_LWXS:*/
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
- MO_TESL);
+ MO_TESL | ctx->default_tcg_memop_mask);
gen_store_gpr(t0, rd);
break;
case NM_LBUX:
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
- MO_UB);
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
gen_store_gpr(t0, rd);
break;
case NM_LHUX:
/*case NM_LHUXS:*/
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
- MO_TEUW);
+ MO_TEUW | ctx->default_tcg_memop_mask);
gen_store_gpr(t0, rd);
break;
case NM_SBX:
check_nms(ctx);
gen_load_gpr(t1, rd);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
- MO_8);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8);
break;
case NM_SHX:
/*case NM_SHXS:*/
check_nms(ctx);
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
- MO_TEUW);
+ MO_TEUW | ctx->default_tcg_memop_mask);
break;
case NM_SWX:
/*case NM_SWXS:*/
check_nms(ctx);
gen_load_gpr(t1, rd);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
- MO_TEUL);
+ MO_TEUL | ctx->default_tcg_memop_mask);
break;
case NM_LWC1X:
/*case NM_LWC1XS:*/
@@ -3738,7 +3736,8 @@
addr_off);
tcg_gen_movi_tl(t0, addr);
- tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
+ tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx,
+ MO_TESL | ctx->default_tcg_memop_mask);
}
break;
case NM_SWPC48:
@@ -3754,7 +3753,8 @@
tcg_gen_movi_tl(t0, addr);
gen_load_gpr(t1, rt);
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+ MO_TEUL | ctx->default_tcg_memop_mask);
}
break;
default:
@@ -4305,7 +4305,7 @@
TCGv va = tcg_temp_new();
TCGv t1 = tcg_temp_new();
MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
- NM_P_LS_UAWM ? MO_UNALN : 0;
+ NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN;
count = (count == 0) ? 8 : count;
while (counter != count) {
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 6610e22..a548e16 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -298,6 +298,11 @@
TCGv data = dest_gpr(dc, instr.b);
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
+#ifdef CONFIG_USER_ONLY
+ flags |= MO_UNALN;
+#else
+ flags |= MO_ALIGN;
+#endif
tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags);
}
@@ -309,6 +314,11 @@
TCGv addr = tcg_temp_new();
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
+#ifdef CONFIG_USER_ONLY
+ flags |= MO_UNALN;
+#else
+ flags |= MO_ALIGN;
+#endif
tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags);
}
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 6e40d5d..0dedbb8 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -527,13 +527,15 @@
case 0x9000: /* mov.w @(disp,PC),Rn */
{
TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2);
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
+ MO_TESW | MO_ALIGN);
}
return;
case 0xd000: /* mov.l @(disp,PC),Rn */
{
TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
+ MO_TESL | MO_ALIGN);
}
return;
case 0x7000: /* add #imm,Rn */
@@ -801,9 +803,11 @@
{
TCGv arg0, arg1;
arg0 = tcg_temp_new();
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
+ MO_TESL | MO_ALIGN);
arg1 = tcg_temp_new();
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
gen_helper_macl(cpu_env, arg0, arg1);
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
@@ -813,9 +817,11 @@
{
TCGv arg0, arg1;
arg0 = tcg_temp_new();
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
+ MO_TESL | MO_ALIGN);
arg1 = tcg_temp_new();
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
gen_helper_macw(cpu_env, arg0, arg1);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
@@ -961,30 +967,36 @@
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
gen_load_fpr64(ctx, fp, XHACK(B7_4));
- tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ);
+ tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx,
+ MO_TEUQ | MO_ALIGN);
} else {
- tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
}
return;
case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
CHECK_FPU_ENABLED
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
+ MO_TEUQ | MO_ALIGN);
gen_store_fpr64(ctx, fp, XHACK(B11_8));
} else {
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
}
return;
case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
CHECK_FPU_ENABLED
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
+ MO_TEUQ | MO_ALIGN);
gen_store_fpr64(ctx, fp, XHACK(B11_8));
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
} else {
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
}
return;
@@ -996,10 +1008,12 @@
TCGv_i64 fp = tcg_temp_new_i64();
gen_load_fpr64(ctx, fp, XHACK(B7_4));
tcg_gen_subi_i32(addr, REG(B11_8), 8);
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
+ MO_TEUQ | MO_ALIGN);
} else {
tcg_gen_subi_i32(addr, REG(B11_8), 4);
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
+ MO_TEUL | MO_ALIGN);
}
tcg_gen_mov_i32(REG(B11_8), addr);
}
@@ -1011,10 +1025,12 @@
tcg_gen_add_i32(addr, REG(B7_4), REG(0));
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ);
+ tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx,
+ MO_TEUQ | MO_ALIGN);
gen_store_fpr64(ctx, fp, XHACK(B11_8));
} else {
- tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx,
+ MO_TEUL | MO_ALIGN);
}
}
return;
@@ -1026,9 +1042,11 @@
if (ctx->tbflags & FPSCR_SZ) {
TCGv_i64 fp = tcg_temp_new_i64();
gen_load_fpr64(ctx, fp, XHACK(B7_4));
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
+ MO_TEUQ | MO_ALIGN);
} else {
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
+ MO_TEUL | MO_ALIGN);
}
}
return;
@@ -1158,14 +1176,14 @@
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN);
}
return;
case 0xc600: /* mov.l @(disp,GBR),R0 */
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN);
}
return;
case 0xc000: /* mov.b R0,@(disp,GBR) */
@@ -1179,14 +1197,14 @@
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN);
}
return;
case 0xc200: /* mov.l R0,@(disp,GBR) */
{
TCGv addr = tcg_temp_new();
tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN);
}
return;
case 0x8000: /* mov.b R0,@(disp,Rn) */
@@ -1286,7 +1304,8 @@
return;
case 0x4087: /* ldc.l @Rm+,Rn_BANK */
CHECK_PRIVILEGED
- tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
return;
case 0x0082: /* stc Rm_BANK,Rn */
@@ -1298,7 +1317,8 @@
{
TCGv addr = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
- tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx,
+ MO_TEUL | MO_ALIGN);
tcg_gen_mov_i32(REG(B11_8), addr);
}
return;
@@ -1354,7 +1374,8 @@
CHECK_PRIVILEGED
{
TCGv val = tcg_temp_new();
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
tcg_gen_andi_i32(val, val, 0x700083f3);
gen_write_sr(val);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
@@ -1372,7 +1393,7 @@
TCGv val = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
gen_read_sr(val);
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
tcg_gen_mov_i32(REG(B11_8), addr);
}
return;
@@ -1383,7 +1404,8 @@
return; \
case ldpnum: \
prechk \
- tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
+ tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \
+ MO_TESL | MO_ALIGN); \
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); \
return;
#define ST(reg,stnum,stpnum,prechk) \
@@ -1396,7 +1418,8 @@
{ \
TCGv addr = tcg_temp_new(); \
tcg_gen_subi_i32(addr, REG(B11_8), 4); \
- tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
+ tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \
+ MO_TEUL | MO_ALIGN); \
tcg_gen_mov_i32(REG(B11_8), addr); \
} \
return;
@@ -1423,7 +1446,8 @@
CHECK_FPU_ENABLED
{
TCGv addr = tcg_temp_new();
- tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
gen_helper_ld_fpscr(cpu_env, addr);
ctx->base.is_jmp = DISAS_STOP;
@@ -1441,16 +1465,18 @@
tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
addr = tcg_temp_new();
tcg_gen_subi_i32(addr, REG(B11_8), 4);
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
tcg_gen_mov_i32(REG(B11_8), addr);
}
return;
case 0x00c3: /* movca.l R0,@Rm */
{
TCGv val = tcg_temp_new();
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
gen_helper_movcal(cpu_env, REG(B11_8), val);
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
}
ctx->has_movcal = 1;
return;
@@ -1492,11 +1518,13 @@
cpu_lock_addr, fail);
tmp = tcg_temp_new();
tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
- REG(0), ctx->memidx, MO_TEUL);
+ REG(0), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
} else {
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
+ MO_TEUL | MO_ALIGN);
tcg_gen_movi_i32(cpu_sr_t, 1);
}
tcg_gen_br(done);
@@ -1521,11 +1549,13 @@
if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
TCGv tmp = tcg_temp_new();
tcg_gen_mov_i32(tmp, REG(B11_8));
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
tcg_gen_mov_i32(cpu_lock_value, REG(0));
tcg_gen_mov_i32(cpu_lock_addr, tmp);
} else {
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
+ MO_TESL | MO_ALIGN);
tcg_gen_movi_i32(cpu_lock_addr, 0);
}
return;
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index d8d464e..62dd22d 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1580,13 +1580,6 @@
}
}
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
-{
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
- tcg_out_insn(s, 3406, ADR, rd, offset);
-}
-
typedef struct {
TCGReg base;
TCGReg index;
@@ -1627,151 +1620,44 @@
#endif
};
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ntmp = 1, .tmp = { TCG_REG_TMP }
+};
+
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
+ MemOp opc = get_memop(lb->oi);
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
- tcg_out_adr(s, TCG_REG_X3, lb->raddr);
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
-
- tcg_out_movext(s, lb->type, lb->datalo_reg,
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tcg_out_goto(s, lb->raddr);
return true;
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
- MemOp size = opc & MO_SIZE;
+ MemOp opc = get_memop(lb->oi);
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
- tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
- tcg_out_adr(s, TCG_REG_X4, lb->raddr);
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
tcg_out_goto(s, lb->raddr);
return true;
}
-
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
- TCGType ext, TCGReg data_reg, TCGReg addr_reg,
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = ext;
- label->datalo_reg = data_reg;
- label->addrlo_reg = addr_reg;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr;
-}
-
-/* We expect to use a 7-bit scaled negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
-
-/* These offsets are built into the LDP below. */
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
-
-/* Load and compare a TLB entry, emitting the conditional jump to the
- slow path for the failure case, which will be patched later when finalizing
- the slow path. Generated code returns the host addend in X1,
- clobbers X0,X2,X3,TMP. */
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
- tcg_insn_unit **label_ptr, int mem_index,
- bool is_read)
-{
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_mask = (1u << a_bits) - 1;
- unsigned s_mask = (1u << s_bits) - 1;
- TCGReg x3;
- TCGType mask_type;
- uint64_t compare_mask;
-
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
-
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
- tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
- TLB_MASK_TABLE_OFS(mem_index), 1, 0);
-
- /* Extract the TLB index from the address into X0. */
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
- TCG_REG_X0, TCG_REG_X0, addr_reg,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
- /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
-
- /* Load the tlb comparator into X0, and the fast path addend into X1. */
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
- ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
- offsetof(CPUTLBEntry, addend));
-
- /* For aligned accesses, we check the first byte and include the alignment
- bits within the address. For unaligned access, we check that we don't
- cross pages using the address of the last byte of the access. */
- if (a_bits >= s_bits) {
- x3 = addr_reg;
- } else {
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
- TCG_REG_X3, addr_reg, s_mask - a_mask);
- x3 = TCG_REG_X3;
- }
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
-
- /* Store the page mask part of the address into X3. */
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
- TCG_REG_X3, x3, compare_mask);
-
- /* Perform the address comparison. */
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
-
- /* If not equal, we jump to the slow path. */
- *label_ptr = s->code_ptr;
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
-}
-
#else
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
- unsigned a_bits)
+static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->addrlo_reg = addr_reg;
-
- /* tst addr, #mask */
- tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
-
- label->label_ptr[0] = s->code_ptr;
-
- /* b.ne slow_path */
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
-
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ ptrdiff_t offset = tcg_pcrel_diff(s, target);
+ tcg_debug_assert(offset == sextract64(offset, 0, 21));
+ tcg_out_insn(s, 3406, ADR, rd, offset);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
@@ -1801,6 +1687,125 @@
}
#endif /* CONFIG_SOFTMMU */
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addr_reg, MemOpIdx oi,
+ bool is_ld)
+{
+ TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1u << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned s_mask = (1u << s_bits) - 1;
+ unsigned mem_index = get_mmuidx(oi);
+ TCGReg x3;
+ TCGType mask_type;
+ uint64_t compare_mask;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
+
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
+ tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index), 1, 0);
+
+ /* Extract the TLB index from the address into X0. */
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
+ TCG_REG_X0, TCG_REG_X0, addr_reg,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
+
+ /* Load the tlb comparator into X0, and the fast path addend into X1. */
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
+ offsetof(CPUTLBEntry, addend));
+
+ /*
+ * For aligned accesses, we check the first byte and include the alignment
+ * bits within the address. For unaligned access, we check that we don't
+ * cross pages using the address of the last byte of the access.
+ */
+ if (a_bits >= s_bits) {
+ x3 = addr_reg;
+ } else {
+ tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
+ TCG_REG_X3, addr_reg, s_mask - a_mask);
+ x3 = TCG_REG_X3;
+ }
+ compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
+
+ /* Store the page mask part of the address into X3. */
+ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
+ TCG_REG_X3, x3, compare_mask);
+
+ /* Perform the address comparison. */
+ tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
+
+ /* If not equal, we jump to the slow path. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
+
+ *h = (HostAddress){
+ .base = TCG_REG_X1,
+ .index = addr_reg,
+ .index_ext = addr_type
+ };
+#else
+ if (a_mask) {
+ ldst = new_ldst_label(s);
+
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ /* tst addr, #mask */
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
+
+ /* b.ne slow_path */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
+ }
+
+ if (USE_GUEST_BASE) {
+ *h = (HostAddress){
+ .base = TCG_REG_GUEST_BASE,
+ .index = addr_reg,
+ .index_ext = addr_type
+ };
+ } else {
+ *h = (HostAddress){
+ .base = addr_reg,
+ .index = TCG_REG_XZR,
+ .index_ext = TCG_TYPE_I64
+ };
+ }
+#endif
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
TCGReg data_r, HostAddress h)
{
@@ -1857,93 +1862,33 @@
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp memop = get_memop(oi);
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+ TCGLabelQemuLdst *ldst;
HostAddress h;
- /* Byte swapping is left to middle-end expansion. */
- tcg_debug_assert((memop & MO_BSWAP) == 0);
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr;
-
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
-
- h = (HostAddress){
- .base = TCG_REG_X1,
- .index = addr_reg,
- .index_ext = addr_type
- };
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
-
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#else /* !CONFIG_SOFTMMU */
- unsigned a_bits = get_alignment_bits(memop);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- if (USE_GUEST_BASE) {
- h = (HostAddress){
- .base = TCG_REG_GUEST_BASE,
- .index = addr_reg,
- .index_ext = addr_type
- };
- } else {
- h = (HostAddress){
- .base = addr_reg,
- .index = TCG_REG_XZR,
- .index_ext = TCG_TYPE_I64
- };
- }
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
-#endif /* CONFIG_SOFTMMU */
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp memop = get_memop(oi);
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+ TCGLabelQemuLdst *ldst;
HostAddress h;
- /* Byte swapping is left to middle-end expansion. */
- tcg_debug_assert((memop & MO_BSWAP) == 0);
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr;
-
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
-
- h = (HostAddress){
- .base = TCG_REG_X1,
- .index = addr_reg,
- .index_ext = addr_type
- };
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
-
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#else /* !CONFIG_SOFTMMU */
- unsigned a_bits = get_alignment_bits(memop);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- if (USE_GUEST_BASE) {
- h = (HostAddress){
- .base = TCG_REG_GUEST_BASE,
- .index = addr_reg,
- .index_ext = addr_type
- };
- } else {
- h = (HostAddress){
- .base = addr_reg,
- .index = TCG_REG_XZR,
- .index_ext = TCG_TYPE_I64
- };
- }
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
-#endif /* CONFIG_SOFTMMU */
}
static const tcg_insn_unit *tb_ret_addr;
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index b6b4ffc..df514e5 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -690,8 +690,8 @@
tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
}
-static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
- TCGReg rn, int imm8)
+static void __attribute__((unused))
+tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
{
tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
}
@@ -969,28 +969,16 @@
tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
}
-static void __attribute__((unused))
-tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
-{
- tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
-}
-
static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
{
/* sxth */
tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
}
-static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond,
- TCGReg rd, TCGReg rn)
-{
- /* uxth */
- tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
-}
-
static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
{
- tcg_out_ext16u_cond(s, COND_AL, rd, rn);
+ /* uxth */
+ tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn);
}
static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
@@ -1382,82 +1370,121 @@
#endif
};
-/* Helper routines for marshalling helper function arguments into
- * the correct registers and stack.
- * argreg is where we want to put this argument, arg is the argument itself.
- * Return value is the updated argreg ready for the next call.
- * Note that argreg 0..3 is real registers, 4+ on stack.
- *
- * We provide routines for arguments which are: immediate, 32 bit
- * value in register, 16 and 8 bit values in register (which must be zero
- * extended before use) and 64 bit value in a lo:hi register pair.
- */
-#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
-static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
-{ \
- if (argreg < 4) { \
- MOV_ARG(s, COND_AL, argreg, arg); \
- } else { \
- int ofs = (argreg - 4) * 4; \
- EXT_ARG; \
- tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
- tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
- } \
- return argreg + 1; \
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
+{
+ /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
+ return TCG_REG_R14;
}
-DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
- (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
- (tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond,
- (tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ra_gen = ldst_ra_gen,
+ .ntmp = 1,
+ .tmp = { TCG_REG_TMP },
+};
-static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
- TCGReg arglo, TCGReg arghi)
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- /* 64 bit arguments must go in even/odd register pairs
- * and in 8-aligned stack slots.
- */
- if (argreg & 1) {
- argreg++;
+ MemOp opc = get_memop(lb->oi);
+
+ if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
}
- if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
- tcg_out_strd_8(s, COND_AL, arglo,
- TCG_REG_CALL_STACK, (argreg - 4) * 4);
- return argreg + 2;
+
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
+
+ tcg_out_goto(s, COND_AL, lb->raddr);
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ MemOp opc = get_memop(lb->oi);
+
+ if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
+
+ /* Tail-call to the helper, which will return to the fast path. */
+ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
+ return true;
+}
+#else
+static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ if (TARGET_LONG_BITS == 64) {
+ /* 64-bit target address is aligned into R2:R3. */
+ TCGMovExtend ext[2] = {
+ { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
+ .src = l->addrlo_reg,
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
+ .src = l->addrhi_reg,
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
+ };
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
} else {
- argreg = tcg_out_arg_reg32(s, argreg, arglo);
- argreg = tcg_out_arg_reg32(s, argreg, arghi);
- return argreg;
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
}
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
+
+ /*
+ * Tail call to the helper, with the return address back inline,
+ * just for the clarity of the debugging traceback -- the helper
+ * cannot return. We have used BLNE to arrive here, so LR is
+ * already set.
+ */
+ tcg_out_goto(s, COND_AL, (const void *)
+ (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
+ return true;
}
-#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
-
-/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
-
-/* These offsets are built into the LDRD below. */
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
-
-/* Load and compare a TLB entry, leaving the flags set. Returns the register
- containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
-
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
- MemOp opc, int mem_index, bool is_load)
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
+ return tcg_out_fail_alignment(s, l);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ return tcg_out_fail_alignment(s, l);
+}
+#endif /* SOFTMMU */
+
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ MemOp a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1 << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ int mem_index = get_mmuidx(oi);
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
- unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
TCGReg t_addr;
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
/* Extract the tlb index from the address into R0. */
@@ -1527,182 +1554,37 @@
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
}
- return TCG_REG_R1;
-}
-
-/* Record the context of a call to the out of line helper code for the slow
- path for a load or store, so that we can later generate the correct
- helper code. */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
- MemOpIdx oi, TCGType type,
- TCGReg datalo, TCGReg datahi,
- TCGReg addrlo, TCGReg addrhi,
- tcg_insn_unit *raddr,
- tcg_insn_unit *label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = type;
- label->datalo_reg = datalo;
- label->datahi_reg = datahi;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr;
-}
-
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
-{
- TCGReg argreg;
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
-
- if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
- return false;
- }
-
- argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
- if (TARGET_LONG_BITS == 64) {
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
- } else {
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
- }
- argreg = tcg_out_arg_imm32(s, argreg, oi);
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
-
- /* Use the canonical unsigned helpers and minimize icache usage. */
- tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
-
- if ((opc & MO_SIZE) == MO_64) {
- TCGMovExtend ext[2] = {
- { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
- .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
- { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
- .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
- };
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
- } else {
- tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
- TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
- }
-
- tcg_out_goto(s, COND_AL, lb->raddr);
- return true;
-}
-
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
-{
- TCGReg argreg, datalo, datahi;
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
-
- if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
- return false;
- }
-
- argreg = TCG_REG_R0;
- argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
- if (TARGET_LONG_BITS == 64) {
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
- } else {
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
- }
-
- datalo = lb->datalo_reg;
- datahi = lb->datahi_reg;
- switch (opc & MO_SIZE) {
- case MO_8:
- argreg = tcg_out_arg_reg8(s, argreg, datalo);
- break;
- case MO_16:
- argreg = tcg_out_arg_reg16(s, argreg, datalo);
- break;
- case MO_32:
- default:
- argreg = tcg_out_arg_reg32(s, argreg, datalo);
- break;
- case MO_64:
- argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
- break;
- }
-
- argreg = tcg_out_arg_imm32(s, argreg, oi);
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
-
- /* Tail-call to the helper, which will return to the fast path. */
- tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
- return true;
-}
+ *h = (HostAddress){
+ .cond = COND_AL,
+ .base = addrlo,
+ .index = TCG_REG_R1,
+ .index_scratch = true,
+ };
#else
+ if (a_mask) {
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
-
- /* We are expecting a_bits to max out at 7, and can easily support 8. */
- tcg_debug_assert(a_mask <= 0xff);
- /* tst addr, #mask */
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
-
- /* blne slow_path */
- label->label_ptr[0] = s->code_ptr;
- tcg_out_bl_imm(s, COND_NE, 0);
-
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
-{
- if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
- return false;
+ /* We are expecting a_bits to max out at 7 */
+ tcg_debug_assert(a_mask <= 0xff);
+ /* tst addr, #mask */
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
}
- if (TARGET_LONG_BITS == 64) {
- /* 64-bit target address is aligned into R2:R3. */
- TCGMovExtend ext[2] = {
- { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
- .src = l->addrlo_reg,
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
- { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
- .src = l->addrhi_reg,
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
- };
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
- }
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
+ *h = (HostAddress){
+ .cond = COND_AL,
+ .base = addrlo,
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
+ .index_scratch = false,
+ };
+#endif
- /*
- * Tail call to the helper, with the return address back inline,
- * just for the clarity of the debugging traceback -- the helper
- * cannot return. We have used BLNE to arrive here, so LR is
- * already set.
- */
- tcg_out_goto(s, COND_AL, (const void *)
- (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
- return true;
+ return ldst;
}
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
-{
- return tcg_out_fail_alignment(s, l);
-}
-
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
-{
- return tcg_out_fail_alignment(s, l);
-}
-#endif /* SOFTMMU */
-
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
TCGReg datahi, HostAddress h)
{
@@ -1799,37 +1681,28 @@
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- h.cond = COND_AL;
- h.base = addrlo;
- h.index_scratch = true;
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
- /*
- * This a conditional BL only to load a pointer within this opcode into
- * LR for the slow path. We will not be using the value for a tail call.
- */
- tcg_insn_unit *label_ptr = s->code_ptr;
- tcg_out_bl_imm(s, COND_NE, 0);
+ /*
+ * This a conditional BL only to load a pointer within this
+ * opcode into LR for the slow path. We will not be using
+ * the value for a tail call.
+ */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_bl_imm(s, COND_NE, 0);
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
-
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ } else {
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
}
-
- h.cond = COND_AL;
- h.base = addrlo;
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
- h.index_scratch = false;
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
-#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
@@ -1891,35 +1764,25 @@
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- h.cond = COND_EQ;
- h.base = addrlo;
- h.index_scratch = true;
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
- /* The conditional call must come last, as we're going to return here. */
- tcg_insn_unit *label_ptr = s->code_ptr;
- tcg_out_bl_imm(s, COND_NE, 0);
-
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
-
- h.cond = COND_AL;
- if (a_bits) {
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
h.cond = COND_EQ;
- }
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
- h.base = addrlo;
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
- h.index_scratch = false;
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
-#endif
+ /* The conditional call is last, as we're going to return here. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_bl_imm(s, COND_NE, 0);
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ } else {
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
+ }
}
static void tcg_out_epilogue(TCGContext *s);
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index aae6981..a01bfad 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1802,142 +1802,37 @@
[MO_BEUQ] = helper_be_stq_mmu,
};
-/* Perform the TLB load and compare.
-
- Inputs:
- ADDRLO and ADDRHI contain the low and high part of the address.
-
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
-
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
- This should be offsetof addr_read or addr_write.
-
- Outputs:
- LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
- positions of the displacements of forward jumps to the TLB miss case.
-
- Second argument register is loaded with the low part of the address.
- In the TLB hit case, it has been adjusted as indicated by the TLB
- and so is a host address. In the TLB miss case, it continues to
- hold a guest address.
-
- First argument register is clobbered. */
-
-static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
- int mem_index, MemOp opc,
- tcg_insn_unit **label_ptr, int which)
-{
- TCGType ttype = TCG_TYPE_I32;
- TCGType tlbtype = TCG_TYPE_I32;
- int trexw = 0, hrexw = 0, tlbrexw = 0;
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_mask = (1 << a_bits) - 1;
- unsigned s_mask = (1 << s_bits) - 1;
- target_ulong tlb_mask;
-
- if (TCG_TARGET_REG_BITS == 64) {
- if (TARGET_LONG_BITS == 64) {
- ttype = TCG_TYPE_I64;
- trexw = P_REXW;
- }
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
- hrexw = P_REXW;
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
- tlbtype = TCG_TYPE_I64;
- tlbrexw = P_REXW;
- }
- }
- }
-
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
- TLB_MASK_TABLE_OFS(mem_index) +
- offsetof(CPUTLBDescFast, mask));
-
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
- TLB_MASK_TABLE_OFS(mem_index) +
- offsetof(CPUTLBDescFast, table));
-
- /* If the required alignment is at least as large as the access, simply
- copy the address and mask. For lesser alignments, check that we don't
- cross pages for the complete access. */
- if (a_bits >= s_bits) {
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
- } else {
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
- addrlo, s_mask - a_mask);
- }
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
-
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
- TCG_REG_L1, TCG_REG_L0, which);
-
- /* Prepare for both the fast path add of the tlb addend, and the slow
- path function argument setup. */
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
-
- /* jne slow_path */
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
- label_ptr[0] = s->code_ptr;
- s->code_ptr += 4;
-
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- /* cmp 4(TCG_REG_L0), addrhi */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
-
- /* jne slow_path */
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
- label_ptr[1] = s->code_ptr;
- s->code_ptr += 4;
- }
-
- /* TLB Hit. */
-
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
- offsetof(CPUTLBEntry, addend));
-}
-
/*
- * Record the context of a call to the out of line helper code for the slow path
- * for a load or store, so that we can later generate the correct helper code
+ * Because i686 has no register parameters and because x86_64 has xchg
+ * to handle addr/data register overlap, we have placed all input arguments
+ * before we need might need a scratch reg.
+ *
+ * Even then, a scratch is only needed for l->raddr. Rather than expose
+ * a general-purpose scratch when we don't actually know it's available,
+ * use the ra_gen hook to load into RAX if needed.
*/
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
- TCGType type, MemOpIdx oi,
- TCGReg datalo, TCGReg datahi,
- TCGReg addrlo, TCGReg addrhi,
- tcg_insn_unit *raddr,
- tcg_insn_unit **label_ptr)
+#if TCG_TARGET_REG_BITS == 64
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = type;
- label->datalo_reg = datalo;
- label->datahi_reg = datahi;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr[0];
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- label->label_ptr[1] = label_ptr[1];
+ if (arg < 0) {
+ arg = TCG_REG_RAX;
}
+ tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
+ return arg;
}
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ra_gen = ldst_ra_gen
+};
+#else
+static const TCGLdstHelperParam ldst_helper_param = { };
+#endif
/*
* Generate code for the slow path for a load at the end of block
*/
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
+ MemOp opc = get_memop(l->oi);
tcg_insn_unit **label_ptr = &l->label_ptr[0];
/* resolve label address */
@@ -1946,48 +1841,10 @@
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
}
- if (TCG_TARGET_REG_BITS == 32) {
- int ofs = 0;
-
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
- ofs += 4;
-
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
- ofs += 4;
-
- if (TARGET_LONG_BITS == 64) {
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
- ofs += 4;
- }
-
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
- ofs += 4;
-
- tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
- } else {
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
- /* The second argument is already loaded with addrlo. */
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
- tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
- (uintptr_t)l->raddr);
- }
-
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
- TCGMovExtend ext[2] = {
- { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
- .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
- { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
- .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
- };
- tcg_out_movext2(s, &ext[0], &ext[1], -1);
- } else {
- tcg_out_movext(s, l->type, l->datalo_reg,
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
- }
-
- /* Jump to the code corresponding to next IR of qemu_st */
tcg_out_jmp(s, l->raddr);
return true;
}
@@ -1997,11 +1854,8 @@
*/
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
+ MemOp opc = get_memop(l->oi);
tcg_insn_unit **label_ptr = &l->label_ptr[0];
- TCGReg retaddr;
/* resolve label address */
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
@@ -2009,79 +1863,13 @@
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
}
- if (TCG_TARGET_REG_BITS == 32) {
- int ofs = 0;
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
- ofs += 4;
-
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
- ofs += 4;
-
- if (TARGET_LONG_BITS == 64) {
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
- ofs += 4;
- }
-
- tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
- ofs += 4;
-
- if (s_bits == MO_64) {
- tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
- ofs += 4;
- }
-
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
- ofs += 4;
-
- retaddr = TCG_REG_EAX;
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
- } else {
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
- /* The second argument is already loaded with addrlo. */
- tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
- tcg_target_call_iarg_regs[2], l->datalo_reg);
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
-
- if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
- retaddr = tcg_target_call_iarg_regs[4];
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
- } else {
- retaddr = TCG_REG_RAX;
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
- TCG_TARGET_CALL_STACK_OFFSET);
- }
- }
-
- /* "Tail call" to the helper, with the return address back inline. */
- tcg_out_push(s, retaddr);
- tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+ tcg_out_jmp(s, l->raddr);
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label;
-
- tcg_out_testi(s, addrlo, a_mask);
- /* jne slow_path */
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
-
- label = new_ldst_label(s);
- label->is_ld = is_ld;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4);
- label->label_ptr[0] = s->code_ptr;
-
- s->code_ptr += 4;
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@@ -2159,6 +1947,128 @@
#endif /* setup_guest_base_seg */
#endif /* SOFTMMU */
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1 << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
+ TCGType ttype = TCG_TYPE_I32;
+ TCGType tlbtype = TCG_TYPE_I32;
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
+ unsigned mem_index = get_mmuidx(oi);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned s_mask = (1 << s_bits) - 1;
+ target_ulong tlb_mask;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (TARGET_LONG_BITS == 64) {
+ ttype = TCG_TYPE_I64;
+ trexw = P_REXW;
+ }
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
+ hrexw = P_REXW;
+ if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
+ tlbtype = TCG_TYPE_I64;
+ tlbrexw = P_REXW;
+ }
+ }
+ }
+
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index) +
+ offsetof(CPUTLBDescFast, mask));
+
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index) +
+ offsetof(CPUTLBDescFast, table));
+
+ /*
+ * If the required alignment is at least as large as the access, simply
+ * copy the address and mask. For lesser alignments, check that we don't
+ * cross pages for the complete access.
+ */
+ if (a_bits >= s_bits) {
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
+ } else {
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
+ addrlo, s_mask - a_mask);
+ }
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
+
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ ldst->label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ /* cmp 4(TCG_REG_L0), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ ldst->label_ptr[1] = s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ /* TLB Hit. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
+ offsetof(CPUTLBEntry, addend));
+
+ *h = (HostAddress) {
+ .base = addrlo,
+ .index = TCG_REG_L0,
+ };
+#else
+ if (a_bits) {
+ ldst = new_ldst_label(s);
+
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ tcg_out_testi(s, addrlo, a_mask);
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ ldst->label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ *h = x86_guest_base;
+ h->base = addrlo;
+#endif
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
HostAddress h, TCGType type, MemOp memop)
{
@@ -2258,35 +2168,18 @@
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[2];
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
- label_ptr, offsetof(CPUTLBEntry, addr_read));
-
- /* TLB Hit. */
- h.base = TCG_REG_L1;
- h.index = -1;
- h.ofs = 0;
- h.seg = 0;
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
-
- /* Record the current context of a load into ldst label */
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
-
- h = x86_guest_base;
- h.base = addrlo;
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
-#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
@@ -2345,36 +2238,18 @@
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[2];
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
- label_ptr, offsetof(CPUTLBEntry, addr_write));
-
- /* TLB Hit. */
- h.base = TCG_REG_L1;
- h.index = -1;
- h.ofs = 0;
- h.seg = 0;
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
-
- /* Record the current context of a store into ldst label */
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
-
- h = x86_guest_base;
- h.base = addrlo;
-
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
-#endif
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index 172c107..c2bde44 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,9 +17,7 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
-C_O0_I2(LZ, L)
C_O1_I1(r, r)
-C_O1_I1(r, L)
C_O1_I2(r, r, rC)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
index 541ff47..6e9ccca 100644
--- a/tcg/loongarch64/tcg-target-con-str.h
+++ b/tcg/loongarch64/tcg-target-con-str.h
@@ -14,7 +14,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
/*
* Define constraint letters for constants:
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 6a87a5e..83fa45c 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -133,18 +133,7 @@
#define TCG_CT_CONST_C12 0x1000
#define TCG_CT_CONST_WSZ 0x2000
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
-/*
- * For softmmu, we need to avoid conflicts with the first 5
- * argument registers to call the helper. Some of these are
- * also used for the tlb lookup.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
-#else
-#define SOFTMMU_RESERVE_REGS 0
-#endif
-
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
{
@@ -818,156 +807,45 @@
[MO_64] = helper_le_stq_mmu,
};
-/* We expect to use a 12-bit negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
-
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
tcg_out_opc_b(s, 0);
return reloc_br_sd10k16(s->code_ptr - 1, target);
}
-/*
- * Emits common code for TLB addend lookup, that eventually loads the
- * addend in TCG_REG_TMP2.
- */
-static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi,
- tcg_insn_unit **label_ptr, bool is_load)
-{
- MemOp opc = get_memop(oi);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_bits = get_alignment_bits(opc);
- tcg_target_long compare_mask;
- int mem_index = get_mmuidx(oi);
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
-
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
-
- tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
- tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
-
- /* Load the tlb comparator and the addend. */
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
- is_load ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
- offsetof(CPUTLBEntry, addend));
-
- /* We don't support unaligned accesses. */
- if (a_bits < s_bits) {
- a_bits = s_bits;
- }
- /* Clear the non-page, non-alignment bits from the address. */
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
-
- /* Compare masked address with the TLB entry. */
- label_ptr[0] = s->code_ptr;
- tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
-
- /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */
-}
-
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
- TCGType type,
- TCGReg datalo, TCGReg addrlo,
- void *raddr, tcg_insn_unit **label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = type;
- label->datalo_reg = datalo;
- label->datahi_reg = 0; /* unused */
- label->addrlo_reg = addrlo;
- label->addrhi_reg = 0; /* unused */
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr[0];
-}
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
+};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- MemOp size = opc & MO_SIZE;
+ MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
- /* call load helper */
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
-
- tcg_out_call_int(s, qemu_ld_helpers[size], false);
-
- tcg_out_movext(s, l->type, l->datalo_reg,
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0);
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
return tcg_out_goto(s, l->raddr);
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- MemOp size = opc & MO_SIZE;
+ MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
- /* call store helper */
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2,
- l->type, size, l->datalo_reg);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
-
- tcg_out_call_int(s, qemu_st_helpers[size], false);
-
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
return tcg_out_goto(s, l->raddr);
}
#else
-
-/*
- * Alignment helpers for user-mode emulation
- */
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
- unsigned a_bits)
-{
- TCGLabelQemuLdst *l = new_ldst_label(s);
-
- l->is_ld = is_ld;
- l->addrlo_reg = addr_reg;
-
- /*
- * Without micro-architecture details, we don't know which of bstrpick or
- * andi is faster, so use bstrpick as it's not constrained by imm field
- * width. (Not to say alignments >= 2^12 are going to happen any time
- * soon, though)
- */
- tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
-
- l->label_ptr[0] = s->code_ptr;
- tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
-
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@@ -997,27 +875,102 @@
#endif /* CONFIG_SOFTMMU */
-/*
- * `ext32u` the address register into the temp register given,
- * if target is 32-bit, no-op otherwise.
- *
- * Returns the address register ready for use with TLB addend.
- */
-static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
- TCGReg addr, TCGReg tmp)
-{
- if (TARGET_LONG_BITS == 32) {
- tcg_out_ext32u(s, tmp, addr);
- return tmp;
- }
- return addr;
-}
-
typedef struct {
TCGReg base;
TCGReg index;
} HostAddress;
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addr_reg, MemOpIdx oi,
+ bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+
+#ifdef CONFIG_SOFTMMU
+ unsigned s_bits = opc & MO_SIZE;
+ int mem_index = get_mmuidx(oi);
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+ tcg_target_long compare_mask;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
+
+ tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+ tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+ offsetof(CPUTLBEntry, addend));
+
+ /* We don't support unaligned accesses. */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ /* Clear the non-page, non-alignment bits from the address. */
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
+ tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
+
+ /* Compare masked address with the TLB entry. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+ h->index = TCG_REG_TMP2;
+#else
+ if (a_bits) {
+ ldst = new_ldst_label(s);
+
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ /*
+ * Without micro-architecture details, we don't know which of
+ * bstrpick or andi is faster, so use bstrpick as it's not
+ * constrained by imm field width. Not to say alignments >= 2^12
+ * are going to happen any time soon.
+ */
+ tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
+
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
+ }
+
+ h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
+#endif
+
+ if (TARGET_LONG_BITS == 32) {
+ h->base = TCG_REG_TMP0;
+ tcg_out_ext32u(s, h->base, addr_reg);
+ } else {
+ h->base = addr_reg;
+ }
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
TCGReg rd, HostAddress h)
{
@@ -1057,29 +1010,17 @@
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr[1];
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
+ tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
- h.index = TCG_REG_TMP2;
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
-#endif
-
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
- tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h);
-
-#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#endif
}
static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
@@ -1109,29 +1050,17 @@
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr[1];
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
+ tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
- h.index = TCG_REG_TMP2;
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
-#endif
-
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
- tcg_out_qemu_st_indexed(s, opc, data_reg, h);
-
-#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#endif
}
/*
@@ -1601,16 +1530,14 @@
case INDEX_op_st32_i64:
case INDEX_op_st_i32:
case INDEX_op_st_i64:
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
return C_O0_I2(rZ, r);
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
return C_O0_I2(rZ, rZ);
- case INDEX_op_qemu_st_i32:
- case INDEX_op_qemu_st_i64:
- return C_O0_I2(LZ, L);
-
case INDEX_op_ext8s_i32:
case INDEX_op_ext8s_i64:
case INDEX_op_ext8u_i32:
@@ -1646,11 +1573,9 @@
case INDEX_op_ld32u_i64:
case INDEX_op_ld_i32:
case INDEX_op_ld_i64:
- return C_O1_I1(r, r);
-
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
- return C_O1_I1(r, L);
+ return C_O1_I1(r, r);
case INDEX_op_andc_i32:
case INDEX_op_andc_i64:
diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
index fe3e868..864034f 100644
--- a/tcg/mips/tcg-target-con-set.h
+++ b/tcg/mips/tcg-target-con-set.h
@@ -12,15 +12,13 @@
C_O0_I1(r)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
-C_O0_I2(SZ, S)
-C_O0_I3(SZ, S, S)
-C_O0_I3(SZ, SZ, S)
+C_O0_I3(rZ, r, r)
+C_O0_I3(rZ, rZ, r)
C_O0_I4(rZ, rZ, rZ, rZ)
-C_O0_I4(SZ, SZ, S, S)
-C_O1_I1(r, L)
+C_O0_I4(rZ, rZ, r, r)
C_O1_I1(r, r)
C_O1_I2(r, 0, rZ)
-C_O1_I2(r, L, L)
+C_O1_I2(r, r, r)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
C_O1_I2(r, r, rIK)
@@ -30,7 +28,6 @@
C_O1_I2(r, rZ, rZ)
C_O1_I4(r, rZ, rZ, rZ, 0)
C_O1_I4(r, rZ, rZ, rZ, rZ)
-C_O2_I1(r, r, L)
-C_O2_I2(r, r, L, L)
+C_O2_I1(r, r, r)
C_O2_I2(r, r, r, r)
C_O2_I4(r, r, rZ, rZ, rN, rN)
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
index e4b2965..413c280 100644
--- a/tcg/mips/tcg-target-con-str.h
+++ b/tcg/mips/tcg-target-con-str.h
@@ -9,8 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_QLOAD_REGS)
-REGS('S', ALL_QSTORE_REGS)
/*
* Define constraint letters for constants:
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index ef8350e..5ad9867 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -176,20 +176,6 @@
#define TCG_CT_CONST_WSZ 0x2000 /* word size */
#define ALL_GENERAL_REGS 0xffffffffu
-#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
-
-#ifdef CONFIG_SOFTMMU
-#define ALL_QLOAD_REGS \
- (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
-#define ALL_QSTORE_REGS \
- (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \
- ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \
- : (1 << TCG_REG_A1)))
-#else
-#define ALL_QLOAD_REGS NOA0_REGS
-#define ALL_QSTORE_REGS NOA0_REGS
-#endif
-
static bool is_p2m1(tcg_target_long val)
{
@@ -370,6 +356,8 @@
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
? OPC_SRL : OPC_DSRL,
+ ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
+ ? OPC_ADDIU : OPC_DADDIU,
} MIPSInsn;
/*
@@ -1088,220 +1076,46 @@
}
#if defined(CONFIG_SOFTMMU)
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
[MO_UB] = helper_ret_ldub_mmu,
[MO_SB] = helper_ret_ldsb_mmu,
- [MO_LEUW] = helper_le_lduw_mmu,
- [MO_LESW] = helper_le_ldsw_mmu,
- [MO_LEUL] = helper_le_ldul_mmu,
- [MO_LEUQ] = helper_le_ldq_mmu,
- [MO_BEUW] = helper_be_lduw_mmu,
- [MO_BESW] = helper_be_ldsw_mmu,
- [MO_BEUL] = helper_be_ldul_mmu,
- [MO_BEUQ] = helper_be_ldq_mmu,
-#if TCG_TARGET_REG_BITS == 64
- [MO_LESL] = helper_le_ldsl_mmu,
- [MO_BESL] = helper_be_ldsl_mmu,
+#if HOST_BIG_ENDIAN
+ [MO_UW] = helper_be_lduw_mmu,
+ [MO_SW] = helper_be_ldsw_mmu,
+ [MO_UL] = helper_be_ldul_mmu,
+ [MO_SL] = helper_be_ldsl_mmu,
+ [MO_UQ] = helper_be_ldq_mmu,
+#else
+ [MO_UW] = helper_le_lduw_mmu,
+ [MO_SW] = helper_le_ldsw_mmu,
+ [MO_UL] = helper_le_ldul_mmu,
+ [MO_UQ] = helper_le_ldq_mmu,
+ [MO_SL] = helper_le_ldsl_mmu,
#endif
};
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
[MO_UB] = helper_ret_stb_mmu,
- [MO_LEUW] = helper_le_stw_mmu,
- [MO_LEUL] = helper_le_stl_mmu,
- [MO_LEUQ] = helper_le_stq_mmu,
- [MO_BEUW] = helper_be_stw_mmu,
- [MO_BEUL] = helper_be_stl_mmu,
- [MO_BEUQ] = helper_be_stq_mmu,
+#if HOST_BIG_ENDIAN
+ [MO_UW] = helper_be_stw_mmu,
+ [MO_UL] = helper_be_stl_mmu,
+ [MO_UQ] = helper_be_stq_mmu,
+#else
+ [MO_UW] = helper_le_stw_mmu,
+ [MO_UL] = helper_le_stl_mmu,
+ [MO_UQ] = helper_le_stq_mmu,
+#endif
};
-/* Helper routines for marshalling helper function arguments into
- * the correct registers and stack.
- * I is where we want to put this argument, and is updated and returned
- * for the next call. ARG is the argument itself.
- *
- * We provide routines for arguments which are: immediate, 32 bit
- * value in register, 16 and 8 bit values in register (which must be zero
- * extended before use) and 64 bit value in a lo:hi register pair.
- */
-
-static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
-{
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
- tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
- } else {
- /* For N32 and N64, the initial offset is different. But there
- we also have 8 argument register so we don't run out here. */
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
- tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
- }
- return i + 1;
-}
-
-static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
-{
- TCGReg tmp = TCG_TMP0;
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
- tmp = tcg_target_call_iarg_regs[i];
- }
- tcg_out_ext8u(s, tmp, arg);
- return tcg_out_call_iarg_reg(s, i, tmp);
-}
-
-static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
-{
- TCGReg tmp = TCG_TMP0;
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
- tmp = tcg_target_call_iarg_regs[i];
- }
- tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
- return tcg_out_call_iarg_reg(s, i, tmp);
-}
-
-static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
-{
- TCGReg tmp = TCG_TMP0;
- if (arg == 0) {
- tmp = TCG_REG_ZERO;
- } else {
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
- tmp = tcg_target_call_iarg_regs[i];
- }
- tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
- }
- return tcg_out_call_iarg_reg(s, i, tmp);
-}
-
-static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
-{
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
- i = (i + 1) & ~1;
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
- return i;
-}
-
-/* We expect to use a 16-bit negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
-
-/*
- * Perform the tlb comparison operation.
- * The complete host address is placed in BASE.
- * Clobbers TMP0, TMP1, TMP2, TMP3.
- */
-static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
- TCGReg addrh, MemOpIdx oi,
- tcg_insn_unit *label_ptr[2], bool is_load)
-{
- MemOp opc = get_memop(oi);
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_mask = (1 << a_bits) - 1;
- unsigned s_mask = (1 << s_bits) - 1;
- int mem_index = get_mmuidx(oi);
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
- int add_off = offsetof(CPUTLBEntry, addend);
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- target_ulong tlb_mask;
-
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
-
- /* Extract the TLB index from the address into TMP3. */
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
-
- /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
-
- /* Load the (low-half) tlb comparator. */
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
- } else {
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
- TCG_TMP0, TCG_TMP3, cmp_off);
- }
-
- /* Zero extend a 32-bit guest address for a 64-bit host. */
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, base, addrl);
- addrl = base;
- }
-
- /*
- * Mask the page bits, keeping the alignment bits to compare against.
- * For unaligned accesses, compare against the end of the access to
- * verify that it does not cross a page boundary.
- */
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
- if (a_mask >= s_mask) {
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
- } else {
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
- }
-
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
- /* Load the tlb addend for the fast path. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
- }
-
- label_ptr[0] = s->code_ptr;
- tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
-
- /* Load and test the high half tlb comparator. */
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- /* delay slot */
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
-
- /* Load the tlb addend for the fast path. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
-
- label_ptr[1] = s->code_ptr;
- tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
- }
-
- /* delay slot */
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
-}
-
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
- TCGType ext,
- TCGReg datalo, TCGReg datahi,
- TCGReg addrlo, TCGReg addrhi,
- void *raddr, tcg_insn_unit *label_ptr[2])
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = ext;
- label->datalo_reg = datalo;
- label->datahi_reg = datahi;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr[0];
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- label->label_ptr[1] = label_ptr[1];
- }
-}
+/* We have four temps, we might as well expose three of them. */
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
+};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- TCGReg v0;
- int i;
+ MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
@@ -1310,29 +1124,13 @@
return false;
}
- i = 1;
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
- } else {
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
- }
- i = tcg_out_call_iarg_imm(s, i, oi);
- i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
- /* delay slot */
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
- v0 = l->datalo_reg;
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
- /* We eliminated V0 from the possible output registers, so it
- cannot be clobbered here. So we must move V1 first. */
- if (MIPS_BE) {
- tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
- v0 = l->datahi_reg;
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
- }
- }
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
+ /* delay slot */
+ tcg_out_nop(s);
+
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
@@ -1340,22 +1138,14 @@
}
/* delay slot */
- if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
- /* we always sign-extend 32-bit loads */
- tcg_out_ext32s(s, v0, TCG_REG_V0);
- } else {
- tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
- }
+ tcg_out_nop(s);
return true;
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
- int i;
+ MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
@@ -1364,71 +1154,23 @@
return false;
}
- i = 1;
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
- } else {
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
- }
- switch (s_bits) {
- case MO_8:
- i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
- break;
- case MO_16:
- i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
- break;
- case MO_32:
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
- break;
- case MO_64:
- if (TCG_TARGET_REG_BITS == 32) {
- i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
- } else {
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
- }
- break;
- default:
- g_assert_not_reached();
- }
- i = tcg_out_call_iarg_imm(s, i, oi);
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
- /* Tail call to the store helper. Thus force the return address
- computation to take place in the return address register. */
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
- i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
/* delay slot */
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_nop(s);
+
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
+ if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
+ return false;
+ }
+
+ /* delay slot */
+ tcg_out_nop(s);
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *l = new_ldst_label(s);
-
- l->is_ld = is_ld;
- l->addrlo_reg = addrlo;
- l->addrhi_reg = addrhi;
-
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
- tcg_debug_assert(a_bits < 16);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
-
- l->label_ptr[0] = s->code_ptr;
- if (use_mips32r6_instructions) {
- tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
- } else {
- tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
- tcg_out_nop(s);
- }
-
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
void *target;
@@ -1478,55 +1220,166 @@
}
#endif /* SOFTMMU */
+typedef struct {
+ TCGReg base;
+ MemOp align;
+} HostAddress;
+
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_mask = (1 << a_bits) - 1;
+ TCGReg base;
+
+#ifdef CONFIG_SOFTMMU
+ unsigned s_mask = (1 << s_bits) - 1;
+ int mem_index = get_mmuidx(oi);
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ int add_off = offsetof(CPUTLBEntry, addend);
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
+
+ /* Extract the TLB index from the address into TMP3. */
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
+
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /* Load the (low-half) tlb comparator. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
+ }
+
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+ /* Load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
+ }
+
+ /*
+ * Mask the page bits, keeping the alignment bits to compare against.
+ * For unaligned accesses, compare against the end of the access to
+ * verify that it does not cross a page boundary.
+ */
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
+ if (a_mask < s_mask) {
+ tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
+ } else {
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
+ }
+
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_TMP2, addrlo);
+ addrlo = TCG_TMP2;
+ }
+
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
+
+ /* Load and test the high half tlb comparator. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ /* delay slot */
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
+
+ /* Load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
+
+ ldst->label_ptr[1] = s->code_ptr;
+ tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
+ }
+
+ /* delay slot */
+ base = TCG_TMP3;
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
+#else
+ if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
+ ldst = new_ldst_label(s);
+
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
+ tcg_debug_assert(a_bits < 16);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
+
+ ldst->label_ptr[0] = s->code_ptr;
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
+ } else {
+ tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
+ tcg_out_nop(s);
+ }
+ }
+
+ base = addrlo;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_A0, base);
+ base = TCG_REG_A0;
+ }
+ if (guest_base) {
+ if (guest_base == (int16_t)guest_base) {
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
+ } else {
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
+ TCG_GUEST_BASE_REG);
+ }
+ base = TCG_REG_A0;
+ }
+#endif
+
+ h->base = base;
+ h->align = a_bits;
+ return ldst;
+}
+
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, MemOp opc, TCGType type)
{
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
+ switch (opc & MO_SSIZE) {
case MO_UB:
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
break;
case MO_SB:
tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
break;
- case MO_UW | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
- break;
case MO_UW:
tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
break;
- case MO_SW | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
- break;
case MO_SW:
tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
break;
- case MO_UL | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
- if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
- tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
- } else {
- tcg_out_bswap_subr(s, bswap32u_addr);
- /* delay slot */
- tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
- }
- break;
- }
- /* FALLTHRU */
- case MO_SL | MO_BSWAP:
- if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
- tcg_out_bswap32(s, lo, lo, 0);
- } else {
- tcg_out_bswap_subr(s, bswap32_addr);
- /* delay slot */
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
- }
- break;
case MO_UL:
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
@@ -1536,35 +1389,6 @@
case MO_SL:
tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
break;
- case MO_UQ | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 64) {
- if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
- tcg_out_bswap64(s, lo, lo);
- } else {
- tcg_out_bswap_subr(s, bswap64_addr);
- /* delay slot */
- tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
- }
- } else if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
- } else {
- tcg_out_bswap_subr(s, bswap32_addr);
- /* delay slot */
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
- tcg_out_bswap_subr(s, bswap32_addr);
- /* delay slot */
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
- }
- break;
case MO_UQ:
/* Prefer to load from offset 0 first, but allow for overlap. */
if (TCG_TARGET_REG_BITS == 64) {
@@ -1589,25 +1413,20 @@
const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR;
const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL;
+ bool sgn = opc & MO_SIGN;
- bool sgn = (opc & MO_SIGN);
-
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
- case MO_SW | MO_BE:
- case MO_UW | MO_BE:
- tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
- tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
- if (use_mips32r2_instructions) {
- tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
- } else {
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
- tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1);
- }
- break;
-
- case MO_SW | MO_LE:
- case MO_UW | MO_LE:
- if (use_mips32r2_instructions && lo != base) {
+ switch (opc & MO_SIZE) {
+ case MO_16:
+ if (HOST_BIG_ENDIAN) {
+ tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
+ } else {
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+ tcg_out_opc_reg(s, OPC_OR, lo, lo, TCG_TMP0);
+ }
+ } else if (use_mips32r2_instructions && lo != base) {
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1);
tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
@@ -1619,8 +1438,7 @@
}
break;
- case MO_SL:
- case MO_UL:
+ case MO_32:
tcg_out_opc_imm(s, lw1, lo, base, 0);
tcg_out_opc_imm(s, lw2, lo, base, 3);
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) {
@@ -1628,28 +1446,7 @@
}
break;
- case MO_UL | MO_BSWAP:
- case MO_SL | MO_BSWAP:
- if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, lw1, lo, base, 0);
- tcg_out_opc_imm(s, lw2, lo, base, 3);
- tcg_out_bswap32(s, lo, lo,
- TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64
- ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
- } else {
- const tcg_insn_unit *subr =
- (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn
- ? bswap32u_addr : bswap32_addr);
-
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
- tcg_out_bswap_subr(s, subr);
- /* delay slot */
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
- tcg_out_mov(s, type, lo, TCG_TMP3);
- }
- break;
-
- case MO_UQ:
+ case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_opc_imm(s, ld1, lo, base, 0);
tcg_out_opc_imm(s, ld2, lo, base, 7);
@@ -1661,42 +1458,6 @@
}
break;
- case MO_UQ | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 64) {
- if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, ld1, lo, base, 0);
- tcg_out_opc_imm(s, ld2, lo, base, 7);
- tcg_out_bswap64(s, lo, lo);
- } else {
- tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0);
- tcg_out_bswap_subr(s, bswap64_addr);
- /* delay slot */
- tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7);
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
- }
- } else if (use_mips32r2_instructions) {
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
- tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0);
- tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3);
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
- } else {
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
- tcg_out_bswap_subr(s, bswap32_addr);
- /* delay slot */
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0);
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
- tcg_out_bswap_subr(s, bswap32_addr);
- /* delay slot */
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3);
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
- }
- break;
-
default:
g_assert_not_reached();
}
@@ -1707,106 +1468,38 @@
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
- TCGReg base;
+ TCGLabelQemuLdst *ldst;
+ HostAddress h;
- /*
- * R6 removes the left/right instructions but requires the
- * system to support misaligned memory accesses.
- */
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[2];
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
- base = TCG_REG_A0;
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1);
- if (use_mips32r6_instructions || a_bits >= s_bits) {
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
} else {
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
+ tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
}
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- base = addrlo;
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, TCG_REG_A0, base);
- base = TCG_REG_A0;
+
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- if (guest_base) {
- if (guest_base == (int16_t)guest_base) {
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
- } else {
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
- TCG_GUEST_BASE_REG);
- }
- base = TCG_REG_A0;
- }
- if (use_mips32r6_instructions) {
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
- }
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
- } else {
- if (a_bits && a_bits != s_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
- }
- if (a_bits >= s_bits) {
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
- } else {
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
- }
- }
-#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, MemOp opc)
{
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
- if ((lo | hi) == 0) {
- opc &= ~MO_BSWAP;
- }
-
- switch (opc & (MO_SIZE | MO_BSWAP)) {
+ switch (opc & MO_SIZE) {
case MO_8:
tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
break;
-
- case MO_16 | MO_BSWAP:
- tcg_out_bswap16(s, TCG_TMP1, lo, 0);
- lo = TCG_TMP1;
- /* FALLTHRU */
case MO_16:
tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
break;
-
- case MO_32 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
- lo = TCG_TMP3;
- /* FALLTHRU */
case MO_32:
tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
break;
-
- case MO_64 | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_bswap64(s, TCG_TMP3, lo);
- tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
- } else if (use_mips32r2_instructions) {
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
- } else {
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
- }
- break;
case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
@@ -1815,7 +1508,6 @@
tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
}
break;
-
default:
g_assert_not_reached();
}
@@ -1829,54 +1521,18 @@
const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR;
const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL;
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
- if ((lo | hi) == 0) {
- opc &= ~MO_BSWAP;
- }
-
- switch (opc & (MO_SIZE | MO_BSWAP)) {
- case MO_16 | MO_BE:
+ switch (opc & MO_SIZE) {
+ case MO_16:
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0);
- tcg_out_opc_imm(s, OPC_SB, lo, base, 1);
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? TCG_TMP0 : lo, base, 0);
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? lo : TCG_TMP0, base, 1);
break;
- case MO_16 | MO_LE:
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
- tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1);
- break;
-
- case MO_32 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
- lo = TCG_TMP3;
- /* fall through */
case MO_32:
tcg_out_opc_imm(s, sw1, lo, base, 0);
tcg_out_opc_imm(s, sw2, lo, base, 3);
break;
- case MO_64 | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_bswap64(s, TCG_TMP3, lo);
- lo = TCG_TMP3;
- } else if (use_mips32r2_instructions) {
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo);
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi);
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
- hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1;
- lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0;
- } else {
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0);
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3);
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0);
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3);
- break;
- }
- /* fall through */
case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_opc_imm(s, sd1, lo, base, 0);
@@ -1899,57 +1555,23 @@
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
- TCGReg base;
+ TCGLabelQemuLdst *ldst;
+ HostAddress h;
- /*
- * R6 removes the left/right instructions but requires the
- * system to support misaligned memory accesses.
- */
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[2];
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
- base = TCG_REG_A0;
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0);
- if (use_mips32r6_instructions || a_bits >= s_bits) {
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
+ tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
} else {
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
+ tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
}
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- base = addrlo;
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, TCG_REG_A0, base);
- base = TCG_REG_A0;
+
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- if (guest_base) {
- if (guest_base == (int16_t)guest_base) {
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
- } else {
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
- TCG_GUEST_BASE_REG);
- }
- base = TCG_REG_A0;
- }
- if (use_mips32r6_instructions) {
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
- }
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
- } else {
- if (a_bits && a_bits != s_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
- }
- if (a_bits >= s_bits) {
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
- } else {
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
- }
- }
-#endif
}
static void tcg_out_mb(TCGContext *s, TCGArg a0)
@@ -2596,18 +2218,18 @@
case INDEX_op_qemu_ld_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
+ ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
case INDEX_op_qemu_st_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
- ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
+ ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
case INDEX_op_qemu_ld_i64:
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
- : C_O2_I2(r, r, L, L));
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
+ : C_O2_I2(r, r, r, r));
case INDEX_op_qemu_st_i64:
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
- : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
- : C_O0_I4(SZ, SZ, S, S));
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
+ : C_O0_I4(rZ, rZ, r, r));
default:
g_assert_not_reached();
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 2431fc5..42bd7ff 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -204,8 +204,8 @@
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
#endif
-#define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+#define TCG_TARGET_DEFAULT_MO 0
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
#define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
index a1a3458..f206b29 100644
--- a/tcg/ppc/tcg-target-con-set.h
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -12,18 +12,15 @@
C_O0_I1(r)
C_O0_I2(r, r)
C_O0_I2(r, ri)
-C_O0_I2(S, S)
C_O0_I2(v, r)
-C_O0_I3(S, S, S)
+C_O0_I3(r, r, r)
C_O0_I4(r, r, ri, ri)
-C_O0_I4(S, S, S, S)
-C_O1_I1(r, L)
+C_O0_I4(r, r, r, r)
C_O1_I1(r, r)
C_O1_I1(v, r)
C_O1_I1(v, v)
C_O1_I1(v, vr)
C_O1_I2(r, 0, rZ)
-C_O1_I2(r, L, L)
C_O1_I2(r, rI, ri)
C_O1_I2(r, rI, rT)
C_O1_I2(r, r, r)
@@ -36,7 +33,7 @@
C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, rZ, rZ)
C_O1_I4(r, r, r, ri, ri)
-C_O2_I1(L, L, L)
-C_O2_I2(L, L, L, L)
+C_O2_I1(r, r, r)
+C_O2_I2(r, r, r, r)
C_O2_I4(r, r, rI, rZM, r, r)
C_O2_I4(r, r, r, r, rI, rZM)
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
index 298ca20..094613c 100644
--- a/tcg/ppc/tcg-target-con-str.h
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -10,19 +10,12 @@
*/
REGS('r', ALL_GENERAL_REGS)
REGS('v', ALL_VECTOR_REGS)
-REGS('A', 1u << TCG_REG_R3)
-REGS('B', 1u << TCG_REG_R4)
-REGS('C', 1u << TCG_REG_R5)
-REGS('D', 1u << TCG_REG_R6)
-REGS('L', ALL_QLOAD_REGS)
-REGS('S', ALL_QSTORE_REGS)
/*
* Define constraint letters for constants:
* CONST(letter, TCG_CT_CONST_* bit set)
*/
CONST('I', TCG_CT_CONST_S16)
-CONST('J', TCG_CT_CONST_U16)
CONST('M', TCG_CT_CONST_MONE)
CONST('T', TCG_CT_CONST_S32)
CONST('U', TCG_CT_CONST_U32)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index cd473de..29bfbfc 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -68,6 +68,7 @@
#else
# define TCG_REG_TMP1 TCG_REG_R12
#endif
+#define TCG_REG_TMP2 TCG_REG_R11
#define TCG_VEC_TMP1 TCG_REG_V0
#define TCG_VEC_TMP2 TCG_REG_V1
@@ -82,7 +83,6 @@
#define SZR (TCG_TARGET_REG_BITS / 8)
#define TCG_CT_CONST_S16 0x100
-#define TCG_CT_CONST_U16 0x200
#define TCG_CT_CONST_S32 0x400
#define TCG_CT_CONST_U32 0x800
#define TCG_CT_CONST_ZERO 0x1000
@@ -92,18 +92,6 @@
#define ALL_GENERAL_REGS 0xffffffffu
#define ALL_VECTOR_REGS 0xffffffff00000000ull
-#ifdef CONFIG_SOFTMMU
-#define ALL_QLOAD_REGS \
- (ALL_GENERAL_REGS & \
- ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
-#define ALL_QSTORE_REGS \
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
- (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
-#else
-#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
-#define ALL_QSTORE_REGS ALL_QLOAD_REGS
-#endif
-
TCGPowerISA have_isa;
static bool have_isel;
bool have_altivec;
@@ -281,8 +269,6 @@
if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
return 1;
- } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
- return 1;
} else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
return 1;
} else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
@@ -2003,178 +1989,36 @@
[MO_BEUQ] = helper_be_stq_mmu,
};
-/* We expect to use a 16-bit negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
-
-/* Perform the TLB load and compare. Places the result of the comparison
- in CR7, loads the addend of the TLB into R3, and returns the register
- containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
-
-static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
- TCGReg addrlo, TCGReg addrhi,
- int mem_index, bool is_read)
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
{
- int cmp_off
- = (is_read
- ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_bits = get_alignment_bits(opc);
-
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
-
- /* Extract the page index, shifted into place for tlb index. */
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- } else {
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ if (arg < 0) {
+ arg = TCG_REG_TMP1;
}
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
-
- /* Load the TLB comparator. */
- if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
- uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
- ? LWZUX : LDUX);
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
- } else {
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
- } else {
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
- }
- }
-
- /* Load the TLB addend for use on the fast path. Do this asap
- to minimize any load use delay. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
- offsetof(CPUTLBEntry, addend));
-
- /* Clear the non-page, non-alignment bits from the address */
- if (TCG_TARGET_REG_BITS == 32) {
- /* We don't support unaligned accesses on 32-bits.
- * Preserve the bottom bits and thus trigger a comparison
- * failure on unaligned accesses.
- */
- if (a_bits < s_bits) {
- a_bits = s_bits;
- }
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
- } else {
- TCGReg t = addrlo;
-
- /* If the access is unaligned, we need to make sure we fail if we
- * cross a page boundary. The trick is to add the access size-1
- * to the address before masking the low bits. That will make the
- * address overflow to the next page if we cross a page boundary,
- * which will then force a mismatch of the TLB compare.
- */
- if (a_bits < s_bits) {
- unsigned a_mask = (1 << a_bits) - 1;
- unsigned s_mask = (1 << s_bits) - 1;
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
- t = TCG_REG_R0;
- }
-
- /* Mask the address for the requested alignment. */
- if (TARGET_LONG_BITS == 32) {
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
- /* Zero-extend the address for use in the final address. */
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
- addrlo = TCG_REG_R4;
- } else if (a_bits == 0) {
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
- } else {
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
- }
- }
-
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
- 0, 7, TCG_TYPE_I32);
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
- } else {
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
- 0, 7, TCG_TYPE_TL);
- }
-
- return addrlo;
+ tcg_out32(s, MFSPR | RT(arg) | LR);
+ return arg;
}
-/* Record the context of a call to the out of line helper code for the slow
- path for a load or store, so that we can later generate the correct
- helper code. */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
- TCGType type, MemOpIdx oi,
- TCGReg datalo_reg, TCGReg datahi_reg,
- TCGReg addrlo_reg, TCGReg addrhi_reg,
- tcg_insn_unit *raddr, tcg_insn_unit *lptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->type = type;
- label->oi = oi;
- label->datalo_reg = datalo_reg;
- label->datahi_reg = datahi_reg;
- label->addrlo_reg = addrlo_reg;
- label->addrhi_reg = addrhi_reg;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = lptr;
-}
+/*
+ * For the purposes of ppc32 sorting 4 input registers into 4 argument
+ * registers, there is an outside chance we would require 3 temps.
+ */
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ra_gen = ldst_ra_gen,
+ .ntmp = 3,
+ .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
+};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
- TCGReg hi, lo, arg = TCG_REG_R3;
+ MemOp opc = get_memop(lb->oi);
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
-
- lo = lb->addrlo_reg;
- hi = lb->addrhi_reg;
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
- } else {
- /* If the address needed to be zero-extended, we'll have already
- placed it in R4. The only remaining case is 64-bit guest. */
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
- }
-
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
- tcg_out32(s, MFSPR | RT(arg) | LR);
-
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
-
- lo = lb->datalo_reg;
- hi = lb->datahi_reg;
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
- tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
- } else {
- tcg_out_movext(s, lb->type, lo,
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3);
- }
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tcg_out_b(s, 0, lb->raddr);
return true;
@@ -2182,70 +2026,19 @@
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
- TCGReg hi, lo, arg = TCG_REG_R3;
+ MemOp opc = get_memop(lb->oi);
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
return false;
}
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
-
- lo = lb->addrlo_reg;
- hi = lb->addrhi_reg;
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
- } else {
- /* If the address needed to be zero-extended, we'll have already
- placed it in R4. The only remaining case is 64-bit guest. */
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
- }
-
- lo = lb->datalo_reg;
- hi = lb->datahi_reg;
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
- } else {
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
- arg++, lb->type, s_bits, lo);
- }
-
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
- tcg_out32(s, MFSPR | RT(arg) | LR);
-
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tcg_out_b(s, 0, lb->raddr);
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
-
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
- tcg_debug_assert(a_bits < 16);
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
-
- label->label_ptr[0] = s->code_ptr;
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
-
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -2294,37 +2087,187 @@
TCGReg index;
} HostAddress;
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+
+#ifdef CONFIG_SOFTMMU
+ int mem_index = get_mmuidx(oi);
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ unsigned s_bits = opc & MO_SIZE;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
+
+ /* Extract the page index, shifted into place for tlb index. */
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_shri32(s, TCG_REG_R0, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ } else {
+ tcg_out_shri64(s, TCG_REG_R0, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ }
+ tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
+
+ /* Load the (low part) TLB comparator into TMP2. */
+ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+ uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
+ ? LWZUX : LDUX);
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
+ } else {
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2,
+ TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
+ }
+ }
+
+ /*
+ * Load the TLB addend for use on the fast path.
+ * Do this asap to minimize any load use delay.
+ */
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
+ offsetof(CPUTLBEntry, addend));
+ }
+
+ /* Clear the non-page, non-alignment bits from the address in R0. */
+ if (TCG_TARGET_REG_BITS == 32) {
+ /*
+ * We don't support unaligned accesses on 32-bits.
+ * Preserve the bottom bits and thus trigger a comparison
+ * failure on unaligned accesses.
+ */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+ } else {
+ TCGReg t = addrlo;
+
+ /*
+ * If the access is unaligned, we need to make sure we fail if we
+ * cross a page boundary. The trick is to add the access size-1
+ * to the address before masking the low bits. That will make the
+ * address overflow to the next page if we cross a page boundary,
+ * which will then force a mismatch of the TLB compare.
+ */
+ if (a_bits < s_bits) {
+ unsigned a_mask = (1 << a_bits) - 1;
+ unsigned s_mask = (1 << s_bits) - 1;
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
+ t = TCG_REG_R0;
+ }
+
+ /* Mask the address for the requested alignment. */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+ } else if (a_bits == 0) {
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+ }
+ }
+
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ /* Low part comparison into cr7. */
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
+ 0, 7, TCG_TYPE_I32);
+
+ /* Load the high part TLB comparator into TMP2. */
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
+ cmp_off + 4 * !HOST_BIG_ENDIAN);
+
+ /* Load addend, deferred for this case. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
+ offsetof(CPUTLBEntry, addend));
+
+ /* High part comparison into cr6. */
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
+
+ /* Combine comparisons into cr7. */
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ } else {
+ /* Full comparison into cr7. */
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
+ 0, 7, TCG_TYPE_TL);
+ }
+
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+
+ h->base = TCG_REG_TMP1;
+#else
+ if (a_bits) {
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
+ tcg_debug_assert(a_bits < 16);
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
+
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
+ }
+
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
+#endif
+
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ /* Zero-extend the guest address for use in the host address. */
+ tcg_out_ext32u(s, TCG_REG_R0, addrlo);
+ h->index = TCG_REG_R0;
+ } else {
+ h->index = addrlo;
+ }
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr;
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
- h.base = TCG_REG_R3;
-
- /* Load a pointer into the current opcode w/conditional branch-link. */
- label_ptr = s->code_ptr;
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
-#else /* !CONFIG_SOFTMMU */
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
- }
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
- h.index = addrlo;
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
- h.index = TCG_REG_TMP1;
- }
-#endif
-
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
if (opc & MO_BSWAP) {
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
@@ -2357,10 +2300,12 @@
}
}
-#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#endif
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ }
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
@@ -2368,32 +2313,12 @@
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr;
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
- h.base = TCG_REG_R3;
-
- /* Load a pointer into the current opcode w/conditional branch-link. */
- label_ptr = s->code_ptr;
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
-#else /* !CONFIG_SOFTMMU */
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
- }
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
- h.index = addrlo;
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
- h.index = TCG_REG_TMP1;
- }
-#endif
-
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
if (opc & MO_BSWAP) {
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
@@ -2418,10 +2343,12 @@
}
}
-#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#endif
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ }
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
@@ -3812,23 +3739,23 @@
case INDEX_op_qemu_ld_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
- ? C_O1_I1(r, L)
- : C_O1_I2(r, L, L));
+ ? C_O1_I1(r, r)
+ : C_O1_I2(r, r, r));
case INDEX_op_qemu_st_i32:
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
- ? C_O0_I2(S, S)
- : C_O0_I3(S, S, S));
+ ? C_O0_I2(r, r)
+ : C_O0_I3(r, r, r));
case INDEX_op_qemu_ld_i64:
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
- : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
- : C_O2_I2(L, L, L, L));
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
+ : C_O2_I2(r, r, r, r));
case INDEX_op_qemu_st_i64:
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
- : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
- : C_O0_I4(S, S, S, S));
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
+ : C_O0_I4(r, r, r, r));
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
@@ -3978,7 +3905,8 @@
#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
#endif
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
if (USE_REG_TB) {
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
index d4cff67..d88888d 100644
--- a/tcg/riscv/tcg-target-con-set.h
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -10,10 +10,8 @@
* tcg-target-con-str.h; the constraint combination is inclusive or.
*/
C_O0_I1(r)
-C_O0_I2(LZ, L)
C_O0_I2(rZ, r)
C_O0_I2(rZ, rZ)
-C_O1_I1(r, L)
C_O1_I1(r, r)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rI)
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
index 8d8afae..6f1cfb9 100644
--- a/tcg/riscv/tcg-target-con-str.h
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -9,7 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
/*
* Define constraint letters for constants:
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index a4cf60c..d12b824 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -125,17 +125,7 @@
#define TCG_CT_CONST_N12 0x400
#define TCG_CT_CONST_M12 0x800
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
-/*
- * For softmmu, we need to avoid conflicts with the first 5
- * argument registers to call the helper. Some of these are
- * also used for the tlb lookup.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
-#else
-#define SOFTMMU_RESERVE_REGS 0
-#endif
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
#define sextreg sextract64
@@ -899,10 +889,6 @@
#endif
};
-/* We expect to use a 12-bit negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
-
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
{
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
@@ -910,84 +896,14 @@
tcg_debug_assert(ok);
}
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi,
- tcg_insn_unit **label_ptr, bool is_load)
-{
- MemOp opc = get_memop(oi);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_bits = get_alignment_bits(opc);
- tcg_target_long compare_mask;
- int mem_index = get_mmuidx(oi);
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
-
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
-
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
-
- /* Load the tlb comparator and the addend. */
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
- is_load ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
- offsetof(CPUTLBEntry, addend));
-
- /* We don't support unaligned accesses. */
- if (a_bits < s_bits) {
- a_bits = s_bits;
- }
- /* Clear the non-page, non-alignment bits from the address. */
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask);
- } else {
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr);
- }
-
- /* Compare masked address with the TLB entry. */
- label_ptr[0] = s->code_ptr;
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
-
- /* TLB Hit - translate address using addend. */
- if (TARGET_LONG_BITS == 32) {
- tcg_out_ext32u(s, TCG_REG_TMP0, addr);
- addr = TCG_REG_TMP0;
- }
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr);
- return TCG_REG_TMP0;
-}
-
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
- TCGType data_type, TCGReg data_reg,
- TCGReg addr_reg, void *raddr,
- tcg_insn_unit **label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = data_type;
- label->datalo_reg = data_reg;
- label->addrlo_reg = addr_reg;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr[0];
-}
+/* We have three temps, we might as well expose them. */
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
+};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- TCGReg a0 = tcg_target_call_iarg_regs[0];
- TCGReg a1 = tcg_target_call_iarg_regs[1];
- TCGReg a2 = tcg_target_call_iarg_regs[2];
- TCGReg a3 = tcg_target_call_iarg_regs[3];
+ MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -995,13 +911,9 @@
}
/* call load helper */
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
- tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
- tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
-
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
- tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
tcg_out_goto(s, l->raddr);
return true;
@@ -1009,14 +921,7 @@
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
- MemOpIdx oi = l->oi;
- MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
- TCGReg a0 = tcg_target_call_iarg_regs[0];
- TCGReg a1 = tcg_target_call_iarg_regs[1];
- TCGReg a2 = tcg_target_call_iarg_regs[2];
- TCGReg a3 = tcg_target_call_iarg_regs[3];
- TCGReg a4 = tcg_target_call_iarg_regs[4];
+ MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -1024,39 +929,13 @@
}
/* call store helper */
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2,
- l->type, s_bits, l->datalo_reg);
- tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
- tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
-
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
tcg_out_goto(s, l->raddr);
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
- unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *l = new_ldst_label(s);
-
- l->is_ld = is_ld;
- l->addrlo_reg = addr_reg;
-
- /* We are expecting a_bits to max out at 7, so we can always use andi. */
- tcg_debug_assert(a_bits < 12);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
-
- l->label_ptr[0] = s->code_ptr;
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
-
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@@ -1083,9 +962,108 @@
{
return tcg_out_fail_alignment(s, l);
}
-
#endif /* CONFIG_SOFTMMU */
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
+ TCGReg addr_reg, MemOpIdx oi,
+ bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1u << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ unsigned s_bits = opc & MO_SIZE;
+ int mem_index = get_mmuidx(oi);
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+ TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
+ tcg_target_long compare_mask;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
+
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+ offsetof(CPUTLBEntry, addend));
+
+ /* We don't support unaligned accesses. */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ /* Clear the non-page, non-alignment bits from the address. */
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
+ }
+
+ /* Compare masked address with the TLB entry. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+ /* TLB Hit - translate address using addend. */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
+ addr_reg = TCG_REG_TMP0;
+ }
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
+ *pbase = TCG_REG_TMP0;
+#else
+ if (a_mask) {
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ /* We are expecting a_bits max 7, so we can always use andi. */
+ tcg_debug_assert(a_bits < 12);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
+
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
+ }
+
+ TCGReg base = addr_reg;
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_TMP0, base);
+ base = TCG_REG_TMP0;
+ }
+ if (guest_base != 0) {
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
+ base = TCG_REG_TMP0;
+ }
+ *pbase = base;
+#endif
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
TCGReg base, MemOp opc, TCGType type)
{
@@ -1125,32 +1103,17 @@
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
TCGReg base;
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[1];
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
+ tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- base = addr_reg;
- if (TARGET_LONG_BITS == 32) {
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
- base = TCG_REG_TMP0;
- }
- if (guest_base != 0) {
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
- base = TCG_REG_TMP0;
- }
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
-#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
@@ -1180,32 +1143,17 @@
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
TCGReg base;
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[1];
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
+ tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- base = addr_reg;
- if (TARGET_LONG_BITS == 32) {
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
- base = TCG_REG_TMP0;
- }
- if (guest_base != 0) {
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
- base = TCG_REG_TMP0;
- }
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
-#endif
}
static const tcg_insn_unit *tb_ret_addr;
@@ -1642,10 +1590,10 @@
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
- return C_O1_I1(r, L);
+ return C_O1_I1(r, r);
case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st_i64:
- return C_O0_I2(LZ, L);
+ return C_O0_I2(rZ, r);
default:
g_assert_not_reached();
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 15f1c55..ecc079b 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -10,12 +10,10 @@
* tcg-target-con-str.h; the constraint combination is inclusive or.
*/
C_O0_I1(r)
-C_O0_I2(L, L)
C_O0_I2(r, r)
C_O0_I2(r, ri)
C_O0_I2(r, rA)
C_O0_I2(v, r)
-C_O1_I1(r, L)
C_O1_I1(r, r)
C_O1_I1(v, r)
C_O1_I1(v, v)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
index 6fa64a1..25675b4 100644
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -9,7 +9,6 @@
* REGS(letter, register_mask)
*/
REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
REGS('v', ALL_VECTOR_REGS)
REGS('o', 0xaaaa) /* odd numbered general regs */
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index da7ee5b..aacbaf2 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -44,18 +44,6 @@
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
-/*
- * For softmmu, we need to avoid conflicts with the first 3
- * argument registers to perform the tlb lookup, and to call
- * the helper function.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
-#else
-#define SOFTMMU_RESERVE_REGS 0
-#endif
-
-
/* Several places within the instruction set 0 means "no register"
rather than TCG_REG_R0. */
#define TCG_REG_NONE 0
@@ -149,6 +137,7 @@
RRE_ALGR = 0xb90a,
RRE_ALCR = 0xb998,
RRE_ALCGR = 0xb988,
+ RRE_ALGFR = 0xb91a,
RRE_CGR = 0xb920,
RRE_CLGR = 0xb921,
RRE_DLGR = 0xb987,
@@ -1718,98 +1707,22 @@
}
#if defined(CONFIG_SOFTMMU)
-/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
-
-/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
- addend into R2. Returns a register with the santitized guest address. */
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
- int mem_index, bool is_ld)
-{
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_mask = (1 << s_bits) - 1;
- unsigned a_mask = (1 << a_bits) - 1;
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
- int ofs, a_off;
- uint64_t tlb_mask;
-
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
-
- /* For aligned accesses, we check the first byte and include the alignment
- bits within the address. For unaligned access, we check that we don't
- cross pages using the address of the last byte of the access. */
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
- if (a_off == 0) {
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
- } else {
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
- }
-
- if (is_ld) {
- ofs = offsetof(CPUTLBEntry, addr_read);
- } else {
- ofs = offsetof(CPUTLBEntry, addr_write);
- }
- if (TARGET_LONG_BITS == 32) {
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
- } else {
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
- }
-
- tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
- offsetof(CPUTLBEntry, addend));
-
- if (TARGET_LONG_BITS == 32) {
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
- return TCG_REG_R3;
- }
- return addr_reg;
-}
-
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
- TCGType type, TCGReg data, TCGReg addr,
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = type;
- label->datalo_reg = data;
- label->addrlo_reg = addr;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr;
-}
+static const TCGLdstHelperParam ldst_helper_param = {
+ .ntmp = 1, .tmp = { TCG_TMP0 }
+};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- TCGReg addr_reg = lb->addrlo_reg;
- TCGReg data_reg = lb->datalo_reg;
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
+ MemOp opc = get_memop(lb->oi);
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
- if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
- }
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
+ tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
return true;
@@ -1817,51 +1730,20 @@
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- TCGReg addr_reg = lb->addrlo_reg;
- TCGReg data_reg = lb->datalo_reg;
- MemOpIdx oi = lb->oi;
- MemOp opc = get_memop(oi);
- MemOp size = opc & MO_SIZE;
+ MemOp opc = get_memop(lb->oi);
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
return false;
}
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
- if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
- }
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
- TCG_REG_R4, lb->type, size, data_reg);
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
return true;
}
#else
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
- TCGReg addrlo, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *l = new_ldst_label(s);
-
- l->is_ld = is_ld;
- l->addrlo_reg = addrlo;
-
- /* We are expecting a_bits to max out at 7, much lower than TMLL. */
- tcg_debug_assert(a_bits < 16);
- tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
-
- tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
- l->label_ptr[0] = s->code_ptr;
- s->code_ptr += 1;
-
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
@@ -1888,91 +1770,147 @@
{
return tcg_out_fail_alignment(s, l);
}
+#endif /* CONFIG_SOFTMMU */
-static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addr_reg, MemOpIdx oi,
+ bool is_ld)
{
- TCGReg index;
- int disp;
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1u << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned s_mask = (1 << s_bits) - 1;
+ int mem_index = get_mmuidx(oi);
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ int ofs, a_off;
+ uint64_t tlb_mask;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
+ tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
+ tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
+
+ /*
+ * For aligned accesses, we check the first byte and include the alignment
+ * bits within the address. For unaligned access, we check that we don't
+ * cross pages using the address of the last byte of the access.
+ */
+ a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
+ tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
+ if (a_off == 0) {
+ tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
+ } else {
+ tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask);
+ }
+
+ if (is_ld) {
+ ofs = offsetof(CPUTLBEntry, addr_read);
+ } else {
+ ofs = offsetof(CPUTLBEntry, addr_write);
+ }
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
+ } else {
+ tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
+ }
+
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ ldst->label_ptr[0] = s->code_ptr++;
+
+ h->index = TCG_TMP0;
+ tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
+ offsetof(CPUTLBEntry, addend));
if (TARGET_LONG_BITS == 32) {
+ tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
+ h->base = TCG_REG_NONE;
+ } else {
+ h->base = addr_reg;
+ }
+ h->disp = 0;
+#else
+ if (a_mask) {
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addr_reg;
+
+ /* We are expecting a_bits to max out at 7, much lower than TMLL. */
+ tcg_debug_assert(a_bits < 16);
+ tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
+
+ tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
+ ldst->label_ptr[0] = s->code_ptr++;
+ }
+
+ h->base = addr_reg;
+ if (TARGET_LONG_BITS == 32) {
tcg_out_ext32u(s, TCG_TMP0, addr_reg);
- addr_reg = TCG_TMP0;
+ h->base = TCG_TMP0;
}
if (guest_base < 0x80000) {
- index = TCG_REG_NONE;
- disp = guest_base;
+ h->index = TCG_REG_NONE;
+ h->disp = guest_base;
} else {
- index = TCG_GUEST_BASE_REG;
- disp = 0;
+ h->index = TCG_GUEST_BASE_REG;
+ h->disp = 0;
}
- return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
+#endif
+
+ return ldst;
}
-#endif /* CONFIG_SOFTMMU */
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- unsigned mem_index = get_mmuidx(oi);
- tcg_insn_unit *label_ptr;
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
- h.index = TCG_REG_R2;
- h.disp = 0;
-
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
- label_ptr = s->code_ptr;
- s->code_ptr += 1;
-
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
-
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
-
- if (a_bits) {
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- h = tcg_prepare_user_ldst(s, addr_reg);
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
-#endif
}
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- unsigned mem_index = get_mmuidx(oi);
- tcg_insn_unit *label_ptr;
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
- h.index = TCG_REG_R2;
- h.disp = 0;
-
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
- label_ptr = s->code_ptr;
- s->code_ptr += 1;
-
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
-
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
- s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
-
- if (a_bits) {
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = data_reg;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
- h = tcg_prepare_user_ldst(s, addr_reg);
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
-#endif
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
@@ -3205,10 +3143,10 @@
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64:
- return C_O1_I1(r, L);
+ return C_O1_I1(r, r);
case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_i32:
- return C_O0_I2(L, L);
+ return C_O0_I2(r, r);
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 057423c..1231c8a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -181,6 +181,22 @@
static int tcg_out_ldst_finalize(TCGContext *s);
#endif
+typedef struct TCGLdstHelperParam {
+ TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
+ unsigned ntmp;
+ int tmp[3];
+} TCGLdstHelperParam;
+
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
+ const TCGLdstHelperParam *p)
+ __attribute__((unused));
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
+ bool load_sign, const TCGLdstHelperParam *p)
+ __attribute__((unused));
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
+ const TCGLdstHelperParam *p)
+ __attribute__((unused));
+
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
@@ -366,8 +382,17 @@
siglongjmp(s->jmp_trans, -2);
}
+/*
+ * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
+ * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
+ *
+ * However, tcg_out_helper_load_slots reuses this field to hold an
+ * argument slot number (which may designate a argument register or an
+ * argument stack slot), converting to TCGReg once all arguments that
+ * are destined for the stack are processed.
+ */
typedef struct TCGMovExtend {
- TCGReg dst;
+ unsigned dst;
TCGReg src;
TCGType dst_type;
TCGType src_type;
@@ -459,9 +484,8 @@
* between the sources and destinations.
*/
-static void __attribute__((unused))
-tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
- const TCGMovExtend *i2, int scratch)
+static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
+ const TCGMovExtend *i2, int scratch)
{
TCGReg src1 = i1->src;
TCGReg src2 = i2->src;
@@ -715,6 +739,58 @@
};
static GHashTable *helper_table;
+/*
+ * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
+ * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
+ * We only use these for layout in tcg_out_ld_helper_ret and
+ * tcg_out_st_helper_args, and share them between several of
+ * the helpers, with the end result that it's easier to build manually.
+ */
+
+#if TCG_TARGET_REG_BITS == 32
+# define dh_typecode_ttl dh_typecode_i32
+#else
+# define dh_typecode_ttl dh_typecode_i64
+#endif
+
+static TCGHelperInfo info_helper_ld32_mmu = {
+ .flags = TCG_CALL_NO_WG,
+ .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
+ | dh_typemask(env, 1)
+ | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i32, 3) /* unsigned oi */
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_ld64_mmu = {
+ .flags = TCG_CALL_NO_WG,
+ .typemask = dh_typemask(i64, 0) /* return uint64_t */
+ | dh_typemask(env, 1)
+ | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i32, 3) /* unsigned oi */
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_st32_mmu = {
+ .flags = TCG_CALL_NO_WG,
+ .typemask = dh_typemask(void, 0)
+ | dh_typemask(env, 1)
+ | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i32, 3) /* uint32_t data */
+ | dh_typemask(i32, 4) /* unsigned oi */
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_st64_mmu = {
+ .flags = TCG_CALL_NO_WG,
+ .typemask = dh_typemask(void, 0)
+ | dh_typemask(env, 1)
+ | dh_typemask(tl, 2) /* target_ulong addr */
+ | dh_typemask(i64, 3) /* uint64_t data */
+ | dh_typemask(i32, 4) /* unsigned oi */
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
+};
+
#ifdef CONFIG_TCG_INTERPRETER
static ffi_type *typecode_to_ffi(int argmask)
{
@@ -1126,6 +1202,11 @@
(gpointer)&all_helpers[i]);
}
+ init_call_layout(&info_helper_ld32_mmu);
+ init_call_layout(&info_helper_ld64_mmu);
+ init_call_layout(&info_helper_st32_mmu);
+ init_call_layout(&info_helper_st64_mmu);
+
#ifdef CONFIG_TCG_INTERPRETER
init_ffi_layouts();
#endif
@@ -2104,13 +2185,8 @@
};
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
-#ifdef TARGET_ALIGNED_ONLY
[MO_UNALN >> MO_ASHIFT] = "un+",
- [MO_ALIGN >> MO_ASHIFT] = "",
-#else
- [MO_UNALN >> MO_ASHIFT] = "",
[MO_ALIGN >> MO_ASHIFT] = "al+",
-#endif
[MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
[MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
[MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
@@ -5011,6 +5087,392 @@
}
}
+/*
+ * Similarly for qemu_ld/st slow path helpers.
+ * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
+ * using only the provided backend tcg_out_* functions.
+ */
+
+static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
+{
+ int ofs = arg_slot_stk_ofs(slot);
+
+ /*
+ * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
+ * require extension to uint64_t, adjust the address for uint32_t.
+ */
+ if (HOST_BIG_ENDIAN &&
+ TCG_TARGET_REG_BITS == 64 &&
+ type == TCG_TYPE_I32) {
+ ofs += 4;
+ }
+ return ofs;
+}
+
+static void tcg_out_helper_load_regs(TCGContext *s,
+ unsigned nmov, TCGMovExtend *mov,
+ unsigned ntmp, const int *tmp)
+{
+ switch (nmov) {
+ default:
+ /* The backend must have provided enough temps for the worst case. */
+ tcg_debug_assert(ntmp + 1 >= nmov);
+
+ for (unsigned i = nmov - 1; i >= 2; --i) {
+ TCGReg dst = mov[i].dst;
+
+ for (unsigned j = 0; j < i; ++j) {
+ if (dst == mov[j].src) {
+ /*
+ * Conflict.
+ * Copy the source to a temporary, recurse for the
+ * remaining moves, perform the extension from our
+ * scratch on the way out.
+ */
+ TCGReg scratch = tmp[--ntmp];
+ tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
+ mov[i].src = scratch;
+
+ tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
+ tcg_out_movext1(s, &mov[i]);
+ return;
+ }
+ }
+
+ /* No conflicts: perform this move and continue. */
+ tcg_out_movext1(s, &mov[i]);
+ }
+ /* fall through for the final two moves */
+
+ case 2:
+ tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
+ return;
+ case 1:
+ tcg_out_movext1(s, mov);
+ return;
+ case 0:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_helper_load_slots(TCGContext *s,
+ unsigned nmov, TCGMovExtend *mov,
+ const TCGLdstHelperParam *parm)
+{
+ unsigned i;
+
+ /*
+ * Start from the end, storing to the stack first.
+ * This frees those registers, so we need not consider overlap.
+ */
+ for (i = nmov; i-- > 0; ) {
+ unsigned slot = mov[i].dst;
+
+ if (arg_slot_reg_p(slot)) {
+ goto found_reg;
+ }
+
+ TCGReg src = mov[i].src;
+ TCGType dst_type = mov[i].dst_type;
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
+
+ /* The argument is going onto the stack; extend into scratch. */
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
+ tcg_debug_assert(parm->ntmp != 0);
+ mov[i].dst = src = parm->tmp[0];
+ tcg_out_movext1(s, &mov[i]);
+ }
+
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
+ tcg_out_helper_stk_ofs(dst_type, slot));
+ }
+ return;
+
+ found_reg:
+ /*
+ * The remaining arguments are in registers.
+ * Convert slot numbers to argument registers.
+ */
+ nmov = i + 1;
+ for (i = 0; i < nmov; ++i) {
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
+ }
+ tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
+}
+
+static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
+ TCGType type, tcg_target_long imm,
+ const TCGLdstHelperParam *parm)
+{
+ if (arg_slot_reg_p(slot)) {
+ tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
+ } else {
+ int ofs = tcg_out_helper_stk_ofs(type, slot);
+ if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
+ tcg_debug_assert(parm->ntmp != 0);
+ tcg_out_movi(s, type, parm->tmp[0], imm);
+ tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
+ }
+ }
+}
+
+static void tcg_out_helper_load_common_args(TCGContext *s,
+ const TCGLabelQemuLdst *ldst,
+ const TCGLdstHelperParam *parm,
+ const TCGHelperInfo *info,
+ unsigned next_arg)
+{
+ TCGMovExtend ptr_mov = {
+ .dst_type = TCG_TYPE_PTR,
+ .src_type = TCG_TYPE_PTR,
+ .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
+ };
+ const TCGCallArgumentLoc *loc = &info->in[0];
+ TCGType type;
+ unsigned slot;
+ tcg_target_ulong imm;
+
+ /*
+ * Handle env, which is always first.
+ */
+ ptr_mov.dst = loc->arg_slot;
+ ptr_mov.src = TCG_AREG0;
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
+
+ /*
+ * Handle oi.
+ */
+ imm = ldst->oi;
+ loc = &info->in[next_arg];
+ type = TCG_TYPE_I32;
+ switch (loc->kind) {
+ case TCG_CALL_ARG_NORMAL:
+ break;
+ case TCG_CALL_ARG_EXTEND_U:
+ case TCG_CALL_ARG_EXTEND_S:
+ /* No extension required for MemOpIdx. */
+ tcg_debug_assert(imm <= INT32_MAX);
+ type = TCG_TYPE_REG;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
+ next_arg++;
+
+ /*
+ * Handle ra.
+ */
+ loc = &info->in[next_arg];
+ slot = loc->arg_slot;
+ if (parm->ra_gen) {
+ int arg_reg = -1;
+ TCGReg ra_reg;
+
+ if (arg_slot_reg_p(slot)) {
+ arg_reg = tcg_target_call_iarg_regs[slot];
+ }
+ ra_reg = parm->ra_gen(s, ldst, arg_reg);
+
+ ptr_mov.dst = slot;
+ ptr_mov.src = ra_reg;
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
+ } else {
+ imm = (uintptr_t)ldst->raddr;
+ tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
+ }
+}
+
+static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
+ const TCGCallArgumentLoc *loc,
+ TCGType dst_type, TCGType src_type,
+ TCGReg lo, TCGReg hi)
+{
+ if (dst_type <= TCG_TYPE_REG) {
+ MemOp src_ext;
+
+ switch (loc->kind) {
+ case TCG_CALL_ARG_NORMAL:
+ src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
+ break;
+ case TCG_CALL_ARG_EXTEND_U:
+ dst_type = TCG_TYPE_REG;
+ src_ext = MO_UL;
+ break;
+ case TCG_CALL_ARG_EXTEND_S:
+ dst_type = TCG_TYPE_REG;
+ src_ext = MO_SL;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ mov[0].dst = loc->arg_slot;
+ mov[0].dst_type = dst_type;
+ mov[0].src = lo;
+ mov[0].src_type = src_type;
+ mov[0].src_ext = src_ext;
+ return 1;
+ }
+
+ assert(TCG_TARGET_REG_BITS == 32);
+
+ mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
+ mov[0].src = lo;
+ mov[0].dst_type = TCG_TYPE_I32;
+ mov[0].src_type = TCG_TYPE_I32;
+ mov[0].src_ext = MO_32;
+
+ mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
+ mov[1].src = hi;
+ mov[1].dst_type = TCG_TYPE_I32;
+ mov[1].src_type = TCG_TYPE_I32;
+ mov[1].src_ext = MO_32;
+
+ return 2;
+}
+
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
+ const TCGLdstHelperParam *parm)
+{
+ const TCGHelperInfo *info;
+ const TCGCallArgumentLoc *loc;
+ TCGMovExtend mov[2];
+ unsigned next_arg, nmov;
+ MemOp mop = get_memop(ldst->oi);
+
+ switch (mop & MO_SIZE) {
+ case MO_8:
+ case MO_16:
+ case MO_32:
+ info = &info_helper_ld32_mmu;
+ break;
+ case MO_64:
+ info = &info_helper_ld64_mmu;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Defer env argument. */
+ next_arg = 1;
+
+ loc = &info->in[next_arg];
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
+ ldst->addrlo_reg, ldst->addrhi_reg);
+ next_arg += nmov;
+
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
+
+ /* No special attention for 32 and 64-bit return values. */
+ tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
+
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
+}
+
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
+ bool load_sign,
+ const TCGLdstHelperParam *parm)
+{
+ TCGMovExtend mov[2];
+
+ if (ldst->type <= TCG_TYPE_REG) {
+ MemOp mop = get_memop(ldst->oi);
+
+ mov[0].dst = ldst->datalo_reg;
+ mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
+ mov[0].dst_type = ldst->type;
+ mov[0].src_type = TCG_TYPE_REG;
+
+ /*
+ * If load_sign, then we allowed the helper to perform the
+ * appropriate sign extension to tcg_target_ulong, and all
+ * we need now is a plain move.
+ *
+ * If they do not, then we expect the relevant extension
+ * instruction to be no more expensive than a move, and
+ * we thus save the icache etc by only using one of two
+ * helper functions.
+ */
+ if (load_sign || !(mop & MO_SIGN)) {
+ if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
+ mov[0].src_ext = MO_32;
+ } else {
+ mov[0].src_ext = MO_64;
+ }
+ } else {
+ mov[0].src_ext = mop & MO_SSIZE;
+ }
+ tcg_out_movext1(s, mov);
+ } else {
+ assert(TCG_TARGET_REG_BITS == 32);
+
+ mov[0].dst = ldst->datalo_reg;
+ mov[0].src =
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
+ mov[0].dst_type = TCG_TYPE_I32;
+ mov[0].src_type = TCG_TYPE_I32;
+ mov[0].src_ext = MO_32;
+
+ mov[1].dst = ldst->datahi_reg;
+ mov[1].src =
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
+ mov[1].dst_type = TCG_TYPE_REG;
+ mov[1].src_type = TCG_TYPE_REG;
+ mov[1].src_ext = MO_32;
+
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
+ }
+}
+
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
+ const TCGLdstHelperParam *parm)
+{
+ const TCGHelperInfo *info;
+ const TCGCallArgumentLoc *loc;
+ TCGMovExtend mov[4];
+ TCGType data_type;
+ unsigned next_arg, nmov, n;
+ MemOp mop = get_memop(ldst->oi);
+
+ switch (mop & MO_SIZE) {
+ case MO_8:
+ case MO_16:
+ case MO_32:
+ info = &info_helper_st32_mmu;
+ data_type = TCG_TYPE_I32;
+ break;
+ case MO_64:
+ info = &info_helper_st64_mmu;
+ data_type = TCG_TYPE_I64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Defer env argument. */
+ next_arg = 1;
+ nmov = 0;
+
+ /* Handle addr argument. */
+ loc = &info->in[next_arg];
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
+ ldst->addrlo_reg, ldst->addrhi_reg);
+ next_arg += n;
+ nmov += n;
+
+ /* Handle data argument. */
+ loc = &info->in[next_arg];
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
+ ldst->datalo_reg, ldst->datahi_reg);
+ next_arg += n;
+ nmov += n;
+ tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
+
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
+}
+
#ifdef CONFIG_PROFILER
/* avoid copy/paste errors */
diff --git a/trace/control-target.c b/trace/control-target.c
index 232c97a..c0c1e23 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/lockable.h"
#include "cpu.h"
#include "trace/trace-root.h"
#include "trace/control.h"
@@ -116,11 +117,9 @@
static bool adding_first_cpu(void)
{
- bool res;
- cpu_list_lock();
- res = adding_first_cpu1();
- cpu_list_unlock();
- return res;
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
+
+ return adding_first_cpu1();
}
void trace_init_vcpu(CPUState *vcpu)