Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* target/i386: Introduce SapphireRapids-v3 to add missing features
* switch boards to "default y"
* allow building emulators without any board
* configs: list "implied" device groups in the default configs
* remove unnecessary declarations from typedefs.h
* target/i386: Give IRQs a chance when resetting HF_INHIBIT_IRQ_MASK
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmY1ILsUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroNtIwf+MEehq2HudZvsK1M8FrvNmkB/AssO
# x4tqL8DlTus23mQDBu9+rANTB93ManJdK9ybtf6NfjEwK+R8RJslLVnuy/qT+aQX
# PD208L88fjZg17G8uyawwvD1VmqWzHFSN14ShmKzqB2yPXXo/1cJ30w78DbD50yC
# 6rw/xbC5j195CwE2u8eBcIyY4Hh2PUYEE4uyHbYVr57cMjfmmA5Pg4I4FJrpLrF3
# eM2Avl/4pIbsW3zxXVB8QbAkgypxZErk3teDK1AkPJnlnBYM1jGKbt/GdKe7vcHR
# V/o+7NlcbS3oHVItQ2gP3m91stjFq+NhixaZpa0VlmuqayBa3xNGl0G6OQ==
# =ZbNW
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 03 May 2024 10:36:59 AM PDT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (46 commits)
qga/commands-posix: fix typo in qmp_guest_set_user_password
migration: do not include coroutine_int.h
kvm: move target-dependent interrupt routing out of kvm-all.c
pci: remove some types from typedefs.h
tcg: remove CPU* types from typedefs.h
display: remove GraphicHwOps from typedefs.h
qapi/machine: remove types from typedefs.h
monitor: remove MonitorDef from typedefs.h
migration: remove PostcopyDiscardState from typedefs.h
lockable: remove QemuLockable from typedefs.h
intc: remove PICCommonState from typedefs.h
qemu-option: remove QemuOpt from typedefs.h
net: remove AnnounceTimer from typedefs.h
numa: remove types from typedefs.h
qdev-core: remove DeviceListener from typedefs.h
fw_cfg: remove useless declarations from typedefs.h
build: do not build virtio-vga-gl if virgl/opengl not available
bitmap: Use g_try_new0/g_new0/g_renew
target/i386: Introduce SapphireRapids-v3 to add missing features
docs: document new convention for Kconfig board symbols
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 302b6fd..2f08cc5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -532,7 +532,7 @@
---------------------
X86 Xen CPUs
M: Stefano Stabellini <sstabellini@kernel.org>
-M: Anthony Perard <anthony.perard@citrix.com>
+M: Anthony PERARD <anthony@xenproject.org>
M: Paul Durrant <paul@xen.org>
L: xen-devel@lists.xenproject.org
S: Supported
@@ -3692,7 +3692,6 @@
M: Riku Voipio <riku.voipio@iki.fi>
S: Maintained
F: accel/tcg/user-exec*.c
-F: include/exec/user/
F: include/user/
F: common-user/
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 8629f0d..a916724 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -22,11 +22,11 @@
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
-#include "exec/user/abitypes.h"
+#include "user/abitypes.h"
extern char **environ;
-#include "exec/user/thunk.h"
+#include "user/thunk.h"
#include "target_arch.h"
#include "syscall_defs.h"
#include "target_syscall.h"
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index e75ec13..032c6d9 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -64,7 +64,7 @@
/* MMU memory access macros */
#if defined(CONFIG_USER_ONLY)
-#include "exec/user/abitypes.h"
+#include "user/abitypes.h"
/*
* If non-zero, the guest virtual address space is a contiguous subset
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 6d53188..8812ba7 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -1,8 +1,13 @@
+/*
+ * CPU interfaces that are target independent.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: LGPL-2.1+
+ */
#ifndef CPU_COMMON_H
#define CPU_COMMON_H
-/* CPU interfaces that are target independent. */
-
#include "exec/vaddr.h"
#ifndef CONFIG_USER_ONLY
#include "exec/hwaddr.h"
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 92c927a..741dade 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -187,9 +187,39 @@
/* used to print char* safely */
#define STR_OR_NULL(str) ((str) ? (str) : "null")
-bool buffer_is_zero(const void *buf, size_t len);
+/*
+ * Check if a buffer is all zeroes.
+ */
+
+bool buffer_is_zero_ool(const void *vbuf, size_t len);
+bool buffer_is_zero_ge256(const void *vbuf, size_t len);
bool test_buffer_is_zero_next_accel(void);
+static inline bool buffer_is_zero_sample3(const char *buf, size_t len)
+{
+ /*
+ * For any reasonably sized buffer, these three samples come from
+ * three different cachelines. In qemu-img usage, we find that
+ * each byte eliminates more than half of all buffer testing.
+ * It is therefore critical to performance that the byte tests
+ * short-circuit, so that we do not pull in additional cache lines.
+ * Do not "optimize" this to !(a | b | c).
+ */
+ return !buf[0] && !buf[len - 1] && !buf[len / 2];
+}
+
+#ifdef __OPTIMIZE__
+static inline bool buffer_is_zero(const void *buf, size_t len)
+{
+ return (__builtin_constant_p(len) && len >= 256
+ ? buffer_is_zero_sample3(buf, len) &&
+ buffer_is_zero_ge256(buf, len)
+ : buffer_is_zero_ool(buf, len));
+}
+#else
+#define buffer_is_zero buffer_is_zero_ool
+#endif
+
/*
* Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
* Input is limited to 14-bit numbers
diff --git a/include/exec/user/abitypes.h b/include/user/abitypes.h
similarity index 97%
rename from include/exec/user/abitypes.h
rename to include/user/abitypes.h
index 3ec1969..5c9a955 100644
--- a/include/exec/user/abitypes.h
+++ b/include/user/abitypes.h
@@ -1,5 +1,5 @@
-#ifndef EXEC_USER_ABITYPES_H
-#define EXEC_USER_ABITYPES_H
+#ifndef USER_ABITYPES_H
+#define USER_ABITYPES_H
#ifndef CONFIG_USER_ONLY
#error Cannot include this header from system emulation
diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h
index b48b2b2..9bd7ca1 100644
--- a/include/user/syscall-trace.h
+++ b/include/user/syscall-trace.h
@@ -10,7 +10,7 @@
#ifndef SYSCALL_TRACE_H
#define SYSCALL_TRACE_H
-#include "exec/user/abitypes.h"
+#include "user/abitypes.h"
#include "gdbstub/user.h"
#include "qemu/plugin.h"
#include "trace/trace-root.h"
diff --git a/include/exec/user/thunk.h b/include/user/thunk.h
similarity index 96%
rename from include/exec/user/thunk.h
rename to include/user/thunk.h
index 2ebfecf..2a2104b 100644
--- a/include/exec/user/thunk.h
+++ b/include/user/thunk.h
@@ -17,11 +17,15 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef THUNK_H
-#define THUNK_H
+#ifndef USER_THUNK_H
+#define USER_THUNK_H
+
+#ifndef CONFIG_USER_ONLY
+#error Cannot include this header from system emulation
+#endif
#include "cpu.h"
-#include "exec/user/abitypes.h"
+#include "user/abitypes.h"
/* types enums definitions */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 4777856..263f445 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -4,7 +4,7 @@
#include "cpu.h"
#include "exec/cpu_ldst.h"
-#include "exec/user/abitypes.h"
+#include "user/abitypes.h"
#include "syscall_defs.h"
#include "target_syscall.h"
diff --git a/linux-user/thunk.c b/linux-user/thunk.c
index 071aad4..3cd19e7 100644
--- a/linux-user/thunk.c
+++ b/linux-user/thunk.c
@@ -20,7 +20,7 @@
#include "qemu/log.h"
#include "qemu.h"
-#include "exec/user/thunk.h"
+#include "user/thunk.h"
//#define DEBUG
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index ce11d9e..5c7f173 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -18,7 +18,7 @@
#ifndef LINUX_USER_USER_INTERNALS_H
#define LINUX_USER_USER_INTERNALS_H
-#include "exec/user/thunk.h"
+#include "user/thunk.h"
#include "exec/exec-all.h"
#include "exec/tb-flush.h"
#include "qemu/log.h"
diff --git a/plugins/api.c b/plugins/api.c
index 3912c9c..2144da1 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -42,10 +42,10 @@
#include "tcg/tcg.h"
#include "exec/exec-all.h"
#include "exec/gdbstub.h"
-#include "exec/ram_addr.h"
#include "disas/disas.h"
#include "plugin.h"
#ifndef CONFIG_USER_ONLY
+#include "exec/ram_addr.h"
#include "qemu/plugin-memory.h"
#include "hw/boards.h"
#else
diff --git a/plugins/core.c b/plugins/core.c
index 0213513..081323d 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -410,7 +410,7 @@
struct qemu_plugin_cb *cb, *next;
enum qemu_plugin_event ev = QEMU_PLUGIN_EV_VCPU_TB_TRANS;
- /* no plugin_mask check here; caller should have checked */
+ /* no plugin_state->event_mask check here; caller should have checked */
QLIST_FOREACH_SAFE_RCU(cb, &plugin.cb_lists[ev], entry, next) {
qemu_plugin_vcpu_tb_trans_cb_t func = cb->f.vcpu_tb_trans;
diff --git a/scripts/coverity-scan/COMPONENTS.md b/scripts/coverity-scan/COMPONENTS.md
index 91be8d1..1537e49 100644
--- a/scripts/coverity-scan/COMPONENTS.md
+++ b/scripts/coverity-scan/COMPONENTS.md
@@ -121,7 +121,7 @@
~ (/qemu)?(/hw/usb/.*|/include/hw/usb/.*)
user
- ~ (/qemu)?(/linux-user/.*|/bsd-user/.*|/user-exec\.c|/thunk\.c|/include/exec/user/.*)
+ ~ (/qemu)?(/linux-user/.*|/bsd-user/.*|/user-exec\.c|/thunk\.c|/include/user/.*)
util
~ (/qemu)?(/util/.*|/include/qemu/.*)
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 05f9ee41..0e2fbcb 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -28,25 +28,37 @@
static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
{
- AlphaCPU *cpu = ALPHA_CPU(cs);
-
- cpu->env.pc = value;
+ CPUAlphaState *env = cpu_env(cs);
+ env->pc = value;
}
static vaddr alpha_cpu_get_pc(CPUState *cs)
{
- AlphaCPU *cpu = ALPHA_CPU(cs);
+ CPUAlphaState *env = cpu_env(cs);
+ return env->pc;
+}
- return cpu->env.pc;
+static void alpha_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
+{
+ /* The program counter is always up to date with CF_PCREL. */
+ if (!(tb_cflags(tb) & CF_PCREL)) {
+ CPUAlphaState *env = cpu_env(cs);
+ env->pc = tb->pc;
+ }
}
static void alpha_restore_state_to_opc(CPUState *cs,
const TranslationBlock *tb,
const uint64_t *data)
{
- AlphaCPU *cpu = ALPHA_CPU(cs);
+ CPUAlphaState *env = cpu_env(cs);
- cpu->env.pc = data[0];
+ if (tb_cflags(tb) & CF_PCREL) {
+ env->pc = (env->pc & TARGET_PAGE_MASK) | data[0];
+ } else {
+ env->pc = data[0];
+ }
}
static bool alpha_cpu_has_work(CPUState *cs)
@@ -81,6 +93,11 @@
AlphaCPUClass *acc = ALPHA_CPU_GET_CLASS(dev);
Error *local_err = NULL;
+#ifndef CONFIG_USER_ONLY
+ /* Use pc-relative instructions in system-mode */
+ cs->tcg_cflags |= CF_PCREL;
+#endif
+
cpu_exec_realizefn(cs, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -193,6 +210,7 @@
static const TCGCPUOps alpha_tcg_ops = {
.initialize = alpha_translate_init,
+ .synchronize_from_tb = alpha_cpu_synchronize_from_tb,
.restore_state_to_opc = alpha_restore_state_to_opc,
#ifdef CONFIG_USER_ONLY
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index d6d4353..c5e4958 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -124,7 +124,7 @@
MMUAccessType access_type,
bool maperr, uintptr_t retaddr)
{
- AlphaCPU *cpu = ALPHA_CPU(cs);
+ CPUAlphaState *env = cpu_env(cs);
target_ulong mmcsr, cause;
/* Assuming !maperr, infer the missing protection. */
@@ -155,9 +155,9 @@
}
/* Record the arguments that PALcode would give to the kernel. */
- cpu->env.trap_arg0 = address;
- cpu->env.trap_arg1 = mmcsr;
- cpu->env.trap_arg2 = cause;
+ env->trap_arg0 = address;
+ env->trap_arg1 = mmcsr;
+ env->trap_arg2 = cause;
}
#else
/* Returns the OSF/1 entMM failure indication, or -1 on success. */
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index a97cd54..db847e7 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -54,6 +54,9 @@
uint32_t tbflags;
int mem_idx;
+ /* True if generating pc-relative code. */
+ bool pcrel;
+
/* implver and amask values for this CPU. */
int implver;
int amask;
@@ -252,6 +255,16 @@
tcg_gen_st8_i64(val, tcg_env, get_flag_ofs(shift));
}
+static void gen_pc_disp(DisasContext *ctx, TCGv dest, int32_t disp)
+{
+ uint64_t addr = ctx->base.pc_next + disp;
+ if (ctx->pcrel) {
+ tcg_gen_addi_i64(dest, cpu_pc, addr - ctx->base.pc_first);
+ } else {
+ tcg_gen_movi_i64(dest, addr);
+ }
+}
+
static void gen_excp_1(int exception, int error_code)
{
TCGv_i32 tmp1, tmp2;
@@ -263,7 +276,7 @@
static DisasJumpType gen_excp(DisasContext *ctx, int exception, int error_code)
{
- tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
+ gen_pc_disp(ctx, cpu_pc, 0);
gen_excp_1(exception, error_code);
return DISAS_NORETURN;
}
@@ -425,60 +438,49 @@
return DISAS_NEXT;
}
-static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
+static void gen_goto_tb(DisasContext *ctx, int idx, int32_t disp)
{
- return translator_use_goto_tb(&ctx->base, dest);
+ if (translator_use_goto_tb(&ctx->base, ctx->base.pc_next + disp)) {
+ /* With PCREL, PC must always be up-to-date. */
+ if (ctx->pcrel) {
+ gen_pc_disp(ctx, cpu_pc, disp);
+ tcg_gen_goto_tb(idx);
+ } else {
+ tcg_gen_goto_tb(idx);
+ gen_pc_disp(ctx, cpu_pc, disp);
+ }
+ tcg_gen_exit_tb(ctx->base.tb, idx);
+ } else {
+ gen_pc_disp(ctx, cpu_pc, disp);
+ tcg_gen_lookup_and_goto_ptr();
+ }
}
static DisasJumpType gen_bdirect(DisasContext *ctx, int ra, int32_t disp)
{
- uint64_t dest = ctx->base.pc_next + (disp << 2);
-
if (ra != 31) {
- tcg_gen_movi_i64(ctx->ir[ra], ctx->base.pc_next);
+ gen_pc_disp(ctx, ctx->ir[ra], 0);
}
/* Notice branch-to-next; used to initialize RA with the PC. */
if (disp == 0) {
- return 0;
- } else if (use_goto_tb(ctx, dest)) {
- tcg_gen_goto_tb(0);
- tcg_gen_movi_i64(cpu_pc, dest);
- tcg_gen_exit_tb(ctx->base.tb, 0);
- return DISAS_NORETURN;
- } else {
- tcg_gen_movi_i64(cpu_pc, dest);
- return DISAS_PC_UPDATED;
+ return DISAS_NEXT;
}
+ gen_goto_tb(ctx, 0, disp);
+ return DISAS_NORETURN;
}
static DisasJumpType gen_bcond_internal(DisasContext *ctx, TCGCond cond,
TCGv cmp, uint64_t imm, int32_t disp)
{
- uint64_t dest = ctx->base.pc_next + (disp << 2);
TCGLabel *lab_true = gen_new_label();
- if (use_goto_tb(ctx, dest)) {
- tcg_gen_brcondi_i64(cond, cmp, imm, lab_true);
+ tcg_gen_brcondi_i64(cond, cmp, imm, lab_true);
+ gen_goto_tb(ctx, 0, 0);
+ gen_set_label(lab_true);
+ gen_goto_tb(ctx, 1, disp);
- tcg_gen_goto_tb(0);
- tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
- tcg_gen_exit_tb(ctx->base.tb, 0);
-
- gen_set_label(lab_true);
- tcg_gen_goto_tb(1);
- tcg_gen_movi_i64(cpu_pc, dest);
- tcg_gen_exit_tb(ctx->base.tb, 1);
-
- return DISAS_NORETURN;
- } else {
- TCGv_i64 i = tcg_constant_i64(imm);
- TCGv_i64 d = tcg_constant_i64(dest);
- TCGv_i64 p = tcg_constant_i64(ctx->base.pc_next);
-
- tcg_gen_movcond_i64(cond, cpu_pc, cmp, i, d, p);
- return DISAS_PC_UPDATED;
- }
+ return DISAS_NORETURN;
}
static DisasJumpType gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
@@ -1106,7 +1108,7 @@
}
/* Allow interrupts to be recognized right away. */
- tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
+ gen_pc_disp(ctx, cpu_pc, 0);
return DISAS_PC_UPDATED_NOCHAIN;
case 0x36:
@@ -1153,19 +1155,17 @@
#else
{
TCGv tmp = tcg_temp_new();
- uint64_t exc_addr = ctx->base.pc_next;
- uint64_t entry = ctx->palbr;
+ uint64_t entry;
+ gen_pc_disp(ctx, tmp, 0);
if (ctx->tbflags & ENV_FLAG_PAL_MODE) {
- exc_addr |= 1;
+ tcg_gen_ori_i64(tmp, tmp, 1);
} else {
- tcg_gen_movi_i64(tmp, 1);
- st_flag_byte(tmp, ENV_FLAG_PAL_SHIFT);
+ st_flag_byte(tcg_constant_i64(1), ENV_FLAG_PAL_SHIFT);
}
-
- tcg_gen_movi_i64(tmp, exc_addr);
tcg_gen_st_i64(tmp, tcg_env, offsetof(CPUAlphaState, exc_addr));
+ entry = ctx->palbr;
entry += (palcode & 0x80
? 0x2000 + (palcode - 0x80) * 64
: 0x1000 + palcode * 64);
@@ -1382,7 +1382,7 @@
real_islit = islit = extract32(insn, 12, 1);
lit = extract32(insn, 13, 8);
- disp21 = sextract32(insn, 0, 21);
+ disp21 = sextract32(insn, 0, 21) * 4;
disp16 = sextract32(insn, 0, 16);
disp12 = sextract32(insn, 0, 12);
@@ -2359,9 +2359,13 @@
/* JMP, JSR, RET, JSR_COROUTINE. These only differ by the branch
prediction stack action, which of course we don't implement. */
vb = load_gpr(ctx, rb);
- tcg_gen_andi_i64(cpu_pc, vb, ~3);
if (ra != 31) {
- tcg_gen_movi_i64(ctx->ir[ra], ctx->base.pc_next);
+ tmp = tcg_temp_new();
+ tcg_gen_andi_i64(tmp, vb, ~3);
+ gen_pc_disp(ctx, ctx->ir[ra], 0);
+ tcg_gen_mov_i64(cpu_pc, tmp);
+ } else {
+ tcg_gen_andi_i64(cpu_pc, vb, ~3);
}
ret = DISAS_PC_UPDATED;
break;
@@ -2862,6 +2866,7 @@
ctx->tbflags = ctx->base.tb->flags;
ctx->mem_idx = alpha_env_mmu_index(env);
+ ctx->pcrel = ctx->base.tb->cflags & CF_PCREL;
ctx->implver = env->implver;
ctx->amask = env->amask;
@@ -2897,7 +2902,13 @@
static void alpha_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
{
- tcg_gen_insn_start(dcbase->pc_next);
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
+
+ if (ctx->pcrel) {
+ tcg_gen_insn_start(dcbase->pc_next & ~TARGET_PAGE_MASK);
+ } else {
+ tcg_gen_insn_start(dcbase->pc_next);
+ }
}
static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
@@ -2920,14 +2931,10 @@
case DISAS_NORETURN:
break;
case DISAS_TOO_MANY:
- if (use_goto_tb(ctx, ctx->base.pc_next)) {
- tcg_gen_goto_tb(0);
- tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
- tcg_gen_exit_tb(ctx->base.tb, 0);
- }
- /* FALLTHRU */
+ gen_goto_tb(ctx, 0, 0);
+ break;
case DISAS_PC_STALE:
- tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
+ gen_pc_disp(ctx, cpu_pc, 0);
/* FALLTHRU */
case DISAS_PC_UPDATED:
tcg_gen_lookup_and_goto_ptr();
diff --git a/target/hexagon/README b/target/hexagon/README
index 746ebec..7ffd517 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -43,11 +43,9 @@
That file is consumed by the following python scripts to produce the indicated
header files in <BUILD_DIR>/target/hexagon
gen_opcodes_def.py -> opcodes_def_generated.h.inc
- gen_op_regs.py -> op_regs_generated.h.inc
gen_printinsn.py -> printinsn_generated.h.inc
gen_op_attribs.py -> op_attribs_generated.h.inc
gen_helper_protos.py -> helper_protos_generated.h.inc
- gen_shortcode.py -> shortcode_generated.h.inc
gen_tcg_funcs.py -> tcg_funcs_generated.c.inc
gen_tcg_func_table.py -> tcg_func_table_generated.c.inc
gen_helper_funcs.py -> helper_funcs_generated.c.inc
@@ -183,10 +181,11 @@
}
We also generate an analyze_<tag> function for each instruction. Currently,
-these functions record the writes to registers by calling ctx_log_*. During
-gen_start_packet, we invoke the analyze_<tag> function for each instruction in
-the packet, and we mark the implicit writes. After the analysis is performed,
-we initialize the result register for each of the predicated assignments.
+these functions record the reads and writes to registers by calling ctx_log_*.
+During gen_start_packet, we invoke the analyze_<tag> function for each instruction in
+the packet, and we mark the implicit writes. The analysis determines if the packet
+semantics can be short-circuited. If not, we initialize the result register for each
+of the predicated assignments.
In addition to instruction semantics, we use a generator to create the decode
tree. This generation is a four step process.
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index 87942d4..9e3a05f 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -117,6 +117,7 @@
DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "")
DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "")
DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "")
+DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the SP register", "", "")
DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "")
DEF_ATTRIB(DEALLOCRET, "dealloc_return", "", "")
DEF_ATTRIB(DEALLOCFRAME, "deallocframe", "", "")
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index a40210c..23deba2 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -115,22 +115,13 @@
decode_fill_newvalue_regno(Packet *packet)
{
int i, use_regidx, offset, def_idx, dst_idx;
- uint16_t def_opcode, use_opcode;
- char *dststr;
for (i = 1; i < packet->num_insns; i++) {
if (GET_ATTRIB(packet->insn[i].opcode, A_DOTNEWVALUE) &&
!GET_ATTRIB(packet->insn[i].opcode, A_EXTENSION)) {
- use_opcode = packet->insn[i].opcode;
- /* It's a store, so we're adjusting the Nt field */
- if (GET_ATTRIB(use_opcode, A_STORE)) {
- use_regidx = strchr(opcode_reginfo[use_opcode], 't') -
- opcode_reginfo[use_opcode];
- } else { /* It's a Jump, so we're adjusting the Ns field */
- use_regidx = strchr(opcode_reginfo[use_opcode], 's') -
- opcode_reginfo[use_opcode];
- }
+ g_assert(packet->insn[i].new_read_idx != -1);
+ use_regidx = packet->insn[i].new_read_idx;
/*
* What's encoded at the N-field is the offset to who's producing
@@ -151,37 +142,9 @@
*/
g_assert(!((def_idx < 0) || (def_idx > (packet->num_insns - 1))));
- /*
- * packet->insn[def_idx] is the producer
- * Figure out which type of destination it produces
- * and the corresponding index in the reginfo
- */
- def_opcode = packet->insn[def_idx].opcode;
- dststr = strstr(opcode_wregs[def_opcode], "Rd");
- if (dststr) {
- dststr = strchr(opcode_reginfo[def_opcode], 'd');
- } else {
- dststr = strstr(opcode_wregs[def_opcode], "Rx");
- if (dststr) {
- dststr = strchr(opcode_reginfo[def_opcode], 'x');
- } else {
- dststr = strstr(opcode_wregs[def_opcode], "Re");
- if (dststr) {
- dststr = strchr(opcode_reginfo[def_opcode], 'e');
- } else {
- dststr = strstr(opcode_wregs[def_opcode], "Ry");
- if (dststr) {
- dststr = strchr(opcode_reginfo[def_opcode], 'y');
- } else {
- g_assert_not_reached();
- }
- }
- }
- }
- g_assert(dststr != NULL);
-
/* Now patch up the consumer with the register number */
- dst_idx = dststr - opcode_reginfo[def_opcode];
+ g_assert(packet->insn[def_idx].dest_idx != -1);
+ dst_idx = packet->insn[def_idx].dest_idx;
packet->insn[i].regno[use_regidx] =
packet->insn[def_idx].regno[dst_idx];
/*
@@ -362,8 +325,7 @@
for (flag = false, i = 0; i < last_insn + 1; i++) {
int opcode = packet->insn[i].opcode;
- if ((strstr(opcode_wregs[opcode], "Pd4") ||
- strstr(opcode_wregs[opcode], "Pe4")) &&
+ if (packet->insn[i].has_pred_dest &&
GET_ATTRIB(opcode, A_STORE) == 0) {
/* This should be a compare (not a store conditional) */
if (flag) {
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index a9af666..54bac19 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -43,59 +43,53 @@
f.write("{\n")
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
-
- i = 0
- ## Analyze all the registers
- for regtype, regid in regs:
- reg = hex_common.get_register(tag, regtype, regid)
- if reg.is_written():
- reg.analyze_write(f, tag, i)
+ if (hex_common.is_hvx_insn(tag)):
+ if hex_common.has_hvx_helper(tag):
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
+ )
+ f.write(" ctx_start_hvx_insn(ctx);\n")
else:
- reg.analyze_read(f, i)
- i += 1
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
+ )
- has_generated_helper = not hex_common.skip_qemu_helper(
- tag
- ) and not hex_common.is_idef_parser_enabled(tag)
+ ## Declare all the registers
+ for regno, register in enumerate(regs):
+ reg_type, reg_id = register
+ reg = hex_common.get_register(tag, reg_type, reg_id)
+ reg.decl_reg_num(f, regno)
- ## Mark HVX instructions with generated helpers
- if (has_generated_helper and
- "A_CVI" in hex_common.attribdict[tag]):
- f.write(" ctx->has_hvx_helper = true;\n")
+ ## Analyze the register reads
+ for regno, register in enumerate(regs):
+ reg_type, reg_id = register
+ reg = hex_common.get_register(tag, reg_type, reg_id)
+ if reg.is_read():
+ reg.analyze_read(f, regno)
+
+ ## Analyze the register writes
+ for regno, register in enumerate(regs):
+ reg_type, reg_id = register
+ reg = hex_common.get_register(tag, reg_type, reg_id)
+ if reg.is_written():
+ reg.analyze_write(f, tag, regno)
f.write("}\n\n")
def main():
- hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
- hex_common.read_overrides_file(sys.argv[3])
- hex_common.read_overrides_file(sys.argv[4])
- ## Whether or not idef-parser is enabled is
- ## determined by the number of arguments to
- ## this script:
- ##
- ## 5 args. -> not enabled,
- ## 6 args. -> idef-parser enabled.
- ##
- ## The 6:th arg. then holds a list of the successfully
- ## parsed instructions.
- is_idef_parser_enabled = len(sys.argv) > 6
- if is_idef_parser_enabled:
- hex_common.read_idef_parser_enabled_file(sys.argv[5])
- hex_common.calculate_attribs()
- hex_common.init_registers()
+ hex_common.read_common_files()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
with open(sys.argv[-1], "w") as f:
- f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
- f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+ f.write("#ifndef HEXAGON_ANALYZE_FUNCS_C_INC\n")
+ f.write("#define HEXAGON_ANALYZE_FUNCS_C_INC\n\n")
for tag in hex_common.tags:
gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
- f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
+ f.write("#endif /* HEXAGON_ANALYZE_FUNCS_C_INC */\n")
if __name__ == "__main__":
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index 9cc3d69..e9685bf 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -102,24 +102,7 @@
def main():
- hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
- hex_common.read_overrides_file(sys.argv[3])
- hex_common.read_overrides_file(sys.argv[4])
- ## Whether or not idef-parser is enabled is
- ## determined by the number of arguments to
- ## this script:
- ##
- ## 5 args. -> not enabled,
- ## 6 args. -> idef-parser enabled.
- ##
- ## The 6:th arg. then holds a list of the successfully
- ## parsed instructions.
- is_idef_parser_enabled = len(sys.argv) > 6
- if is_idef_parser_enabled:
- hex_common.read_idef_parser_enabled_file(sys.argv[5])
- hex_common.calculate_attribs()
- hex_common.init_registers()
+ hex_common.read_common_files()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index c82b0f5..fd2bfd0 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -40,28 +40,19 @@
declared.append(arg.proto_arg)
arguments = ", ".join(declared)
- f.write(f"DEF_HELPER_{len(declared) - 1}({tag}, {arguments})\n")
+
+ ## Add the TCG_CALL_NO_RWG_SE flag to helpers that don't take the env
+ ## argument and aren't HVX instructions. Since HVX instructions take
+ ## pointers to their arguments, they will have side effects.
+ if hex_common.need_env(tag) or hex_common.is_hvx_insn(tag):
+ f.write(f"DEF_HELPER_{len(declared) - 1}({tag}, {arguments})\n")
+ else:
+ f.write(f"DEF_HELPER_FLAGS_{len(declared) - 1}({tag}, "
+ f"TCG_CALL_NO_RWG_SE, {arguments})\n")
def main():
- hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
- hex_common.read_overrides_file(sys.argv[3])
- hex_common.read_overrides_file(sys.argv[4])
- ## Whether or not idef-parser is enabled is
- ## determined by the number of arguments to
- ## this script:
- ##
- ## 5 args. -> not enabled,
- ## 6 args. -> idef-parser enabled.
- ##
- ## The 6:th arg. then holds a list of the successfully
- ## parsed instructions.
- is_idef_parser_enabled = len(sys.argv) > 6
- if is_idef_parser_enabled:
- hex_common.read_idef_parser_enabled_file(sys.argv[5])
- hex_common.calculate_attribs()
- hex_common.init_registers()
+ hex_common.read_common_files()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py
index 550a48c..eb494ab 100644
--- a/target/hexagon/gen_idef_parser_funcs.py
+++ b/target/hexagon/gen_idef_parser_funcs.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved.
+## Copyright(c) 2019-2024 rev.ng Labs Srl. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -44,13 +44,12 @@
##
def main():
hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
hex_common.calculate_attribs()
hex_common.init_registers()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
- with open(sys.argv[3], "w") as f:
+ with open(sys.argv[-1], "w") as f:
f.write('#include "macros.inc"\n\n')
for tag in hex_common.tags:
diff --git a/target/hexagon/gen_op_attribs.py b/target/hexagon/gen_op_attribs.py
index 41074b8..9944822 100755
--- a/target/hexagon/gen_op_attribs.py
+++ b/target/hexagon/gen_op_attribs.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -25,13 +25,12 @@
def main():
hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
hex_common.calculate_attribs()
##
## Generate all the attributes associated with each instruction
##
- with open(sys.argv[3], "w") as f:
+ with open(sys.argv[-1], "w") as f:
for tag in hex_common.tags:
f.write(
f"OP_ATTRIB({tag},ATTRIBS("
diff --git a/target/hexagon/gen_op_regs.py b/target/hexagon/gen_op_regs.py
deleted file mode 100755
index 7b7b338..0000000
--- a/target/hexagon/gen_op_regs.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/usr/bin/env python3
-
-##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2 of the License, or
-## (at your option) any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, see <http://www.gnu.org/licenses/>.
-##
-
-import sys
-import re
-import string
-import hex_common
-
-
-##
-## Generate the register and immediate operands for each instruction
-##
-def calculate_regid_reg(tag):
- def letter_inc(x):
- return chr(ord(x) + 1)
-
- ordered_implregs = ["SP", "FP", "LR"]
- srcdst_lett = "X"
- src_lett = "S"
- dst_lett = "D"
- retstr = ""
- mapdict = {}
- for reg in ordered_implregs:
- reg_rd = 0
- reg_wr = 0
- if ("A_IMPLICIT_WRITES_" + reg) in hex_common.attribdict[tag]:
- reg_wr = 1
- if reg_rd and reg_wr:
- retstr += srcdst_lett
- mapdict[srcdst_lett] = reg
- srcdst_lett = letter_inc(srcdst_lett)
- elif reg_rd:
- retstr += src_lett
- mapdict[src_lett] = reg
- src_lett = letter_inc(src_lett)
- elif reg_wr:
- retstr += dst_lett
- mapdict[dst_lett] = reg
- dst_lett = letter_inc(dst_lett)
- return retstr, mapdict
-
-
-def calculate_regid_letters(tag):
- retstr, mapdict = calculate_regid_reg(tag)
- return retstr
-
-
-def strip_reg_prefix(x):
- y = x.replace("UREG.", "")
- y = y.replace("MREG.", "")
- return y.replace("GREG.", "")
-
-
-def main():
- hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
- hex_common.init_registers()
- tagregs = hex_common.get_tagregs(full=True)
- tagimms = hex_common.get_tagimms()
-
- with open(sys.argv[3], "w") as f:
- for tag in hex_common.tags:
- regs = tagregs[tag]
- rregs = []
- wregs = []
- regids = ""
- for regtype, regid, _, numregs in regs:
- reg = hex_common.get_register(tag, regtype, regid)
- if reg.is_read():
- if regid[0] not in regids:
- regids += regid[0]
- rregs.append(regtype + regid + numregs)
- if reg.is_written():
- wregs.append(regtype + regid + numregs)
- if regid[0] not in regids:
- regids += regid[0]
- for attrib in hex_common.attribdict[tag]:
- if hex_common.attribinfo[attrib]["rreg"]:
- rregs.append(strip_reg_prefix(attribinfo[attrib]["rreg"]))
- if hex_common.attribinfo[attrib]["wreg"]:
- wregs.append(strip_reg_prefix(attribinfo[attrib]["wreg"]))
- regids += calculate_regid_letters(tag)
- f.write(
- f'REGINFO({tag},"{regids}",\t/*RD:*/\t"{",".join(rregs)}",'
- f'\t/*WR:*/\t"{",".join(wregs)}")\n'
- )
-
- for tag in hex_common.tags:
- imms = tagimms[tag]
- f.write(f"IMMINFO({tag}")
- if not imms:
- f.write(""",'u',0,0,'U',0,0""")
- for sign, size, shamt in imms:
- if sign == "r":
- sign = "s"
- if not shamt:
- shamt = "0"
- f.write(f""",'{sign}',{size},{shamt}""")
- if len(imms) == 1:
- if sign.isupper():
- myu = "u"
- else:
- myu = "U"
- f.write(f""",'{myu}',0,0""")
- f.write(")\n")
-
-
-if __name__ == "__main__":
- main()
diff --git a/target/hexagon/gen_opcodes_def.py b/target/hexagon/gen_opcodes_def.py
index cddd868..536f0eb 100755
--- a/target/hexagon/gen_opcodes_def.py
+++ b/target/hexagon/gen_opcodes_def.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -29,7 +29,7 @@
##
## Generate a list of all the opcodes
##
- with open(sys.argv[3], "w") as f:
+ with open(sys.argv[-1], "w") as f:
for tag in hex_common.tags:
f.write(f"OPCODE({tag}),\n")
diff --git a/target/hexagon/gen_printinsn.py b/target/hexagon/gen_printinsn.py
index e570bd7..8bf4d09 100755
--- a/target/hexagon/gen_printinsn.py
+++ b/target/hexagon/gen_printinsn.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -97,11 +97,10 @@
def main():
hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
immext_casere = re.compile(r"IMMEXT\(([A-Za-z])")
- with open(sys.argv[3], "w") as f:
+ with open(sys.argv[-1], "w") as f:
for tag in hex_common.tags:
if not hex_common.behdict[tag]:
continue
diff --git a/target/hexagon/gen_shortcode.py b/target/hexagon/gen_shortcode.py
deleted file mode 100755
index deb9444..0000000
--- a/target/hexagon/gen_shortcode.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-
-##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 2 of the License, or
-## (at your option) any later version.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with this program; if not, see <http://www.gnu.org/licenses/>.
-##
-
-import sys
-import re
-import string
-import hex_common
-
-
-def gen_shortcode(f, tag):
- f.write(f"DEF_SHORTCODE({tag}, {hex_common.semdict[tag]})\n")
-
-
-def main():
- hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
- hex_common.calculate_attribs()
- tagregs = hex_common.get_tagregs()
- tagimms = hex_common.get_tagimms()
-
- with open(sys.argv[3], "w") as f:
- f.write("#ifndef DEF_SHORTCODE\n")
- f.write("#define DEF_SHORTCODE(TAG,SHORTCODE) /* Nothing */\n")
- f.write("#endif\n")
-
- for tag in hex_common.tags:
- ## Skip the priv instructions
- if "A_PRIV" in hex_common.attribdict[tag]:
- continue
- ## Skip the guest instructions
- if "A_GUEST" in hex_common.attribdict[tag]:
- continue
- ## Skip the diag instructions
- if tag == "Y6_diag":
- continue
- if tag == "Y6_diag0":
- continue
- if tag == "Y6_diag1":
- continue
-
- gen_shortcode(f, tag)
-
- f.write("#undef DEF_SHORTCODE\n")
-
-
-if __name__ == "__main__":
- main()
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 1c4391b..3fc1f4e 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -1369,3 +1369,6 @@
gen_helper_raise_exception(tcg_env, excp); \
} while (0)
#endif
+
+#define fGEN_TCG_A2_nop(SHORTCODE) do { } while (0)
+#define fGEN_TCG_SA1_setin1(SHORTCODE) tcg_gen_movi_tl(RdV, -1)
diff --git a/target/hexagon/gen_tcg_func_table.py b/target/hexagon/gen_tcg_func_table.py
index f998ef0..978ac18 100755
--- a/target/hexagon/gen_tcg_func_table.py
+++ b/target/hexagon/gen_tcg_func_table.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -25,12 +25,11 @@
def main():
hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
hex_common.calculate_attribs()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
- with open(sys.argv[3], "w") as f:
+ with open(sys.argv[-1], "w") as f:
f.write("#ifndef HEXAGON_FUNC_TABLE_H\n")
f.write("#define HEXAGON_FUNC_TABLE_H\n\n")
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index 3d8e3cb..05aa0a7 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -108,24 +108,7 @@
def main():
- hex_common.read_semantics_file(sys.argv[1])
- hex_common.read_attribs_file(sys.argv[2])
- hex_common.read_overrides_file(sys.argv[3])
- hex_common.read_overrides_file(sys.argv[4])
- hex_common.calculate_attribs()
- hex_common.init_registers()
- ## Whether or not idef-parser is enabled is
- ## determined by the number of arguments to
- ## this script:
- ##
- ## 5 args. -> not enabled,
- ## 6 args. -> idef-parser enabled.
- ##
- ## The 6:th arg. then holds a list of the successfully
- ## parsed instructions.
- is_idef_parser_enabled = len(sys.argv) > 6
- if is_idef_parser_enabled:
- hex_common.read_idef_parser_enabled_file(sys.argv[5])
+ is_idef_parser_enabled = hex_common.read_common_files()
tagregs = hex_common.get_tagregs()
tagimms = hex_common.get_tagimms()
diff --git a/target/hexagon/gen_trans_funcs.py b/target/hexagon/gen_trans_funcs.py
index 53e844a..9f86b4e 100755
--- a/target/hexagon/gen_trans_funcs.py
+++ b/target/hexagon/gen_trans_funcs.py
@@ -68,6 +68,9 @@
## insn->regno[0] = args->Rd;
## insn->regno[1] = args->Rs;
## insn->regno[2] = args->Rt;
+## insn->new_read_idx = -1;
+## insn->dest_idx = 0;
+## insn->has_pred_dest = false;
## return true;
## }
##
@@ -84,14 +87,21 @@
insn->opcode = {tag};
"""))
- regno = 0
- for reg in regs:
- reg_type = reg[0]
- reg_id = reg[1]
+ new_read_idx = -1
+ dest_idx = -1
+ has_pred_dest = "false"
+ for regno, (reg_type, reg_id, *_) in enumerate(regs):
+ reg = hex_common.get_register(tag, reg_type, reg_id)
f.write(code_fmt(f"""\
insn->regno[{regno}] = args->{reg_type}{reg_id};
"""))
- regno += 1
+ if reg.is_read() and reg.is_new():
+ new_read_idx = regno
+ # dest_idx should be the first destination, so check for -1
+ if reg.is_written() and dest_idx == -1:
+ dest_idx = regno
+ if reg_type == "P" and reg.is_written() and not reg.is_read():
+ has_pred_dest = "true"
if len(imms) != 0:
mark_which_imm_extended(f, tag)
@@ -112,6 +122,11 @@
insn->immed[{immno}] = args->{imm_type}{imm_letter};
"""))
+ f.write(code_fmt(f"""\
+ insn->new_read_idx = {new_read_idx};
+ insn->dest_idx = {dest_idx};
+ insn->has_pred_dest = {has_pred_dest};
+ """))
f.write(textwrap.dedent(f"""\
return true;
{close_curly}
@@ -120,5 +135,6 @@
if __name__ == "__main__":
hex_common.read_semantics_file(sys.argv[1])
+ hex_common.init_registers()
with open(sys.argv[2], "w") as f:
gen_trans_funcs(f)
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 195620c..15ed498 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -26,7 +26,6 @@
semdict = {} # tag -> semantics
attribdict = {} # tag -> attributes
macros = {} # macro -> macro information...
-attribinfo = {} # Register information and misc
registers = {} # register -> register functions
new_registers = {}
tags = [] # list of all tags
@@ -101,6 +100,7 @@
add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1')
add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1')
add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3')
+ add_qemu_macro_attrib('fREAD_SP', 'A_IMPLICIT_READS_SP')
# Recurse down macros, find attributes from sub-macros
macroValues = list(macros.values())
@@ -197,6 +197,26 @@
return dict(zip(tags, list(map(compute_tag_immediates, tags))))
+def need_p0(tag):
+ return "A_IMPLICIT_READS_P0" in attribdict[tag]
+
+
+def need_sp(tag):
+ return "A_IMPLICIT_READS_SP" in attribdict[tag]
+
+
+def is_hvx_insn(tag):
+ return "A_CVI" in attribdict[tag]
+
+
+def need_env(tag):
+ return ("A_STORE" in attribdict[tag] or
+ "A_LOAD" in attribdict[tag] or
+ "A_CVI_GATHER" in attribdict[tag] or
+ "A_CVI_SCATTER" in attribdict[tag] or
+ "A_IMPLICIT_WRITES_USR" in attribdict[tag])
+
+
def need_slot(tag):
if (
"A_CVI_SCATTER" not in attribdict[tag]
@@ -241,6 +261,16 @@
return tag in idef_parser_enabled
+def is_hvx_insn(tag):
+ return "A_CVI" in attribdict[tag]
+
+
+def has_hvx_helper(tag):
+ return (is_hvx_insn(tag) and
+ not skip_qemu_helper(tag) and
+ not is_idef_parser_enabled(tag))
+
+
def imm_name(immlett):
return f"{immlett}iV"
@@ -257,19 +287,6 @@
eval_line = ""
-def read_attribs_file(name):
- attribre = re.compile(
- r"DEF_ATTRIB\(([A-Za-z0-9_]+), ([^,]*), "
- + r'"([A-Za-z0-9_\.]*)", "([A-Za-z0-9_\.]*)"\)'
- )
- for line in open(name, "rt").readlines():
- if not attribre.match(line):
- continue
- (attrib_base, descr, rreg, wreg) = attribre.findall(line)[0]
- attrib_base = "A_" + attrib_base
- attribinfo[attrib_base] = {"rreg": rreg, "wreg": wreg, "descr": descr}
-
-
def read_overrides_file(name):
overridere = re.compile(r"#define fGEN_TCG_([A-Za-z0-9_]+)\(.*")
for line in open(name, "rt").readlines():
@@ -397,10 +414,18 @@
class OldSource(Source):
def reg_tcg(self):
return f"{self.regtype}{self.regid}V"
+ def is_old(self):
+ return True
+ def is_new(self):
+ return False
class NewSource(Source):
def reg_tcg(self):
return f"{self.regtype}{self.regid}N"
+ def is_old(self):
+ return False
+ def is_new(self):
+ return True
class ReadWrite:
def reg_tcg(self):
@@ -413,6 +438,10 @@
return True
def is_readwrite(self):
return True
+ def is_old(self):
+ return True
+ def is_new(self):
+ return False
class GprDest(Register, Single, Dest):
def decl_tcg(self, f, tag, regno):
@@ -425,7 +454,6 @@
gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -438,7 +466,6 @@
TCGv {self.reg_tcg()} = hex_gpr[{self.reg_num}];
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read(ctx, {self.reg_num});
"""))
@@ -449,9 +476,8 @@
TCGv {self.reg_tcg()} = get_result_gpr(ctx, insn->regno[{regno}]);
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_reg_read(ctx, {self.reg_num});
+ ctx_log_reg_read_new(ctx, {self.reg_num});
"""))
class GprReadWrite(Register, Single, ReadWrite):
@@ -471,8 +497,11 @@
f.write(code_fmt(f"""\
gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_reg_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -493,7 +522,6 @@
gen_write_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -511,7 +539,6 @@
gen_read_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read(ctx, {self.reg_num});
"""))
@@ -532,7 +559,6 @@
declared.append(self.reg_tcg())
declared.append("CS")
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read(ctx, {self.reg_num});
"""))
@@ -548,7 +574,6 @@
gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_pred_write(ctx, {self.reg_num});
"""))
@@ -560,7 +585,6 @@
TCGv {self.reg_tcg()} = hex_pred[{self.reg_num}];
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_pred_read(ctx, {self.reg_num});
"""))
@@ -571,9 +595,8 @@
TCGv {self.reg_tcg()} = get_result_pred(ctx, insn->regno[{regno}]);
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_pred_read(ctx, {self.reg_num});
+ ctx_log_pred_read_new(ctx, {self.reg_num});
"""))
class PredReadWrite(Register, Single, ReadWrite):
@@ -587,8 +610,11 @@
f.write(code_fmt(f"""\
gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_pred_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_pred_write(ctx, {self.reg_num});
"""))
@@ -605,7 +631,6 @@
gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -621,7 +646,6 @@
hex_gpr[{self.reg_num} + 1]);
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read_pair(ctx, {self.reg_num});
"""))
@@ -640,8 +664,11 @@
f.write(code_fmt(f"""\
gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_reg_read_pair(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -663,7 +690,6 @@
gen_write_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -681,7 +707,6 @@
gen_read_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read_pair(ctx, {self.reg_num});
"""))
@@ -705,11 +730,11 @@
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegSource(Register, Hvx, OldSource):
@@ -728,9 +753,8 @@
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_vreg_read(ctx, {self.reg_num});
+ ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class VRegNewSource(Register, Hvx, NewSource):
@@ -746,9 +770,8 @@
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_vreg_read(ctx, {self.reg_num});
+ ctx_log_vreg_read_new(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class VRegReadWrite(Register, Hvx, ReadWrite):
@@ -772,12 +795,16 @@
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegTmp(Register, Hvx, ReadWrite):
@@ -803,12 +830,16 @@
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegPairDest(Register, Hvx, Dest):
@@ -830,11 +861,11 @@
/* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegPairSource(Register, Hvx, OldSource):
@@ -860,9 +891,8 @@
/* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_vreg_read_pair(ctx, {self.reg_num});
+ ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class VRegPairReadWrite(Register, Hvx, ReadWrite):
@@ -892,12 +922,16 @@
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class QRegDest(Register, Hvx, Dest):
@@ -919,9 +953,8 @@
/* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_qreg_write(ctx, {self.reg_num});
+ ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class QRegSource(Register, Hvx, OldSource):
@@ -941,9 +974,8 @@
/* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_qreg_read(ctx, {self.reg_num});
+ ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class QRegReadWrite(Register, Hvx, ReadWrite):
@@ -967,10 +999,13 @@
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
"""))
- def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
+ def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_qreg_write(ctx, {self.reg_num});
+ ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
+ """))
+ def analyze_write(self, f, tag, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
def init_registers():
@@ -1060,11 +1095,12 @@
args = []
## First argument is the CPU state
- args.append(HelperArg(
- "env",
- "tcg_env",
- "CPUHexagonState *env"
- ))
+ if need_env(tag):
+ args.append(HelperArg(
+ "env",
+ "tcg_env",
+ "CPUHexagonState *env"
+ ))
## For predicated instructions, we pass in the destination register
if is_predicated(tag):
@@ -1118,6 +1154,18 @@
"tcg_constant_tl(ctx->next_PC)",
"target_ulong next_PC"
))
+ if need_p0(tag):
+ args.append(HelperArg(
+ "i32",
+ "hex_pred[0]",
+ "uint32_t P0"
+ ))
+ if need_sp(tag):
+ args.append(HelperArg(
+ "i32",
+ "hex_gpr[HEX_REG_SP]",
+ "uint32_t SP"
+ ))
if need_slot(tag):
args.append(HelperArg(
"i32",
@@ -1131,3 +1179,24 @@
"uint32_t part1"
))
return args
+
+
+def read_common_files():
+ read_semantics_file(sys.argv[1])
+ read_overrides_file(sys.argv[2])
+ read_overrides_file(sys.argv[3])
+ ## Whether or not idef-parser is enabled is
+ ## determined by the number of arguments to
+ ## this script:
+ ##
+ ## 4 args. -> not enabled,
+ ## 5 args. -> idef-parser enabled.
+ ##
+ ## The 5:th arg. then holds a list of the successfully
+ ## parsed instructions.
+ is_idef_parser_enabled = len(sys.argv) > 5
+ if is_idef_parser_enabled:
+ read_idef_parser_enabled_file(sys.argv[4])
+ calculate_attribs()
+ init_registers()
+ return is_idef_parser_enabled
diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 3e7a22c..24dcf7f 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -39,6 +39,9 @@
uint32_t slot:3;
uint32_t which_extended:1; /* If has an extender, which immediate */
uint32_t new_value_producer_slot:4;
+ int32_t new_read_idx;
+ int32_t dest_idx;
+ bool has_pred_dest;
bool part1; /*
* cmp-jumps are split into two insns.
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 1376d6c..feb798c 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -343,7 +343,7 @@
#define fREAD_LR() (env->gpr[HEX_REG_LR])
-#define fREAD_SP() (env->gpr[HEX_REG_SP])
+#define fREAD_SP() (SP)
#define fREAD_LC0 (env->gpr[HEX_REG_LC0])
#define fREAD_LC1 (env->gpr[HEX_REG_LC1])
#define fREAD_SA0 (env->gpr[HEX_REG_SA0])
@@ -358,7 +358,7 @@
#endif
#define fREAD_PC() (PC)
-#define fREAD_P0() (env->pred[0])
+#define fREAD_P0() (P0)
#define fCHECK_PCALIGN(A)
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index fb480af..b0b253a 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -1,5 +1,5 @@
##
-## Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -18,7 +18,6 @@
hexagon_ss = ss.source_set()
hex_common_py = 'hex_common.py'
-attribs_def = meson.current_source_dir() / 'attribs_def.h.inc'
gen_tcg_h = meson.current_source_dir() / 'gen_tcg.h'
gen_tcg_hvx_h = meson.current_source_dir() / 'gen_tcg_hvx.h'
idef_parser_dir = meson.current_source_dir() / 'idef-parser'
@@ -42,28 +41,17 @@
#
# Step 2
# We use Python scripts to generate the following files
-# shortcode_generated.h.inc
# tcg_func_table_generated.c.inc
# printinsn_generated.h.inc
-# op_regs_generated.h.inc
# op_attribs_generated.h.inc
# opcodes_def_generated.h.inc
#
-shortcode_generated = custom_target(
- 'shortcode_generated.h.inc',
- output: 'shortcode_generated.h.inc',
- depends: [semantics_generated],
- depend_files: [hex_common_py, attribs_def],
- command: [python, files('gen_shortcode.py'), semantics_generated, attribs_def, '@OUTPUT@'],
-)
-hexagon_ss.add(shortcode_generated)
-
tcg_func_table_generated = custom_target(
'tcg_func_table_generated.c.inc',
output: 'tcg_func_table_generated.c.inc',
depends: [semantics_generated],
- depend_files: [hex_common_py, attribs_def],
- command: [python, files('gen_tcg_func_table.py'), semantics_generated, attribs_def, '@OUTPUT@'],
+ depend_files: [hex_common_py],
+ command: [python, files('gen_tcg_func_table.py'), semantics_generated, '@OUTPUT@'],
)
hexagon_ss.add(tcg_func_table_generated)
@@ -71,26 +59,17 @@
'printinsn_generated.h.inc',
output: 'printinsn_generated.h.inc',
depends: [semantics_generated],
- depend_files: [hex_common_py, attribs_def],
- command: [python, files('gen_printinsn.py'), semantics_generated, attribs_def, '@OUTPUT@'],
+ depend_files: [hex_common_py],
+ command: [python, files('gen_printinsn.py'), semantics_generated, '@OUTPUT@'],
)
hexagon_ss.add(printinsn_generated)
-op_regs_generated = custom_target(
- 'op_regs_generated.h.inc',
- output: 'op_regs_generated.h.inc',
- depends: [semantics_generated],
- depend_files: [hex_common_py, attribs_def],
- command: [python, files('gen_op_regs.py'), semantics_generated, attribs_def, '@OUTPUT@'],
-)
-hexagon_ss.add(op_regs_generated)
-
op_attribs_generated = custom_target(
'op_attribs_generated.h.inc',
output: 'op_attribs_generated.h.inc',
depends: [semantics_generated],
- depend_files: [hex_common_py, attribs_def],
- command: [python, files('gen_op_attribs.py'), semantics_generated, attribs_def, '@OUTPUT@'],
+ depend_files: [hex_common_py],
+ command: [python, files('gen_op_attribs.py'), semantics_generated, '@OUTPUT@'],
)
hexagon_ss.add(op_attribs_generated)
@@ -98,8 +77,8 @@
'opcodes_def_generated.h.inc',
output: 'opcodes_def_generated.h.inc',
depends: [semantics_generated],
- depend_files: [hex_common_py, attribs_def],
- command: [python, files('gen_opcodes_def.py'), semantics_generated, attribs_def, '@OUTPUT@'],
+ depend_files: [hex_common_py],
+ command: [python, files('gen_opcodes_def.py'), semantics_generated, '@OUTPUT@'],
)
hexagon_ss.add(opcodes_def_generated)
@@ -110,7 +89,7 @@
#
gen_dectree_import = executable(
'gen_dectree_import',
- 'gen_dectree_import.c', opcodes_def_generated, op_regs_generated,
+ 'gen_dectree_import.c', opcodes_def_generated,
native: true, build_by_default: false)
iset_py = custom_target(
@@ -298,7 +277,7 @@
output: 'idef_parser_input.h.inc',
depends: [semantics_generated],
depend_files: [hex_common_py],
- command: [python, files('gen_idef_parser_funcs.py'), semantics_generated, attribs_def, '@OUTPUT@'],
+ command: [python, files('gen_idef_parser_funcs.py'), semantics_generated, '@OUTPUT@'],
)
preprocessed_idef_parser_input_generated = custom_target(
@@ -367,12 +346,12 @@
# Setup input and dependencies for the next step, this depends on whether or
# not idef-parser is enabled
helper_dep = [semantics_generated, idef_generated_tcg_c, idef_generated_tcg]
- helper_in = [semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h, idef_generated_list]
+ helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, idef_generated_list]
else
# Setup input and dependencies for the next step, this depends on whether or
# not idef-parser is enabled
helper_dep = [semantics_generated]
- helper_in = [semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h]
+ helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h]
endif
#
@@ -386,7 +365,7 @@
'helper_protos_generated.h.inc',
output: 'helper_protos_generated.h.inc',
depends: helper_dep,
- depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+ depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
command: [python, files('gen_helper_protos.py'), helper_in, '@OUTPUT@'],
)
hexagon_ss.add(helper_protos_generated)
@@ -395,7 +374,7 @@
'helper_funcs_generated.c.inc',
output: 'helper_funcs_generated.c.inc',
depends: helper_dep,
- depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+ depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
command: [python, files('gen_helper_funcs.py'), helper_in, '@OUTPUT@'],
)
hexagon_ss.add(helper_funcs_generated)
@@ -404,7 +383,7 @@
'tcg_funcs_generated.c.inc',
output: 'tcg_funcs_generated.c.inc',
depends: helper_dep,
- depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+ depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
command: [python, files('gen_tcg_funcs.py'), helper_in, '@OUTPUT@'],
)
hexagon_ss.add(tcg_funcs_generated)
@@ -413,7 +392,7 @@
'analyze_funcs_generated.c.inc',
output: 'analyze_funcs_generated.c.inc',
depends: helper_dep,
- depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+ depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h],
command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'],
)
hexagon_ss.add(analyze_funcs_generated)
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c b/target/hexagon/mmvec/decode_ext_mmvec.c
index 202d84c..f850d01 100644
--- a/target/hexagon/mmvec/decode_ext_mmvec.c
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -28,19 +28,15 @@
{
/* .new value for a MMVector store */
int i, j;
- const char *reginfo;
- const char *destletters;
- const char *dststr = NULL;
uint16_t def_opcode;
- char letter;
for (i = 1; i < pkt->num_insns; i++) {
uint16_t use_opcode = pkt->insn[i].opcode;
if (GET_ATTRIB(use_opcode, A_DOTNEWVALUE) &&
GET_ATTRIB(use_opcode, A_CVI) &&
GET_ATTRIB(use_opcode, A_STORE)) {
- int use_regidx = strchr(opcode_reginfo[use_opcode], 's') -
- opcode_reginfo[use_opcode];
+ int use_regidx = pkt->insn[i].new_read_idx;
+ g_assert(pkt->insn[i].new_read_idx != -1);
/*
* What's encoded at the N-field is the offset to who's producing
* the value.
@@ -68,31 +64,19 @@
/* def_idx is the index of the producer */
def_opcode = pkt->insn[def_idx].opcode;
- reginfo = opcode_reginfo[def_opcode];
- destletters = "dexy";
- for (j = 0; (letter = destletters[j]) != 0; j++) {
- dststr = strchr(reginfo, letter);
- if (dststr != NULL) {
- break;
- }
- }
- if ((dststr == NULL) && GET_ATTRIB(def_opcode, A_CVI_GATHER)) {
+ if ((pkt->insn[def_idx].dest_idx == -1) &&
+ GET_ATTRIB(def_opcode, A_CVI_GATHER)) {
pkt->insn[i].regno[use_regidx] = def_oreg;
pkt->insn[i].new_value_producer_slot = pkt->insn[def_idx].slot;
} else {
- if (dststr == NULL) {
+ if (pkt->insn[def_idx].dest_idx == -1) {
/* still not there, we have a bad packet */
g_assert_not_reached();
}
- int def_regnum = pkt->insn[def_idx].regno[dststr - reginfo];
+ int def_regnum =
+ pkt->insn[def_idx].regno[pkt->insn[def_idx].dest_idx];
/* Now patch up the consumer with the register number */
pkt->insn[i].regno[use_regidx] = def_regnum ^ def_oreg;
- /* special case for (Vx,Vy) */
- dststr = strchr(reginfo, 'y');
- if (def_oreg && strchr(reginfo, 'x') && dststr) {
- def_regnum = pkt->insn[def_idx].regno[dststr - reginfo];
- pkt->insn[i].regno[use_regidx] = def_regnum;
- }
/*
* We need to remember who produces this value to later
* check if it was dynamically cancelled
diff --git a/target/hexagon/opcodes.c b/target/hexagon/opcodes.c
index 1f7f3de..c8bde2f 100644
--- a/target/hexagon/opcodes.c
+++ b/target/hexagon/opcodes.c
@@ -36,41 +36,6 @@
#undef OPCODE
};
-const char * const opcode_reginfo[] = {
-#define IMMINFO(TAG, SIGN, SIZE, SHAMT, SIGN2, SIZE2, SHAMT2) /* nothing */
-#define REGINFO(TAG, REGINFO, RREGS, WREGS) REGINFO,
-#include "op_regs_generated.h.inc"
- NULL
-#undef REGINFO
-#undef IMMINFO
-};
-
-
-const char * const opcode_rregs[] = {
-#define IMMINFO(TAG, SIGN, SIZE, SHAMT, SIGN2, SIZE2, SHAMT2) /* nothing */
-#define REGINFO(TAG, REGINFO, RREGS, WREGS) RREGS,
-#include "op_regs_generated.h.inc"
- NULL
-#undef REGINFO
-#undef IMMINFO
-};
-
-
-const char * const opcode_wregs[] = {
-#define IMMINFO(TAG, SIGN, SIZE, SHAMT, SIGN2, SIZE2, SHAMT2) /* nothing */
-#define REGINFO(TAG, REGINFO, RREGS, WREGS) WREGS,
-#include "op_regs_generated.h.inc"
- NULL
-#undef REGINFO
-#undef IMMINFO
-};
-
-const char * const opcode_short_semantics[] = {
-#define DEF_SHORTCODE(TAG, SHORTCODE) [TAG] = #SHORTCODE,
-#include "shortcode_generated.h.inc"
-#undef DEF_SHORTCODE
- NULL
-};
DECLARE_BITMAP(opcode_attribs[XX_LAST_OPCODE], A_ZZ_LASTATTRIB);
diff --git a/target/hexagon/opcodes.h b/target/hexagon/opcodes.h
index fa7e321..0ee11bd 100644
--- a/target/hexagon/opcodes.h
+++ b/target/hexagon/opcodes.h
@@ -40,10 +40,6 @@
extern const char * const opcode_names[];
-extern const char * const opcode_reginfo[];
-extern const char * const opcode_rregs[];
-extern const char * const opcode_wregs[];
-
typedef struct {
const char * const encoding;
const EncClass enc_class;
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 47a870f..0904dc2 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -380,70 +380,8 @@
return true;
}
- if (pkt->num_insns == 1) {
- if (pkt->pkt_has_hvx) {
- /*
- * The HVX instructions with generated helpers use
- * pass-by-reference, so they need the read/write overlap
- * check below.
- * The HVX instructions with overrides are OK.
- */
- if (!ctx->has_hvx_helper) {
- return false;
- }
- } else {
- return false;
- }
- }
-
- /* Check for overlap between register reads and writes */
- for (int i = 0; i < ctx->reg_log_idx; i++) {
- int rnum = ctx->reg_log[i];
- if (test_bit(rnum, ctx->regs_read)) {
- return true;
- }
- }
-
- /* Check for overlap between predicate reads and writes */
- for (int i = 0; i < ctx->preg_log_idx; i++) {
- int pnum = ctx->preg_log[i];
- if (test_bit(pnum, ctx->pregs_read)) {
- return true;
- }
- }
-
- /* Check for overlap between HVX reads and writes */
- for (int i = 0; i < ctx->vreg_log_idx; i++) {
- int vnum = ctx->vreg_log[i];
- if (test_bit(vnum, ctx->vregs_read)) {
- return true;
- }
- }
- if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
- }
- }
- if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
- }
- }
-
- /* Check for overlap between HVX predicate reads and writes */
- for (int i = 0; i < ctx->qreg_log_idx; i++) {
- int qnum = ctx->qreg_log[i];
- if (test_bit(qnum, ctx->qregs_read)) {
- return true;
- }
+ if (ctx->read_after_write || ctx->has_hvx_overlap) {
+ return true;
}
return false;
@@ -467,7 +405,8 @@
static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
- ctx->has_hvx_helper = false;
+ ctx->read_after_write = false;
+ ctx->has_hvx_overlap = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -492,21 +431,19 @@
ctx->next_PC = next_PC;
ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
- bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS);
- bitmap_zero(ctx->pregs_read, NUM_PREGS);
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
+ bitmap_zero(ctx->vregs_written, NUM_VREGS);
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
bitmap_zero(ctx->vregs_updated, NUM_VREGS);
bitmap_zero(ctx->vregs_select, NUM_VREGS);
bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
- bitmap_zero(ctx->vregs_read, NUM_VREGS);
- bitmap_zero(ctx->qregs_read, NUM_QREGS);
+ bitmap_zero(ctx->qregs_written, NUM_QREGS);
ctx->qreg_log_idx = 0;
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4dd59c6..00cc2bc 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -38,12 +38,10 @@
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
- DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS);
- DECLARE_BITMAP(pregs_read, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
@@ -52,22 +50,27 @@
int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS];
int vreg_log_idx;
+ DECLARE_BITMAP(vregs_written, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
DECLARE_BITMAP(vregs_select, NUM_VREGS);
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
- DECLARE_BITMAP(vregs_read, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
int qreg_log[NUM_QREGS];
int qreg_log_idx;
- DECLARE_BITMAP(qregs_read, NUM_QREGS);
+ DECLARE_BITMAP(qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
bool pre_commit;
bool need_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
bool short_circuit;
- bool has_hvx_helper;
+ bool read_after_write;
+ bool has_hvx_overlap;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -75,6 +78,8 @@
TCGv dczero_addr;
} DisasContext;
+bool is_gather_store_insn(DisasContext *ctx);
+
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{
if (!test_bit(pnum, ctx->pregs_written)) {
@@ -86,7 +91,14 @@
static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
{
- set_bit(pnum, ctx->pregs_read);
+ if (test_bit(pnum, ctx->pregs_written)) {
+ ctx->read_after_write = true;
+ }
+}
+
+static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
+{
+ g_assert(test_bit(pnum, ctx->pregs_written));
}
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
@@ -117,7 +129,14 @@
static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
{
- set_bit(rnum, ctx->regs_read);
+ if (test_bit(rnum, ctx->regs_written)) {
+ ctx->read_after_write = true;
+ }
+}
+
+static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(test_bit(rnum, ctx->regs_written));
}
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
@@ -131,10 +150,25 @@
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
+static inline void ctx_start_hvx_insn(DisasContext *ctx)
+{
+ bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
+ bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
+ bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
+ bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
+}
+
static inline void ctx_log_vreg_write(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_written);
+ if (test_bit(rnum, ctx->insn_vregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->vregs_written);
if (type != EXT_TMP) {
if (!test_bit(rnum, ctx->vregs_updated)) {
ctx->vreg_log[ctx->vreg_log_idx] = rnum;
@@ -160,33 +194,77 @@
static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
- ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
- ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
+ ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
+ ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
}
-static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(rnum, ctx->vregs_written)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- ctx_log_vreg_read(ctx, rnum ^ 0);
- ctx_log_vreg_read(ctx, rnum ^ 1);
+ g_assert(is_gather_store_insn(ctx) ||
+ test_bit(rnum, ctx->vregs_updated) ||
+ test_bit(rnum, ctx->vregs_select) ||
+ test_bit(rnum, ctx->vregs_updated_tmp));
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (is_gather_store_insn(ctx)) {
+ ctx->read_after_write = true;
+ }
+}
+
+static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
+ bool has_helper)
+{
+ ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
+ ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
}
static inline void ctx_log_qreg_write(DisasContext *ctx,
- int rnum)
+ int rnum, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_qregs_written);
+ if (test_bit(rnum, ctx->insn_qregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->qregs_written);
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_log_idx++;
}
-static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
+static inline void ctx_log_qreg_read(DisasContext *ctx,
+ int qnum, bool has_helper)
{
- set_bit(qnum, ctx->qregs_read);
+ if (has_helper) {
+ set_bit(qnum, ctx->insn_qregs_read);
+ if (test_bit(qnum, ctx->insn_qregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(qnum, ctx->qregs_written)) {
+ ctx->read_after_write = true;
+ }
}
extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
@@ -205,7 +283,6 @@
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
-bool is_gather_store_insn(DisasContext *ctx);
void process_store(DisasContext *ctx, int slot_num);
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index f9cced5..65768ac 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -982,7 +982,7 @@
}
}
- cpu->accel->dirty = true;
+ qcpu->dirty = true;
cpu->accel = qcpu;
return 0;
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index b08e644..a6674a8 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -2236,7 +2236,7 @@
}
vcpu->interruptable = true;
- cpu->accel->dirty = true;
+ vcpu->dirty = true;
cpu->accel = vcpu;
max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
qemu_add_vm_change_state_handler(whpx_cpu_update_state, env);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index ebb6c90..e599ab9 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -705,16 +705,20 @@
return;
case 0x300f: /* addv Rm,Rn */
{
- TCGv t0, t1, t2;
- t0 = tcg_temp_new();
- tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
+ TCGv Rn = REG(B11_8);
+ TCGv Rm = REG(B7_4);
+ TCGv result, t1, t2;
+
+ result = tcg_temp_new();
t1 = tcg_temp_new();
- tcg_gen_xor_i32(t1, t0, REG(B11_8));
t2 = tcg_temp_new();
- tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
+ tcg_gen_add_i32(result, Rm, Rn);
+ /* T = ((Rn ^ Rm) & (Result ^ Rn)) >> 31 */
+ tcg_gen_xor_i32(t1, result, Rn);
+ tcg_gen_xor_i32(t2, Rm, Rn);
tcg_gen_andc_i32(cpu_sr_t, t1, t2);
tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31);
- tcg_gen_mov_i32(REG(B7_4), t0);
+ tcg_gen_mov_i32(Rn, result);
}
return;
case 0x2009: /* and Rm,Rn */
@@ -929,16 +933,20 @@
return;
case 0x300b: /* subv Rm,Rn */
{
- TCGv t0, t1, t2;
- t0 = tcg_temp_new();
- tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4));
+ TCGv Rn = REG(B11_8);
+ TCGv Rm = REG(B7_4);
+ TCGv result, t1, t2;
+
+ result = tcg_temp_new();
t1 = tcg_temp_new();
- tcg_gen_xor_i32(t1, t0, REG(B7_4));
t2 = tcg_temp_new();
- tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
+ tcg_gen_sub_i32(result, Rn, Rm);
+ /* T = ((Rn ^ Rm) & (Result ^ Rn)) >> 31 */
+ tcg_gen_xor_i32(t1, result, Rn);
+ tcg_gen_xor_i32(t2, Rn, Rm);
tcg_gen_and_i32(t1, t1, t2);
tcg_gen_shri_i32(cpu_sr_t, t1, 31);
- tcg_gen_mov_i32(REG(B11_8), t0);
+ tcg_gen_mov_i32(Rn, result);
}
return;
case 0x2008: /* tst Rm,Rn */
diff --git a/tests/bench/bufferiszero-bench.c b/tests/bench/bufferiszero-bench.c
new file mode 100644
index 0000000..222695c
--- /dev/null
+++ b/tests/bench/bufferiszero-bench.c
@@ -0,0 +1,47 @@
+/*
+ * QEMU buffer_is_zero speed benchmark
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/units.h"
+
+static void test(const void *opaque)
+{
+ size_t max = 64 * KiB;
+ void *buf = g_malloc0(max);
+ int accel_index = 0;
+
+ do {
+ if (accel_index != 0) {
+ g_test_message("%s", ""); /* gnu_printf Werror for simple "" */
+ }
+ for (size_t len = 1 * KiB; len <= max; len *= 4) {
+ double total = 0.0;
+
+ g_test_timer_start();
+ do {
+ buffer_is_zero_ge256(buf, len);
+ total += len;
+ } while (g_test_timer_elapsed() < 0.5);
+
+ total /= MiB;
+ g_test_message("buffer_is_zero #%d: %2zuKB %8.0f MB/sec",
+ accel_index, len / (size_t)KiB,
+ total / g_test_timer_last());
+ }
+ accel_index++;
+ } while (test_buffer_is_zero_next_accel());
+
+ g_free(buf);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ g_test_add_data_func("/cutils/bufferiszero/speed", NULL, test);
+ return g_test_run();
+}
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
index 7e76338..4cd7a2f 100644
--- a/tests/bench/meson.build
+++ b/tests/bench/meson.build
@@ -21,6 +21,7 @@
if have_block
benchs += {
+ 'bufferiszero-bench': [],
'benchmark-crypto-hash': [crypto],
'benchmark-crypto-hmac': [crypto],
'benchmark-crypto-cipher': [crypto],
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index b45170a..1fe14b5 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2021-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2021-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -231,6 +231,7 @@
static void test_new_value_store(void)
{
void *p0 = buffer0;
+ void *p1 = buffer1;
void *pout = output;
asm("{\n\t"
@@ -242,6 +243,19 @@
expect[0] = buffer0[0];
check_output_w(__LINE__, 1);
+
+ /* Test the .new read from the high half of a pair */
+ asm("v7 = vmem(%0 + #0)\n\t"
+ "v12 = vmem(%1 + #0)\n\t"
+ "{\n\t"
+ " v5:4 = vcombine(v12, v7)\n\t"
+ " vmem(%2 + #0) = v5.new\n\t"
+ "}\n\t"
+ : : "r"(p0), "r"(p1), "r"(pout) : "v4", "v5", "v7", "v12", "memory");
+
+ expect[0] = buffer1[0];
+
+ check_output_w(__LINE__, 1);
}
static void test_max_temps()
diff --git a/tests/tcg/sh4/Makefile.target b/tests/tcg/sh4/Makefile.target
index 4d09291c..7852fa6 100644
--- a/tests/tcg/sh4/Makefile.target
+++ b/tests/tcg/sh4/Makefile.target
@@ -17,3 +17,9 @@
test-macw: CFLAGS += -O -g
TESTS += test-macw
+
+test-addv: CFLAGS += -O -g
+TESTS += test-addv
+
+test-subv: CFLAGS += -O -g
+TESTS += test-subv
diff --git a/tests/tcg/sh4/test-addv.c b/tests/tcg/sh4/test-addv.c
new file mode 100644
index 0000000..ca87fe7
--- /dev/null
+++ b/tests/tcg/sh4/test-addv.c
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static void addv(const int a, const int b, const int res, const int carry)
+{
+ int o = a, c;
+
+ asm volatile("addv %2,%0\n"
+ "movt %1\n"
+ : "+r"(o), "=r"(c) : "r"(b) : );
+
+ if (c != carry || o != res) {
+ printf("ADDV %d, %d = %d/%d [T = %d/%d]\n", a, b, o, res, c, carry);
+ abort();
+ }
+}
+
+int main(void)
+{
+ addv(INT_MAX, 1, INT_MIN, 1);
+ addv(INT_MAX - 1, 1, INT_MAX, 0);
+
+ return 0;
+}
diff --git a/tests/tcg/sh4/test-subv.c b/tests/tcg/sh4/test-subv.c
new file mode 100644
index 0000000..a3c2db9
--- /dev/null
+++ b/tests/tcg/sh4/test-subv.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static void subv(const int a, const int b, const int res, const int carry)
+{
+ int o = a, c;
+
+ asm volatile("subv %2,%0\n"
+ "movt %1\n"
+ : "+r"(o), "=r"(c) : "r"(b) : );
+
+ if (c != carry || o != res) {
+ printf("SUBV %d, %d = %d/%d [T = %d/%d]\n", a, b, o, res, c, carry);
+ abort();
+ }
+}
+
+int main(void)
+{
+ subv(INT_MIN, 1, INT_MAX, 1);
+ subv(INT_MAX, -1, INT_MIN, 1);
+ subv(INT_MAX, 1, INT_MAX - 1, 0);
+ subv(0, 1, -1, 0);
+ subv(-1, -1, 0, 0);
+
+ return 0;
+}
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 25e0db9..981615a 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -50,23 +50,10 @@
#include <Carbon/Carbon.h>
#include "hw/core/cpu.h"
-#ifndef MAC_OS_X_VERSION_10_13
-#define MAC_OS_X_VERSION_10_13 101300
-#endif
-
#ifndef MAC_OS_VERSION_14_0
#define MAC_OS_VERSION_14_0 140000
#endif
-/* 10.14 deprecates NSOnState and NSOffState in favor of
- * NSControlStateValueOn/Off, which were introduced in 10.13.
- * Define for older versions
- */
-#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_13
-#define NSControlStateValueOn NSOnState
-#define NSControlStateValueOff NSOffState
-#endif
-
//#define DEBUG
#ifdef DEBUG
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 3e6a5df..74864f7 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -26,265 +26,290 @@
#include "qemu/bswap.h"
#include "host/cpuinfo.h"
-static bool
-buffer_zero_int(const void *buf, size_t len)
+typedef bool (*biz_accel_fn)(const void *, size_t);
+
+static bool buffer_is_zero_int_lt256(const void *buf, size_t len)
{
- if (unlikely(len < 8)) {
- /* For a very small buffer, simply accumulate all the bytes. */
- const unsigned char *p = buf;
- const unsigned char *e = buf + len;
- unsigned char t = 0;
+ uint64_t t;
+ const uint64_t *p, *e;
- do {
- t |= *p++;
- } while (p < e);
-
- return t == 0;
- } else {
- /* Otherwise, use the unaligned memory access functions to
- handle the beginning and end of the buffer, with a couple
- of loops handling the middle aligned section. */
- uint64_t t = ldq_he_p(buf);
- const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
- const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
-
- for (; p + 8 <= e; p += 8) {
- __builtin_prefetch(p + 8);
- if (t) {
- return false;
- }
- t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
- }
- while (p < e) {
- t |= *p++;
- }
- t |= ldq_he_p(buf + len - 8);
-
- return t == 0;
+ /*
+ * Use unaligned memory access functions to handle
+ * the beginning and end of the buffer.
+ */
+ if (unlikely(len <= 8)) {
+ return (ldl_he_p(buf) | ldl_he_p(buf + len - 4)) == 0;
}
+
+ t = ldq_he_p(buf) | ldq_he_p(buf + len - 8);
+ p = QEMU_ALIGN_PTR_DOWN(buf + 8, 8);
+ e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 8);
+
+ /* Read 0 to 31 aligned words from the middle. */
+ while (p < e) {
+ t |= *p++;
+ }
+ return t == 0;
}
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
+static bool buffer_is_zero_int_ge256(const void *buf, size_t len)
+{
+ /*
+ * Use unaligned memory access functions to handle
+ * the beginning and end of the buffer.
+ */
+ uint64_t t = ldq_he_p(buf) | ldq_he_p(buf + len - 8);
+ const uint64_t *p = QEMU_ALIGN_PTR_DOWN(buf + 8, 8);
+ const uint64_t *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 8);
+
+ /* Collect a partial block at the tail end. */
+ t |= e[-7] | e[-6] | e[-5] | e[-4] | e[-3] | e[-2] | e[-1];
+
+ /*
+ * Loop over 64 byte blocks.
+ * With the head and tail removed, e - p >= 30,
+ * so the loop must iterate at least 3 times.
+ */
+ do {
+ if (t) {
+ return false;
+ }
+ t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
+ p += 8;
+ } while (p < e - 7);
+
+ return t == 0;
+}
+
+#if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
#include <immintrin.h>
-/* Note that each of these vectorized functions require len >= 64. */
+/* Helper for preventing the compiler from reassociating
+ chains of binary vector operations. */
+#define SSE_REASSOC_BARRIER(vec0, vec1) asm("" : "+x"(vec0), "+x"(vec1))
+
+/* Note that these vectorized functions may assume len >= 256. */
static bool __attribute__((target("sse2")))
buffer_zero_sse2(const void *buf, size_t len)
{
- __m128i t = _mm_loadu_si128(buf);
- __m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
- __m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
- __m128i zero = _mm_setzero_si128();
+ /* Unaligned loads at head/tail. */
+ __m128i v = *(__m128i_u *)(buf);
+ __m128i w = *(__m128i_u *)(buf + len - 16);
+ /* Align head/tail to 16-byte boundaries. */
+ const __m128i *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+ const __m128i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
+ __m128i zero = { 0 };
- /* Loop over 16-byte aligned blocks of 64. */
- while (likely(p <= e)) {
- __builtin_prefetch(p);
- t = _mm_cmpeq_epi8(t, zero);
- if (unlikely(_mm_movemask_epi8(t) != 0xFFFF)) {
+ /* Collect a partial block at tail end. */
+ v |= e[-1]; w |= e[-2];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= e[-3]; w |= e[-4];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= e[-5]; w |= e[-6];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= e[-7]; v |= w;
+
+ /*
+ * Loop over complete 128-byte blocks.
+ * With the head and tail removed, e - p >= 14, so the loop
+ * must iterate at least once.
+ */
+ do {
+ v = _mm_cmpeq_epi8(v, zero);
+ if (unlikely(_mm_movemask_epi8(v) != 0xFFFF)) {
return false;
}
- t = p[-4] | p[-3] | p[-2] | p[-1];
- p += 4;
- }
+ v = p[0]; w = p[1];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= p[2]; w |= p[3];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= p[4]; w |= p[5];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= p[6]; w |= p[7];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= w;
+ p += 8;
+ } while (p < e - 7);
- /* Finish the aligned tail. */
- t |= e[-3];
- t |= e[-2];
- t |= e[-1];
-
- /* Finish the unaligned tail. */
- t |= _mm_loadu_si128(buf + len - 16);
-
- return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
+ return _mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) == 0xFFFF;
}
#ifdef CONFIG_AVX2_OPT
-static bool __attribute__((target("sse4")))
-buffer_zero_sse4(const void *buf, size_t len)
-{
- __m128i t = _mm_loadu_si128(buf);
- __m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
- __m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
-
- /* Loop over 16-byte aligned blocks of 64. */
- while (likely(p <= e)) {
- __builtin_prefetch(p);
- if (unlikely(!_mm_testz_si128(t, t))) {
- return false;
- }
- t = p[-4] | p[-3] | p[-2] | p[-1];
- p += 4;
- }
-
- /* Finish the aligned tail. */
- t |= e[-3];
- t |= e[-2];
- t |= e[-1];
-
- /* Finish the unaligned tail. */
- t |= _mm_loadu_si128(buf + len - 16);
-
- return _mm_testz_si128(t, t);
-}
-
static bool __attribute__((target("avx2")))
buffer_zero_avx2(const void *buf, size_t len)
{
- /* Begin with an unaligned head of 32 bytes. */
- __m256i t = _mm256_loadu_si256(buf);
- __m256i *p = (__m256i *)(((uintptr_t)buf + 5 * 32) & -32);
- __m256i *e = (__m256i *)(((uintptr_t)buf + len) & -32);
+ /* Unaligned loads at head/tail. */
+ __m256i v = *(__m256i_u *)(buf);
+ __m256i w = *(__m256i_u *)(buf + len - 32);
+ /* Align head/tail to 32-byte boundaries. */
+ const __m256i *p = QEMU_ALIGN_PTR_DOWN(buf + 32, 32);
+ const __m256i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 32);
+ __m256i zero = { 0 };
- /* Loop over 32-byte aligned blocks of 128. */
- while (p <= e) {
- __builtin_prefetch(p);
- if (unlikely(!_mm256_testz_si256(t, t))) {
+ /* Collect a partial block at tail end. */
+ v |= e[-1]; w |= e[-2];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= e[-3]; w |= e[-4];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= e[-5]; w |= e[-6];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= e[-7]; v |= w;
+
+ /* Loop over complete 256-byte blocks. */
+ for (; p < e - 7; p += 8) {
+ /* PTEST is not profitable here. */
+ v = _mm256_cmpeq_epi8(v, zero);
+ if (unlikely(_mm256_movemask_epi8(v) != 0xFFFFFFFF)) {
return false;
}
- t = p[-4] | p[-3] | p[-2] | p[-1];
- p += 4;
- } ;
+ v = p[0]; w = p[1];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= p[2]; w |= p[3];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= p[4]; w |= p[5];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= p[6]; w |= p[7];
+ SSE_REASSOC_BARRIER(v, w);
+ v |= w;
+ }
- /* Finish the last block of 128 unaligned. */
- t |= _mm256_loadu_si256(buf + len - 4 * 32);
- t |= _mm256_loadu_si256(buf + len - 3 * 32);
- t |= _mm256_loadu_si256(buf + len - 2 * 32);
- t |= _mm256_loadu_si256(buf + len - 1 * 32);
-
- return _mm256_testz_si256(t, t);
+ return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, zero)) == 0xFFFFFFFF;
}
#endif /* CONFIG_AVX2_OPT */
-#ifdef CONFIG_AVX512F_OPT
-static bool __attribute__((target("avx512f")))
-buffer_zero_avx512(const void *buf, size_t len)
-{
- /* Begin with an unaligned head of 64 bytes. */
- __m512i t = _mm512_loadu_si512(buf);
- __m512i *p = (__m512i *)(((uintptr_t)buf + 5 * 64) & -64);
- __m512i *e = (__m512i *)(((uintptr_t)buf + len) & -64);
-
- /* Loop over 64-byte aligned blocks of 256. */
- while (p <= e) {
- __builtin_prefetch(p);
- if (unlikely(_mm512_test_epi64_mask(t, t))) {
- return false;
- }
- t = p[-4] | p[-3] | p[-2] | p[-1];
- p += 4;
- }
-
- t |= _mm512_loadu_si512(buf + len - 4 * 64);
- t |= _mm512_loadu_si512(buf + len - 3 * 64);
- t |= _mm512_loadu_si512(buf + len - 2 * 64);
- t |= _mm512_loadu_si512(buf + len - 1 * 64);
-
- return !_mm512_test_epi64_mask(t, t);
-
-}
-#endif /* CONFIG_AVX512F_OPT */
-
-/*
- * Make sure that these variables are appropriately initialized when
- * SSE2 is enabled on the compiler command-line, but the compiler is
- * too old to support CONFIG_AVX2_OPT.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-# define INIT_USED 0
-# define INIT_LENGTH 0
-# define INIT_ACCEL buffer_zero_int
-#else
-# ifndef __SSE2__
-# error "ISA selection confusion"
-# endif
-# define INIT_USED CPUINFO_SSE2
-# define INIT_LENGTH 64
-# define INIT_ACCEL buffer_zero_sse2
-#endif
-
-static unsigned used_accel = INIT_USED;
-static unsigned length_to_accel = INIT_LENGTH;
-static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
-
-static unsigned __attribute__((noinline))
-select_accel_cpuinfo(unsigned info)
-{
- /* Array is sorted in order of algorithm preference. */
- static const struct {
- unsigned bit;
- unsigned len;
- bool (*fn)(const void *, size_t);
- } all[] = {
-#ifdef CONFIG_AVX512F_OPT
- { CPUINFO_AVX512F, 256, buffer_zero_avx512 },
-#endif
+static biz_accel_fn const accel_table[] = {
+ buffer_is_zero_int_ge256,
+ buffer_zero_sse2,
#ifdef CONFIG_AVX2_OPT
- { CPUINFO_AVX2, 128, buffer_zero_avx2 },
- { CPUINFO_SSE4, 64, buffer_zero_sse4 },
+ buffer_zero_avx2,
#endif
- { CPUINFO_SSE2, 64, buffer_zero_sse2 },
- { CPUINFO_ALWAYS, 0, buffer_zero_int },
- };
+};
- for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
- if (info & all[i].bit) {
- length_to_accel = all[i].len;
- buffer_accel = all[i].fn;
- return all[i].bit;
- }
+static unsigned best_accel(void)
+{
+ unsigned info = cpuinfo_init();
+
+#ifdef CONFIG_AVX2_OPT
+ if (info & CPUINFO_AVX2) {
+ return 2;
}
- return 0;
-}
-
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-static void __attribute__((constructor)) init_accel(void)
-{
- used_accel = select_accel_cpuinfo(cpuinfo_init());
-}
-#endif /* CONFIG_AVX2_OPT */
-
-bool test_buffer_is_zero_next_accel(void)
-{
- /*
- * Accumulate the accelerators that we've already tested, and
- * remove them from the set to test this round. We'll get back
- * a zero from select_accel_cpuinfo when there are no more.
- */
- unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
- used_accel |= used;
- return used;
-}
-
-static bool select_accel_fn(const void *buf, size_t len)
-{
- if (likely(len >= length_to_accel)) {
- return buffer_accel(buf, len);
- }
- return buffer_zero_int(buf, len);
-}
-
-#else
-#define select_accel_fn buffer_zero_int
-bool test_buffer_is_zero_next_accel(void)
-{
- return false;
-}
#endif
+ return info & CPUINFO_SSE2 ? 1 : 0;
+}
+
+#elif defined(__aarch64__) && defined(__ARM_NEON)
+#include <arm_neon.h>
/*
- * Checks if a buffer is all zeroes
+ * Helper for preventing the compiler from reassociating
+ * chains of binary vector operations.
*/
-bool buffer_is_zero(const void *buf, size_t len)
+#define REASSOC_BARRIER(vec0, vec1) asm("" : "+w"(vec0), "+w"(vec1))
+
+static bool buffer_is_zero_simd(const void *buf, size_t len)
+{
+ uint32x4_t t0, t1, t2, t3;
+
+ /* Align head/tail to 16-byte boundaries. */
+ const uint32x4_t *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+ const uint32x4_t *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
+
+ /* Unaligned loads at head/tail. */
+ t0 = vld1q_u32(buf) | vld1q_u32(buf + len - 16);
+
+ /* Collect a partial block at tail end. */
+ t1 = e[-7] | e[-6];
+ t2 = e[-5] | e[-4];
+ t3 = e[-3] | e[-2];
+ t0 |= e[-1];
+ REASSOC_BARRIER(t0, t1);
+ REASSOC_BARRIER(t2, t3);
+ t0 |= t1;
+ t2 |= t3;
+ REASSOC_BARRIER(t0, t2);
+ t0 |= t2;
+
+ /*
+ * Loop over complete 128-byte blocks.
+ * With the head and tail removed, e - p >= 14, so the loop
+ * must iterate at least once.
+ */
+ do {
+ /*
+ * Reduce via UMAXV. Whatever the actual result,
+ * it will only be zero if all input bytes are zero.
+ */
+ if (unlikely(vmaxvq_u32(t0) != 0)) {
+ return false;
+ }
+
+ t0 = p[0] | p[1];
+ t1 = p[2] | p[3];
+ t2 = p[4] | p[5];
+ t3 = p[6] | p[7];
+ REASSOC_BARRIER(t0, t1);
+ REASSOC_BARRIER(t2, t3);
+ t0 |= t1;
+ t2 |= t3;
+ REASSOC_BARRIER(t0, t2);
+ t0 |= t2;
+ p += 8;
+ } while (p < e - 7);
+
+ return vmaxvq_u32(t0) == 0;
+}
+
+#define best_accel() 1
+static biz_accel_fn const accel_table[] = {
+ buffer_is_zero_int_ge256,
+ buffer_is_zero_simd,
+};
+#else
+#define best_accel() 0
+static biz_accel_fn const accel_table[1] = {
+ buffer_is_zero_int_ge256
+};
+#endif
+
+static biz_accel_fn buffer_is_zero_accel;
+static unsigned accel_index;
+
+bool buffer_is_zero_ool(const void *buf, size_t len)
{
if (unlikely(len == 0)) {
return true;
}
+ if (!buffer_is_zero_sample3(buf, len)) {
+ return false;
+ }
+ /* All bytes are covered for any len <= 3. */
+ if (unlikely(len <= 3)) {
+ return true;
+ }
- /* Fetch the beginning of the buffer while we select the accelerator. */
- __builtin_prefetch(buf);
+ if (likely(len >= 256)) {
+ return buffer_is_zero_accel(buf, len);
+ }
+ return buffer_is_zero_int_lt256(buf, len);
+}
- /* Use an optimized zero check if possible. Note that this also
- includes a check for an unrolled loop over 64-bit integers. */
- return select_accel_fn(buf, len);
+bool buffer_is_zero_ge256(const void *buf, size_t len)
+{
+ return buffer_is_zero_accel(buf, len);
+}
+
+bool test_buffer_is_zero_next_accel(void)
+{
+ if (accel_index != 0) {
+ buffer_is_zero_accel = accel_table[--accel_index];
+ return true;
+ }
+ return false;
+}
+
+static void __attribute__((constructor)) init_accel(void)
+{
+ accel_index = best_accel();
+ buffer_is_zero_accel = accel_table[accel_index];
}