Merge tag 'pull-tcg-plugin-memory-190924-1' of https://gitlab.com/stsquad/qemu into staging
TCG plugin memory instrumentation updates
- deprecate plugins on 32 bit hosts
- deprecate plugins with TCI
- extend memory API to save value
- add check-tcg tests to exercise new memory API
- fix timer deadlock with non-changing timer
- add basic block vector plugin to contrib
- add cflow plugin to contrib
- extend syscall plugin to dump write memory
- validate ips plugin arguments meet minimum slice value
# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEEZoWumedRZ7yvyN81+9DbCVqeKkQFAmbsPCUACgkQ+9DbCVqe
# KkTm1gf9Hs5Zfdng0E+7sr5Dpa5F+cJOXU9QJhoTWJ4XC16CygWByqMXbyeX/kvm
# HXJEm6OnkADJhikIUCoBko8uK4/96iWSrDL0sEdzASX4SM/tXu684KeL+j9G/Ql8
# iqxm6tIjaJqmbSZRMp0l5jD+ZBltRMCzBNdK1suJR2ppQgqfKj3qMLVLtq2hhqPH
# qPgwKm44hk9BEpHYqXaivzSWN5GKCgvp5ECcFXCBhDcM+8W7Dl3Mv6X0pWOpYcKZ
# d2a5KUt+Xp7WB2jkOgJYr0zKCOQCiCjGSfm/30qRDOUnwiLRWbfamRI9jUDNUtfy
# RYR+GaspurGCwSkwICdlvj+vFp/16Q==
# =5wfo
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 19 Sep 2024 15:58:45 BST
# gpg: using RSA key 6685AE99E75167BCAFC8DF35FBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>" [full]
# Primary key fingerprint: 6685 AE99 E751 67BC AFC8 DF35 FBD0 DB09 5A9E 2A44
* tag 'pull-tcg-plugin-memory-190924-1' of https://gitlab.com/stsquad/qemu:
contrib/plugins: avoid hanging program
plugins: add option to dump write argument to syscall plugin
plugins: add plugin API to read guest memory
contrib/plugins: Add a plugin to generate basic block vectors
util/timer: avoid deadlock when shutting down
tests/tcg: add a system test to check memory instrumentation
tests/tcg: ensure s390x-softmmu output redirected
tests/tcg: only read/write 64 bit words on 64 bit systems
tests/tcg: clean up output of memory system test
tests/tcg/multiarch: add test for plugin memory access
tests/tcg/plugins/mem: add option to print memory accesses
tests/tcg: allow to check output of plugins
tests/tcg: add mechanism to run specific tests with plugins
plugins: extend API to get latest memory value accessed
plugins: save value during memory accesses
contrib/plugins: control flow plugin
deprecation: don't enable TCG plugins by default with TCI
deprecation: don't enable TCG plugins by default on 32 bit hosts
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index beb1988..fe4cd72 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -414,7 +414,7 @@
static int do_kvm_destroy_vcpu(CPUState *cpu)
{
KVMState *s = kvm_state;
- long mmap_size;
+ int mmap_size;
int ret = 0;
trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
@@ -459,7 +459,7 @@
int kvm_init_vcpu(CPUState *cpu, Error **errp)
{
KVMState *s = kvm_state;
- long mmap_size;
+ int mmap_size;
int ret;
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
@@ -1525,11 +1525,7 @@
r->reaper_iteration++;
}
- trace_kvm_dirty_ring_reaper("exit");
-
- rcu_unregister_thread();
-
- return NULL;
+ g_assert_not_reached();
}
static void kvm_dirty_ring_reaper_init(KVMState *s)
diff --git a/docs/devel/nested-papr.txt b/docs/devel/nested-papr.txt
deleted file mode 100644
index 9094365..0000000
--- a/docs/devel/nested-papr.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-Nested PAPR API (aka KVM on PowerVM)
-====================================
-
-This API aims at providing support to enable nested virtualization with
-KVM on PowerVM. While the existing support for nested KVM on PowerNV was
-introduced with cap-nested-hv option, however, with a slight design change,
-to enable this on papr/pseries, a new cap-nested-papr option is added. eg:
-
- qemu-system-ppc64 -cpu POWER10 -machine pseries,cap-nested-papr=true ...
-
-Work by:
- Michael Neuling <mikey@neuling.org>
- Vaibhav Jain <vaibhav@linux.ibm.com>
- Jordan Niethe <jniethe5@gmail.com>
- Harsh Prateek Bora <harshpb@linux.ibm.com>
- Shivaprasad G Bhat <sbhat@linux.ibm.com>
- Kautuk Consul <kconsul@linux.vnet.ibm.com>
-
-Below taken from the kernel documentation:
-
-Introduction
-============
-
-This document explains how a guest operating system can act as a
-hypervisor and run nested guests through the use of hypercalls, if the
-hypervisor has implemented them. The terms L0, L1, and L2 are used to
-refer to different software entities. L0 is the hypervisor mode entity
-that would normally be called the "host" or "hypervisor". L1 is a
-guest virtual machine that is directly run under L0 and is initiated
-and controlled by L0. L2 is a guest virtual machine that is initiated
-and controlled by L1 acting as a hypervisor. A significant design change
-wrt existing API is that now the entire L2 state is maintained within L0.
-
-Existing Nested-HV API
-======================
-
-Linux/KVM has had support for Nesting as an L0 or L1 since 2018
-
-The L0 code was added::
-
- commit 8e3f5fc1045dc49fd175b978c5457f5f51e7a2ce
- Author: Paul Mackerras <paulus@ozlabs.org>
- Date: Mon Oct 8 16:31:03 2018 +1100
- KVM: PPC: Book3S HV: Framework and hcall stubs for nested virtualization
-
-The L1 code was added::
-
- commit 360cae313702cdd0b90f82c261a8302fecef030a
- Author: Paul Mackerras <paulus@ozlabs.org>
- Date: Mon Oct 8 16:31:04 2018 +1100
- KVM: PPC: Book3S HV: Nested guest entry via hypercall
-
-This API works primarily using a signal hcall h_enter_nested(). This
-call made by the L1 to tell the L0 to start an L2 vCPU with the given
-state. The L0 then starts this L2 and runs until an L2 exit condition
-is reached. Once the L2 exits, the state of the L2 is given back to
-the L1 by the L0. The full L2 vCPU state is always transferred from
-and to L1 when the L2 is run. The L0 doesn't keep any state on the L2
-vCPU (except in the short sequence in the L0 on L1 -> L2 entry and L2
--> L1 exit).
-
-The only state kept by the L0 is the partition table. The L1 registers
-it's partition table using the h_set_partition_table() hcall. All
-other state held by the L0 about the L2s is cached state (such as
-shadow page tables).
-
-The L1 may run any L2 or vCPU without first informing the L0. It
-simply starts the vCPU using h_enter_nested(). The creation of L2s and
-vCPUs is done implicitly whenever h_enter_nested() is called.
-
-In this document, we call this existing API the v1 API.
-
-New PAPR API
-===============
-
-The new PAPR API changes from the v1 API such that the creating L2 and
-associated vCPUs is explicit. In this document, we call this the v2
-API.
-
-h_enter_nested() is replaced with H_GUEST_VCPU_RUN(). Before this can
-be called the L1 must explicitly create the L2 using h_guest_create()
-and any associated vCPUs() created with h_guest_create_vCPU(). Getting
-and setting vCPU state can also be performed using h_guest_{g|s}et
-hcall.
-
-The basic execution flow is for an L1 to create an L2, run it, and
-delete it is:
-
-- L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES()
- (normally at L1 boot time).
-
-- L1 requests the L0 to create an L2 with H_GUEST_CREATE() and receives a token
-
-- L1 requests the L0 to create an L2 vCPU with H_GUEST_CREATE_VCPU()
-
-- L1 and L0 communicate the vCPU state using the H_GUEST_{G,S}ET() hcall
-
-- L1 requests the L0 to run the vCPU using H_GUEST_RUN_VCPU() hcall
-
-- L1 deletes L2 with H_GUEST_DELETE()
-
-For more details, please refer:
-
-[1] Linux Kernel documentation (upstream documentation commit):
-
-commit 476652297f94a2e5e5ef29e734b0da37ade94110
-Author: Michael Neuling <mikey@neuling.org>
-Date: Thu Sep 14 13:06:00 2023 +1000
-
- docs: powerpc: Document nested KVM on POWER
-
- Document support for nested KVM on POWER using the existing API as well
- as the new PAPR API. This includes the new HCALL interface and how it
- used by KVM.
-
- Signed-off-by: Michael Neuling <mikey@neuling.org>
- Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
- Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
- Link: https://msgid.link/20230914030600.16993-12-jniethe5@gmail.com
diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
index fc5dcac..3cf200c 100644
--- a/hw/char/stm32l4x5_usart.c
+++ b/hw/char/stm32l4x5_usart.c
@@ -154,6 +154,21 @@
REG32(TDR, 0x28)
FIELD(TDR, TDR, 0, 9)
+static void stm32l4x5_update_isr(Stm32l4x5UsartBaseState *s)
+{
+ if (s->cr1 & R_CR1_TE_MASK) {
+ s->isr |= R_ISR_TEACK_MASK;
+ } else {
+ s->isr &= ~R_ISR_TEACK_MASK;
+ }
+
+ if (s->cr1 & R_CR1_RE_MASK) {
+ s->isr |= R_ISR_REACK_MASK;
+ } else {
+ s->isr &= ~R_ISR_REACK_MASK;
+ }
+}
+
static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
{
if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK)) ||
@@ -456,6 +471,7 @@
case A_CR1:
s->cr1 = value;
stm32l4x5_update_params(s);
+ stm32l4x5_update_isr(s);
stm32l4x5_update_irq(s);
return;
case A_CR2:
diff --git a/target/arm/helper.h b/target/arm/helper.h
index b463be3..58919b6 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -324,6 +324,18 @@
DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_uqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(neon_sqshlui_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -363,17 +375,17 @@
DEF_HELPER_4(neon_qrdmlah_s32, i32, env, s32, s32, s32)
DEF_HELPER_4(neon_qrdmlsh_s32, i32, env, s32, s32, s32)
-DEF_HELPER_1(neon_narrow_u8, i32, i64)
-DEF_HELPER_1(neon_narrow_u16, i32, i64)
-DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64)
-DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64)
-DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64)
+DEF_HELPER_1(neon_narrow_u8, i64, i64)
+DEF_HELPER_1(neon_narrow_u16, i64, i64)
+DEF_HELPER_2(neon_unarrow_sat8, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u8, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s8, i64, env, i64)
+DEF_HELPER_2(neon_unarrow_sat16, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u16, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s16, i64, env, i64)
+DEF_HELPER_2(neon_unarrow_sat32, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_u32, i64, env, i64)
+DEF_HELPER_2(neon_narrow_sat_s32, i64, env, i64)
DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
DEF_HELPER_1(neon_narrow_high_u16, i32, i64)
DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 62df4c4..331a8e1 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -30,10 +30,12 @@
&rri_sf rd rn imm sf
&i imm
&rr_e rd rn esz
+&rri_e rd rn imm esz
&rrr_e rd rn rm esz
&rrx_e rd rn rm idx esz
&rrrr_e rd rn rm ra esz
&qrr_e q rd rn esz
+&qrri_e q rd rn imm esz
&qrrr_e q rd rn rm esz
&qrrx_e q rd rn rm idx esz
&qrrrr_e q rd rn rm ra esz
@@ -54,11 +56,15 @@
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
+@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
+@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
+@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
+
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2
@@ -1136,3 +1142,254 @@
FMSUB 0001 1111 .. 0 ..... 1 ..... ..... ..... @rrrr_hsd
FNMADD 0001 1111 .. 1 ..... 0 ..... ..... ..... @rrrr_hsd
FNMSUB 0001 1111 .. 1 ..... 1 ..... ..... ..... @rrrr_hsd
+
+# Advanced SIMD Extract
+
+EXT_d 0010 1110 00 0 rm:5 00 imm:3 0 rn:5 rd:5
+EXT_q 0110 1110 00 0 rm:5 0 imm:4 0 rn:5 rd:5
+
+# Advanced SIMD Table Lookup
+
+TBL_TBX 0 q:1 00 1110 000 rm:5 0 len:2 tbx:1 00 rn:5 rd:5
+
+# Advanced SIMD Permute
+
+UZP1 0.00 1110 .. 0 ..... 0 001 10 ..... ..... @qrrr_e
+UZP2 0.00 1110 .. 0 ..... 0 101 10 ..... ..... @qrrr_e
+TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
+TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
+ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
+ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
+
+# Advanced SIMD Across Lanes
+
+ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e
+SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e
+UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e
+SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
+UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
+SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
+UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
+
+FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h
+FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2
+
+FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h
+FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2
+
+FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h
+FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
+
+FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
+FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
+
+# Floating-point Immediate
+
+FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd
+
+# Advanced SIMD Modified Immediate / Shift by Immediate
+
+%abcdefgh 16:3 5:5
+
+# Right shifts are encoded as N - shift, where N is the element size in bits.
+%neon_rshift_i6 16:6 !function=rsub_64
+%neon_rshift_i5 16:5 !function=rsub_32
+%neon_rshift_i4 16:4 !function=rsub_16
+%neon_rshift_i3 16:3 !function=rsub_8
+
+@q_shri_b . q:1 .. ..... 0001 ... ..... . rn:5 rd:5 \
+ &qrri_e esz=0 imm=%neon_rshift_i3
+@q_shri_h . q:1 .. ..... 001 .... ..... . rn:5 rd:5 \
+ &qrri_e esz=1 imm=%neon_rshift_i4
+@q_shri_s . q:1 .. ..... 01 ..... ..... . rn:5 rd:5 \
+ &qrri_e esz=2 imm=%neon_rshift_i5
+@q_shri_d . 1 .. ..... 1 ...... ..... . rn:5 rd:5 \
+ &qrri_e esz=3 imm=%neon_rshift_i6 q=1
+
+@q_shli_b . q:1 .. ..... 0001 imm:3 ..... . rn:5 rd:5 &qrri_e esz=0
+@q_shli_h . q:1 .. ..... 001 imm:4 ..... . rn:5 rd:5 &qrri_e esz=1
+@q_shli_s . q:1 .. ..... 01 imm:5 ..... . rn:5 rd:5 &qrri_e esz=2
+@q_shli_d . 1 .. ..... 1 imm:6 ..... . rn:5 rd:5 &qrri_e esz=3 q=1
+
+FMOVI_v_h 0 q:1 00 1111 00000 ... 1111 11 ..... rd:5 %abcdefgh
+
+# MOVI, MVNI, ORR, BIC, FMOV are all intermixed via cmode.
+Vimm 0 q:1 op:1 0 1111 00000 ... cmode:4 01 ..... rd:5 %abcdefgh
+
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_b
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_h
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_s
+SSHR_v 0.00 11110 .... ... 00000 1 ..... ..... @q_shri_d
+
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_b
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_h
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_s
+USHR_v 0.10 11110 .... ... 00000 1 ..... ..... @q_shri_d
+
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_b
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_h
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_s
+SSRA_v 0.00 11110 .... ... 00010 1 ..... ..... @q_shri_d
+
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_b
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_h
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_s
+USRA_v 0.10 11110 .... ... 00010 1 ..... ..... @q_shri_d
+
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_b
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_h
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_s
+SRSHR_v 0.00 11110 .... ... 00100 1 ..... ..... @q_shri_d
+
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_b
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_h
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_s
+URSHR_v 0.10 11110 .... ... 00100 1 ..... ..... @q_shri_d
+
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_b
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_h
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_s
+SRSRA_v 0.00 11110 .... ... 00110 1 ..... ..... @q_shri_d
+
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_b
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_h
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_s
+URSRA_v 0.10 11110 .... ... 00110 1 ..... ..... @q_shri_d
+
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_b
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_h
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_s
+SRI_v 0.10 11110 .... ... 01000 1 ..... ..... @q_shri_d
+
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_b
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_h
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_s
+SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @q_shli_d
+
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_b
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_h
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_s
+SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @q_shli_d
+
+SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_b
+SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_h
+SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @q_shli_s
+
+USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_b
+USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_h
+USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @q_shli_s
+
+SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_b
+SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_h
+SHRN_v 0.00 11110 .... ... 10000 1 ..... ..... @q_shri_s
+
+RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_b
+RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_h
+RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_s
+
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_b
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_h
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_s
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_d
+
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_b
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_h
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_s
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_d
+
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_b
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_h
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_s
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_d
+
+SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_b
+SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_h
+SQSHRN_v 0.00 11110 .... ... 10010 1 ..... ..... @q_shri_s
+
+UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_b
+UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_h
+UQSHRN_v 0.10 11110 .... ... 10010 1 ..... ..... @q_shri_s
+
+SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_b
+SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_h
+SQSHRUN_v 0.10 11110 .... ... 10000 1 ..... ..... @q_shri_s
+
+SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_b
+SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_h
+SQRSHRN_v 0.00 11110 .... ... 10011 1 ..... ..... @q_shri_s
+
+UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_b
+UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_h
+UQRSHRN_v 0.10 11110 .... ... 10011 1 ..... ..... @q_shri_s
+
+SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_b
+SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_h
+SQRSHRUN_v 0.10 11110 .... ... 10001 1 ..... ..... @q_shri_s
+
+# Advanced SIMD scalar shift by immediate
+
+@shri_b .... ..... 0001 ... ..... . rn:5 rd:5 \
+ &rri_e esz=0 imm=%neon_rshift_i3
+@shri_h .... ..... 001 .... ..... . rn:5 rd:5 \
+ &rri_e esz=1 imm=%neon_rshift_i4
+@shri_s .... ..... 01 ..... ..... . rn:5 rd:5 \
+ &rri_e esz=2 imm=%neon_rshift_i5
+@shri_d .... ..... 1 ...... ..... . rn:5 rd:5 \
+ &rri_e esz=3 imm=%neon_rshift_i6
+
+@shli_b .... ..... 0001 imm:3 ..... . rn:5 rd:5 &rri_e esz=0
+@shli_h .... ..... 001 imm:4 ..... . rn:5 rd:5 &rri_e esz=1
+@shli_s .... ..... 01 imm:5 ..... . rn:5 rd:5 &rri_e esz=2
+@shli_d .... ..... 1 imm:6 ..... . rn:5 rd:5 &rri_e esz=3
+
+SSHR_s 0101 11110 .... ... 00000 1 ..... ..... @shri_d
+USHR_s 0111 11110 .... ... 00000 1 ..... ..... @shri_d
+SSRA_s 0101 11110 .... ... 00010 1 ..... ..... @shri_d
+USRA_s 0111 11110 .... ... 00010 1 ..... ..... @shri_d
+SRSHR_s 0101 11110 .... ... 00100 1 ..... ..... @shri_d
+URSHR_s 0111 11110 .... ... 00100 1 ..... ..... @shri_d
+SRSRA_s 0101 11110 .... ... 00110 1 ..... ..... @shri_d
+URSRA_s 0111 11110 .... ... 00110 1 ..... ..... @shri_d
+SRI_s 0111 11110 .... ... 01000 1 ..... ..... @shri_d
+
+SHL_s 0101 11110 .... ... 01010 1 ..... ..... @shli_d
+SLI_s 0111 11110 .... ... 01010 1 ..... ..... @shli_d
+
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_b
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_h
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_s
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_d
+
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_b
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_h
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_s
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_d
+
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_b
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_h
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_s
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_d
+
+SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_b
+SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_h
+SQSHRN_si 0101 11110 .... ... 10010 1 ..... ..... @shri_s
+
+UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_b
+UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_h
+UQSHRN_si 0111 11110 .... ... 10010 1 ..... ..... @shri_s
+
+SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_b
+SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_h
+SQSHRUN_si 0111 11110 .... ... 10000 1 ..... ..... @shri_s
+
+SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_b
+SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_h
+SQRSHRN_si 0101 11110 .... ... 10011 1 ..... ..... @shri_s
+
+UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_b
+UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_h
+UQRSHRN_si 0111 11110 .... ... 10011 1 ..... ..... @shri_s
+
+SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_b
+SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_h
+SQRSHRUN_si 0111 11110 .... ... 10001 1 ..... ..... @shri_s
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index b9f34f0..0168920 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -677,7 +677,7 @@
cpu->isar.id_aa64dfr0 = 0x000001f210305519ull;
cpu->isar.id_aa64dfr1 = 0x00000000;
cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */
- cpu->isar.id_aa64isar1 = 0x0111000001211032ull;
+ cpu->isar.id_aa64isar1 = 0x0011100001211032ull;
cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull;
cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull;
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 56a1dc1..f652520 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -88,6 +88,25 @@
#undef GEN_CMP0
+void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ /* Signed shift out of range results in all-sign-bits */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
+}
+
+void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ /* Unsigned shift out of range results in all-zero-bits */
+ if (shift >= (8 << vece)) {
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
+ }
+}
+
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar8i_i64(a, a, shift);
@@ -285,7 +304,7 @@
tcg_gen_add_i32(d, d, t);
}
- void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
TCGv_i64 t = tcg_temp_new_i64();
@@ -297,10 +316,9 @@
static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_sari_vec(vece, d, a, sh);
tcg_gen_add_vec(vece, d, d, t);
@@ -492,10 +510,9 @@
static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_shri_vec(vece, d, a, shift);
tcg_gen_add_vec(vece, d, d, t);
@@ -685,9 +702,9 @@
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
tcg_gen_shri_vec(vece, t, a, sh);
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
@@ -773,10 +790,9 @@
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
}
@@ -1044,14 +1060,13 @@
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
+ TCGv_vec max, zero;
tcg_gen_neg_vec(vece, rsh, shift);
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
tcg_gen_and_vec(vece, lsh, shift, msk);
tcg_gen_and_vec(vece, rsh, rsh, msk);
}
@@ -1064,26 +1079,21 @@
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_shrv_vec(vece, rval, src, rsh);
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
/*
- * The choice of LT (signed) and GEU (unsigned) are biased toward
+ * The choice of GE (signed) and GEU (unsigned) are biased toward
* the instructions of the x86_64 host. For MO_8, the whole byte
* is significant so we must use an unsigned compare; otherwise we
* have already masked to a byte and so a signed compare works.
* Other tcg hosts have a full set of comparisons and do not care.
*/
+ zero = tcg_constant_vec_matching(dst, vece, 0);
+ max = tcg_constant_vec_matching(dst, vece, 8 << vece);
if (vece == MO_8) {
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
- tcg_gen_andc_vec(vece, lval, lval, lsh);
- tcg_gen_andc_vec(vece, rval, rval, rsh);
+ tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval);
} else {
- tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
- tcg_gen_and_vec(vece, lval, lval, lsh);
- tcg_gen_and_vec(vece, rval, rval, rsh);
+ tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval);
}
tcg_gen_or_vec(vece, dst, lval, rval);
}
@@ -1093,7 +1103,7 @@
{
static const TCGOpcode vecop_list[] = {
INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0
};
static const GVecGen3 ops[4] = {
{ .fniv = gen_ushl_vec,
@@ -1169,7 +1179,7 @@
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+ TCGv_vec max, zero;
/*
* Rely on the TCG guarantee that out of range shifts produce
@@ -1180,29 +1190,28 @@
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
}
/* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+ max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, max);
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_sarv_vec(vece, rval, src, rsh);
/* Select in-bound left shift. */
- tcg_gen_andc_vec(vece, lval, lval, tmp);
+ zero = tcg_constant_vec_matching(dst, vece, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval);
/* Select between left and right shift. */
if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
}
}
@@ -1211,7 +1220,7 @@
{
static const TCGOpcode vecop_list[] = {
INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0
};
static const GVecGen3 ops[4] = {
{ .fniv = gen_sshl_vec,
@@ -1304,6 +1313,42 @@
opr_sz, max_sz, 0, fns[vece]);
}
+void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h,
+ gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
+void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h,
+ gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
+void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h,
+ gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
{
uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode
index 788578c..e883c6a 100644
--- a/target/arm/tcg/neon-dp.decode
+++ b/target/arm/tcg/neon-dp.decode
@@ -291,17 +291,17 @@
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
-VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
+VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b
-VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
+VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
-VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
+VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index 082bfd8..93b2076 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -141,6 +141,19 @@
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
+#define NEON_GVEC_VOP2i_ENV(name, vtype) \
+void HELPER(name)(void *vd, void *vn, void *venv, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int imm = simd_data(desc); \
+ vtype *d = vd, *n = vn; \
+ CPUARMState *env = venv; \
+ for (i = 0; i < opr_sz / sizeof(vtype); i++) { \
+ NEON_FN(d[i], n[i], imm); \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
/* Pairwise operations. */
/* For 32-bit elements each segment only contains a single element, so
the elementwise and pairwise operations are the same. */
@@ -271,22 +284,26 @@
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
NEON_GVEC_VOP2_ENV(neon_uqshl_b, uint8_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_b, uint8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
NEON_GVEC_VOP2_ENV(neon_uqshl_h, uint16_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_h, uint16_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_uqshl_s, uint32_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_s, uint32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_uqshl_d, uint64_t)
+NEON_GVEC_VOP2i_ENV(neon_uqshli_d, uint64_t)
#undef NEON_FN
uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -303,22 +320,26 @@
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
NEON_GVEC_VOP2_ENV(neon_sqshl_b, int8_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
NEON_GVEC_VOP2_ENV(neon_sqshl_h, int16_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_h, int16_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_sqshl_s, int32_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_s, int32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_sqshl_d, int64_t)
+NEON_GVEC_VOP2i_ENV(neon_sqshli_d, int64_t)
#undef NEON_FN
uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -334,11 +355,13 @@
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshlu_s8, neon_s8, 4)
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshlu_s16, neon_s16, 2)
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_h, int16_t)
#undef NEON_FN
uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -352,6 +375,16 @@
}
#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_s, int32_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
+NEON_GVEC_VOP2i_ENV(neon_sqshlui_d, int64_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
NEON_GVEC_VOP2_ENV(neon_uqrshl_b, uint8_t)
@@ -565,13 +598,15 @@
#undef NEON_FN
#undef NEON_QDMULH32
-uint32_t HELPER(neon_narrow_u8)(uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_u8)(uint64_t x)
{
return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u)
| ((x >> 24) & 0xff000000u);
}
-uint32_t HELPER(neon_narrow_u16)(uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_u16)(uint64_t x)
{
return (x & 0xffffu) | ((x >> 16) & 0xffff0000u);
}
@@ -602,7 +637,8 @@
return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
}
-uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x)
{
uint16_t s;
uint8_t d;
@@ -629,7 +665,8 @@
return res;
}
-uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x)
{
uint16_t s;
uint8_t d;
@@ -652,7 +689,8 @@
return res;
}
-uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x)
{
int16_t s;
uint8_t d;
@@ -675,7 +713,8 @@
return res;
}
-uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x)
{
uint32_t high;
uint32_t low;
@@ -695,10 +734,11 @@
high = 0xffff;
SET_QC();
}
- return low | (high << 16);
+ return deposit32(low, 16, 16, high);
}
-uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x)
{
uint32_t high;
uint32_t low;
@@ -712,10 +752,11 @@
high = 0xffff;
SET_QC();
}
- return low | (high << 16);
+ return deposit32(low, 16, 16, high);
}
-uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x)
{
int32_t low;
int32_t high;
@@ -729,10 +770,11 @@
high = (high >> 31) ^ 0x7fff;
SET_QC();
}
- return (uint16_t)low | (high << 16);
+ return deposit32(low, 16, 16, high);
}
-uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x)
{
if (x & 0x8000000000000000ull) {
SET_QC();
@@ -745,7 +787,8 @@
return x;
}
-uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x)
{
if (x > 0xffffffffu) {
SET_QC();
@@ -754,13 +797,14 @@
return x;
}
-uint32_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x)
+/* Only the low 32-bits of output are significant. */
+uint64_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x)
{
if ((int64_t)x != (int32_t)x) {
SET_QC();
- return ((int64_t)x >> 63) ^ 0x7fffffff;
+ return (uint32_t)((int64_t)x >> 63) ^ 0x7fffffff;
}
- return x;
+ return (uint32_t)x;
}
uint64_t HELPER(neon_widen_u8)(uint32_t x)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 6d5f12e..071b634 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -4680,6 +4680,88 @@
return true;
}
+static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
+{
+ if (fp_access_check(s)) {
+ int len = (a->len + 1) * 16;
+
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rm), tcg_env,
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ (len << 6) | (a->tbx << 5) | a->rn,
+ gen_helper_simd_tblx);
+ }
+ return true;
+}
+
+typedef int simd_permute_idx_fn(int i, int part, int elements);
+
+static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
+ simd_permute_idx_fn *fn, int part)
+{
+ MemOp esz = a->esz;
+ int datasize = a->q ? 16 : 8;
+ int elements = datasize >> esz;
+ TCGv_i64 tcg_res[2], tcg_ele;
+
+ if (esz == MO_64 && !a->q) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
+ tcg_ele = tcg_temp_new_i64();
+
+ for (int i = 0; i < elements; i++) {
+ int o, w, idx;
+
+ idx = fn(i, part, elements);
+ read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
+ idx & (elements - 1), esz);
+
+ w = (i << (esz + 3)) / 64;
+ o = (i << (esz + 3)) % 64;
+ if (o == 0) {
+ tcg_gen_mov_i64(tcg_res[w], tcg_ele);
+ } else {
+ tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
+ }
+ }
+
+ for (int i = a->q; i >= 0; --i) {
+ write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
+ }
+ clear_vec_high(s, a->q, a->rd);
+ return true;
+}
+
+static int permute_load_uzp(int i, int part, int elements)
+{
+ return 2 * i + part;
+}
+
+TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
+TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
+
+static int permute_load_trn(int i, int part, int elements)
+{
+ return (i & 1) * elements + (i & ~1) + part;
+}
+
+TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
+TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
+
+static int permute_load_zip(int i, int part, int elements)
+{
+ return (i & 1) * elements + ((part * elements + i) >> 1);
+}
+
+TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
+TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
+
/*
* Cryptographic AES, SHA, SHA512
*/
@@ -6583,6 +6665,54 @@
}
/*
+ * Advanced SIMD Extract
+ */
+
+static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 lo = read_fp_dreg(s, a->rn);
+ if (a->imm != 0) {
+ TCGv_i64 hi = read_fp_dreg(s, a->rm);
+ tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
+ }
+ write_fp_dreg(s, a->rd, lo);
+ }
+ return true;
+}
+
+static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
+{
+ TCGv_i64 lo, hi;
+ int pos = (a->imm & 7) * 8;
+ int elt = a->imm >> 3;
+
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ lo = tcg_temp_new_i64();
+ hi = tcg_temp_new_i64();
+
+ read_vec_element(s, lo, a->rn, elt, MO_64);
+ elt++;
+ read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
+ elt++;
+
+ if (pos != 0) {
+ TCGv_i64 hh = tcg_temp_new_i64();
+ tcg_gen_extract2_i64(lo, lo, hi, pos);
+ read_vec_element(s, hh, a->rm, elt & 1, MO_64);
+ tcg_gen_extract2_i64(hi, hi, hh, pos);
+ }
+
+ write_vec_element(s, lo, a->rd, 0, MO_64);
+ write_vec_element(s, hi, a->rd, 1, MO_64);
+ clear_vec_high(s, true, a->rd);
+ return true;
+}
+
+/*
* Floating-point data-processing (3 source)
*/
@@ -6664,6 +6794,697 @@
TRANS(FMSUB, do_fmadd, a, false, true)
TRANS(FNMSUB, do_fmadd, a, true, false)
+/*
+ * Advanced SIMD Across Lanes
+ */
+
+static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
+ MemOp src_sign, NeonGenTwo64OpFn *fn)
+{
+ TCGv_i64 tcg_res, tcg_elt;
+ MemOp src_mop = a->esz | src_sign;
+ int elements = (a->q ? 16 : 8) >> a->esz;
+
+ /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
+ if (elements < 4) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_res = tcg_temp_new_i64();
+ tcg_elt = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_res, a->rn, 0, src_mop);
+ for (int i = 1; i < elements; i++) {
+ read_vec_element(s, tcg_elt, a->rn, i, src_mop);
+ fn(tcg_res, tcg_res, tcg_elt);
+ }
+
+ tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
+ write_fp_dreg(s, a->rd, tcg_res);
+ return true;
+}
+
+TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
+TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
+TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
+TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
+TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
+TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
+TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
+
+/*
+ * do_fp_reduction helper
+ *
+ * This mirrors the Reduce() pseudocode in the ARM ARM. It is
+ * important for correct NaN propagation that we do these
+ * operations in exactly the order specified by the pseudocode.
+ *
+ * This is a recursive function.
+ */
+static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
+ int ebase, int ecount, TCGv_ptr fpst,
+ NeonGenTwoSingleOpFn *fn)
+{
+ if (ecount == 1) {
+ TCGv_i32 tcg_elem = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
+ return tcg_elem;
+ } else {
+ int half = ecount >> 1;
+ TCGv_i32 tcg_hi, tcg_lo, tcg_res;
+
+ tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
+ tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
+ tcg_res = tcg_temp_new_i32();
+
+ fn(tcg_res, tcg_lo, tcg_hi, fpst);
+ return tcg_res;
+ }
+}
+
+static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
+ NeonGenTwoSingleOpFn *fn)
+{
+ if (fp_access_check(s)) {
+ MemOp esz = a->esz;
+ int elts = (a->q ? 16 : 8) >> esz;
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
+ write_fp_sreg(s, a->rd, res);
+ }
+ return true;
+}
+
+TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
+TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
+TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
+TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
+
+TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
+TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
+TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
+TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
+
+/*
+ * Floating-point Immediate
+ */
+
+static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
+{
+ switch (a->esz) {
+ case MO_32:
+ case MO_64:
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+ if (fp_access_check(s)) {
+ uint64_t imm = vfp_expand_imm(a->esz, a->imm);
+ write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
+ }
+ return true;
+}
+
+/*
+ * Advanced SIMD Modified Immediate
+ */
+
+static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
+{
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ vfp_expand_imm(MO_16, a->abcdefgh));
+ }
+ return true;
+}
+
+static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
+{
+ GVecGen2iFn *fn;
+
+ /* Handle decode of cmode/op here between ORR/BIC/MOVI */
+ if ((a->cmode & 1) && a->cmode < 12) {
+ /* For op=1, the imm will be inverted, so BIC becomes AND. */
+ fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
+ } else {
+ /* There is one unallocated cmode/op combination in this space */
+ if (a->cmode == 15 && a->op == 1 && a->q == 0) {
+ return false;
+ }
+ fn = gen_movi;
+ }
+
+ if (fp_access_check(s)) {
+ uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
+ gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
+ }
+ return true;
+}
+
+/*
+ * Advanced SIMD Shift by Immediate
+ */
+
+static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
+ }
+ return true;
+}
+
+TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
+TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
+TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
+TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
+TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
+TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
+TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
+TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
+TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
+TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
+TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
+TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
+TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
+TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
+
+static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
+{
+ TCGv_i64 tcg_rn, tcg_rd;
+ int esz = a->esz;
+ int esize;
+
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * For the LL variants the store is larger than the load,
+ * so if rd == rn we would overwrite parts of our input.
+ * So load everything right now and use shifts in the main loop.
+ */
+ tcg_rd = tcg_temp_new_i64();
+ tcg_rn = tcg_temp_new_i64();
+ read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
+
+ esize = 8 << esz;
+ for (int i = 0, elements = 8 >> esz; i < elements; i++) {
+ if (is_u) {
+ tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
+ } else {
+ tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
+ }
+ tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
+ write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
+ }
+ clear_vec_high(s, true, a->rd);
+ return true;
+}
+
+TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
+TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
+
+static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 64);
+ tcg_gen_sari_i64(dst, src, MIN(shift, 63));
+}
+
+static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 64);
+ if (shift == 64) {
+ tcg_gen_movi_i64(dst, 0);
+ } else {
+ tcg_gen_shri_i64(dst, src, shift);
+ }
+}
+
+static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ gen_sshr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
+
+static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ gen_ushr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
+
+static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 32);
+ if (shift) {
+ TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
+ tcg_gen_add_i64(dst, src, rnd);
+ tcg_gen_sari_i64(dst, dst, shift);
+ } else {
+ tcg_gen_mov_i64(dst, src);
+ }
+}
+
+static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 32);
+ if (shift) {
+ TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
+ tcg_gen_add_i64(dst, src, rnd);
+ tcg_gen_shri_i64(dst, dst, shift);
+ } else {
+ tcg_gen_mov_i64(dst, src);
+ }
+}
+
+static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 64);
+ if (shift == 0) {
+ tcg_gen_mov_i64(dst, src);
+ } else if (shift == 64) {
+ /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
+ tcg_gen_movi_i64(dst, 0);
+ } else {
+ TCGv_i64 rnd = tcg_temp_new_i64();
+ tcg_gen_extract_i64(rnd, src, shift - 1, 1);
+ tcg_gen_sari_i64(dst, src, shift);
+ tcg_gen_add_i64(dst, dst, rnd);
+ }
+}
+
+static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ assert(shift >= 0 && shift <= 64);
+ if (shift == 0) {
+ tcg_gen_mov_i64(dst, src);
+ } else if (shift == 64) {
+ /* Rounding will propagate bit 63 into bit 64. */
+ tcg_gen_shri_i64(dst, src, 63);
+ } else {
+ TCGv_i64 rnd = tcg_temp_new_i64();
+ tcg_gen_extract_i64(rnd, src, shift - 1, 1);
+ tcg_gen_shri_i64(dst, src, shift);
+ tcg_gen_add_i64(dst, dst, rnd);
+ }
+}
+
+static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ gen_srshr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
+
+static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ gen_urshr_d(src, src, shift);
+ tcg_gen_add_i64(dst, dst, src);
+}
+
+static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ /* If shift is 64, dst is unchanged. */
+ if (shift != 64) {
+ tcg_gen_shri_i64(src, src, shift);
+ tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
+ }
+}
+
+static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
+{
+ tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
+}
+
+static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
+ WideShiftImmFn * const fns[3], MemOp sign)
+{
+ TCGv_i64 tcg_rn, tcg_rd;
+ int esz = a->esz;
+ int esize;
+ WideShiftImmFn *fn;
+
+ tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
+
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ tcg_gen_movi_i64(tcg_rd, 0);
+
+ fn = fns[esz];
+ esize = 8 << esz;
+ for (int i = 0, elements = 8 >> esz; i < elements; i++) {
+ read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
+ fn(tcg_rn, tcg_rn, a->imm);
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
+ }
+
+ write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
+ clear_vec_high(s, a->q, a->rd);
+ return true;
+}
+
+static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
+}
+
+static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
+}
+
+static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_sshr_d(d, s, i);
+ gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
+}
+
+static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_shri_i64(d, s, i);
+ gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
+}
+
+static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_shri_i64(d, s, i);
+ gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
+}
+
+static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_ushr_d(d, s, i);
+ gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
+}
+
+static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_unarrow_sat8(d, tcg_env, d);
+}
+
+static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ tcg_gen_sari_i64(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_unarrow_sat16(d, tcg_env, d);
+}
+
+static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_sshr_d(d, s, i);
+ gen_helper_neon_unarrow_sat32(d, tcg_env, d);
+}
+
+static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
+}
+
+static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
+}
+
+static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_d(d, s, i);
+ gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
+}
+
+static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_urshr_bhs(d, s, i);
+ gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
+}
+
+static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_urshr_bhs(d, s, i);
+ gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
+}
+
+static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_urshr_d(d, s, i);
+ gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
+}
+
+static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext16u_i64(d, d);
+ gen_helper_neon_unarrow_sat8(d, tcg_env, d);
+}
+
+static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_bhs(d, s, i);
+ tcg_gen_ext32u_i64(d, d);
+ gen_helper_neon_unarrow_sat16(d, tcg_env, d);
+}
+
+static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_srshr_d(d, s, i);
+ gen_helper_neon_unarrow_sat32(d, tcg_env, d);
+}
+
+static WideShiftImmFn * const shrn_fns[] = {
+ tcg_gen_shri_i64,
+ tcg_gen_shri_i64,
+ gen_ushr_d,
+};
+TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
+
+static WideShiftImmFn * const rshrn_fns[] = {
+ gen_urshr_bhs,
+ gen_urshr_bhs,
+ gen_urshr_d,
+};
+TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
+
+static WideShiftImmFn * const sqshrn_fns[] = {
+ gen_sqshrn_b,
+ gen_sqshrn_h,
+ gen_sqshrn_s,
+};
+TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
+
+static WideShiftImmFn * const uqshrn_fns[] = {
+ gen_uqshrn_b,
+ gen_uqshrn_h,
+ gen_uqshrn_s,
+};
+TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
+
+static WideShiftImmFn * const sqshrun_fns[] = {
+ gen_sqshrun_b,
+ gen_sqshrun_h,
+ gen_sqshrun_s,
+};
+TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
+
+static WideShiftImmFn * const sqrshrn_fns[] = {
+ gen_sqrshrn_b,
+ gen_sqrshrn_h,
+ gen_sqrshrn_s,
+};
+TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
+
+static WideShiftImmFn * const uqrshrn_fns[] = {
+ gen_uqrshrn_b,
+ gen_uqrshrn_h,
+ gen_uqrshrn_s,
+};
+TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
+
+static WideShiftImmFn * const sqrshrun_fns[] = {
+ gen_sqrshrun_b,
+ gen_sqrshrun_h,
+ gen_sqrshrun_s,
+};
+TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
+
+/*
+ * Advanced SIMD Scalar Shift by Immediate
+ */
+
+static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
+ WideShiftImmFn *fn, bool accumulate,
+ MemOp sign)
+{
+ if (fp_access_check(s)) {
+ TCGv_i64 rd = tcg_temp_new_i64();
+ TCGv_i64 rn = tcg_temp_new_i64();
+
+ read_vec_element(s, rn, a->rn, 0, a->esz | sign);
+ if (accumulate) {
+ read_vec_element(s, rd, a->rd, 0, a->esz | sign);
+ }
+ fn(rd, rn, a->imm);
+ write_fp_dreg(s, a->rd, rd);
+ }
+ return true;
+}
+
+TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
+TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
+TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
+TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
+TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
+TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
+TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
+TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
+TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
+
+TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
+TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
+
+static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
+ NeonGenTwoOpEnvFn *fn)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t, s);
+ fn(t, tcg_env, t, tcg_constant_i32(i));
+ tcg_gen_extu_i32_i64(d, t);
+}
+
+static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
+}
+
+static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
+}
+
+static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
+}
+
+static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
+}
+
+static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
+}
+
+static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
+}
+
+static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
+}
+
+static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
+}
+
+static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
+}
+
+static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
+}
+
+static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
+}
+
+static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
+}
+
+static WideShiftImmFn * const f_scalar_sqshli[] = {
+ gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
+};
+
+static WideShiftImmFn * const f_scalar_uqshli[] = {
+ gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
+};
+
+static WideShiftImmFn * const f_scalar_sqshlui[] = {
+ gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
+};
+
+/* Note that the helpers sign-extend their inputs, so don't do it here. */
+TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
+TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
+TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
+
+static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
+ WideShiftImmFn * const fns[3],
+ MemOp sign, bool zext)
+{
+ MemOp esz = a->esz;
+
+ tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
+
+ if (fp_access_check(s)) {
+ TCGv_i64 rd = tcg_temp_new_i64();
+ TCGv_i64 rn = tcg_temp_new_i64();
+
+ read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
+ fns[esz](rd, rn, a->imm);
+ if (zext) {
+ tcg_gen_ext_i64(rd, rd, esz);
+ }
+ write_fp_dreg(s, a->rd, rd);
+ }
+ return true;
+}
+
+TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
+TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
+TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
+TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
+TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
+TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
+
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -8401,53 +9222,6 @@
}
}
-/* Floating point immediate
- * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
- * +---+---+---+-----------+------+---+------------+-------+------+------+
- * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
- * +---+---+---+-----------+------+---+------------+-------+------+------+
- */
-static void disas_fp_imm(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int imm5 = extract32(insn, 5, 5);
- int imm8 = extract32(insn, 13, 8);
- int type = extract32(insn, 22, 2);
- int mos = extract32(insn, 29, 3);
- uint64_t imm;
- MemOp sz;
-
- if (mos || imm5) {
- unallocated_encoding(s);
- return;
- }
-
- switch (type) {
- case 0:
- sz = MO_32;
- break;
- case 1:
- sz = MO_64;
- break;
- case 3:
- sz = MO_16;
- if (dc_isar_feature(aa64_fp16, s)) {
- break;
- }
- /* fallthru */
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- imm = vfp_expand_imm(sz, imm8);
- write_fp_dreg(s, rd, tcg_constant_i64(imm));
-}
-
/* Handle floating point <=> fixed point conversions. Note that we can
* also deal with fp <=> integer conversions as a special case (scale == 64)
* OPTME: consider handling that special case specially or at least skipping
@@ -8867,7 +9641,7 @@
switch (ctz32(extract32(insn, 12, 4))) {
case 0: /* [15:12] == xxx1 */
/* Floating point immediate */
- disas_fp_imm(s, insn);
+ unallocated_encoding(s); /* in decodetree */
break;
case 1: /* [15:12] == xx10 */
/* Floating point compare */
@@ -8890,874 +9664,6 @@
}
}
-static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
- int pos)
-{
- /* Extract 64 bits from the middle of two concatenated 64 bit
- * vector register slices left:right. The extracted bits start
- * at 'pos' bits into the right (least significant) side.
- * We return the result in tcg_right, and guarantee not to
- * trash tcg_left.
- */
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- assert(pos > 0 && pos < 64);
-
- tcg_gen_shri_i64(tcg_right, tcg_right, pos);
- tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
- tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
-}
-
-/* EXT
- * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
- * +---+---+-------------+-----+---+------+---+------+---+------+------+
- * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
- * +---+---+-------------+-----+---+------+---+------+---+------+------+
- */
-static void disas_simd_ext(DisasContext *s, uint32_t insn)
-{
- int is_q = extract32(insn, 30, 1);
- int op2 = extract32(insn, 22, 2);
- int imm4 = extract32(insn, 11, 4);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- int pos = imm4 << 3;
- TCGv_i64 tcg_resl, tcg_resh;
-
- if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_resh = tcg_temp_new_i64();
- tcg_resl = tcg_temp_new_i64();
-
- /* Vd gets bits starting at pos bits into Vm:Vn. This is
- * either extracting 128 bits from a 128:128 concatenation, or
- * extracting 64 bits from a 64:64 concatenation.
- */
- if (!is_q) {
- read_vec_element(s, tcg_resl, rn, 0, MO_64);
- if (pos != 0) {
- read_vec_element(s, tcg_resh, rm, 0, MO_64);
- do_ext64(s, tcg_resh, tcg_resl, pos);
- }
- } else {
- TCGv_i64 tcg_hh;
- typedef struct {
- int reg;
- int elt;
- } EltPosns;
- EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
- EltPosns *elt = eltposns;
-
- if (pos >= 64) {
- elt++;
- pos -= 64;
- }
-
- read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
- elt++;
- read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
- elt++;
- if (pos != 0) {
- do_ext64(s, tcg_resh, tcg_resl, pos);
- tcg_hh = tcg_temp_new_i64();
- read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
- do_ext64(s, tcg_hh, tcg_resh, pos);
- }
- }
-
- write_vec_element(s, tcg_resl, rd, 0, MO_64);
- if (is_q) {
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- }
- clear_vec_high(s, is_q, rd);
-}
-
-/* TBL/TBX
- * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
- * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
- * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
- * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
- */
-static void disas_simd_tb(DisasContext *s, uint32_t insn)
-{
- int op2 = extract32(insn, 22, 2);
- int is_q = extract32(insn, 30, 1);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
- int is_tbx = extract32(insn, 12, 1);
- int len = (extract32(insn, 13, 2) + 1) * 16;
-
- if (op2 != 0) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rm), tcg_env,
- is_q ? 16 : 8, vec_full_reg_size(s),
- (len << 6) | (is_tbx << 5) | rn,
- gen_helper_simd_tblx);
-}
-
-/* ZIP/UZP/TRN
- * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
- * +---+---+-------------+------+---+------+---+------------------+------+
- * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
- * +---+---+-------------+------+---+------+---+------------------+------+
- */
-static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- /* opc field bits [1:0] indicate ZIP/UZP/TRN;
- * bit 2 indicates 1 vs 2 variant of the insn.
- */
- int opcode = extract32(insn, 12, 2);
- bool part = extract32(insn, 14, 1);
- bool is_q = extract32(insn, 30, 1);
- int esize = 8 << size;
- int i;
- int datasize = is_q ? 128 : 64;
- int elements = datasize / esize;
- TCGv_i64 tcg_res[2], tcg_ele;
-
- if (opcode == 0 || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
- tcg_ele = tcg_temp_new_i64();
-
- for (i = 0; i < elements; i++) {
- int o, w;
-
- switch (opcode) {
- case 1: /* UZP1/2 */
- {
- int midpoint = elements / 2;
- if (i < midpoint) {
- read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
- } else {
- read_vec_element(s, tcg_ele, rm,
- 2 * (i - midpoint) + part, size);
- }
- break;
- }
- case 2: /* TRN1/2 */
- if (i & 1) {
- read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
- } else {
- read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
- }
- break;
- case 3: /* ZIP1/2 */
- {
- int base = part * elements / 2;
- if (i & 1) {
- read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
- } else {
- read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
- }
- break;
- }
- default:
- g_assert_not_reached();
- }
-
- w = (i * esize) / 64;
- o = (i * esize) % 64;
- if (o == 0) {
- tcg_gen_mov_i64(tcg_res[w], tcg_ele);
- } else {
- tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
- tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
- }
- }
-
- for (i = 0; i <= is_q; ++i) {
- write_vec_element(s, tcg_res[i], rd, i, MO_64);
- }
- clear_vec_high(s, is_q, rd);
-}
-
-/*
- * do_reduction_op helper
- *
- * This mirrors the Reduce() pseudocode in the ARM ARM. It is
- * important for correct NaN propagation that we do these
- * operations in exactly the order specified by the pseudocode.
- *
- * This is a recursive function, TCG temps should be freed by the
- * calling function once it is done with the values.
- */
-static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
- int esize, int size, int vmap, TCGv_ptr fpst)
-{
- if (esize == size) {
- int element;
- MemOp msize = esize == 16 ? MO_16 : MO_32;
- TCGv_i32 tcg_elem;
-
- /* We should have one register left here */
- assert(ctpop8(vmap) == 1);
- element = ctz32(vmap);
- assert(element < 8);
-
- tcg_elem = tcg_temp_new_i32();
- read_vec_element_i32(s, tcg_elem, rn, element, msize);
- return tcg_elem;
- } else {
- int bits = size / 2;
- int shift = ctpop8(vmap) / 2;
- int vmap_lo = (vmap >> shift) & vmap;
- int vmap_hi = (vmap & ~vmap_lo);
- TCGv_i32 tcg_hi, tcg_lo, tcg_res;
-
- tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
- tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
- tcg_res = tcg_temp_new_i32();
-
- switch (fpopcode) {
- case 0x0c: /* fmaxnmv half-precision */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x0f: /* fmaxv half-precision */
- gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x1c: /* fminnmv half-precision */
- gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x1f: /* fminv half-precision */
- gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x2c: /* fmaxnmv */
- gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x2f: /* fmaxv */
- gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x3c: /* fminnmv */
- gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x3f: /* fminv */
- gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- return tcg_res;
- }
-}
-
-/* AdvSIMD across lanes
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- bool is_q = extract32(insn, 30, 1);
- bool is_u = extract32(insn, 29, 1);
- bool is_fp = false;
- bool is_min = false;
- int esize;
- int elements;
- int i;
- TCGv_i64 tcg_res, tcg_elt;
-
- switch (opcode) {
- case 0x1b: /* ADDV */
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x3: /* SADDLV, UADDLV */
- case 0xa: /* SMAXV, UMAXV */
- case 0x1a: /* SMINV, UMINV */
- if (size == 3 || (size == 2 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- break;
- case 0xc: /* FMAXNMV, FMINNMV */
- case 0xf: /* FMAXV, FMINV */
- /* Bit 1 of size field encodes min vs max and the actual size
- * depends on the encoding of the U bit. If not set (and FP16
- * enabled) then we do half-precision float instead of single
- * precision.
- */
- is_min = extract32(size, 1, 1);
- is_fp = true;
- if (!is_u && dc_isar_feature(aa64_fp16, s)) {
- size = 1;
- } else if (!is_u || !is_q || extract32(size, 0, 1)) {
- unallocated_encoding(s);
- return;
- } else {
- size = 2;
- }
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- esize = 8 << size;
- elements = (is_q ? 128 : 64) / esize;
-
- tcg_res = tcg_temp_new_i64();
- tcg_elt = tcg_temp_new_i64();
-
- /* These instructions operate across all lanes of a vector
- * to produce a single result. We can guarantee that a 64
- * bit intermediate is sufficient:
- * + for [US]ADDLV the maximum element size is 32 bits, and
- * the result type is 64 bits
- * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
- * same as the element size, which is 32 bits at most
- * For the integer operations we can choose to work at 64
- * or 32 bits and truncate at the end; for simplicity
- * we use 64 bits always. The floating point
- * ops do require 32 bit intermediates, though.
- */
- if (!is_fp) {
- read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
-
- for (i = 1; i < elements; i++) {
- read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
-
- switch (opcode) {
- case 0x03: /* SADDLV / UADDLV */
- case 0x1b: /* ADDV */
- tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
- break;
- case 0x0a: /* SMAXV / UMAXV */
- if (is_u) {
- tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- case 0x1a: /* SMINV / UMINV */
- if (is_u) {
- tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- }
- } else {
- /* Floating point vector reduction ops which work across 32
- * bit (single) or 16 bit (half-precision) intermediates.
- * Note that correct NaN propagation requires that we do these
- * operations in exactly the order specified by the pseudocode.
- */
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
- int fpopcode = opcode | is_min << 4 | is_u << 5;
- int vmap = (1 << elements) - 1;
- TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
- (is_q ? 128 : 64), vmap, fpst);
- tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
- }
-
- /* Now truncate the result to the width required for the final output */
- if (opcode == 0x03) {
- /* SADDLV, UADDLV: result is 2*esize */
- size++;
- }
-
- switch (size) {
- case 0:
- tcg_gen_ext8u_i64(tcg_res, tcg_res);
- break;
- case 1:
- tcg_gen_ext16u_i64(tcg_res, tcg_res);
- break;
- case 2:
- tcg_gen_ext32u_i64(tcg_res, tcg_res);
- break;
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_dreg(s, rd, tcg_res);
-}
-
-/* AdvSIMD modified immediate
- * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
- * +---+---+----+---------------------+-----+-------+----+---+-------+------+
- * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
- * +---+---+----+---------------------+-----+-------+----+---+-------+------+
- *
- * There are a number of operations that can be carried out here:
- * MOVI - move (shifted) imm into register
- * MVNI - move inverted (shifted) imm into register
- * ORR - bitwise OR of (shifted) imm with register
- * BIC - bitwise clear of (shifted) imm with register
- * With ARMv8.2 we also have:
- * FMOV half-precision
- */
-static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int cmode = extract32(insn, 12, 4);
- int o2 = extract32(insn, 11, 1);
- uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
- bool is_neg = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
- uint64_t imm = 0;
-
- if (o2) {
- if (cmode != 0xf || is_neg) {
- unallocated_encoding(s);
- return;
- }
- /* FMOV (vector, immediate) - half-precision */
- if (!dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
- imm = vfp_expand_imm(MO_16, abcdefgh);
- /* now duplicate across the lanes */
- imm = dup_const(MO_16, imm);
- } else {
- if (cmode == 0xf && is_neg && !is_q) {
- unallocated_encoding(s);
- return;
- }
- imm = asimd_imm_const(abcdefgh, cmode, is_neg);
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
- /* MOVI or MVNI, with MVNI negation handled above. */
- tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
- vec_full_reg_size(s), imm);
- } else {
- /* ORR or BIC, with BIC negation to AND handled above. */
- if (is_neg) {
- gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
- } else {
- gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
- }
- }
-}
-
-/*
- * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
- *
- * This code is handles the common shifting code and is used by both
- * the vector and scalar code.
- */
-static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
- TCGv_i64 tcg_rnd, bool accumulate,
- bool is_u, int size, int shift)
-{
- bool extended_result = false;
- bool round = tcg_rnd != NULL;
- int ext_lshift = 0;
- TCGv_i64 tcg_src_hi;
-
- if (round && size == 3) {
- extended_result = true;
- ext_lshift = 64 - shift;
- tcg_src_hi = tcg_temp_new_i64();
- } else if (shift == 64) {
- if (!accumulate && is_u) {
- /* result is zero */
- tcg_gen_movi_i64(tcg_res, 0);
- return;
- }
- }
-
- /* Deal with the rounding step */
- if (round) {
- if (extended_result) {
- TCGv_i64 tcg_zero = tcg_constant_i64(0);
- if (!is_u) {
- /* take care of sign extending tcg_res */
- tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
- tcg_gen_add2_i64(tcg_src, tcg_src_hi,
- tcg_src, tcg_src_hi,
- tcg_rnd, tcg_zero);
- } else {
- tcg_gen_add2_i64(tcg_src, tcg_src_hi,
- tcg_src, tcg_zero,
- tcg_rnd, tcg_zero);
- }
- } else {
- tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
- }
- }
-
- /* Now do the shift right */
- if (round && extended_result) {
- /* extended case, >64 bit precision required */
- if (ext_lshift == 0) {
- /* special case, only high bits matter */
- tcg_gen_mov_i64(tcg_src, tcg_src_hi);
- } else {
- tcg_gen_shri_i64(tcg_src, tcg_src, shift);
- tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
- tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
- }
- } else {
- if (is_u) {
- if (shift == 64) {
- /* essentially shifting in 64 zeros */
- tcg_gen_movi_i64(tcg_src, 0);
- } else {
- tcg_gen_shri_i64(tcg_src, tcg_src, shift);
- }
- } else {
- if (shift == 64) {
- /* effectively extending the sign-bit */
- tcg_gen_sari_i64(tcg_src, tcg_src, 63);
- } else {
- tcg_gen_sari_i64(tcg_src, tcg_src, shift);
- }
- }
- }
-
- if (accumulate) {
- tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
- } else {
- tcg_gen_mov_i64(tcg_res, tcg_src);
- }
-}
-
-/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
-static void handle_scalar_simd_shri(DisasContext *s,
- bool is_u, int immh, int immb,
- int opcode, int rn, int rd)
-{
- const int size = 3;
- int immhb = immh << 3 | immb;
- int shift = 2 * (8 << size) - immhb;
- bool accumulate = false;
- bool round = false;
- bool insert = false;
- TCGv_i64 tcg_rn;
- TCGv_i64 tcg_rd;
- TCGv_i64 tcg_round;
-
- if (!extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- switch (opcode) {
- case 0x02: /* SSRA / USRA (accumulate) */
- accumulate = true;
- break;
- case 0x04: /* SRSHR / URSHR (rounding) */
- round = true;
- break;
- case 0x06: /* SRSRA / URSRA (accum + rounding) */
- accumulate = round = true;
- break;
- case 0x08: /* SRI */
- insert = true;
- break;
- }
-
- if (round) {
- tcg_round = tcg_constant_i64(1ULL << (shift - 1));
- } else {
- tcg_round = NULL;
- }
-
- tcg_rn = read_fp_dreg(s, rn);
- tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
-
- if (insert) {
- /* shift count same as element size is valid but does nothing;
- * special case to avoid potential shift by 64.
- */
- int esize = 8 << size;
- if (shift != esize) {
- tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
- }
- } else {
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
- }
-
- write_fp_dreg(s, rd, tcg_rd);
-}
-
-/* SHL/SLI - Scalar shift left */
-static void handle_scalar_simd_shli(DisasContext *s, bool insert,
- int immh, int immb, int opcode,
- int rn, int rd)
-{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = immhb - (8 << size);
- TCGv_i64 tcg_rn;
- TCGv_i64 tcg_rd;
-
- if (!extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_rn = read_fp_dreg(s, rn);
- tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
-
- if (insert) {
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
- } else {
- tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
- }
-
- write_fp_dreg(s, rd, tcg_rd);
-}
-
-/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
- * (signed/unsigned) narrowing */
-static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
- bool is_u_shift, bool is_u_narrow,
- int immh, int immb, int opcode,
- int rn, int rd)
-{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int esize = 8 << size;
- int shift = (2 * esize) - immhb;
- int elements = is_scalar ? 1 : (64 / esize);
- bool round = extract32(opcode, 0, 1);
- MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
- TCGv_i64 tcg_rn, tcg_rd, tcg_round;
- TCGv_i32 tcg_rd_narrowed;
- TCGv_i64 tcg_final;
-
- static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
- { gen_helper_neon_narrow_sat_s8,
- gen_helper_neon_unarrow_sat8 },
- { gen_helper_neon_narrow_sat_s16,
- gen_helper_neon_unarrow_sat16 },
- { gen_helper_neon_narrow_sat_s32,
- gen_helper_neon_unarrow_sat32 },
- { NULL, NULL },
- };
- static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
- gen_helper_neon_narrow_sat_u8,
- gen_helper_neon_narrow_sat_u16,
- gen_helper_neon_narrow_sat_u32,
- NULL
- };
- NeonGenNarrowEnvFn *narrowfn;
-
- int i;
-
- assert(size < 4);
-
- if (extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (is_u_shift) {
- narrowfn = unsigned_narrow_fns[size];
- } else {
- narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
- }
-
- tcg_rn = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- tcg_rd_narrowed = tcg_temp_new_i32();
- tcg_final = tcg_temp_new_i64();
-
- if (round) {
- tcg_round = tcg_constant_i64(1ULL << (shift - 1));
- } else {
- tcg_round = NULL;
- }
-
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, ldop);
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- false, is_u_shift, size+1, shift);
- narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
- tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
- if (i == 0) {
- tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
- } else {
- tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
- }
- }
-
- if (!is_q) {
- write_vec_element(s, tcg_final, rd, 0, MO_64);
- } else {
- write_vec_element(s, tcg_final, rd, 1, MO_64);
- }
- clear_vec_high(s, is_q, rd);
-}
-
-/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
-static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
- bool src_unsigned, bool dst_unsigned,
- int immh, int immb, int rn, int rd)
-{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int shift = immhb - (8 << size);
- int pass;
-
- assert(immh != 0);
- assert(!(scalar && is_q));
-
- if (!scalar) {
- if (!is_q && extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
-
- /* Since we use the variable-shift helpers we must
- * replicate the shift count into each element of
- * the tcg_shift value.
- */
- switch (size) {
- case 0:
- shift |= shift << 8;
- /* fall through */
- case 1:
- shift |= shift << 16;
- break;
- case 2:
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (size == 3) {
- TCGv_i64 tcg_shift = tcg_constant_i64(shift);
- static NeonGenTwo64OpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
- { NULL, gen_helper_neon_qshl_u64 },
- };
- NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
- int maxpass = is_q ? 2 : 1;
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
- write_vec_element(s, tcg_op, rd, pass, MO_64);
- }
- clear_vec_high(s, is_q, rd);
- } else {
- TCGv_i32 tcg_shift = tcg_constant_i32(shift);
- static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
- {
- { gen_helper_neon_qshl_s8,
- gen_helper_neon_qshl_s16,
- gen_helper_neon_qshl_s32 },
- { gen_helper_neon_qshlu_s8,
- gen_helper_neon_qshlu_s16,
- gen_helper_neon_qshlu_s32 }
- }, {
- { NULL, NULL, NULL },
- { gen_helper_neon_qshl_u8,
- gen_helper_neon_qshl_u16,
- gen_helper_neon_qshl_u32 }
- }
- };
- NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
- MemOp memop = scalar ? size : MO_32;
- int maxpass = scalar ? 1 : is_q ? 4 : 2;
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, pass, memop);
- genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
- if (scalar) {
- switch (size) {
- case 0:
- tcg_gen_ext8u_i32(tcg_op, tcg_op);
- break;
- case 1:
- tcg_gen_ext16u_i32(tcg_op, tcg_op);
- break;
- case 2:
- break;
- default:
- g_assert_not_reached();
- }
- write_fp_sreg(s, rd, tcg_op);
- } else {
- write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
- }
- }
-
- if (!scalar) {
- clear_vec_high(s, is_q, rd);
- }
- }
-}
-
/* Common vector code for handling integer to FP conversion */
static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
int elements, int is_signed,
@@ -10019,53 +9925,26 @@
}
switch (opcode) {
- case 0x08: /* SRI */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x00: /* SSHR / USHR */
- case 0x02: /* SSRA / USRA */
- case 0x04: /* SRSHR / URSHR */
- case 0x06: /* SRSRA / URSRA */
- handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x0a: /* SHL / SLI */
- handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
- break;
case 0x1c: /* SCVTF, UCVTF */
handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
opcode, rn, rd);
break;
- case 0x10: /* SQSHRUN, SQSHRUN2 */
- case 0x11: /* SQRSHRUN, SQRSHRUN2 */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_vec_simd_sqshrn(s, true, false, false, true,
- immh, immb, opcode, rn, rd);
- break;
- case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
- case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
- handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
- immh, immb, opcode, rn, rd);
- break;
- case 0xc: /* SQSHLU */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
- break;
- case 0xe: /* SQSHL, UQSHL */
- handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
- break;
case 0x1f: /* FCVTZS, FCVTZU */
handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
break;
default:
+ case 0x00: /* SSHR / USHR */
+ case 0x02: /* SSRA / USRA */
+ case 0x04: /* SRSHR / URSHR */
+ case 0x06: /* SRSRA / URSRA */
+ case 0x08: /* SRI */
+ case 0x0a: /* SHL / SLI */
+ case 0x0c: /* SQSHLU */
+ case 0x0e: /* SQSHL, UQSHL */
+ case 0x10: /* SQSHRUN */
+ case 0x11: /* SQRSHRUN */
+ case 0x12: /* SQSHRN, UQSHRN */
+ case 0x13: /* SQRSHRN, UQRSHRN */
unallocated_encoding(s);
break;
}
@@ -10380,35 +10259,35 @@
* in the source becomes a size element in the destination).
*/
int pass;
- TCGv_i32 tcg_res[2];
+ TCGv_i64 tcg_res[2];
int destelt = is_q ? 2 : 0;
int passes = scalar ? 1 : 2;
if (scalar) {
- tcg_res[1] = tcg_constant_i32(0);
+ tcg_res[1] = tcg_constant_i64(0);
}
for (pass = 0; pass < passes; pass++) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenNarrowFn *genfn = NULL;
- NeonGenNarrowEnvFn *genenvfn = NULL;
+ NeonGenOne64OpFn *genfn = NULL;
+ NeonGenOne64OpEnvFn *genenvfn = NULL;
if (scalar) {
read_vec_element(s, tcg_op, rn, pass, size + 1);
} else {
read_vec_element(s, tcg_op, rn, pass, MO_64);
}
- tcg_res[pass] = tcg_temp_new_i32();
+ tcg_res[pass] = tcg_temp_new_i64();
switch (opcode) {
case 0x12: /* XTN, SQXTUN */
{
- static NeonGenNarrowFn * const xtnfns[3] = {
+ static NeonGenOne64OpFn * const xtnfns[3] = {
gen_helper_neon_narrow_u8,
gen_helper_neon_narrow_u16,
- tcg_gen_extrl_i64_i32,
+ tcg_gen_ext32u_i64,
};
- static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
+ static NeonGenOne64OpEnvFn * const sqxtunfns[3] = {
gen_helper_neon_unarrow_sat8,
gen_helper_neon_unarrow_sat16,
gen_helper_neon_unarrow_sat32,
@@ -10422,7 +10301,7 @@
}
case 0x14: /* SQXTN, UQXTN */
{
- static NeonGenNarrowEnvFn * const fns[3][2] = {
+ static NeonGenOne64OpEnvFn * const fns[3][2] = {
{ gen_helper_neon_narrow_sat_s8,
gen_helper_neon_narrow_sat_u8 },
{ gen_helper_neon_narrow_sat_s16,
@@ -10436,7 +10315,9 @@
case 0x16: /* FCVTN, FCVTN2 */
/* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
if (size == 2) {
- gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ gen_helper_vfp_fcvtsd(tmp, tcg_op, tcg_env);
+ tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
} else {
TCGv_i32 tcg_lo = tcg_temp_new_i32();
TCGv_i32 tcg_hi = tcg_temp_new_i32();
@@ -10446,21 +10327,29 @@
tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
- tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
+ tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
+ tcg_gen_extu_i32_i64(tcg_res[pass], tcg_lo);
}
break;
case 0x36: /* BFCVTN, BFCVTN2 */
{
TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
- gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ gen_helper_bfcvt_pair(tmp, tcg_op, fpst);
+ tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
}
break;
case 0x56: /* FCVTXN, FCVTXN2 */
- /* 64 bit to 32 bit float conversion
- * with von Neumann rounding (round to odd)
- */
- assert(size == 2);
- gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
+ {
+ /*
+ * 64 bit to 32 bit float conversion
+ * with von Neumann rounding (round to odd)
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ assert(size == 2);
+ gen_helper_fcvtx_f64_to_f32(tmp, tcg_op, tcg_env);
+ tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
+ }
break;
default:
g_assert_not_reached();
@@ -10474,7 +10363,7 @@
}
for (pass = 0; pass < 2; pass++) {
- write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
+ write_vec_element(s, tcg_res[pass], rd, destelt + pass, MO_32);
}
clear_vec_high(s, is_q, rd);
}
@@ -10667,185 +10556,6 @@
}
}
-/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
-static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
- int immh, int immb, int opcode, int rn, int rd)
-{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = 2 * (8 << size) - immhb;
- GVecGen2iFn *gvec_fn;
-
- if (extract32(immh, 3, 1) && !is_q) {
- unallocated_encoding(s);
- return;
- }
- tcg_debug_assert(size <= 3);
-
- if (!fp_access_check(s)) {
- return;
- }
-
- switch (opcode) {
- case 0x02: /* SSRA / USRA (accumulate) */
- gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
- break;
-
- case 0x08: /* SRI */
- gvec_fn = gen_gvec_sri;
- break;
-
- case 0x00: /* SSHR / USHR */
- if (is_u) {
- if (shift == 8 << size) {
- /* Shift count the same size as element size produces zero. */
- tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
- is_q ? 16 : 8, vec_full_reg_size(s), 0);
- return;
- }
- gvec_fn = tcg_gen_gvec_shri;
- } else {
- /* Shift count the same size as element size produces all sign. */
- if (shift == 8 << size) {
- shift -= 1;
- }
- gvec_fn = tcg_gen_gvec_sari;
- }
- break;
-
- case 0x04: /* SRSHR / URSHR (rounding) */
- gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
- break;
-
- case 0x06: /* SRSRA / URSRA (accum + rounding) */
- gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
- break;
-
- default:
- g_assert_not_reached();
- }
-
- gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
-}
-
-/* SHL/SLI - Vector shift left */
-static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
- int immh, int immb, int opcode, int rn, int rd)
-{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = immhb - (8 << size);
-
- /* Range of size is limited by decode: immh is a non-zero 4 bit field */
- assert(size >= 0 && size <= 3);
-
- if (extract32(immh, 3, 1) && !is_q) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (insert) {
- gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
- } else {
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
- }
-}
-
-/* USHLL/SHLL - Vector shift left with widening */
-static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
- int immh, int immb, int opcode, int rn, int rd)
-{
- int size = 32 - clz32(immh) - 1;
- int immhb = immh << 3 | immb;
- int shift = immhb - (8 << size);
- int dsize = 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- TCGv_i64 tcg_rn = tcg_temp_new_i64();
- TCGv_i64 tcg_rd = tcg_temp_new_i64();
- int i;
-
- if (size >= 3) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- /* For the LL variants the store is larger than the load,
- * so if rd == rn we would overwrite parts of our input.
- * So load everything right now and use shifts in the main loop.
- */
- read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
-
- for (i = 0; i < elements; i++) {
- tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
- ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
- tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
- write_vec_element(s, tcg_rd, rd, i, size + 1);
- }
- clear_vec_high(s, true, rd);
-}
-
-/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
-static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
- int immh, int immb, int opcode, int rn, int rd)
-{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int dsize = 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- int shift = (2 * esize) - immhb;
- bool round = extract32(opcode, 0, 1);
- TCGv_i64 tcg_rn, tcg_rd, tcg_final;
- TCGv_i64 tcg_round;
- int i;
-
- if (extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_rn = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- tcg_final = tcg_temp_new_i64();
- read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
-
- if (round) {
- tcg_round = tcg_constant_i64(1ULL << (shift - 1));
- } else {
- tcg_round = NULL;
- }
-
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, size+1);
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- false, true, size+1, shift);
-
- tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
- }
-
- if (!is_q) {
- write_vec_element(s, tcg_final, rd, 0, MO_64);
- } else {
- write_vec_element(s, tcg_final, rd, 1, MO_64);
- }
-
- clear_vec_high(s, is_q, rd);
-}
-
-
/* AdvSIMD shift by immediate
* 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
* +---+---+---+-------------+------+------+--------+---+------+------+
@@ -10862,60 +10572,33 @@
bool is_u = extract32(insn, 29, 1);
bool is_q = extract32(insn, 30, 1);
- /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
- assert(immh != 0);
+ if (immh == 0) {
+ unallocated_encoding(s);
+ return;
+ }
switch (opcode) {
- case 0x08: /* SRI */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x00: /* SSHR / USHR */
- case 0x02: /* SSRA / USRA (accumulate) */
- case 0x04: /* SRSHR / URSHR (rounding) */
- case 0x06: /* SRSRA / URSRA (accum + rounding) */
- handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x0a: /* SHL / SLI */
- handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
- break;
- case 0x10: /* SHRN */
- case 0x11: /* RSHRN / SQRSHRUN */
- if (is_u) {
- handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
- opcode, rn, rd);
- } else {
- handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
- }
- break;
- case 0x12: /* SQSHRN / UQSHRN */
- case 0x13: /* SQRSHRN / UQRSHRN */
- handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
- opcode, rn, rd);
- break;
- case 0x14: /* SSHLL / USHLL */
- handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
- break;
case 0x1c: /* SCVTF / UCVTF */
handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
opcode, rn, rd);
break;
- case 0xc: /* SQSHLU */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
- break;
- case 0xe: /* SQSHL, UQSHL */
- handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
- break;
case 0x1f: /* FCVTZS/ FCVTZU */
handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
return;
default:
+ case 0x00: /* SSHR / USHR */
+ case 0x02: /* SSRA / USRA (accumulate) */
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ case 0x08: /* SRI */
+ case 0x0a: /* SHL / SLI */
+ case 0x0c: /* SQSHLU */
+ case 0x0e: /* SQSHL, UQSHL */
+ case 0x10: /* SHRN / SQSHRUN */
+ case 0x11: /* RSHRN / SQRSHRUN */
+ case 0x12: /* SQSHRN / UQSHRN */
+ case 0x13: /* SQRSHRN / UQRSHRN */
+ case 0x14: /* SSHLL / USHLL */
unallocated_encoding(s);
return;
}
@@ -11871,13 +11554,7 @@
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
- { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
- /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
- { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
- { 0x0e000000, 0xbf208c00, disas_simd_tb },
- { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
- { 0x2e000000, 0xbf208400, disas_simd_ext },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 13cd31a..9c8829a 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -1099,144 +1099,18 @@
DO_2SH(VRSHR_U, gen_gvec_urshr)
DO_2SH(VRSRA_S, gen_gvec_srsra)
DO_2SH(VRSRA_U, gen_gvec_ursra)
-
-static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
-{
- /* Signed shift out of range results in all-sign-bits */
- a->shift = MIN(a->shift, (8 << a->size) - 1);
- return do_vector_2sh(s, a, tcg_gen_gvec_sari);
-}
-
-static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
- int64_t shift, uint32_t oprsz, uint32_t maxsz)
-{
- tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
-}
-
-static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
-{
- /* Shift out of range is architecturally valid and results in zero. */
- if (a->shift >= (8 << a->size)) {
- return do_vector_2sh(s, a, gen_zero_rd_2sh);
- } else {
- return do_vector_2sh(s, a, tcg_gen_gvec_shri);
- }
-}
-
-static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
- NeonGenTwo64OpEnvFn *fn)
-{
- /*
- * 2-reg-and-shift operations, size == 3 case, where the
- * function needs to be passed tcg_env.
- */
- TCGv_i64 constimm;
- int pass;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vm | a->vd) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- /*
- * To avoid excessive duplication of ops we implement shift
- * by immediate using the variable shift operations.
- */
- constimm = tcg_constant_i64(dup_const(a->size, a->shift));
-
- for (pass = 0; pass < a->q + 1; pass++) {
- TCGv_i64 tmp = tcg_temp_new_i64();
-
- read_neon_element64(tmp, a->vm, pass, MO_64);
- fn(tmp, tcg_env, tmp, constimm);
- write_neon_element64(tmp, a->vd, pass, MO_64);
- }
- return true;
-}
-
-static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
- NeonGenTwoOpEnvFn *fn)
-{
- /*
- * 2-reg-and-shift operations, size < 3 case, where the
- * helper needs to be passed tcg_env.
- */
- TCGv_i32 constimm, tmp;
- int pass;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return false;
- }
-
- /* UNDEF accesses to D16-D31 if they don't exist. */
- if (!dc_isar_feature(aa32_simd_r32, s) &&
- ((a->vd | a->vm) & 0x10)) {
- return false;
- }
-
- if ((a->vm | a->vd) & a->q) {
- return false;
- }
-
- if (!vfp_access_check(s)) {
- return true;
- }
-
- /*
- * To avoid excessive duplication of ops we implement shift
- * by immediate using the variable shift operations.
- */
- constimm = tcg_constant_i32(dup_const(a->size, a->shift));
- tmp = tcg_temp_new_i32();
-
- for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
- read_neon_element32(tmp, a->vm, pass, MO_32);
- fn(tmp, tcg_env, tmp, constimm);
- write_neon_element32(tmp, a->vd, pass, MO_32);
- }
- return true;
-}
-
-#define DO_2SHIFT_ENV(INSN, FUNC) \
- static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
- { \
- return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
- } \
- static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
- { \
- static NeonGenTwoOpEnvFn * const fns[] = { \
- gen_helper_neon_##FUNC##8, \
- gen_helper_neon_##FUNC##16, \
- gen_helper_neon_##FUNC##32, \
- }; \
- assert(a->size < ARRAY_SIZE(fns)); \
- return do_2shift_env_32(s, a, fns[a->size]); \
- }
-
-DO_2SHIFT_ENV(VQSHLU, qshlu_s)
-DO_2SHIFT_ENV(VQSHL_U, qshl_u)
-DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+DO_2SH(VSHR_S, gen_gvec_sshr)
+DO_2SH(VSHR_U, gen_gvec_ushr)
+DO_2SH(VQSHLU, gen_neon_sqshlui)
+DO_2SH(VQSHL_U, gen_neon_uqshli)
+DO_2SH(VQSHL_S, gen_neon_sqshli)
static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
NeonGenTwo64OpFn *shiftfn,
- NeonGenNarrowEnvFn *narrowfn)
+ NeonGenOne64OpEnvFn *narrowfn)
{
/* 2-reg-and-shift narrowing-shift operations, size == 3 case */
- TCGv_i64 constimm, rm1, rm2;
- TCGv_i32 rd;
+ TCGv_i64 constimm, rm1, rm2, rd;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -1263,7 +1137,7 @@
constimm = tcg_constant_i64(-a->shift);
rm1 = tcg_temp_new_i64();
rm2 = tcg_temp_new_i64();
- rd = tcg_temp_new_i32();
+ rd = tcg_temp_new_i64();
/* Load both inputs first to avoid potential overwrite if rm == rd */
read_neon_element64(rm1, a->vm, 0, MO_64);
@@ -1271,18 +1145,18 @@
shiftfn(rm1, rm1, constimm);
narrowfn(rd, tcg_env, rm1);
- write_neon_element32(rd, a->vd, 0, MO_32);
+ write_neon_element64(rd, a->vd, 0, MO_32);
shiftfn(rm2, rm2, constimm);
narrowfn(rd, tcg_env, rm2);
- write_neon_element32(rd, a->vd, 1, MO_32);
+ write_neon_element64(rd, a->vd, 1, MO_32);
return true;
}
static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
NeonGenTwoOpFn *shiftfn,
- NeonGenNarrowEnvFn *narrowfn)
+ NeonGenOne64OpEnvFn *narrowfn)
{
/* 2-reg-and-shift narrowing-shift operations, size < 3 case */
TCGv_i32 constimm, rm1, rm2, rm3, rm4;
@@ -1337,16 +1211,16 @@
tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
- narrowfn(rm1, tcg_env, rtmp);
- write_neon_element32(rm1, a->vd, 0, MO_32);
+ narrowfn(rtmp, tcg_env, rtmp);
+ write_neon_element64(rtmp, a->vd, 0, MO_32);
shiftfn(rm3, rm3, constimm);
shiftfn(rm4, rm4, constimm);
tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
- narrowfn(rm3, tcg_env, rtmp);
- write_neon_element32(rm3, a->vd, 1, MO_32);
+ narrowfn(rtmp, tcg_env, rtmp);
+ write_neon_element64(rtmp, a->vd, 1, MO_32);
return true;
}
@@ -1361,17 +1235,17 @@
return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
}
-static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+static void gen_neon_narrow_u32(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
{
- tcg_gen_extrl_i64_i32(dest, src);
+ tcg_gen_ext32u_i64(dest, src);
}
-static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+static void gen_neon_narrow_u16(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
{
gen_helper_neon_narrow_u16(dest, src);
}
-static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+static void gen_neon_narrow_u8(TCGv_i64 dest, TCGv_ptr env, TCGv_i64 src)
{
gen_helper_neon_narrow_u8(dest, src);
}
@@ -2962,10 +2836,9 @@
}
static bool do_vmovn(DisasContext *s, arg_2misc *a,
- NeonGenNarrowEnvFn *narrowfn)
+ NeonGenOne64OpEnvFn *narrowfn)
{
- TCGv_i64 rm;
- TCGv_i32 rd0, rd1;
+ TCGv_i64 rm, rd0, rd1;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -2990,22 +2863,22 @@
}
rm = tcg_temp_new_i64();
- rd0 = tcg_temp_new_i32();
- rd1 = tcg_temp_new_i32();
+ rd0 = tcg_temp_new_i64();
+ rd1 = tcg_temp_new_i64();
read_neon_element64(rm, a->vm, 0, MO_64);
narrowfn(rd0, tcg_env, rm);
read_neon_element64(rm, a->vm, 1, MO_64);
narrowfn(rd1, tcg_env, rm);
- write_neon_element32(rd0, a->vd, 0, MO_32);
- write_neon_element32(rd1, a->vd, 1, MO_32);
+ write_neon_element64(rd0, a->vd, 0, MO_32);
+ write_neon_element64(rd1, a->vd, 1, MO_32);
return true;
}
#define DO_VMOVN(INSN, FUNC) \
static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
{ \
- static NeonGenNarrowEnvFn * const narrowfn[] = { \
+ static NeonGenOne64OpEnvFn * const narrowfn[] = { \
FUNC##8, \
FUNC##16, \
FUNC##32, \
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 9e2536d..49d32fa 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -6081,9 +6081,9 @@
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_sari_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
@@ -6138,18 +6138,18 @@
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_shri_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
}
} else {
if (shl == 0) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(0, halfbits)));
} else {
tcg_gen_shli_vec(vece, d, n, halfbits);
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
@@ -6317,18 +6317,14 @@
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, d, d, t);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
}
static const GVecGen2 sqxtnb_ops[3] = {
@@ -6349,19 +6345,15 @@
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const GVecGen2 sqxtnt_ops[3] = {
@@ -6389,12 +6381,10 @@
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 uqxtnb_ops[3] = {
@@ -6415,14 +6405,13 @@
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 uqxtnt_ops[3] = {
@@ -6450,14 +6439,11 @@
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 sqxtunb_ops[3] = {
@@ -6478,16 +6464,14 @@
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 sqxtunt_ops[3] = {
@@ -6551,13 +6535,11 @@
static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
@@ -6609,13 +6591,11 @@
static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
@@ -6658,14 +6638,12 @@
static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode sqshrunb_vec_list[] = {
@@ -6690,16 +6668,15 @@
static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode sqshrunt_vec_list[] = {
@@ -6742,18 +6719,15 @@
static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode sqshrnb_vec_list[] = {
@@ -6778,19 +6752,16 @@
static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode sqshrnt_vec_list[] = {
@@ -6833,12 +6804,11 @@
static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode uqshrnb_vec_list[] = {
@@ -6863,14 +6833,14 @@
static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode uqshrnt_vec_list[] = {
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 3f0e9ce..5a2e10d 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -471,6 +471,13 @@
void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz);
+
void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
@@ -514,6 +521,11 @@
void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
@@ -593,13 +605,13 @@
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
-typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
typedef void NeonGenOneSingleOpFn(TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoSingleOpFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOne64OpFn(TCGv_i64, TCGv_i64);
+typedef void NeonGenOne64OpEnvFn(TCGv_i64, TCGv_env, TCGv_i64);
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
diff --git a/tests/functional/test_aarch64_sbsaref.py b/tests/functional/test_aarch64_sbsaref.py
index f31c2a6..b50e1a5 100755
--- a/tests/functional/test_aarch64_sbsaref.py
+++ b/tests/functional/test_aarch64_sbsaref.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
#
-# Functional test that boots a Linux kernel and checks the console
+# Functional test that boots a kernel and checks the console
#
# SPDX-FileCopyrightText: 2023-2024 Linaro Ltd.
# SPDX-FileContributor: Philippe Mathieu-Daudé <philmd@linaro.org>
@@ -110,16 +110,17 @@ def test_sbsaref_edk2_firmware(self):
# This tests the whole boot chain from EFI to Userspace
# We only boot a whole OS for the current top level CPU and GIC
# Other test profiles should use more minimal boots
- def boot_alpine_linux(self, cpu):
+ def boot_alpine_linux(self, cpu=None):
self.fetch_firmware()
iso_path = self.ASSET_ALPINE_ISO.fetch()
self.vm.set_console()
self.vm.add_args(
- "-cpu", cpu,
"-drive", f"file={iso_path},media=cdrom,format=raw",
)
+ if cpu:
+ self.vm.add_args("-cpu", cpu)
self.vm.launch()
wait_for_console_pattern(self, "Welcome to Alpine Linux 3.17")
@@ -127,8 +128,8 @@ def boot_alpine_linux(self, cpu):
def test_sbsaref_alpine_linux_cortex_a57(self):
self.boot_alpine_linux("cortex-a57")
- def test_sbsaref_alpine_linux_neoverse_n1(self):
- self.boot_alpine_linux("neoverse-n1")
+ def test_sbsaref_alpine_linux_default_cpu(self):
+ self.boot_alpine_linux()
def test_sbsaref_alpine_linux_max_pauth_off(self):
self.boot_alpine_linux("max,pauth=off")
@@ -136,50 +137,53 @@ def test_sbsaref_alpine_linux_max_pauth_off(self):
def test_sbsaref_alpine_linux_max_pauth_impdef(self):
self.boot_alpine_linux("max,pauth-impdef=on")
- @skipUnless(os.getenv('QEMU_TEST_TIMEOUT_EXPECTED'), 'Test might timeout')
+ @skipUnless(os.getenv('QEMU_TEST_TIMEOUT_EXPECTED'),
+ 'Test might timeout due to PAuth emulation')
def test_sbsaref_alpine_linux_max(self):
self.boot_alpine_linux("max")
- ASSET_OPENBSD_ISO = Asset(
- ('https://cdn.openbsd.org/pub/OpenBSD/7.3/arm64/miniroot73.img'),
- '7fc2c75401d6f01fbfa25f4953f72ad7d7c18650056d30755c44b9c129b707e5')
+ ASSET_FREEBSD_ISO = Asset(
+ ('https://download.freebsd.org/releases/arm64/aarch64/ISO-IMAGES/'
+ '14.1/FreeBSD-14.1-RELEASE-arm64-aarch64-bootonly.iso'),
+ '44cdbae275ef1bb6dab1d5fbb59473d4f741e1c8ea8a80fd9e906b531d6ad461')
# This tests the whole boot chain from EFI to Userspace
# We only boot a whole OS for the current top level CPU and GIC
# Other test profiles should use more minimal boots
- def boot_openbsd73(self, cpu):
+ def boot_freebsd14(self, cpu=None):
self.fetch_firmware()
- img_path = self.ASSET_OPENBSD_ISO.fetch()
+ img_path = self.ASSET_FREEBSD_ISO.fetch()
self.vm.set_console()
self.vm.add_args(
- "-cpu", cpu,
"-drive", f"file={img_path},format=raw,snapshot=on",
)
+ if cpu:
+ self.vm.add_args("-cpu", cpu)
self.vm.launch()
- wait_for_console_pattern(self,
- "Welcome to the OpenBSD/arm64"
- " 7.3 installation program.")
+ wait_for_console_pattern(self, 'Welcome to FreeBSD!')
- def test_sbsaref_openbsd73_cortex_a57(self):
- self.boot_openbsd73("cortex-a57")
+ def test_sbsaref_freebsd14_cortex_a57(self):
+ self.boot_freebsd14("cortex-a57")
- def test_sbsaref_openbsd73_neoverse_n1(self):
- self.boot_openbsd73("neoverse-n1")
+ def test_sbsaref_freebsd14_default_cpu(self):
+ self.boot_freebsd14()
- def test_sbsaref_openbsd73_max_pauth_off(self):
- self.boot_openbsd73("max,pauth=off")
+ def test_sbsaref_freebsd14_max_pauth_off(self):
+ self.boot_freebsd14("max,pauth=off")
- @skipUnless(os.getenv('QEMU_TEST_TIMEOUT_EXPECTED'), 'Test might timeout')
- def test_sbsaref_openbsd73_max_pauth_impdef(self):
- self.boot_openbsd73("max,pauth-impdef=on")
+ @skipUnless(os.getenv('QEMU_TEST_TIMEOUT_EXPECTED'),
+ 'Test might timeout due to PAuth emulation')
+ def test_sbsaref_freebsd14_max_pauth_impdef(self):
+ self.boot_freebsd14("max,pauth-impdef=on")
- @skipUnless(os.getenv('QEMU_TEST_TIMEOUT_EXPECTED'), 'Test might timeout')
- def test_sbsaref_openbsd73_max(self):
- self.boot_openbsd73("max")
+ @skipUnless(os.getenv('QEMU_TEST_TIMEOUT_EXPECTED'),
+ 'Test might timeout due to PAuth emulation')
+ def test_sbsaref_freebsd14_max(self):
+ self.boot_freebsd14("max")
if __name__ == '__main__':
diff --git a/tests/qtest/stm32l4x5_usart-test.c b/tests/qtest/stm32l4x5_usart-test.c
index c175ff3..64cebda 100644
--- a/tests/qtest/stm32l4x5_usart-test.c
+++ b/tests/qtest/stm32l4x5_usart-test.c
@@ -36,6 +36,8 @@
REG32(RTOR, 0x14)
REG32(RQR, 0x18)
REG32(ISR, 0x1C)
+ FIELD(ISR, REACK, 22, 1)
+ FIELD(ISR, TEACK, 21, 1)
FIELD(ISR, TXE, 7, 1)
FIELD(ISR, RXNE, 5, 1)
FIELD(ISR, ORE, 3, 1)
@@ -191,7 +193,7 @@
/* Enable the transmitter, the receiver and the USART. */
qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
- R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
+ cr1 | R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
}
static void test_write_read(void)
@@ -298,6 +300,37 @@
qtest_quit(qts);
}
+static void test_ack(void)
+{
+ uint32_t cr1;
+ uint32_t isr;
+ QTestState *qts = qtest_init("-M b-l475e-iot01a");
+
+ init_uart(qts);
+
+ cr1 = qtest_readl(qts, (USART1_BASE_ADDR + A_CR1));
+
+ /* Disable the transmitter and receiver. */
+ qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
+ cr1 & ~(R_CR1_RE_MASK | R_CR1_TE_MASK));
+
+ /* Test ISR ACK for transmitter and receiver disabled */
+ isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
+ g_assert_false(isr & R_ISR_TEACK_MASK);
+ g_assert_false(isr & R_ISR_REACK_MASK);
+
+ /* Enable the transmitter and receiver. */
+ qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
+ cr1 | (R_CR1_RE_MASK | R_CR1_TE_MASK));
+
+ /* Test ISR ACK for transmitter and receiver disabled */
+ isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
+ g_assert_true(isr & R_ISR_TEACK_MASK);
+ g_assert_true(isr & R_ISR_REACK_MASK);
+
+ qtest_quit(qts);
+}
+
int main(int argc, char **argv)
{
int ret;
@@ -310,6 +343,7 @@
qtest_add_func("stm32l4x5/usart/send_char", test_send_char);
qtest_add_func("stm32l4x5/usart/receive_str", test_receive_str);
qtest_add_func("stm32l4x5/usart/send_str", test_send_str);
+ qtest_add_func("stm32l4x5/usart/ack", test_ack);
ret = g_test_run();
return ret;