Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2018-02-07-v4' into staging
Miscellaneous patches for 2018-02-07
# gpg: Signature made Fri 09 Feb 2018 12:52:51 GMT
# gpg: using RSA key 3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg: aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867 4E5F 3870 B400 EB91 8653
* remotes/armbru/tags/pull-misc-2018-02-07-v4:
Move include qemu/option.h from qemu-common.h to actual users
Drop superfluous includes of qapi/qmp/qjson.h
Drop superfluous includes of qapi/qmp/dispatch.h
Include qapi/qmp/qnull.h exactly where needed
Include qapi/qmp/qnum.h exactly where needed
Include qapi/qmp/qbool.h exactly where needed
Include qapi/qmp/qstring.h exactly where needed
Include qapi/qmp/qdict.h exactly where needed
Include qapi/qmp/qlist.h exactly where needed
Include qapi/qmp/qobject.h exactly where needed
qdict qlist: Make most helper macros functions
Eliminate qapi/qmp/types.h
Typedef the subtypes of QObject in qemu/typedefs.h, too
Include qmp-commands.h exactly where needed
Drop superfluous includes of qapi/qmp/qerror.h
Include qapi/error.h exactly where needed
Drop superfluous includes of qapi-types.h and test-qapi-types.h
Clean up includes
Use #include "..." for our own headers, <...> for others
vnc: use stubs for CONFIG_VNC=n dummy functions
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 301b699..54feb95 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -76,6 +76,29 @@
T: git git://git.corpit.ru/qemu.git trivial-patches
T: git git://github.com/vivier/qemu.git trivial-patches
+Architecture support
+--------------------
+S390
+M: Cornelia Huck <cohuck@redhat.com>
+S: Supported
+F: default-configs/s390x-softmmu.mak
+F: gdb-xml/s390*.xml
+F: hw/char/sclp*.[hc]
+F: hw/char/terminal3270.c
+F: hw/intc/s390_flic.c
+F: hw/intc/s390_flic_kvm.c
+F: hw/s390x/
+F: hw/vfio/ccw.c
+F: hw/watchdog/wdt_diag288.c
+F: include/hw/s390x/
+F: include/hw/watchdog/wdt_diag288.h
+F: pc-bios/s390-ccw/
+F: pc-bios/s390-ccw.img
+F: target/s390x/
+K: ^Subject:.*(?i)s390x?
+T: git git://github.com/cohuck/qemu.git s390-next
+L: qemu-s390x@nongnu.org
+
Guest CPU cores (TCG):
----------------------
Overall
@@ -213,6 +236,7 @@
S390
M: Richard Henderson <rth@twiddle.net>
M: Alexander Graf <agraf@suse.de>
+M: David Hildenbrand <david@redhat.com>
S: Maintained
F: target/s390x/
F: hw/s390x/
@@ -832,15 +856,22 @@
F: hw/char/terminal3270.c
F: hw/s390x/
F: include/hw/s390x/
-F: pc-bios/s390-ccw/
F: hw/watchdog/wdt_diag288.c
F: include/hw/watchdog/wdt_diag288.h
-F: pc-bios/s390-ccw.img
F: default-configs/s390x-softmmu.mak
T: git git://github.com/cohuck/qemu.git s390-next
T: git git://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
+S390-ccw Bios
+M: Christian Borntraeger <borntraeger@de.ibm.com>
+M: Thomas Huth <thuth@redhat.com>
+S: Supported
+F: pc-bios/s390-ccw/
+F: pc-bios/s390-ccw.img
+T: git git://github.com/borntraeger/qemu.git s390-next
+L: qemu-s390x@nongnu.org
+
UniCore32 Machines
-------------
PKUnity-3 SoC initramfs-with-busybox
diff --git a/Makefile.target b/Makefile.target
index f9a9da7..6549481 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -93,8 +93,8 @@
# cpu emulator library
obj-y += exec.o
obj-y += accel/
-obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-$(CONFIG_TCG) += tcg/tcg-common.o
+obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-vec.o tcg/tcg-op-gvec.o
+obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/optimize.o
obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
obj-y += fpu/softfloat.o
diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs
index 228cd84..d381a02 100644
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -1,6 +1,6 @@
obj-$(CONFIG_SOFTMMU) += tcg-all.o
obj-$(CONFIG_SOFTMMU) += cputlb.o
-obj-y += tcg-runtime.o
+obj-y += tcg-runtime.o tcg-runtime-gvec.o
obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
obj-y += translator.o
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
new file mode 100644
index 0000000..8bf8d63
--- /dev/null
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -0,0 +1,997 @@
+/*
+ * Generic vectorized operation runtime
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "tcg-gvec-desc.h"
+
+
+/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
+ * them via GCC's generic vector extension. This turns out to be simpler and
+ * more reliable than getting the compiler to autovectorize.
+ *
+ * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
+ * are multiples of 16.
+ *
+ * When the compiler does not support all of the operations we require, the
+ * loops are written so that we can always fall back on the base types.
+ */
+#ifdef CONFIG_VECTOR16
+typedef uint8_t vec8 __attribute__((vector_size(16)));
+typedef uint16_t vec16 __attribute__((vector_size(16)));
+typedef uint32_t vec32 __attribute__((vector_size(16)));
+typedef uint64_t vec64 __attribute__((vector_size(16)));
+
+typedef int8_t svec8 __attribute__((vector_size(16)));
+typedef int16_t svec16 __attribute__((vector_size(16)));
+typedef int32_t svec32 __attribute__((vector_size(16)));
+typedef int64_t svec64 __attribute__((vector_size(16)));
+
+#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
+#define DUP8(X) { X, X, X, X, X, X, X, X }
+#define DUP4(X) { X, X, X, X }
+#define DUP2(X) { X, X }
+#else
+typedef uint8_t vec8;
+typedef uint16_t vec16;
+typedef uint32_t vec32;
+typedef uint64_t vec64;
+
+typedef int8_t svec8;
+typedef int16_t svec16;
+typedef int32_t svec32;
+typedef int64_t svec64;
+
+#define DUP16(X) X
+#define DUP8(X) X
+#define DUP4(X) X
+#define DUP2(X) X
+#endif /* CONFIG_VECTOR16 */
+
+static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
+{
+ intptr_t maxsz = simd_maxsz(desc);
+ intptr_t i;
+
+ if (unlikely(maxsz > oprsz)) {
+ for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = 0;
+ }
+ }
+}
+
+void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec8 vecb = (vec8)DUP16(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec16 vecb = (vec16)DUP8(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec32 vecb = (vec32)DUP4(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec64 vecb = (vec64)DUP2(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec8 vecb = (vec8)DUP16(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec16 vecb = (vec16)DUP8(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec32 vecb = (vec32)DUP4(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec64 vecb = (vec64)DUP2(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec8 vecb = (vec8)DUP16(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec16 vecb = (vec16)DUP8(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec32 vecb = (vec32)DUP4(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec64 vecb = (vec64)DUP2(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = -*(vec8 *)(a + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = -*(vec16 *)(a + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = -*(vec32 *)(a + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = -*(vec64 *)(a + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+
+ memcpy(d, a, oprsz);
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ if (c == 0) {
+ oprsz = 0;
+ } else {
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = c;
+ }
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ if (c == 0) {
+ oprsz = 0;
+ } else {
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ *(uint32_t *)(d + i) = c;
+ }
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
+{
+ HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
+}
+
+void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
+{
+ HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
+}
+
+void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec64 vecb = (vec64)DUP2(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec64 vecb = (vec64)DUP2(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ vec64 vecb = (vec64)DUP2(b);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec16)) {
+ *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+/* If vectors are enabled, the compiler fills in -1 for true.
+ Otherwise, we must take care of this by hand. */
+#ifdef CONFIG_VECTOR16
+# define DO_CMP0(X) X
+#else
+# define DO_CMP0(X) -(X)
+#endif
+
+#define DO_CMP1(NAME, TYPE, OP) \
+void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t i; \
+ for (i = 0; i < oprsz; i += sizeof(vec64)) { \
+ *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
+ } \
+ clear_high(d, oprsz, desc); \
+}
+
+#define DO_CMP2(SZ) \
+ DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
+ DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
+ DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
+ DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
+ DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
+ DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
+
+DO_CMP2(8)
+DO_CMP2(16)
+DO_CMP2(32)
+DO_CMP2(64)
+
+#undef DO_CMP0
+#undef DO_CMP1
+#undef DO_CMP2
+
+void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int8_t)) {
+ int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
+ if (r > INT8_MAX) {
+ r = INT8_MAX;
+ } else if (r < INT8_MIN) {
+ r = INT8_MIN;
+ }
+ *(int8_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
+ if (r > INT16_MAX) {
+ r = INT16_MAX;
+ } else if (r < INT16_MIN) {
+ r = INT16_MIN;
+ }
+ *(int16_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+ int32_t ai = *(int32_t *)(a + i);
+ int32_t bi = *(int32_t *)(b + i);
+ int32_t di = ai + bi;
+ if (((di ^ ai) &~ (ai ^ bi)) < 0) {
+ /* Signed overflow. */
+ di = (di < 0 ? INT32_MAX : INT32_MIN);
+ }
+ *(int32_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+ int64_t ai = *(int64_t *)(a + i);
+ int64_t bi = *(int64_t *)(b + i);
+ int64_t di = ai + bi;
+ if (((di ^ ai) &~ (ai ^ bi)) < 0) {
+ /* Signed overflow. */
+ di = (di < 0 ? INT64_MAX : INT64_MIN);
+ }
+ *(int64_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
+ if (r > INT8_MAX) {
+ r = INT8_MAX;
+ } else if (r < INT8_MIN) {
+ r = INT8_MIN;
+ }
+ *(uint8_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
+ if (r > INT16_MAX) {
+ r = INT16_MAX;
+ } else if (r < INT16_MIN) {
+ r = INT16_MIN;
+ }
+ *(int16_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+ int32_t ai = *(int32_t *)(a + i);
+ int32_t bi = *(int32_t *)(b + i);
+ int32_t di = ai - bi;
+ if (((di ^ ai) & (ai ^ bi)) < 0) {
+ /* Signed overflow. */
+ di = (di < 0 ? INT32_MAX : INT32_MIN);
+ }
+ *(int32_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+ int64_t ai = *(int64_t *)(a + i);
+ int64_t bi = *(int64_t *)(b + i);
+ int64_t di = ai - bi;
+ if (((di ^ ai) & (ai ^ bi)) < 0) {
+ /* Signed overflow. */
+ di = (di < 0 ? INT64_MAX : INT64_MIN);
+ }
+ *(int64_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
+ if (r > UINT8_MAX) {
+ r = UINT8_MAX;
+ }
+ *(uint8_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
+ if (r > UINT16_MAX) {
+ r = UINT16_MAX;
+ }
+ *(uint16_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint32_t ai = *(uint32_t *)(a + i);
+ uint32_t bi = *(uint32_t *)(b + i);
+ uint32_t di = ai + bi;
+ if (di < ai) {
+ di = UINT32_MAX;
+ }
+ *(uint32_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint64_t ai = *(uint64_t *)(a + i);
+ uint64_t bi = *(uint64_t *)(b + i);
+ uint64_t di = ai + bi;
+ if (di < ai) {
+ di = UINT64_MAX;
+ }
+ *(uint64_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
+ if (r < 0) {
+ r = 0;
+ }
+ *(uint8_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
+ if (r < 0) {
+ r = 0;
+ }
+ *(uint16_t *)(d + i) = r;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint32_t ai = *(uint32_t *)(a + i);
+ uint32_t bi = *(uint32_t *)(b + i);
+ uint32_t di = ai - bi;
+ if (ai < bi) {
+ di = 0;
+ }
+ *(uint32_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint64_t ai = *(uint64_t *)(a + i);
+ uint64_t bi = *(uint64_t *)(b + i);
+ uint64_t di = ai - bi;
+ if (ai < bi) {
+ di = 0;
+ }
+ *(uint64_t *)(d + i) = di;
+ }
+ clear_high(d, oprsz, desc);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 1df17d0..2536959 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -134,3 +134,121 @@
GEN_ATOMIC_HELPERS(xchg)
#undef GEN_ATOMIC_HELPERS
+
+DEF_HELPER_FLAGS_3(gvec_mov, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_dup8, TCG_CALL_NO_RWG, void, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(gvec_dup16, TCG_CALL_NO_RWG, void, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(gvec_dup32, TCG_CALL_NO_RWG, void, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(gvec_dup64, TCG_CALL_NO_RWG, void, ptr, i32, i64)
+
+DEF_HELPER_FLAGS_4(gvec_add8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_add16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_add32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_add64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_adds8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_adds16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_adds32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_adds64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_subs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_subs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_subs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_subs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_mul8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_muls8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_muls16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_muls32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_muls64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ssadd8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ssadd16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ssadd32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ssadd64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sssub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sssub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sssub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sssub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_usadd8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_usadd16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_usadd32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_usadd64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ussub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ussub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ussub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ussub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_neg8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_xor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_andc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_orc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_3(gvec_shl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_shl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_shl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_shl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_shr8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_shr16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_shr32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_shr64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sar8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_eq64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ne8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ne16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ne32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ne64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_lt8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_lt16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_lt32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_lt64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_le8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_le16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_le32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_le64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ltu8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ltu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ltu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ltu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_leu8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/configure b/configure
index 831ebf2..62562f0 100755
--- a/configure
+++ b/configure
@@ -1933,9 +1933,9 @@
EOF
if compile_object ; then
- if grep -q BiGeNdIaN $TMPO ; then
+ if strings -a $TMPO | grep -q BiGeNdIaN ; then
bigendian="yes"
- elif grep -q LiTtLeEnDiAn $TMPO ; then
+ elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then
bigendian="no"
else
echo big/little test failed
@@ -5001,6 +5001,50 @@
fi
########################################
+# See if 16-byte vector operations are supported.
+# Even without a vector unit the compiler may expand these.
+# There is a bug in old GCC for PPC that crashes here.
+# Unfortunately it's the system compiler for Centos 7.
+
+cat > $TMPC << EOF
+typedef unsigned char U1 __attribute__((vector_size(16)));
+typedef unsigned short U2 __attribute__((vector_size(16)));
+typedef unsigned int U4 __attribute__((vector_size(16)));
+typedef unsigned long long U8 __attribute__((vector_size(16)));
+typedef signed char S1 __attribute__((vector_size(16)));
+typedef signed short S2 __attribute__((vector_size(16)));
+typedef signed int S4 __attribute__((vector_size(16)));
+typedef signed long long S8 __attribute__((vector_size(16)));
+static U1 a1, b1;
+static U2 a2, b2;
+static U4 a4, b4;
+static U8 a8, b8;
+static S1 c1;
+static S2 c2;
+static S4 c4;
+static S8 c8;
+static int i;
+int main(void)
+{
+ a1 += b1; a2 += b2; a4 += b4; a8 += b8;
+ a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
+ a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
+ a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
+ a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
+ a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
+ a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
+ a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
+ c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
+ return 0;
+}
+EOF
+
+vector16=no
+if compile_prog "" "" ; then
+ vector16=yes
+fi
+
+########################################
# check if getauxval is available.
getauxval=no
@@ -6329,6 +6373,10 @@
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
fi
+if test "$vector16" = "yes" ; then
+ echo "CONFIG_VECTOR16=y" >> $config_host_mak
+fi
+
if test "$getauxval" = "yes" ; then
echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
fi
@@ -6731,6 +6779,7 @@
echo "TARGET_ABI32=y" >> $config_target_mak
;;
s390x)
+ mttcg=yes
gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml s390-gs.xml"
;;
tilegx)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 54f8f57..05108bc 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -386,6 +386,69 @@
}
}
+static void fdt_add_psci_node(void *fdt)
+{
+ uint32_t cpu_suspend_fn;
+ uint32_t cpu_off_fn;
+ uint32_t cpu_on_fn;
+ uint32_t migrate_fn;
+ ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
+ const char *psci_method;
+ int64_t psci_conduit;
+
+ psci_conduit = object_property_get_int(OBJECT(armcpu),
+ "psci-conduit",
+ &error_abort);
+ switch (psci_conduit) {
+ case QEMU_PSCI_CONDUIT_DISABLED:
+ return;
+ case QEMU_PSCI_CONDUIT_HVC:
+ psci_method = "hvc";
+ break;
+ case QEMU_PSCI_CONDUIT_SMC:
+ psci_method = "smc";
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ qemu_fdt_add_subnode(fdt, "/psci");
+ if (armcpu->psci_version == 2) {
+ const char comp[] = "arm,psci-0.2\0arm,psci";
+ qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+
+ cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
+ if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
+ cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
+ cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
+ migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
+ } else {
+ cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
+ cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
+ migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
+ }
+ } else {
+ qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+
+ cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
+ cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
+ cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
+ migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
+ }
+
+ /* We adopt the PSCI spec's nomenclature, and use 'conduit' to refer
+ * to the instruction that should be used to invoke PSCI functions.
+ * However, the device tree binding uses 'method' instead, so that is
+ * what we should use here.
+ */
+ qemu_fdt_setprop_string(fdt, "/psci", "method", psci_method);
+
+ qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
+ qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
+ qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
+ qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
+}
+
/**
* load_dtb() - load a device tree binary image into memory
* @addr: the address to load the image at
@@ -542,6 +605,8 @@
}
}
+ fdt_add_psci_node(fdt);
+
if (binfo->modify_dtb) {
binfo->modify_dtb(binfo, fdt);
}
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index b0d4088..e6559a8 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -93,7 +93,7 @@
}
for (i = 0; i < FSL_IMX6_NUM_ESDHCS; i++) {
- object_initialize(&s->esdhc[i], sizeof(s->esdhc[i]), TYPE_SYSBUS_SDHCI);
+ object_initialize(&s->esdhc[i], sizeof(s->esdhc[i]), TYPE_IMX_USDHC);
qdev_set_parent_bus(DEVICE(&s->esdhc[i]), sysbus_get_default());
snprintf(name, NAME_SIZE, "sdhc%d", i + 1);
object_property_add_child(obj, name, OBJECT(&s->esdhc[i]), NULL);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index b334c82..dbb3c80 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -244,66 +244,6 @@
}
}
-static void fdt_add_psci_node(const VirtMachineState *vms)
-{
- uint32_t cpu_suspend_fn;
- uint32_t cpu_off_fn;
- uint32_t cpu_on_fn;
- uint32_t migrate_fn;
- void *fdt = vms->fdt;
- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
- const char *psci_method;
-
- switch (vms->psci_conduit) {
- case QEMU_PSCI_CONDUIT_DISABLED:
- return;
- case QEMU_PSCI_CONDUIT_HVC:
- psci_method = "hvc";
- break;
- case QEMU_PSCI_CONDUIT_SMC:
- psci_method = "smc";
- break;
- default:
- g_assert_not_reached();
- }
-
- qemu_fdt_add_subnode(fdt, "/psci");
- if (armcpu->psci_version == 2) {
- const char comp[] = "arm,psci-0.2\0arm,psci";
- qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
-
- cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
- if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
- cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
- cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
- migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
- } else {
- cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
- cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
- migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
- }
- } else {
- qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
-
- cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
- cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
- cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
- migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
- }
-
- /* We adopt the PSCI spec's nomenclature, and use 'conduit' to refer
- * to the instruction that should be used to invoke PSCI functions.
- * However, the device tree binding uses 'method' instead, so that is
- * what we should use here.
- */
- qemu_fdt_setprop_string(fdt, "/psci", "method", psci_method);
-
- qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
- qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
- qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
- qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
-}
-
static void fdt_add_timer_nodes(const VirtMachineState *vms)
{
/* On real hardware these interrupts are level-triggered.
@@ -1409,7 +1349,6 @@
}
fdt_add_timer_nodes(vms);
fdt_add_cpu_nodes(vms);
- fdt_add_psci_node(vms);
memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
machine->ram_size);
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index 4601267..cb0e684 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -105,7 +105,7 @@
error_setg(errp, "data can not be specified when setting a "
"program counter");
return;
- } else if (!s->cpu_num) {
+ } else if (s->cpu_num == CPU_NONE) {
error_setg(errp, "cpu_num must be specified when setting a "
"program counter");
return;
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 571e094..0e9963f 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -6,7 +6,7 @@
common-obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-pmu-iomod-intc.o
common-obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-zynqmp-ipi.o
common-obj-$(CONFIG_ETRAXFS) += etraxfs_pic.o
-common-obj-$(CONFIG_IMX) += imx_avic.o
+common-obj-$(CONFIG_IMX) += imx_avic.o imx_gpcv2.o
common-obj-$(CONFIG_LM32) += lm32_pic.o
common-obj-$(CONFIG_REALVIEW) += realview_gic.o
common-obj-$(CONFIG_SLAVIO) += slavio_intctl.o
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 8ca6cee..360889d 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -503,8 +503,25 @@
}
}
-void armv7m_nvic_set_pending(void *opaque, int irq, bool secure)
+static void do_armv7m_nvic_set_pending(void *opaque, int irq, bool secure,
+ bool derived)
{
+ /* Pend an exception, including possibly escalating it to HardFault.
+ *
+ * This function handles both "normal" pending of interrupts and
+ * exceptions, and also derived exceptions (ones which occur as
+ * a result of trying to take some other exception).
+ *
+ * If derived == true, the caller guarantees that we are part way through
+ * trying to take an exception (but have not yet called
+ * armv7m_nvic_acknowledge_irq() to make it active), and so:
+ * - s->vectpending is the "original exception" we were trying to take
+ * - irq is the "derived exception"
+ * - nvic_exec_prio(s) gives the priority before exception entry
+ * Here we handle the prioritization logic which the pseudocode puts
+ * in the DerivedLateArrival() function.
+ */
+
NVICState *s = (NVICState *)opaque;
bool banked = exc_is_banked(irq);
VecInfo *vec;
@@ -514,7 +531,44 @@
vec = (banked && secure) ? &s->sec_vectors[irq] : &s->vectors[irq];
- trace_nvic_set_pending(irq, secure, vec->enabled, vec->prio);
+ trace_nvic_set_pending(irq, secure, derived, vec->enabled, vec->prio);
+
+ if (derived) {
+ /* Derived exceptions are always synchronous. */
+ assert(irq >= ARMV7M_EXCP_HARD && irq < ARMV7M_EXCP_PENDSV);
+
+ if (irq == ARMV7M_EXCP_DEBUG &&
+ exc_group_prio(s, vec->prio, secure) >= nvic_exec_prio(s)) {
+ /* DebugMonitorFault, but its priority is lower than the
+ * preempted exception priority: just ignore it.
+ */
+ return;
+ }
+
+ if (irq == ARMV7M_EXCP_HARD && vec->prio >= s->vectpending_prio) {
+ /* If this is a terminal exception (one which means we cannot
+ * take the original exception, like a failure to read its
+ * vector table entry), then we must take the derived exception.
+ * If the derived exception can't take priority over the
+ * original exception, then we go into Lockup.
+ *
+ * For QEMU, we rely on the fact that a derived exception is
+ * terminal if and only if it's reported to us as HardFault,
+ * which saves having to have an extra argument is_terminal
+ * that we'd only use in one place.
+ */
+ cpu_abort(&s->cpu->parent_obj,
+ "Lockup: can't take terminal derived exception "
+ "(original exception priority %d)\n",
+ s->vectpending_prio);
+ }
+ /* We now continue with the same code as for a normal pending
+ * exception, which will cause us to pend the derived exception.
+ * We'll then take either the original or the derived exception
+ * based on which is higher priority by the usual mechanism
+ * for selecting the highest priority pending interrupt.
+ */
+ }
if (irq >= ARMV7M_EXCP_HARD && irq < ARMV7M_EXCP_PENDSV) {
/* If a synchronous exception is pending then it may be
@@ -585,25 +639,31 @@
}
}
+void armv7m_nvic_set_pending(void *opaque, int irq, bool secure)
+{
+ do_armv7m_nvic_set_pending(opaque, irq, secure, false);
+}
+
+void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure)
+{
+ do_armv7m_nvic_set_pending(opaque, irq, secure, true);
+}
+
/* Make pending IRQ active. */
-bool armv7m_nvic_acknowledge_irq(void *opaque)
+void armv7m_nvic_acknowledge_irq(void *opaque)
{
NVICState *s = (NVICState *)opaque;
CPUARMState *env = &s->cpu->env;
const int pending = s->vectpending;
const int running = nvic_exec_prio(s);
VecInfo *vec;
- bool targets_secure;
assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq);
if (s->vectpending_is_s_banked) {
vec = &s->sec_vectors[pending];
- targets_secure = true;
} else {
vec = &s->vectors[pending];
- targets_secure = !exc_is_banked(s->vectpending) &&
- exc_targets_secure(s, s->vectpending);
}
assert(vec->enabled);
@@ -611,7 +671,7 @@
assert(s->vectpending_prio < running);
- trace_nvic_acknowledge_irq(pending, s->vectpending_prio, targets_secure);
+ trace_nvic_acknowledge_irq(pending, s->vectpending_prio);
vec->active = 1;
vec->pending = 0;
@@ -619,8 +679,28 @@
write_v7m_exception(env, s->vectpending);
nvic_irq_update(s);
+}
- return targets_secure;
+void armv7m_nvic_get_pending_irq_info(void *opaque,
+ int *pirq, bool *ptargets_secure)
+{
+ NVICState *s = (NVICState *)opaque;
+ const int pending = s->vectpending;
+ bool targets_secure;
+
+ assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq);
+
+ if (s->vectpending_is_s_banked) {
+ targets_secure = true;
+ } else {
+ targets_secure = !exc_is_banked(pending) &&
+ exc_targets_secure(s, pending);
+ }
+
+ trace_nvic_get_pending_irq_info(pending, targets_secure);
+
+ *ptargets_secure = targets_secure;
+ *pirq = pending;
}
int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure)
diff --git a/hw/intc/imx_gpcv2.c b/hw/intc/imx_gpcv2.c
new file mode 100644
index 0000000..4eb9ce2
--- /dev/null
+++ b/hw/intc/imx_gpcv2.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * i.MX7 GPCv2 block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/intc/imx_gpcv2.h"
+#include "qemu/log.h"
+
+#define GPC_PU_PGC_SW_PUP_REQ 0x0f8
+#define GPC_PU_PGC_SW_PDN_REQ 0x104
+
+#define USB_HSIC_PHY_SW_Pxx_REQ BIT(4)
+#define USB_OTG2_PHY_SW_Pxx_REQ BIT(3)
+#define USB_OTG1_PHY_SW_Pxx_REQ BIT(2)
+#define PCIE_PHY_SW_Pxx_REQ BIT(1)
+#define MIPI_PHY_SW_Pxx_REQ BIT(0)
+
+
+static void imx_gpcv2_reset(DeviceState *dev)
+{
+ IMXGPCv2State *s = IMX_GPCV2(dev);
+
+ memset(s->regs, 0, sizeof(s->regs));
+}
+
+static uint64_t imx_gpcv2_read(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ IMXGPCv2State *s = opaque;
+
+ return s->regs[offset / sizeof(uint32_t)];
+}
+
+static void imx_gpcv2_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ IMXGPCv2State *s = opaque;
+ const size_t idx = offset / sizeof(uint32_t);
+
+ s->regs[idx] = value;
+
+ /*
+ * Real HW will clear those bits once as a way to indicate that
+ * power up request is complete
+ */
+ if (offset == GPC_PU_PGC_SW_PUP_REQ ||
+ offset == GPC_PU_PGC_SW_PDN_REQ) {
+ s->regs[idx] &= ~(USB_HSIC_PHY_SW_Pxx_REQ |
+ USB_OTG2_PHY_SW_Pxx_REQ |
+ USB_OTG1_PHY_SW_Pxx_REQ |
+ PCIE_PHY_SW_Pxx_REQ |
+ MIPI_PHY_SW_Pxx_REQ);
+ }
+}
+
+static const struct MemoryRegionOps imx_gpcv2_ops = {
+ .read = imx_gpcv2_read,
+ .write = imx_gpcv2_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the real
+ * device but in practice there is no reason for a guest to access
+ * this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void imx_gpcv2_init(Object *obj)
+{
+ SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+ IMXGPCv2State *s = IMX_GPCV2(obj);
+
+ memory_region_init_io(&s->iomem,
+ obj,
+ &imx_gpcv2_ops,
+ s,
+ TYPE_IMX_GPCV2 ".iomem",
+ sizeof(s->regs));
+ sysbus_init_mmio(sd, &s->iomem);
+}
+
+static const VMStateDescription vmstate_imx_gpcv2 = {
+ .name = TYPE_IMX_GPCV2,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(regs, IMXGPCv2State, GPC_NUM),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void imx_gpcv2_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->reset = imx_gpcv2_reset;
+ dc->vmsd = &vmstate_imx_gpcv2;
+ dc->desc = "i.MX GPCv2 Module";
+}
+
+static const TypeInfo imx_gpcv2_info = {
+ .name = TYPE_IMX_GPCV2,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(IMXGPCv2State),
+ .instance_init = imx_gpcv2_init,
+ .class_init = imx_gpcv2_class_init,
+};
+
+static void imx_gpcv2_register_type(void)
+{
+ type_register_static(&imx_gpcv2_info);
+}
+type_init(imx_gpcv2_register_type)
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index 6eaf178..a85a149 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -22,16 +22,36 @@
#include "qapi/error.h"
#include "hw/s390x/s390-virtio-ccw.h"
+S390FLICStateClass *s390_get_flic_class(S390FLICState *fs)
+{
+ static S390FLICStateClass *class;
+
+ if (!class) {
+ /* we only have one flic device, so this is fine to cache */
+ class = S390_FLIC_COMMON_GET_CLASS(fs);
+ }
+ return class;
+}
+
+QEMUS390FLICState *s390_get_qemu_flic(S390FLICState *fs)
+{
+ static QEMUS390FLICState *flic;
+
+ if (!flic) {
+ /* we only have one flic device, so this is fine to cache */
+ flic = QEMU_S390_FLIC(fs);
+ }
+ return flic;
+}
+
S390FLICState *s390_get_flic(void)
{
static S390FLICState *fs;
if (!fs) {
- fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL));
- if (!fs) {
- fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC,
- NULL));
- }
+ fs = S390_FLIC_COMMON(object_resolve_path_type("",
+ TYPE_S390_FLIC_COMMON,
+ NULL));
}
return fs;
}
@@ -40,8 +60,11 @@
{
DeviceState *dev;
- dev = s390_flic_kvm_create();
- if (!dev) {
+ if (kvm_enabled()) {
+ dev = qdev_create(NULL, TYPE_KVM_S390_FLIC);
+ object_property_add_child(qdev_get_machine(), TYPE_KVM_S390_FLIC,
+ OBJECT(dev), NULL);
+ } else {
dev = qdev_create(NULL, TYPE_QEMU_S390_FLIC);
object_property_add_child(qdev_get_machine(), TYPE_QEMU_S390_FLIC,
OBJECT(dev), NULL);
@@ -78,14 +101,41 @@
static int qemu_s390_clear_io_flic(S390FLICState *fs, uint16_t subchannel_id,
uint16_t subchannel_nr)
{
- /* Fixme TCG */
- return -ENOSYS;
+ QEMUS390FLICState *flic = s390_get_qemu_flic(fs);
+ QEMUS390FlicIO *cur, *next;
+ uint8_t isc;
+
+ g_assert(qemu_mutex_iothread_locked());
+ if (!(flic->pending & FLIC_PENDING_IO)) {
+ return 0;
+ }
+
+ /* check all iscs */
+ for (isc = 0; isc < 8; isc++) {
+ if (QLIST_EMPTY(&flic->io[isc])) {
+ continue;
+ }
+
+ /* search and delete any matching one */
+ QLIST_FOREACH_SAFE(cur, &flic->io[isc], next, next) {
+ if (cur->id == subchannel_id && cur->nr == subchannel_nr) {
+ QLIST_REMOVE(cur, next);
+ g_free(cur);
+ }
+ }
+
+ /* update our indicator bit */
+ if (QLIST_EMPTY(&flic->io[isc])) {
+ flic->pending &= ~ISC_TO_PENDING_IO(isc);
+ }
+ }
+ return 0;
}
static int qemu_s390_modify_ais_mode(S390FLICState *fs, uint8_t isc,
uint16_t mode)
{
- QEMUS390FLICState *flic = QEMU_S390_FLIC(fs);
+ QEMUS390FLICState *flic = s390_get_qemu_flic(fs);
switch (mode) {
case SIC_IRQ_MODE_ALL:
@@ -106,7 +156,8 @@
static int qemu_s390_inject_airq(S390FLICState *fs, uint8_t type,
uint8_t isc, uint8_t flags)
{
- QEMUS390FLICState *flic = QEMU_S390_FLIC(fs);
+ QEMUS390FLICState *flic = s390_get_qemu_flic(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
bool flag = flags & S390_ADAPTER_SUPPRESSIBLE;
uint32_t io_int_word = (isc << 27) | IO_INT_WORD_AI;
@@ -115,7 +166,7 @@
return 0;
}
- s390_io_interrupt(0, 0, 0, io_int_word);
+ fsc->inject_io(fs, 0, 0, 0, io_int_word);
if (flag && (flic->simm & AIS_MODE_MASK(isc))) {
flic->nimm |= AIS_MODE_MASK(isc);
@@ -126,12 +177,180 @@
return 0;
}
+static void qemu_s390_flic_notify(uint32_t type)
+{
+ CPUState *cs;
+
+ /*
+ * We have to make all CPUs see CPU_INTERRUPT_HARD, so they might
+ * consider it. We will kick all running CPUs and only relevant
+ * sleeping ones.
+ */
+ CPU_FOREACH(cs) {
+ S390CPU *cpu = S390_CPU(cs);
+
+ cs->interrupt_request |= CPU_INTERRUPT_HARD;
+
+ /* ignore CPUs that are not sleeping */
+ if (s390_cpu_get_state(cpu) != CPU_STATE_OPERATING &&
+ s390_cpu_get_state(cpu) != CPU_STATE_LOAD) {
+ continue;
+ }
+
+ /* we always kick running CPUs for now, this is tricky */
+ if (cs->halted) {
+ /* don't check for subclasses, CPUs double check when waking up */
+ if (type & FLIC_PENDING_SERVICE) {
+ if (!(cpu->env.psw.mask & PSW_MASK_EXT)) {
+ continue;
+ }
+ } else if (type & FLIC_PENDING_IO) {
+ if (!(cpu->env.psw.mask & PSW_MASK_IO)) {
+ continue;
+ }
+ } else if (type & FLIC_PENDING_MCHK_CR) {
+ if (!(cpu->env.psw.mask & PSW_MASK_MCHECK)) {
+ continue;
+ }
+ }
+ }
+ cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+ }
+}
+
+uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic)
+{
+ uint32_t tmp;
+
+ g_assert(qemu_mutex_iothread_locked());
+ g_assert(flic->pending & FLIC_PENDING_SERVICE);
+ tmp = flic->service_param;
+ flic->service_param = 0;
+ flic->pending &= ~FLIC_PENDING_SERVICE;
+
+ return tmp;
+}
+
+/* caller has to free the returned object */
+QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic, uint64_t cr6)
+{
+ QEMUS390FlicIO *io;
+ uint8_t isc;
+
+ g_assert(qemu_mutex_iothread_locked());
+ if (!(flic->pending & CR6_TO_PENDING_IO(cr6))) {
+ return NULL;
+ }
+
+ for (isc = 0; isc < 8; isc++) {
+ if (QLIST_EMPTY(&flic->io[isc]) || !(cr6 & ISC_TO_ISC_BITS(isc))) {
+ continue;
+ }
+ io = QLIST_FIRST(&flic->io[isc]);
+ QLIST_REMOVE(io, next);
+
+ /* update our indicator bit */
+ if (QLIST_EMPTY(&flic->io[isc])) {
+ flic->pending &= ~ISC_TO_PENDING_IO(isc);
+ }
+ return io;
+ }
+
+ return NULL;
+}
+
+void qemu_s390_flic_dequeue_crw_mchk(QEMUS390FLICState *flic)
+{
+ g_assert(qemu_mutex_iothread_locked());
+ g_assert(flic->pending & FLIC_PENDING_MCHK_CR);
+ flic->pending &= ~FLIC_PENDING_MCHK_CR;
+}
+
+static void qemu_s390_inject_service(S390FLICState *fs, uint32_t parm)
+{
+ QEMUS390FLICState *flic = s390_get_qemu_flic(fs);
+
+ g_assert(qemu_mutex_iothread_locked());
+ /* multiplexing is good enough for sclp - kvm does it internally as well */
+ flic->service_param |= parm;
+ flic->pending |= FLIC_PENDING_SERVICE;
+
+ qemu_s390_flic_notify(FLIC_PENDING_SERVICE);
+}
+
+static void qemu_s390_inject_io(S390FLICState *fs, uint16_t subchannel_id,
+ uint16_t subchannel_nr, uint32_t io_int_parm,
+ uint32_t io_int_word)
+{
+ const uint8_t isc = IO_INT_WORD_ISC(io_int_word);
+ QEMUS390FLICState *flic = s390_get_qemu_flic(fs);
+ QEMUS390FlicIO *io;
+
+ g_assert(qemu_mutex_iothread_locked());
+ io = g_new0(QEMUS390FlicIO, 1);
+ io->id = subchannel_id;
+ io->nr = subchannel_nr;
+ io->parm = io_int_parm;
+ io->word = io_int_word;
+
+ QLIST_INSERT_HEAD(&flic->io[isc], io, next);
+ flic->pending |= ISC_TO_PENDING_IO(isc);
+
+ qemu_s390_flic_notify(ISC_TO_PENDING_IO(isc));
+}
+
+static void qemu_s390_inject_crw_mchk(S390FLICState *fs)
+{
+ QEMUS390FLICState *flic = s390_get_qemu_flic(fs);
+
+ g_assert(qemu_mutex_iothread_locked());
+ flic->pending |= FLIC_PENDING_MCHK_CR;
+
+ qemu_s390_flic_notify(FLIC_PENDING_MCHK_CR);
+}
+
+bool qemu_s390_flic_has_service(QEMUS390FLICState *flic)
+{
+ /* called without lock via cc->has_work, will be validated under lock */
+ return !!(flic->pending & FLIC_PENDING_SERVICE);
+}
+
+bool qemu_s390_flic_has_io(QEMUS390FLICState *flic, uint64_t cr6)
+{
+ /* called without lock via cc->has_work, will be validated under lock */
+ return !!(flic->pending & CR6_TO_PENDING_IO(cr6));
+}
+
+bool qemu_s390_flic_has_crw_mchk(QEMUS390FLICState *flic)
+{
+ /* called without lock via cc->has_work, will be validated under lock */
+ return !!(flic->pending & FLIC_PENDING_MCHK_CR);
+}
+
+bool qemu_s390_flic_has_any(QEMUS390FLICState *flic)
+{
+ g_assert(qemu_mutex_iothread_locked());
+ return !!flic->pending;
+}
+
static void qemu_s390_flic_reset(DeviceState *dev)
{
QEMUS390FLICState *flic = QEMU_S390_FLIC(dev);
+ QEMUS390FlicIO *cur, *next;
+ int isc;
+ g_assert(qemu_mutex_iothread_locked());
flic->simm = 0;
flic->nimm = 0;
+ flic->pending = 0;
+
+ /* remove all pending io interrupts */
+ for (isc = 0; isc < 8; isc++) {
+ QLIST_FOREACH_SAFE(cur, &flic->io[isc], next, next) {
+ QLIST_REMOVE(cur, next);
+ g_free(cur);
+ }
+ }
}
bool ais_needed(void *opaque)
@@ -153,6 +372,16 @@
}
};
+static void qemu_s390_flic_instance_init(Object *obj)
+{
+ QEMUS390FLICState *flic = QEMU_S390_FLIC(obj);
+ int isc;
+
+ for (isc = 0; isc < 8; isc++) {
+ QLIST_INIT(&flic->io[isc]);
+ }
+}
+
static void qemu_s390_flic_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -167,6 +396,9 @@
fsc->clear_io_irq = qemu_s390_clear_io_flic;
fsc->modify_ais_mode = qemu_s390_modify_ais_mode;
fsc->inject_airq = qemu_s390_inject_airq;
+ fsc->inject_service = qemu_s390_inject_service;
+ fsc->inject_io = qemu_s390_inject_io;
+ fsc->inject_crw_mchk = qemu_s390_inject_crw_mchk;
}
static Property s390_flic_common_properties[] = {
@@ -201,6 +433,7 @@
.name = TYPE_QEMU_S390_FLIC,
.parent = TYPE_S390_FLIC_COMMON,
.instance_size = sizeof(QEMUS390FLICState),
+ .instance_init = qemu_s390_flic_instance_init,
.class_init = qemu_s390_flic_class_init,
};
diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index d208cb8..3f804ad 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -35,16 +35,15 @@
bool clear_io_supported;
} KVMS390FLICState;
-DeviceState *s390_flic_kvm_create(void)
+static KVMS390FLICState *s390_get_kvm_flic(S390FLICState *fs)
{
- DeviceState *dev = NULL;
+ static KVMS390FLICState *flic;
- if (kvm_enabled()) {
- dev = qdev_create(NULL, TYPE_KVM_S390_FLIC);
- object_property_add_child(qdev_get_machine(), TYPE_KVM_S390_FLIC,
- OBJECT(dev), NULL);
+ if (!flic) {
+ /* we only have one flic device, so this is fine to cache */
+ flic = KVM_S390_FLIC(fs);
}
- return dev;
+ return flic;
}
/**
@@ -123,20 +122,70 @@
return rc ? -errno : 0;
}
-int kvm_s390_inject_flic(struct kvm_s390_irq *irq)
+static void kvm_s390_inject_flic(S390FLICState *fs, struct kvm_s390_irq *irq)
{
- static KVMS390FLICState *flic;
+ static bool use_flic = true;
+ int r;
- if (unlikely(!flic)) {
- flic = KVM_S390_FLIC(s390_get_flic());
+ if (use_flic) {
+ r = flic_enqueue_irqs(irq, sizeof(*irq), s390_get_kvm_flic(fs));
+ if (r == -ENOSYS) {
+ use_flic = false;
+ }
+ if (!r) {
+ return;
+ }
}
- return flic_enqueue_irqs(irq, sizeof(*irq), flic);
+ /* fallback to legacy KVM IOCTL in case FLIC fails */
+ kvm_s390_floating_interrupt_legacy(irq);
+}
+
+static void kvm_s390_inject_service(S390FLICState *fs, uint32_t parm)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_INT_SERVICE,
+ .u.ext.ext_params = parm,
+ };
+
+ kvm_s390_inject_flic(fs, &irq);
+}
+
+static void kvm_s390_inject_io(S390FLICState *fs, uint16_t subchannel_id,
+ uint16_t subchannel_nr, uint32_t io_int_parm,
+ uint32_t io_int_word)
+{
+ struct kvm_s390_irq irq = {
+ .u.io.subchannel_id = subchannel_id,
+ .u.io.subchannel_nr = subchannel_nr,
+ .u.io.io_int_parm = io_int_parm,
+ .u.io.io_int_word = io_int_word,
+ };
+
+ if (io_int_word & IO_INT_WORD_AI) {
+ irq.type = KVM_S390_INT_IO(1, 0, 0, 0);
+ } else {
+ irq.type = KVM_S390_INT_IO(0, (subchannel_id & 0xff00) >> 8,
+ (subchannel_id & 0x0006),
+ subchannel_nr);
+ }
+ kvm_s390_inject_flic(fs, &irq);
+}
+
+static void kvm_s390_inject_crw_mchk(S390FLICState *fs)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_MCHK,
+ .u.mchk.cr14 = CR14_CHANNEL_REPORT_SC,
+ .u.mchk.mcic = s390_build_validity_mcic() | MCIC_SC_CP,
+ };
+
+ kvm_s390_inject_flic(fs, &irq);
}
static int kvm_s390_clear_io_flic(S390FLICState *fs, uint16_t subchannel_id,
uint16_t subchannel_nr)
{
- KVMS390FLICState *flic = KVM_S390_FLIC(fs);
+ KVMS390FLICState *flic = s390_get_kvm_flic(fs);
int rc;
uint32_t sid = subchannel_id << 16 | subchannel_nr;
struct kvm_device_attr attr = {
@@ -154,7 +203,7 @@
static int kvm_s390_modify_ais_mode(S390FLICState *fs, uint8_t isc,
uint16_t mode)
{
- KVMS390FLICState *flic = KVM_S390_FLIC(fs);
+ KVMS390FLICState *flic = s390_get_kvm_flic(fs);
struct kvm_s390_ais_req req = {
.isc = isc,
.mode = mode,
@@ -174,7 +223,7 @@
static int kvm_s390_inject_airq(S390FLICState *fs, uint8_t type,
uint8_t isc, uint8_t flags)
{
- KVMS390FLICState *flic = KVM_S390_FLIC(fs);
+ KVMS390FLICState *flic = s390_get_kvm_flic(fs);
uint32_t id = css_get_adapter_id(type, isc);
struct kvm_device_attr attr = {
.group = KVM_DEV_FLIC_AIRQ_INJECT,
@@ -263,7 +312,7 @@
.group = KVM_DEV_FLIC_ADAPTER_MODIFY,
.addr = (uint64_t)&req,
};
- KVMS390FLICState *flic = KVM_S390_FLIC(fs);
+ KVMS390FLICState *flic = s390_get_kvm_flic(fs);
int r;
if (!kvm_gsi_routing_enabled()) {
@@ -614,6 +663,9 @@
fsc->clear_io_irq = kvm_s390_clear_io_flic;
fsc->modify_ais_mode = kvm_s390_modify_ais_mode;
fsc->inject_airq = kvm_s390_inject_airq;
+ fsc->inject_service = kvm_s390_inject_service;
+ fsc->inject_io = kvm_s390_inject_io;
+ fsc->inject_crw_mchk = kvm_s390_inject_crw_mchk;
}
static const TypeInfo kvm_s390_flic_info = {
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index be76918..4092d28 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -177,10 +177,11 @@
nvic_irq_update(int vectpending, int pendprio, int exception_prio, int level) "NVIC vectpending %d pending prio %d exception_prio %d: setting irq line to %d"
nvic_escalate_prio(int irq, int irqprio, int runprio) "NVIC escalating irq %d to HardFault: insufficient priority %d >= %d"
nvic_escalate_disabled(int irq) "NVIC escalating irq %d to HardFault: disabled"
-nvic_set_pending(int irq, bool secure, int en, int prio) "NVIC set pending irq %d secure-bank %d (enabled: %d priority %d)"
+nvic_set_pending(int irq, bool secure, bool derived, int en, int prio) "NVIC set pending irq %d secure-bank %d derived %d (enabled: %d priority %d)"
nvic_clear_pending(int irq, bool secure, int en, int prio) "NVIC clear pending irq %d secure-bank %d (enabled: %d priority %d)"
nvic_set_pending_level(int irq) "NVIC set pending: irq %d higher prio than vectpending: setting irq line to 1"
-nvic_acknowledge_irq(int irq, int prio, bool targets_secure) "NVIC acknowledge IRQ: %d now active (prio %d targets_secure %d)"
+nvic_acknowledge_irq(int irq, int prio) "NVIC acknowledge IRQ: %d now active (prio %d)"
+nvic_get_pending_irq_info(int irq, bool secure) "NVIC next IRQ %d: targets_secure: %d"
nvic_complete_irq(int irq, bool secure) "NVIC complete IRQ %d (secure %d)"
nvic_set_irq_level(int irq, int level) "NVIC external irq %d level set to %d"
nvic_sysreg_read(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index d517f83..fce426e 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -33,6 +33,10 @@
obj-$(CONFIG_IMX) += imx25_ccm.o
obj-$(CONFIG_IMX) += imx6_ccm.o
obj-$(CONFIG_IMX) += imx6_src.o
+obj-$(CONFIG_IMX) += imx7_ccm.o
+obj-$(CONFIG_IMX) += imx2_wdt.o
+obj-$(CONFIG_IMX) += imx7_snvs.o
+obj-$(CONFIG_IMX) += imx7_gpr.o
obj-$(CONFIG_MILKYMIST) += milkymist-hpdmc.o
obj-$(CONFIG_MILKYMIST) += milkymist-pfpu.o
obj-$(CONFIG_MAINSTONE) += mst_fpga.o
diff --git a/hw/misc/imx2_wdt.c b/hw/misc/imx2_wdt.c
new file mode 100644
index 0000000..e47e442
--- /dev/null
+++ b/hw/misc/imx2_wdt.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * i.MX2 Watchdog IP block
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "sysemu/watchdog.h"
+
+#include "hw/misc/imx2_wdt.h"
+
+#define IMX2_WDT_WCR_WDA BIT(5) /* -> External Reset WDOG_B */
+#define IMX2_WDT_WCR_SRS BIT(4) /* -> Software Reset Signal */
+
+static uint64_t imx2_wdt_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ return 0;
+}
+
+static void imx2_wdt_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned int size)
+{
+ if (addr == IMX2_WDT_WCR &&
+ (value & (IMX2_WDT_WCR_WDA | IMX2_WDT_WCR_SRS))) {
+ watchdog_perform_action();
+ }
+}
+
+static const MemoryRegionOps imx2_wdt_ops = {
+ .read = imx2_wdt_read,
+ .write = imx2_wdt_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the
+ * real device but in practice there is no reason for a guest
+ * to access this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void imx2_wdt_realize(DeviceState *dev, Error **errp)
+{
+ IMX2WdtState *s = IMX2_WDT(dev);
+
+ memory_region_init_io(&s->mmio, OBJECT(dev),
+ &imx2_wdt_ops, s,
+ TYPE_IMX2_WDT".mmio",
+ IMX2_WDT_REG_NUM * sizeof(uint16_t));
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
+}
+
+static void imx2_wdt_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = imx2_wdt_realize;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo imx2_wdt_info = {
+ .name = TYPE_IMX2_WDT,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(IMX2WdtState),
+ .class_init = imx2_wdt_class_init,
+};
+
+static WatchdogTimerModel model = {
+ .wdt_name = "imx2-watchdog",
+ .wdt_description = "i.MX2 Watchdog",
+};
+
+static void imx2_wdt_register_type(void)
+{
+ watchdog_add_model(&model);
+ type_register_static(&imx2_wdt_info);
+}
+type_init(imx2_wdt_register_type)
diff --git a/hw/misc/imx7_ccm.c b/hw/misc/imx7_ccm.c
new file mode 100644
index 0000000..d90c48b
--- /dev/null
+++ b/hw/misc/imx7_ccm.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * i.MX7 CCM, PMU and ANALOG IP blocks emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+
+#include "hw/misc/imx7_ccm.h"
+
+static void imx7_analog_reset(DeviceState *dev)
+{
+ IMX7AnalogState *s = IMX7_ANALOG(dev);
+
+ memset(s->pmu, 0, sizeof(s->pmu));
+ memset(s->analog, 0, sizeof(s->analog));
+
+ s->analog[ANALOG_PLL_ARM] = 0x00002042;
+ s->analog[ANALOG_PLL_DDR] = 0x0060302c;
+ s->analog[ANALOG_PLL_DDR_SS] = 0x00000000;
+ s->analog[ANALOG_PLL_DDR_NUM] = 0x06aaac4d;
+ s->analog[ANALOG_PLL_DDR_DENOM] = 0x100003ec;
+ s->analog[ANALOG_PLL_480] = 0x00002000;
+ s->analog[ANALOG_PLL_480A] = 0x52605a56;
+ s->analog[ANALOG_PLL_480B] = 0x52525216;
+ s->analog[ANALOG_PLL_ENET] = 0x00001fc0;
+ s->analog[ANALOG_PLL_AUDIO] = 0x0001301b;
+ s->analog[ANALOG_PLL_AUDIO_SS] = 0x00000000;
+ s->analog[ANALOG_PLL_AUDIO_NUM] = 0x05f5e100;
+ s->analog[ANALOG_PLL_AUDIO_DENOM] = 0x2964619c;
+ s->analog[ANALOG_PLL_VIDEO] = 0x0008201b;
+ s->analog[ANALOG_PLL_VIDEO_SS] = 0x00000000;
+ s->analog[ANALOG_PLL_VIDEO_NUM] = 0x0000f699;
+ s->analog[ANALOG_PLL_VIDEO_DENOM] = 0x000f4240;
+ s->analog[ANALOG_PLL_MISC0] = 0x00000000;
+
+ /* all PLLs need to be locked */
+ s->analog[ANALOG_PLL_ARM] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_DDR] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_480] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_480A] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_480B] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_ENET] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_AUDIO] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_VIDEO] |= ANALOG_PLL_LOCK;
+ s->analog[ANALOG_PLL_MISC0] |= ANALOG_PLL_LOCK;
+
+ /*
+ * Since I couldn't find any info about this in the reference
+ * manual the value of this register is based strictly on matching
+ * what Linux kernel expects it to be.
+ */
+ s->analog[ANALOG_DIGPROG] = 0x720000;
+ /*
+ * Set revision to be 1.0 (Arbitrary choice, no particular
+ * reason).
+ */
+ s->analog[ANALOG_DIGPROG] |= 0x000010;
+}
+
+static void imx7_ccm_reset(DeviceState *dev)
+{
+ IMX7CCMState *s = IMX7_CCM(dev);
+
+ memset(s->ccm, 0, sizeof(s->ccm));
+}
+
+#define CCM_INDEX(offset) (((offset) & ~(hwaddr)0xF) / sizeof(uint32_t))
+#define CCM_BITOP(offset) ((offset) & (hwaddr)0xF)
+
+enum {
+ CCM_BITOP_NONE = 0x00,
+ CCM_BITOP_SET = 0x04,
+ CCM_BITOP_CLR = 0x08,
+ CCM_BITOP_TOG = 0x0C,
+};
+
+static uint64_t imx7_set_clr_tog_read(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ const uint32_t *mmio = opaque;
+
+ return mmio[CCM_INDEX(offset)];
+}
+
+static void imx7_set_clr_tog_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ const uint8_t bitop = CCM_BITOP(offset);
+ const uint32_t index = CCM_INDEX(offset);
+ uint32_t *mmio = opaque;
+
+ switch (bitop) {
+ case CCM_BITOP_NONE:
+ mmio[index] = value;
+ break;
+ case CCM_BITOP_SET:
+ mmio[index] |= value;
+ break;
+ case CCM_BITOP_CLR:
+ mmio[index] &= ~value;
+ break;
+ case CCM_BITOP_TOG:
+ mmio[index] ^= value;
+ break;
+ };
+}
+
+static const struct MemoryRegionOps imx7_set_clr_tog_ops = {
+ .read = imx7_set_clr_tog_read,
+ .write = imx7_set_clr_tog_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the real
+ * device but in practice there is no reason for a guest to access
+ * this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static const struct MemoryRegionOps imx7_digprog_ops = {
+ .read = imx7_set_clr_tog_read,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void imx7_ccm_init(Object *obj)
+{
+ SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+ IMX7CCMState *s = IMX7_CCM(obj);
+
+ memory_region_init_io(&s->iomem,
+ obj,
+ &imx7_set_clr_tog_ops,
+ s->ccm,
+ TYPE_IMX7_CCM ".ccm",
+ sizeof(s->ccm));
+
+ sysbus_init_mmio(sd, &s->iomem);
+}
+
+static void imx7_analog_init(Object *obj)
+{
+ SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+ IMX7AnalogState *s = IMX7_ANALOG(obj);
+
+ memory_region_init(&s->mmio.container, obj, TYPE_IMX7_ANALOG,
+ 0x10000);
+
+ memory_region_init_io(&s->mmio.analog,
+ obj,
+ &imx7_set_clr_tog_ops,
+ s->analog,
+ TYPE_IMX7_ANALOG,
+ sizeof(s->analog));
+
+ memory_region_add_subregion(&s->mmio.container,
+ 0x60, &s->mmio.analog);
+
+ memory_region_init_io(&s->mmio.pmu,
+ obj,
+ &imx7_set_clr_tog_ops,
+ s->pmu,
+ TYPE_IMX7_ANALOG ".pmu",
+ sizeof(s->pmu));
+
+ memory_region_add_subregion(&s->mmio.container,
+ 0x200, &s->mmio.pmu);
+
+ memory_region_init_io(&s->mmio.digprog,
+ obj,
+ &imx7_digprog_ops,
+ &s->analog[ANALOG_DIGPROG],
+ TYPE_IMX7_ANALOG ".digprog",
+ sizeof(uint32_t));
+
+ memory_region_add_subregion_overlap(&s->mmio.container,
+ 0x800, &s->mmio.digprog, 10);
+
+
+ sysbus_init_mmio(sd, &s->mmio.container);
+}
+
+static const VMStateDescription vmstate_imx7_ccm = {
+ .name = TYPE_IMX7_CCM,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(ccm, IMX7CCMState, CCM_MAX),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static uint32_t imx7_ccm_get_clock_frequency(IMXCCMState *dev, IMXClk clock)
+{
+ /*
+ * This function is "consumed" by GPT emulation code, however on
+ * i.MX7 each GPT block can have their own clock root. This means
+ * that this functions needs somehow to know requester's identity
+ * and the way to pass it: be it via additional IMXClk constants
+ * or by adding another argument to this method needs to be
+ * figured out
+ */
+ qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Not implemented\n",
+ TYPE_IMX7_CCM, __func__);
+ return 0;
+}
+
+static void imx7_ccm_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ IMXCCMClass *ccm = IMX_CCM_CLASS(klass);
+
+ dc->reset = imx7_ccm_reset;
+ dc->vmsd = &vmstate_imx7_ccm;
+ dc->desc = "i.MX7 Clock Control Module";
+
+ ccm->get_clock_frequency = imx7_ccm_get_clock_frequency;
+}
+
+static const TypeInfo imx7_ccm_info = {
+ .name = TYPE_IMX7_CCM,
+ .parent = TYPE_IMX_CCM,
+ .instance_size = sizeof(IMX7CCMState),
+ .instance_init = imx7_ccm_init,
+ .class_init = imx7_ccm_class_init,
+};
+
+static const VMStateDescription vmstate_imx7_analog = {
+ .name = TYPE_IMX7_ANALOG,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(analog, IMX7AnalogState, ANALOG_MAX),
+ VMSTATE_UINT32_ARRAY(pmu, IMX7AnalogState, PMU_MAX),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void imx7_analog_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->reset = imx7_analog_reset;
+ dc->vmsd = &vmstate_imx7_analog;
+ dc->desc = "i.MX7 Analog Module";
+}
+
+static const TypeInfo imx7_analog_info = {
+ .name = TYPE_IMX7_ANALOG,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(IMX7AnalogState),
+ .instance_init = imx7_analog_init,
+ .class_init = imx7_analog_class_init,
+};
+
+static void imx7_ccm_register_type(void)
+{
+ type_register_static(&imx7_ccm_info);
+ type_register_static(&imx7_analog_info);
+}
+type_init(imx7_ccm_register_type)
diff --git a/hw/misc/imx7_gpr.c b/hw/misc/imx7_gpr.c
new file mode 100644
index 0000000..c2a9df2
--- /dev/null
+++ b/hw/misc/imx7_gpr.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * i.MX7 GPR IP block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Bare minimum emulation code needed to support being able to shut
+ * down linux guest gracefully.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/misc/imx7_gpr.h"
+#include "qemu/log.h"
+#include "sysemu/sysemu.h"
+
+#include "trace.h"
+
+enum IMX7GPRRegisters {
+ IOMUXC_GPR0 = 0x00,
+ IOMUXC_GPR1 = 0x04,
+ IOMUXC_GPR2 = 0x08,
+ IOMUXC_GPR3 = 0x0c,
+ IOMUXC_GPR4 = 0x10,
+ IOMUXC_GPR5 = 0x14,
+ IOMUXC_GPR6 = 0x18,
+ IOMUXC_GPR7 = 0x1c,
+ IOMUXC_GPR8 = 0x20,
+ IOMUXC_GPR9 = 0x24,
+ IOMUXC_GPR10 = 0x28,
+ IOMUXC_GPR11 = 0x2c,
+ IOMUXC_GPR12 = 0x30,
+ IOMUXC_GPR13 = 0x34,
+ IOMUXC_GPR14 = 0x38,
+ IOMUXC_GPR15 = 0x3c,
+ IOMUXC_GPR16 = 0x40,
+ IOMUXC_GPR17 = 0x44,
+ IOMUXC_GPR18 = 0x48,
+ IOMUXC_GPR19 = 0x4c,
+ IOMUXC_GPR20 = 0x50,
+ IOMUXC_GPR21 = 0x54,
+ IOMUXC_GPR22 = 0x58,
+};
+
+#define IMX7D_GPR1_IRQ_MASK BIT(12)
+#define IMX7D_GPR1_ENET1_TX_CLK_SEL_MASK BIT(13)
+#define IMX7D_GPR1_ENET2_TX_CLK_SEL_MASK BIT(14)
+#define IMX7D_GPR1_ENET_TX_CLK_SEL_MASK (0x3 << 13)
+#define IMX7D_GPR1_ENET1_CLK_DIR_MASK BIT(17)
+#define IMX7D_GPR1_ENET2_CLK_DIR_MASK BIT(18)
+#define IMX7D_GPR1_ENET_CLK_DIR_MASK (0x3 << 17)
+
+#define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI BIT(4)
+#define IMX7D_GPR12_PCIE_PHY_REFCLK_SEL BIT(5)
+#define IMX7D_GPR22_PCIE_PHY_PLL_LOCKED BIT(31)
+
+
+static uint64_t imx7_gpr_read(void *opaque, hwaddr offset, unsigned size)
+{
+ trace_imx7_gpr_read(offset);
+
+ if (offset == IOMUXC_GPR22) {
+ return IMX7D_GPR22_PCIE_PHY_PLL_LOCKED;
+ }
+
+ return 0;
+}
+
+static void imx7_gpr_write(void *opaque, hwaddr offset,
+ uint64_t v, unsigned size)
+{
+ trace_imx7_gpr_write(offset, v);
+}
+
+static const struct MemoryRegionOps imx7_gpr_ops = {
+ .read = imx7_gpr_read,
+ .write = imx7_gpr_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the
+ * real device but in practice there is no reason for a guest
+ * to access this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void imx7_gpr_init(Object *obj)
+{
+ SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+ IMX7GPRState *s = IMX7_GPR(obj);
+
+ memory_region_init_io(&s->mmio, obj, &imx7_gpr_ops, s,
+ TYPE_IMX7_GPR, 64 * 1024);
+ sysbus_init_mmio(sd, &s->mmio);
+}
+
+static void imx7_gpr_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "i.MX7 General Purpose Registers Module";
+}
+
+static const TypeInfo imx7_gpr_info = {
+ .name = TYPE_IMX7_GPR,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(IMX7GPRState),
+ .instance_init = imx7_gpr_init,
+ .class_init = imx7_gpr_class_init,
+};
+
+static void imx7_gpr_register_type(void)
+{
+ type_register_static(&imx7_gpr_info);
+}
+type_init(imx7_gpr_register_type)
diff --git a/hw/misc/imx7_snvs.c b/hw/misc/imx7_snvs.c
new file mode 100644
index 0000000..4df482b
--- /dev/null
+++ b/hw/misc/imx7_snvs.c
@@ -0,0 +1,83 @@
+/*
+ * IMX7 Secure Non-Volatile Storage
+ *
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Bare minimum emulation code needed to support being able to shut
+ * down linux guest gracefully.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/misc/imx7_snvs.h"
+#include "qemu/log.h"
+#include "sysemu/sysemu.h"
+
+static uint64_t imx7_snvs_read(void *opaque, hwaddr offset, unsigned size)
+{
+ return 0;
+}
+
+static void imx7_snvs_write(void *opaque, hwaddr offset,
+ uint64_t v, unsigned size)
+{
+ const uint32_t value = v;
+ const uint32_t mask = SNVS_LPCR_TOP | SNVS_LPCR_DP_EN;
+
+ if (offset == SNVS_LPCR && ((value & mask) == mask)) {
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ }
+}
+
+static const struct MemoryRegionOps imx7_snvs_ops = {
+ .read = imx7_snvs_read,
+ .write = imx7_snvs_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the real
+ * device but in practice there is no reason for a guest to access
+ * this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void imx7_snvs_init(Object *obj)
+{
+ SysBusDevice *sd = SYS_BUS_DEVICE(obj);
+ IMX7SNVSState *s = IMX7_SNVS(obj);
+
+ memory_region_init_io(&s->mmio, obj, &imx7_snvs_ops, s,
+ TYPE_IMX7_SNVS, 0x1000);
+
+ sysbus_init_mmio(sd, &s->mmio);
+}
+
+static void imx7_snvs_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "i.MX7 Secure Non-Volatile Storage Module";
+}
+
+static const TypeInfo imx7_snvs_info = {
+ .name = TYPE_IMX7_SNVS,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(IMX7SNVSState),
+ .instance_init = imx7_snvs_init,
+ .class_init = imx7_snvs_class_init,
+};
+
+static void imx7_snvs_register_type(void)
+{
+ type_register_static(&imx7_snvs_info);
+}
+type_init(imx7_snvs_register_type)
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index 616579a..e6070f2 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -66,3 +66,7 @@
msf2_sysreg_write(uint64_t offset, uint32_t val, uint32_t prev) "msf2-sysreg write: addr 0x%08" HWADDR_PRIx " data 0x%" PRIx32 " prev 0x%" PRIx32
msf2_sysreg_read(uint64_t offset, uint32_t val) "msf2-sysreg read: addr 0x%08" HWADDR_PRIx " data 0x%08" PRIx32
msf2_sysreg_write_pll_status(void) "Invalid write to read only PLL status register"
+
+#hw/misc/imx7_gpr.c
+imx7_gpr_read(uint64_t offset) "addr 0x%08" HWADDR_PRIx
+imx7_gpr_write(uint64_t offset, uint64_t value) "addr 0x%08" HWADDR_PRIx "value 0x%08" HWADDR_PRIx
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 1c526fd..301bf17 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -439,7 +439,7 @@
bool do_map)
{
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
return fsc->io_adapter_map(fs, adapter->adapter_id, map_addr, do_map);
}
@@ -520,7 +520,7 @@
int ret, isc;
IoAdapter *adapter;
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
/*
* Disallow multiple registrations for the same device type.
@@ -566,7 +566,7 @@
Error *err = NULL;
static bool no_clear_irq;
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
int r;
if (unlikely(no_clear_irq)) {
@@ -640,7 +640,7 @@
int css_do_sic(CPUS390XState *env, uint8_t isc, uint16_t mode)
{
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
int r;
if (env->psw.mask & PSW_MASK_PSTATE) {
@@ -666,7 +666,7 @@
void css_adapter_interrupt(CssIoAdapterType type, uint8_t isc)
{
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
uint32_t io_int_word = (isc << 27) | IO_INT_WORD_AI;
IoAdapter *adapter = channel_subsys.io_adapters[type][isc];
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index b0f71f4..155a694 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -293,10 +293,10 @@
ef->receive_mask = be32_to_cpu(tmp_mask);
/* return the SCLP's capability masks to the guest */
- tmp_mask = cpu_to_be32(get_host_send_mask(ef));
+ tmp_mask = cpu_to_be32(get_host_receive_mask(ef));
copy_mask(WEM_RECEIVE_MASK(we_mask, mask_length), (uint8_t *)&tmp_mask,
mask_length, sizeof(tmp_mask));
- tmp_mask = cpu_to_be32(get_host_receive_mask(ef));
+ tmp_mask = cpu_to_be32(get_host_send_mask(ef));
copy_mask(WEM_SEND_MASK(we_mask, mask_length), (uint8_t *)&tmp_mask,
mask_length, sizeof(tmp_mask));
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 7d9c65e..77a50ca 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -309,49 +309,187 @@
: 0;
}
-static uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
- uint64_t guest_dma_address)
+static bool rt_entry_isvalid(uint64_t entry)
{
- uint64_t sto_a, pto_a, px_a;
- uint64_t sto, pto, pte;
- uint32_t rtx, sx, px;
+ return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
- rtx = calc_rtx(guest_dma_address);
- sx = calc_sx(guest_dma_address);
- px = calc_px(guest_dma_address);
+static bool pt_entry_isvalid(uint64_t entry)
+{
+ return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
- sto_a = guest_iota + rtx * sizeof(uint64_t);
- sto = address_space_ldq(&address_space_memory, sto_a,
- MEMTXATTRS_UNSPECIFIED, NULL);
- sto = get_rt_sto(sto);
- if (!sto) {
- pte = 0;
+static bool entry_isprotected(uint64_t entry)
+{
+ return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
+}
+
+/* ett is expected table type, -1 page table, 0 segment table, 1 region table */
+static uint64_t get_table_index(uint64_t iova, int8_t ett)
+{
+ switch (ett) {
+ case ZPCI_ETT_PT:
+ return calc_px(iova);
+ case ZPCI_ETT_ST:
+ return calc_sx(iova);
+ case ZPCI_ETT_RT:
+ return calc_rtx(iova);
+ }
+
+ return -1;
+}
+
+static bool entry_isvalid(uint64_t entry, int8_t ett)
+{
+ switch (ett) {
+ case ZPCI_ETT_PT:
+ return pt_entry_isvalid(entry);
+ case ZPCI_ETT_ST:
+ case ZPCI_ETT_RT:
+ return rt_entry_isvalid(entry);
+ }
+
+ return false;
+}
+
+/* Return true if address translation is done */
+static bool translate_iscomplete(uint64_t entry, int8_t ett)
+{
+ switch (ett) {
+ case 0:
+ return (entry & ZPCI_TABLE_FC) ? true : false;
+ case 1:
+ return false;
+ }
+
+ return true;
+}
+
+static uint64_t get_frame_size(int8_t ett)
+{
+ switch (ett) {
+ case ZPCI_ETT_PT:
+ return 1ULL << 12;
+ case ZPCI_ETT_ST:
+ return 1ULL << 20;
+ case ZPCI_ETT_RT:
+ return 1ULL << 31;
+ }
+
+ return 0;
+}
+
+static uint64_t get_next_table_origin(uint64_t entry, int8_t ett)
+{
+ switch (ett) {
+ case ZPCI_ETT_PT:
+ return entry & ZPCI_PTE_ADDR_MASK;
+ case ZPCI_ETT_ST:
+ return get_st_pto(entry);
+ case ZPCI_ETT_RT:
+ return get_rt_sto(entry);
+ }
+
+ return 0;
+}
+
+/**
+ * table_translate: do translation within one table and return the following
+ * table origin
+ *
+ * @entry: the entry being translated, the result is stored in this.
+ * @to: the address of table origin.
+ * @ett: expected table type, 1 region table, 0 segment table and -1 page table.
+ * @error: error code
+ */
+static uint64_t table_translate(S390IOTLBEntry *entry, uint64_t to, int8_t ett,
+ uint16_t *error)
+{
+ uint64_t tx, te, nto = 0;
+ uint16_t err = 0;
+
+ tx = get_table_index(entry->iova, ett);
+ te = address_space_ldq(&address_space_memory, to + tx * sizeof(uint64_t),
+ MEMTXATTRS_UNSPECIFIED, NULL);
+
+ if (!te) {
+ err = ERR_EVENT_INVALTE;
goto out;
}
- pto_a = sto + sx * sizeof(uint64_t);
- pto = address_space_ldq(&address_space_memory, pto_a,
- MEMTXATTRS_UNSPECIFIED, NULL);
- pto = get_st_pto(pto);
- if (!pto) {
- pte = 0;
+ if (!entry_isvalid(te, ett)) {
+ entry->perm &= IOMMU_NONE;
goto out;
}
- px_a = pto + px * sizeof(uint64_t);
- pte = address_space_ldq(&address_space_memory, px_a,
- MEMTXATTRS_UNSPECIFIED, NULL);
+ if (ett == ZPCI_ETT_RT && ((te & ZPCI_TABLE_LEN_RTX) != ZPCI_TABLE_LEN_RTX
+ || te & ZPCI_TABLE_OFFSET_MASK)) {
+ err = ERR_EVENT_INVALTL;
+ goto out;
+ }
+ nto = get_next_table_origin(te, ett);
+ if (!nto) {
+ err = ERR_EVENT_TT;
+ goto out;
+ }
+
+ if (entry_isprotected(te)) {
+ entry->perm &= IOMMU_RO;
+ } else {
+ entry->perm &= IOMMU_RW;
+ }
+
+ if (translate_iscomplete(te, ett)) {
+ switch (ett) {
+ case ZPCI_ETT_PT:
+ entry->translated_addr = te & ZPCI_PTE_ADDR_MASK;
+ break;
+ case ZPCI_ETT_ST:
+ entry->translated_addr = (te & ZPCI_SFAA_MASK) |
+ (entry->iova & ~ZPCI_SFAA_MASK);
+ break;
+ }
+ nto = 0;
+ }
out:
- return pte;
+ if (err) {
+ entry->perm = IOMMU_NONE;
+ *error = err;
+ }
+ entry->len = get_frame_size(ett);
+ return nto;
+}
+
+uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
+ S390IOTLBEntry *entry)
+{
+ uint64_t to = s390_pci_get_table_origin(g_iota);
+ int8_t ett = 1;
+ uint16_t error = 0;
+
+ entry->iova = addr & PAGE_MASK;
+ entry->translated_addr = 0;
+ entry->perm = IOMMU_RW;
+
+ if (entry_isprotected(g_iota)) {
+ entry->perm &= IOMMU_RO;
+ }
+
+ while (to) {
+ to = table_translate(entry, to, ett--, &error);
+ }
+
+ return error;
}
static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
IOMMUAccessFlags flag)
{
- uint64_t pte;
- uint32_t flags;
S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr);
+ S390IOTLBEntry *entry;
+ uint64_t iova = addr & PAGE_MASK;
+ uint16_t error = 0;
IOMMUTLBEntry ret = {
.target_as = &address_space_memory,
.iova = 0,
@@ -374,26 +512,31 @@
DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
if (addr < iommu->pba || addr > iommu->pal) {
- return ret;
+ error = ERR_EVENT_OORANGE;
+ goto err;
}
- pte = s390_guest_io_table_walk(s390_pci_get_table_origin(iommu->g_iota),
- addr);
- if (!pte) {
- return ret;
- }
-
- flags = pte & ZPCI_PTE_FLAG_MASK;
- ret.iova = addr;
- ret.translated_addr = pte & ZPCI_PTE_ADDR_MASK;
- ret.addr_mask = 0xfff;
-
- if (flags & ZPCI_PTE_INVALID) {
- ret.perm = IOMMU_NONE;
+ entry = g_hash_table_lookup(iommu->iotlb, &iova);
+ if (entry) {
+ ret.iova = entry->iova;
+ ret.translated_addr = entry->translated_addr;
+ ret.addr_mask = entry->len - 1;
+ ret.perm = entry->perm;
} else {
- ret.perm = IOMMU_RW;
+ ret.iova = iova;
+ ret.addr_mask = ~PAGE_MASK;
+ ret.perm = IOMMU_NONE;
}
+ if (flag != IOMMU_NONE && !(flag & ret.perm)) {
+ error = ERR_EVENT_TPROTE;
+ }
+err:
+ if (error) {
+ iommu->pbdev->state = ZPCI_FS_ERROR;
+ s390_pci_generate_error_event(error, iommu->pbdev->fh,
+ iommu->pbdev->fid, addr, 0);
+ }
return ret;
}
@@ -435,6 +578,8 @@
PCI_FUNC(devfn));
memory_region_init(&iommu->mr, OBJECT(iommu), mr_name, UINT64_MAX);
address_space_init(&iommu->as, &iommu->mr, as_name);
+ iommu->iotlb = g_hash_table_new_full(g_int64_hash, g_int64_equal,
+ NULL, g_free);
table->iommu[PCI_SLOT(devfn)] = iommu;
g_free(mr_name);
@@ -524,6 +669,7 @@
void s390_pci_iommu_disable(S390PCIIOMMU *iommu)
{
iommu->enabled = false;
+ g_hash_table_remove_all(iommu->iotlb);
memory_region_del_subregion(&iommu->mr, MEMORY_REGION(&iommu->iommu_mr));
object_unparent(OBJECT(&iommu->iommu_mr));
}
@@ -539,6 +685,7 @@
}
table->iommu[PCI_SLOT(devfn)] = NULL;
+ g_hash_table_destroy(iommu->iotlb);
address_space_destroy(&iommu->as);
object_unparent(OBJECT(&iommu->mr));
object_unparent(OBJECT(iommu));
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 2993f0d..1f7f9b5 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -148,6 +148,8 @@
#define ZPCI_STE_FLAG_MASK 0x7ffULL
#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK)
+#define ZPCI_SFAA_MASK (~((1ULL << 20) - 1))
+
/* I/O Page tables */
#define ZPCI_PTE_VALID_MASK 0x400
#define ZPCI_PTE_INVALID 0x400
@@ -165,10 +167,15 @@
#define ZPCI_TABLE_INVALID 0x20
#define ZPCI_TABLE_PROTECTED 0x200
#define ZPCI_TABLE_UNPROTECTED 0x000
+#define ZPCI_TABLE_FC 0x400
#define ZPCI_TABLE_VALID_MASK 0x20
#define ZPCI_TABLE_PROT_MASK 0x200
+#define ZPCI_ETT_RT 1
+#define ZPCI_ETT_ST 0
+#define ZPCI_ETT_PT -1
+
/* PCI Function States
*
* reserved: default; device has just been plugged or is in progress of being
@@ -253,6 +260,13 @@
uint32_t pba_offset;
} S390MsixInfo;
+typedef struct S390IOTLBEntry {
+ uint64_t iova;
+ uint64_t translated_addr;
+ uint64_t len;
+ uint64_t perm;
+} S390IOTLBEntry;
+
typedef struct S390PCIBusDevice S390PCIBusDevice;
typedef struct S390PCIIOMMU {
Object parent_obj;
@@ -264,6 +278,7 @@
uint64_t g_iota;
uint64_t pba;
uint64_t pal;
+ GHashTable *iotlb;
} S390PCIIOMMU;
typedef struct S390PCIIOMMUTable {
@@ -320,6 +335,8 @@
void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
uint64_t faddr, uint32_t e);
+uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
+ S390IOTLBEntry *entry);
S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index be44921..3fcc330 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -571,27 +571,65 @@
return 0;
}
+static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
+{
+ S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
+ IOMMUTLBEntry notify = {
+ .target_as = &address_space_memory,
+ .iova = entry->iova,
+ .translated_addr = entry->translated_addr,
+ .perm = entry->perm,
+ .addr_mask = ~PAGE_MASK,
+ };
+
+ if (entry->perm == IOMMU_NONE) {
+ if (!cache) {
+ return;
+ }
+ g_hash_table_remove(iommu->iotlb, &entry->iova);
+ } else {
+ if (cache) {
+ if (cache->perm == entry->perm &&
+ cache->translated_addr == entry->translated_addr) {
+ return;
+ }
+
+ notify.perm = IOMMU_NONE;
+ memory_region_notify_iommu(&iommu->iommu_mr, notify);
+ notify.perm = entry->perm;
+ }
+
+ cache = g_new(S390IOTLBEntry, 1);
+ cache->iova = entry->iova;
+ cache->translated_addr = entry->translated_addr;
+ cache->len = PAGE_SIZE;
+ cache->perm = entry->perm;
+ g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
+ }
+
+ memory_region_notify_iommu(&iommu->iommu_mr, notify);
+}
+
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
{
CPUS390XState *env = &cpu->env;
uint32_t fh;
+ uint16_t error = 0;
S390PCIBusDevice *pbdev;
S390PCIIOMMU *iommu;
+ S390IOTLBEntry entry;
hwaddr start, end;
- IOMMUTLBEntry entry;
- IOMMUMemoryRegion *iommu_mr;
- IOMMUMemoryRegionClass *imrc;
cpu_synchronize_state(CPU(cpu));
if (env->psw.mask & PSW_MASK_PSTATE) {
s390_program_interrupt(env, PGM_PRIVILEGED, 4, ra);
- goto out;
+ return 0;
}
if (r2 & 0x1) {
s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
- goto out;
+ return 0;
}
fh = env->regs[r1] >> 32;
@@ -602,7 +640,7 @@
if (!pbdev) {
DPRINTF("rpcit no pci dev\n");
setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
- goto out;
+ return 0;
}
switch (pbdev->state) {
@@ -622,44 +660,37 @@
iommu = pbdev->iommu;
if (!iommu->g_iota) {
- pbdev->state = ZPCI_FS_ERROR;
- setcc(cpu, ZPCI_PCI_LS_ERR);
- s390_set_status_code(env, r1, ZPCI_PCI_ST_INSUF_RES);
- s390_pci_generate_error_event(ERR_EVENT_INVALAS, pbdev->fh, pbdev->fid,
- start, 0);
- goto out;
+ error = ERR_EVENT_INVALAS;
+ goto err;
}
if (end < iommu->pba || start > iommu->pal) {
- pbdev->state = ZPCI_FS_ERROR;
- setcc(cpu, ZPCI_PCI_LS_ERR);
- s390_set_status_code(env, r1, ZPCI_PCI_ST_INSUF_RES);
- s390_pci_generate_error_event(ERR_EVENT_OORANGE, pbdev->fh, pbdev->fid,
- start, 0);
- goto out;
+ error = ERR_EVENT_OORANGE;
+ goto err;
}
- iommu_mr = &iommu->iommu_mr;
- imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
-
while (start < end) {
- entry = imrc->translate(iommu_mr, start, IOMMU_NONE);
-
- if (!entry.translated_addr) {
- pbdev->state = ZPCI_FS_ERROR;
- setcc(cpu, ZPCI_PCI_LS_ERR);
- s390_set_status_code(env, r1, ZPCI_PCI_ST_INSUF_RES);
- s390_pci_generate_error_event(ERR_EVENT_SERR, pbdev->fh, pbdev->fid,
- start, ERR_EVENT_Q_BIT);
- goto out;
+ error = s390_guest_io_table_walk(iommu->g_iota, start, &entry);
+ if (error) {
+ break;
}
- memory_region_notify_iommu(iommu_mr, entry);
- start += entry.addr_mask + 1;
+ start += entry.len;
+ while (entry.iova < start && entry.iova < end) {
+ s390_pci_update_iotlb(iommu, &entry);
+ entry.iova += PAGE_SIZE;
+ entry.translated_addr += PAGE_SIZE;
+ }
}
-
- setcc(cpu, ZPCI_PCI_LS_OK);
-out:
+err:
+ if (error) {
+ pbdev->state = ZPCI_FS_ERROR;
+ setcc(cpu, ZPCI_PCI_LS_ERR);
+ s390_set_status_code(env, r1, ZPCI_PCI_ST_FUNC_IN_ERR);
+ s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
+ } else {
+ setcc(cpu, ZPCI_PCI_LS_OK);
+ }
return 0;
}
@@ -834,6 +865,8 @@
uint8_t dt = (g_iota >> 2) & 0x7;
uint8_t t = (g_iota >> 11) & 0x1;
+ pba &= ~0xfff;
+ pal |= 0xfff;
if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) {
s390_program_interrupt(env, PGM_OPERAND, 6, ra);
return -EINVAL;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index f2db448..4abbe89 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -77,10 +77,6 @@
MachineClass *mc = MACHINE_GET_CLASS(machine);
int i;
- if (tcg_enabled() && max_cpus > 1) {
- error_report("WARNING: SMP support on s390x is experimental!");
- }
-
/* initialize possible_cpus */
mc->possible_cpu_arch_ids(machine);
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 3d8f269..8f7fbc2 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1111,7 +1111,7 @@
VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
int ret;
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
ret = virtio_ccw_get_mappings(dev);
if (ret) {
@@ -1129,7 +1129,7 @@
static void virtio_ccw_release_irqroutes(VirtioCcwDevice *dev, int nvqs)
{
S390FLICState *fs = s390_get_flic();
- S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
fsc->release_adapter_routes(fs, &dev->routes);
}
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
index fc807f0..0991acd 100644
--- a/hw/sd/sdhci-internal.h
+++ b/hw/sd/sdhci-internal.h
@@ -84,12 +84,18 @@
/* R/W Host control Register 0x0 */
#define SDHC_HOSTCTL 0x28
+#define SDHC_CTRL_LED 0x01
#define SDHC_CTRL_DMA_CHECK_MASK 0x18
#define SDHC_CTRL_SDMA 0x00
#define SDHC_CTRL_ADMA1_32 0x08
#define SDHC_CTRL_ADMA2_32 0x10
#define SDHC_CTRL_ADMA2_64 0x18
#define SDHC_DMA_TYPE(x) ((x) & SDHC_CTRL_DMA_CHECK_MASK)
+#define SDHC_CTRL_4BITBUS 0x02
+#define SDHC_CTRL_8BITBUS 0x20
+#define SDHC_CTRL_CDTEST_INS 0x40
+#define SDHC_CTRL_CDTEST_EN 0x80
+
/* R/W Power Control Register 0x0 */
#define SDHC_PWRCON 0x29
@@ -226,4 +232,21 @@
sdhc_gap_write = 2 /* SDHC stopped at block gap during write operation */
};
+extern const VMStateDescription sdhci_vmstate;
+
+
+#define ESDHC_MIX_CTRL 0x48
+#define ESDHC_VENDOR_SPEC 0xc0
+#define ESDHC_DLL_CTRL 0x60
+
+#define ESDHC_TUNING_CTRL 0xcc
+#define ESDHC_TUNE_CTRL_STATUS 0x68
+#define ESDHC_WTMK_LVL 0x44
+
+/* Undocumented register used by guests working around erratum ERR004536 */
+#define ESDHC_UNDOCUMENTED_REG27 0x6c
+
+#define ESDHC_CTRL_4BITBUS (0x1 << 1)
+#define ESDHC_CTRL_8BITBUS (0x2 << 1)
+
#endif
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 37b2172..ee95e78 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -243,7 +243,8 @@
}
}
- if ((s->norintstsen & SDHC_NISEN_TRSCMP) &&
+ if (!(s->quirks & SDHCI_QUIRK_NO_BUSY_IRQ) &&
+ (s->norintstsen & SDHC_NISEN_TRSCMP) &&
(s->cmdreg & SDHC_CMD_RESPONSE) == SDHC_CMD_RSP_WITH_BUSY) {
s->norintsts |= SDHC_NIS_TRSCMP;
}
@@ -1188,6 +1189,8 @@
s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_raise_insertion_irq, s);
s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_data_transfer, s);
+
+ s->io_ops = &sdhci_mmio_ops;
}
static void sdhci_uninitfn(SDHCIState *s)
@@ -1395,6 +1398,10 @@
}
sysbus_init_irq(sbd, &s->irq);
+
+ memory_region_init_io(&s->iomem, OBJECT(s), s->io_ops, s, "sdhci",
+ SDHC_REGISTERS_MAP_SIZE);
+
sysbus_init_mmio(sbd, &s->iomem);
}
@@ -1446,11 +1453,232 @@
.class_init = sdhci_bus_class_init,
};
+static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size)
+{
+ SDHCIState *s = SYSBUS_SDHCI(opaque);
+ uint32_t ret;
+ uint16_t hostctl;
+
+ switch (offset) {
+ default:
+ return sdhci_read(opaque, offset, size);
+
+ case SDHC_HOSTCTL:
+ /*
+ * For a detailed explanation on the following bit
+ * manipulation code see comments in a similar part of
+ * usdhc_write()
+ */
+ hostctl = SDHC_DMA_TYPE(s->hostctl) << (8 - 3);
+
+ if (s->hostctl & SDHC_CTRL_8BITBUS) {
+ hostctl |= ESDHC_CTRL_8BITBUS;
+ }
+
+ if (s->hostctl & SDHC_CTRL_4BITBUS) {
+ hostctl |= ESDHC_CTRL_4BITBUS;
+ }
+
+ ret = hostctl;
+ ret |= (uint32_t)s->blkgap << 16;
+ ret |= (uint32_t)s->wakcon << 24;
+
+ break;
+
+ case ESDHC_DLL_CTRL:
+ case ESDHC_TUNE_CTRL_STATUS:
+ case ESDHC_UNDOCUMENTED_REG27:
+ case ESDHC_TUNING_CTRL:
+ case ESDHC_VENDOR_SPEC:
+ case ESDHC_MIX_CTRL:
+ case ESDHC_WTMK_LVL:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void
+usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
+{
+ SDHCIState *s = SYSBUS_SDHCI(opaque);
+ uint8_t hostctl;
+ uint32_t value = (uint32_t)val;
+
+ switch (offset) {
+ case ESDHC_DLL_CTRL:
+ case ESDHC_TUNE_CTRL_STATUS:
+ case ESDHC_UNDOCUMENTED_REG27:
+ case ESDHC_TUNING_CTRL:
+ case ESDHC_WTMK_LVL:
+ case ESDHC_VENDOR_SPEC:
+ break;
+
+ case SDHC_HOSTCTL:
+ /*
+ * Here's What ESDHCI has at offset 0x28 (SDHC_HOSTCTL)
+ *
+ * 7 6 5 4 3 2 1 0
+ * |-----------+--------+--------+-----------+----------+---------|
+ * | Card | Card | Endian | DATA3 | Data | Led |
+ * | Detect | Detect | Mode | as Card | Transfer | Control |
+ * | Signal | Test | | Detection | Width | |
+ * | Selection | Level | | Pin | | |
+ * |-----------+--------+--------+-----------+----------+---------|
+ *
+ * and 0x29
+ *
+ * 15 10 9 8
+ * |----------+------|
+ * | Reserved | DMA |
+ * | | Sel. |
+ * | | |
+ * |----------+------|
+ *
+ * and here's what SDCHI spec expects those offsets to be:
+ *
+ * 0x28 (Host Control Register)
+ *
+ * 7 6 5 4 3 2 1 0
+ * |--------+--------+----------+------+--------+----------+---------|
+ * | Card | Card | Extended | DMA | High | Data | LED |
+ * | Detect | Detect | Data | Sel. | Speed | Transfer | Control |
+ * | Signal | Test | Transfer | | Enable | Width | |
+ * | Sel. | Level | Width | | | | |
+ * |--------+--------+----------+------+--------+----------+---------|
+ *
+ * and 0x29 (Power Control Register)
+ *
+ * |----------------------------------|
+ * | Power Control Register |
+ * | |
+ * | Description omitted, |
+ * | since it has no analog in ESDHCI |
+ * | |
+ * |----------------------------------|
+ *
+ * Since offsets 0x2A and 0x2B should be compatible between
+ * both IP specs we only need to reconcile least 16-bit of the
+ * word we've been given.
+ */
+
+ /*
+ * First, save bits 7 6 and 0 since they are identical
+ */
+ hostctl = value & (SDHC_CTRL_LED |
+ SDHC_CTRL_CDTEST_INS |
+ SDHC_CTRL_CDTEST_EN);
+ /*
+ * Second, split "Data Transfer Width" from bits 2 and 1 in to
+ * bits 5 and 1
+ */
+ if (value & ESDHC_CTRL_8BITBUS) {
+ hostctl |= SDHC_CTRL_8BITBUS;
+ }
+
+ if (value & ESDHC_CTRL_4BITBUS) {
+ hostctl |= ESDHC_CTRL_4BITBUS;
+ }
+
+ /*
+ * Third, move DMA select from bits 9 and 8 to bits 4 and 3
+ */
+ hostctl |= SDHC_DMA_TYPE(value >> (8 - 3));
+
+ /*
+ * Now place the corrected value into low 16-bit of the value
+ * we are going to give standard SDHCI write function
+ *
+ * NOTE: This transformation should be the inverse of what can
+ * be found in drivers/mmc/host/sdhci-esdhc-imx.c in Linux
+ * kernel
+ */
+ value &= ~UINT16_MAX;
+ value |= hostctl;
+ value |= (uint16_t)s->pwrcon << 8;
+
+ sdhci_write(opaque, offset, value, size);
+ break;
+
+ case ESDHC_MIX_CTRL:
+ /*
+ * So, when SD/MMC stack in Linux tries to write to "Transfer
+ * Mode Register", ESDHC i.MX quirk code will translate it
+ * into a write to ESDHC_MIX_CTRL, so we do the opposite in
+ * order to get where we started
+ *
+ * Note that Auto CMD23 Enable bit is located in a wrong place
+ * on i.MX, but since it is not used by QEMU we do not care.
+ *
+ * We don't want to call sdhci_write(.., SDHC_TRNMOD, ...)
+ * here becuase it will result in a call to
+ * sdhci_send_command(s) which we don't want.
+ *
+ */
+ s->trnmod = value & UINT16_MAX;
+ break;
+ case SDHC_TRNMOD:
+ /*
+ * Similar to above, but this time a write to "Command
+ * Register" will be translated into a 4-byte write to
+ * "Transfer Mode register" where lower 16-bit of value would
+ * be set to zero. So what we do is fill those bits with
+ * cached value from s->trnmod and let the SDHCI
+ * infrastructure handle the rest
+ */
+ sdhci_write(opaque, offset, val | s->trnmod, size);
+ break;
+ case SDHC_BLKSIZE:
+ /*
+ * ESDHCI does not implement "Host SDMA Buffer Boundary", and
+ * Linux driver will try to zero this field out which will
+ * break the rest of SDHCI emulation.
+ *
+ * Linux defaults to maximum possible setting (512K boundary)
+ * and it seems to be the only option that i.MX IP implements,
+ * so we artificially set it to that value.
+ */
+ val |= 0x7 << 12;
+ /* FALLTHROUGH */
+ default:
+ sdhci_write(opaque, offset, val, size);
+ break;
+ }
+}
+
+
+static const MemoryRegionOps usdhc_mmio_ops = {
+ .read = usdhc_read,
+ .write = usdhc_write,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ .unaligned = false
+ },
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void imx_usdhc_init(Object *obj)
+{
+ SDHCIState *s = SYSBUS_SDHCI(obj);
+
+ s->io_ops = &usdhc_mmio_ops;
+ s->quirks = SDHCI_QUIRK_NO_BUSY_IRQ;
+}
+
+static const TypeInfo imx_usdhc_info = {
+ .name = TYPE_IMX_USDHC,
+ .parent = TYPE_SYSBUS_SDHCI,
+ .instance_init = imx_usdhc_init,
+};
+
static void sdhci_register_types(void)
{
type_register_static(&sdhci_pci_info);
type_register_static(&sdhci_sysbus_info);
type_register_static(&sdhci_bus_info);
+ type_register_static(&imx_usdhc_info);
}
type_init(sdhci_register_types)
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 4b9b54b..65e4ee6 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -113,6 +113,17 @@
CLK_HIGH, /* 111 reference clock */
};
+static const IMXClk imx7_gpt_clocks[] = {
+ CLK_NONE, /* 000 No clock source */
+ CLK_IPG, /* 001 ipg_clk, 532MHz*/
+ CLK_IPG_HIGH, /* 010 ipg_clk_highfreq */
+ CLK_EXT, /* 011 External clock */
+ CLK_32k, /* 100 ipg_clk_32k */
+ CLK_HIGH, /* 101 reference clock */
+ CLK_NONE, /* 110 not defined */
+ CLK_NONE, /* 111 not defined */
+};
+
static void imx_gpt_set_freq(IMXGPTState *s)
{
uint32_t clksrc = extract32(s->cr, GPT_CR_CLKSRC_SHIFT, 3);
@@ -512,6 +523,13 @@
s->clocks = imx6_gpt_clocks;
}
+static void imx7_gpt_init(Object *obj)
+{
+ IMXGPTState *s = IMX_GPT(obj);
+
+ s->clocks = imx7_gpt_clocks;
+}
+
static const TypeInfo imx25_gpt_info = {
.name = TYPE_IMX25_GPT,
.parent = TYPE_SYS_BUS_DEVICE,
@@ -532,11 +550,18 @@
.instance_init = imx6_gpt_init,
};
+static const TypeInfo imx7_gpt_info = {
+ .name = TYPE_IMX7_GPT,
+ .parent = TYPE_IMX25_GPT,
+ .instance_init = imx7_gpt_init,
+};
+
static void imx_gpt_register_types(void)
{
type_register_static(&imx25_gpt_info);
type_register_static(&imx31_gpt_info);
type_register_static(&imx6_gpt_info);
+ type_register_static(&imx7_gpt_info);
}
type_init(imx_gpt_register_types)
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index fbcd498..41be700 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -12,6 +12,7 @@
common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o
obj-$(CONFIG_TUSB6010) += tusb6010.o
+obj-$(CONFIG_IMX) += chipidea.o
# emulated usb devices
common-obj-$(CONFIG_USB) += dev-hub.o
diff --git a/hw/usb/chipidea.c b/hw/usb/chipidea.c
new file mode 100644
index 0000000..60d67f8
--- /dev/null
+++ b/hw/usb/chipidea.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * Chipidea USB block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/usb/hcd-ehci.h"
+#include "hw/usb/chipidea.h"
+#include "qemu/log.h"
+
+enum {
+ CHIPIDEA_USBx_DCIVERSION = 0x000,
+ CHIPIDEA_USBx_DCCPARAMS = 0x004,
+ CHIPIDEA_USBx_DCCPARAMS_HC = BIT(8),
+};
+
+static uint64_t chipidea_read(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ return 0;
+}
+
+static void chipidea_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+}
+
+static const struct MemoryRegionOps chipidea_ops = {
+ .read = chipidea_read,
+ .write = chipidea_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the
+ * real device but in practice there is no reason for a guest
+ * to access this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static uint64_t chipidea_dc_read(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ switch (offset) {
+ case CHIPIDEA_USBx_DCIVERSION:
+ return 0x1;
+ case CHIPIDEA_USBx_DCCPARAMS:
+ /*
+ * Real hardware (at least i.MX7) will also report the
+ * controller as "Device Capable" (and 8 supported endpoints),
+ * but there doesn't seem to be much point in doing so, since
+ * we don't emulate that part.
+ */
+ return CHIPIDEA_USBx_DCCPARAMS_HC;
+ }
+
+ return 0;
+}
+
+static void chipidea_dc_write(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+}
+
+static const struct MemoryRegionOps chipidea_dc_ops = {
+ .read = chipidea_dc_read,
+ .write = chipidea_dc_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ /*
+ * Our device would not work correctly if the guest was doing
+ * unaligned access. This might not be a limitation on the real
+ * device but in practice there is no reason for a guest to access
+ * this device unaligned.
+ */
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void chipidea_init(Object *obj)
+{
+ EHCIState *ehci = &SYS_BUS_EHCI(obj)->ehci;
+ ChipideaState *ci = CHIPIDEA(obj);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ci->iomem); i++) {
+ const struct {
+ const char *name;
+ hwaddr offset;
+ uint64_t size;
+ const struct MemoryRegionOps *ops;
+ } regions[ARRAY_SIZE(ci->iomem)] = {
+ /*
+ * Registers located between offsets 0x000 and 0xFC
+ */
+ {
+ .name = TYPE_CHIPIDEA ".misc",
+ .offset = 0x000,
+ .size = 0x100,
+ .ops = &chipidea_ops,
+ },
+ /*
+ * Registers located between offsets 0x1A4 and 0x1DC
+ */
+ {
+ .name = TYPE_CHIPIDEA ".endpoints",
+ .offset = 0x1A4,
+ .size = 0x1DC - 0x1A4 + 4,
+ .ops = &chipidea_ops,
+ },
+ /*
+ * USB_x_DCIVERSION and USB_x_DCCPARAMS
+ */
+ {
+ .name = TYPE_CHIPIDEA ".dc",
+ .offset = 0x120,
+ .size = 8,
+ .ops = &chipidea_dc_ops,
+ },
+ };
+
+ memory_region_init_io(&ci->iomem[i],
+ obj,
+ regions[i].ops,
+ ci,
+ regions[i].name,
+ regions[i].size);
+
+ memory_region_add_subregion(&ehci->mem,
+ regions[i].offset,
+ &ci->iomem[i]);
+ }
+}
+
+static void chipidea_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SysBusEHCIClass *sec = SYS_BUS_EHCI_CLASS(klass);
+
+ /*
+ * Offsets used were taken from i.MX7Dual Applications Processor
+ * Reference Manual, Rev 0.1, p. 3177, Table 11-59
+ */
+ sec->capsbase = 0x100;
+ sec->opregbase = 0x140;
+ sec->portnr = 1;
+
+ set_bit(DEVICE_CATEGORY_USB, dc->categories);
+ dc->desc = "Chipidea USB Module";
+}
+
+static const TypeInfo chipidea_info = {
+ .name = TYPE_CHIPIDEA,
+ .parent = TYPE_SYS_BUS_EHCI,
+ .instance_size = sizeof(ChipideaState),
+ .instance_init = chipidea_init,
+ .class_init = chipidea_class_init,
+};
+
+static void chipidea_register_type(void)
+{
+ type_register_static(&chipidea_info);
+}
+type_init(chipidea_register_type)
diff --git a/include/hw/intc/imx_gpcv2.h b/include/hw/intc/imx_gpcv2.h
new file mode 100644
index 0000000..ed978b2
--- /dev/null
+++ b/include/hw/intc/imx_gpcv2.h
@@ -0,0 +1,22 @@
+#ifndef IMX_GPCV2_H
+#define IMX_GPCV2_H
+
+#include "hw/sysbus.h"
+
+enum IMXGPCv2Registers {
+ GPC_NUM = 0xE00 / sizeof(uint32_t),
+};
+
+typedef struct IMXGPCv2State {
+ /*< private >*/
+ SysBusDevice parent_obj;
+
+ /*< public >*/
+ MemoryRegion iomem;
+ uint32_t regs[GPC_NUM];
+} IMXGPCv2State;
+
+#define TYPE_IMX_GPCV2 "imx-gpcv2"
+#define IMX_GPCV2(obj) OBJECT_CHECK(IMXGPCv2State, (obj), TYPE_IMX_GPCV2)
+
+#endif /* IMX_GPCV2_H */
diff --git a/include/hw/misc/imx2_wdt.h b/include/hw/misc/imx2_wdt.h
new file mode 100644
index 0000000..8afc99a
--- /dev/null
+++ b/include/hw/misc/imx2_wdt.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX2 Watchdog IP block
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX2_WDT_H
+#define IMX2_WDT_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_IMX2_WDT "imx2.wdt"
+#define IMX2_WDT(obj) OBJECT_CHECK(IMX2WdtState, (obj), TYPE_IMX2_WDT)
+
+enum IMX2WdtRegisters {
+ IMX2_WDT_WCR = 0x0000,
+ IMX2_WDT_REG_NUM = 0x0008 / sizeof(uint16_t) + 1,
+};
+
+
+typedef struct IMX2WdtState {
+ /* <private> */
+ SysBusDevice parent_obj;
+
+ MemoryRegion mmio;
+} IMX2WdtState;
+
+#endif /* IMX7_SNVS_H */
diff --git a/include/hw/misc/imx7_ccm.h b/include/hw/misc/imx7_ccm.h
new file mode 100644
index 0000000..9538f37
--- /dev/null
+++ b/include/hw/misc/imx7_ccm.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX7 CCM, PMU and ANALOG IP blocks emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_CCM_H
+#define IMX7_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qemu/bitops.h"
+
+enum IMX7AnalogRegisters {
+ ANALOG_PLL_ARM,
+ ANALOG_PLL_ARM_SET,
+ ANALOG_PLL_ARM_CLR,
+ ANALOG_PLL_ARM_TOG,
+ ANALOG_PLL_DDR,
+ ANALOG_PLL_DDR_SET,
+ ANALOG_PLL_DDR_CLR,
+ ANALOG_PLL_DDR_TOG,
+ ANALOG_PLL_DDR_SS,
+ ANALOG_PLL_DDR_SS_SET,
+ ANALOG_PLL_DDR_SS_CLR,
+ ANALOG_PLL_DDR_SS_TOG,
+ ANALOG_PLL_DDR_NUM,
+ ANALOG_PLL_DDR_NUM_SET,
+ ANALOG_PLL_DDR_NUM_CLR,
+ ANALOG_PLL_DDR_NUM_TOG,
+ ANALOG_PLL_DDR_DENOM,
+ ANALOG_PLL_DDR_DENOM_SET,
+ ANALOG_PLL_DDR_DENOM_CLR,
+ ANALOG_PLL_DDR_DENOM_TOG,
+ ANALOG_PLL_480,
+ ANALOG_PLL_480_SET,
+ ANALOG_PLL_480_CLR,
+ ANALOG_PLL_480_TOG,
+ ANALOG_PLL_480A,
+ ANALOG_PLL_480A_SET,
+ ANALOG_PLL_480A_CLR,
+ ANALOG_PLL_480A_TOG,
+ ANALOG_PLL_480B,
+ ANALOG_PLL_480B_SET,
+ ANALOG_PLL_480B_CLR,
+ ANALOG_PLL_480B_TOG,
+ ANALOG_PLL_ENET,
+ ANALOG_PLL_ENET_SET,
+ ANALOG_PLL_ENET_CLR,
+ ANALOG_PLL_ENET_TOG,
+ ANALOG_PLL_AUDIO,
+ ANALOG_PLL_AUDIO_SET,
+ ANALOG_PLL_AUDIO_CLR,
+ ANALOG_PLL_AUDIO_TOG,
+ ANALOG_PLL_AUDIO_SS,
+ ANALOG_PLL_AUDIO_SS_SET,
+ ANALOG_PLL_AUDIO_SS_CLR,
+ ANALOG_PLL_AUDIO_SS_TOG,
+ ANALOG_PLL_AUDIO_NUM,
+ ANALOG_PLL_AUDIO_NUM_SET,
+ ANALOG_PLL_AUDIO_NUM_CLR,
+ ANALOG_PLL_AUDIO_NUM_TOG,
+ ANALOG_PLL_AUDIO_DENOM,
+ ANALOG_PLL_AUDIO_DENOM_SET,
+ ANALOG_PLL_AUDIO_DENOM_CLR,
+ ANALOG_PLL_AUDIO_DENOM_TOG,
+ ANALOG_PLL_VIDEO,
+ ANALOG_PLL_VIDEO_SET,
+ ANALOG_PLL_VIDEO_CLR,
+ ANALOG_PLL_VIDEO_TOG,
+ ANALOG_PLL_VIDEO_SS,
+ ANALOG_PLL_VIDEO_SS_SET,
+ ANALOG_PLL_VIDEO_SS_CLR,
+ ANALOG_PLL_VIDEO_SS_TOG,
+ ANALOG_PLL_VIDEO_NUM,
+ ANALOG_PLL_VIDEO_NUM_SET,
+ ANALOG_PLL_VIDEO_NUM_CLR,
+ ANALOG_PLL_VIDEO_NUM_TOG,
+ ANALOG_PLL_VIDEO_DENOM,
+ ANALOG_PLL_VIDEO_DENOM_SET,
+ ANALOG_PLL_VIDEO_DENOM_CLR,
+ ANALOG_PLL_VIDEO_DENOM_TOG,
+ ANALOG_PLL_MISC0,
+ ANALOG_PLL_MISC0_SET,
+ ANALOG_PLL_MISC0_CLR,
+ ANALOG_PLL_MISC0_TOG,
+
+ ANALOG_DIGPROG = 0x800 / sizeof(uint32_t),
+ ANALOG_MAX,
+
+ ANALOG_PLL_LOCK = BIT(31)
+};
+
+enum IMX7CCMRegisters {
+ CCM_MAX = 0xBE00 / sizeof(uint32_t) + 1,
+};
+
+enum IMX7PMURegisters {
+ PMU_MAX = 0x140 / sizeof(uint32_t),
+};
+
+#define TYPE_IMX7_CCM "imx7.ccm"
+#define IMX7_CCM(obj) OBJECT_CHECK(IMX7CCMState, (obj), TYPE_IMX7_CCM)
+
+typedef struct IMX7CCMState {
+ /* <private> */
+ IMXCCMState parent_obj;
+
+ /* <public> */
+ MemoryRegion iomem;
+
+ uint32_t ccm[CCM_MAX];
+} IMX7CCMState;
+
+
+#define TYPE_IMX7_ANALOG "imx7.analog"
+#define IMX7_ANALOG(obj) OBJECT_CHECK(IMX7AnalogState, (obj), TYPE_IMX7_ANALOG)
+
+typedef struct IMX7AnalogState {
+ /* <private> */
+ IMXCCMState parent_obj;
+
+ /* <public> */
+ struct {
+ MemoryRegion container;
+ MemoryRegion analog;
+ MemoryRegion digprog;
+ MemoryRegion pmu;
+ } mmio;
+
+ uint32_t analog[ANALOG_MAX];
+ uint32_t pmu[PMU_MAX];
+} IMX7AnalogState;
+
+#endif /* IMX7_CCM_H */
diff --git a/include/hw/misc/imx7_gpr.h b/include/hw/misc/imx7_gpr.h
new file mode 100644
index 0000000..e19373d
--- /dev/null
+++ b/include/hw/misc/imx7_gpr.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX7 GPR IP block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_GPR_H
+#define IMX7_GPR_H
+
+#include "qemu/bitops.h"
+#include "hw/sysbus.h"
+
+#define TYPE_IMX7_GPR "imx7.gpr"
+#define IMX7_GPR(obj) OBJECT_CHECK(IMX7GPRState, (obj), TYPE_IMX7_GPR)
+
+typedef struct IMX7GPRState {
+ /* <private> */
+ SysBusDevice parent_obj;
+
+ MemoryRegion mmio;
+} IMX7GPRState;
+
+#endif /* IMX7_GPR_H */
diff --git a/include/hw/misc/imx7_snvs.h b/include/hw/misc/imx7_snvs.h
new file mode 100644
index 0000000..255f8f2
--- /dev/null
+++ b/include/hw/misc/imx7_snvs.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX7 SNVS block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_SNVS_H
+#define IMX7_SNVS_H
+
+#include "qemu/bitops.h"
+#include "hw/sysbus.h"
+
+
+enum IMX7SNVSRegisters {
+ SNVS_LPCR = 0x38,
+ SNVS_LPCR_TOP = BIT(6),
+ SNVS_LPCR_DP_EN = BIT(5)
+};
+
+#define TYPE_IMX7_SNVS "imx7.snvs"
+#define IMX7_SNVS(obj) OBJECT_CHECK(IMX7SNVSState, (obj), TYPE_IMX7_SNVS)
+
+typedef struct IMX7SNVSState {
+ /* <private> */
+ SysBusDevice parent_obj;
+
+ MemoryRegion mmio;
+} IMX7SNVSState;
+
+#endif /* IMX7_SNVS_H */
diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h
index 7aab6ef..4687ecf 100644
--- a/include/hw/s390x/s390_flic.h
+++ b/include/hw/s390x/s390_flic.h
@@ -16,6 +16,7 @@
#include "hw/sysbus.h"
#include "hw/s390x/adapter.h"
#include "hw/virtio/virtio.h"
+#include "qemu/queue.h"
/*
* Reserve enough gsis to accommodate all virtio devices.
@@ -66,6 +67,11 @@
int (*modify_ais_mode)(S390FLICState *fs, uint8_t isc, uint16_t mode);
int (*inject_airq)(S390FLICState *fs, uint8_t type, uint8_t isc,
uint8_t flags);
+ void (*inject_service)(S390FLICState *fs, uint32_t parm);
+ void (*inject_io)(S390FLICState *fs, uint16_t subchannel_id,
+ uint16_t subchannel_nr, uint32_t io_int_parm,
+ uint32_t io_int_word);
+ void (*inject_crw_mchk)(S390FLICState *fs);
} S390FLICStateClass;
#define TYPE_KVM_S390_FLIC "s390-flic-kvm"
@@ -80,24 +86,57 @@
#define SIC_IRQ_MODE_SINGLE 1
#define AIS_MODE_MASK(isc) (0x80 >> isc)
+#define ISC_TO_PENDING_IO(_isc) (0x80 >> (_isc))
+#define CR6_TO_PENDING_IO(_cr6) (((_cr6) >> 24) & 0xff)
+
+/* organize the ISC bits so that the macros above work */
+#define FLIC_PENDING_IO_ISC7 (1 << 0)
+#define FLIC_PENDING_IO_ISC6 (1 << 1)
+#define FLIC_PENDING_IO_ISC5 (1 << 2)
+#define FLIC_PENDING_IO_ISC4 (1 << 3)
+#define FLIC_PENDING_IO_ISC3 (1 << 4)
+#define FLIC_PENDING_IO_ISC2 (1 << 5)
+#define FLIC_PENDING_IO_ISC1 (1 << 6)
+#define FLIC_PENDING_IO_ISC0 (1 << 7)
+#define FLIC_PENDING_SERVICE (1 << 8)
+#define FLIC_PENDING_MCHK_CR (1 << 9)
+
+#define FLIC_PENDING_IO (FLIC_PENDING_IO_ISC0 | FLIC_PENDING_IO_ISC1 | \
+ FLIC_PENDING_IO_ISC2 | FLIC_PENDING_IO_ISC3 | \
+ FLIC_PENDING_IO_ISC4 | FLIC_PENDING_IO_ISC5 | \
+ FLIC_PENDING_IO_ISC6 | FLIC_PENDING_IO_ISC7)
+
+typedef struct QEMUS390FlicIO {
+ uint16_t id;
+ uint16_t nr;
+ uint32_t parm;
+ uint32_t word;
+ QLIST_ENTRY(QEMUS390FlicIO) next;
+} QEMUS390FlicIO;
+
typedef struct QEMUS390FLICState {
S390FLICState parent_obj;
+ uint32_t pending;
+ uint32_t service_param;
uint8_t simm;
uint8_t nimm;
+ QLIST_HEAD(, QEMUS390FlicIO) io[8];
} QEMUS390FLICState;
+uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic);
+QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic,
+ uint64_t cr6);
+void qemu_s390_flic_dequeue_crw_mchk(QEMUS390FLICState *flic);
+bool qemu_s390_flic_has_service(QEMUS390FLICState *flic);
+bool qemu_s390_flic_has_io(QEMUS390FLICState *fs, uint64_t cr6);
+bool qemu_s390_flic_has_crw_mchk(QEMUS390FLICState *flic);
+bool qemu_s390_flic_has_any(QEMUS390FLICState *flic);
+
void s390_flic_init(void);
S390FLICState *s390_get_flic(void);
+QEMUS390FLICState *s390_get_qemu_flic(S390FLICState *fs);
+S390FLICStateClass *s390_get_flic_class(S390FLICState *fs);
bool ais_needed(void *opaque);
-#ifdef CONFIG_KVM
-DeviceState *s390_flic_kvm_create(void);
-#else
-static inline DeviceState *s390_flic_kvm_create(void)
-{
- return NULL;
-}
-#endif
-
#endif /* HW_S390_FLIC_H */
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
index 1cf70f8..f8d1ba3 100644
--- a/include/hw/sd/sdhci.h
+++ b/include/hw/sd/sdhci.h
@@ -44,6 +44,7 @@
AddressSpace sysbus_dma_as;
AddressSpace *dma_as;
MemoryRegion *dma_mr;
+ const MemoryRegionOps *io_ops;
QEMUTimer *insert_timer; /* timer for 'changing' sd card. */
QEMUTimer *transfer_timer;
@@ -91,8 +92,18 @@
/* Configurable properties */
bool pending_insert_quirk; /* Quirk for Raspberry Pi card insert int */
+ uint32_t quirks;
} SDHCIState;
+/*
+ * Controller does not provide transfer-complete interrupt when not
+ * busy.
+ *
+ * NOTE: This definition is taken out of Linux kernel and so the
+ * original bit number is preserved
+ */
+#define SDHCI_QUIRK_NO_BUSY_IRQ BIT(14)
+
#define TYPE_PCI_SDHCI "sdhci-pci"
#define PCI_SDHCI(obj) OBJECT_CHECK(SDHCIState, (obj), TYPE_PCI_SDHCI)
@@ -100,4 +111,6 @@
#define SYSBUS_SDHCI(obj) \
OBJECT_CHECK(SDHCIState, (obj), TYPE_SYSBUS_SDHCI)
+#define TYPE_IMX_USDHC "imx-usdhc"
+
#endif /* SDHCI_H */
diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h
index eac59b2..20ccb32 100644
--- a/include/hw/timer/imx_gpt.h
+++ b/include/hw/timer/imx_gpt.h
@@ -77,6 +77,7 @@
#define TYPE_IMX25_GPT "imx25.gpt"
#define TYPE_IMX31_GPT "imx31.gpt"
#define TYPE_IMX6_GPT "imx6.gpt"
+#define TYPE_IMX7_GPT "imx7.gpt"
#define TYPE_IMX_GPT TYPE_IMX25_GPT
diff --git a/include/hw/usb/chipidea.h b/include/hw/usb/chipidea.h
new file mode 100644
index 0000000..1ec2e9d
--- /dev/null
+++ b/include/hw/usb/chipidea.h
@@ -0,0 +1,16 @@
+#ifndef CHIPIDEA_H
+#define CHIPIDEA_H
+
+#include "hw/usb/hcd-ehci.h"
+
+typedef struct ChipideaState {
+ /*< private >*/
+ EHCISysBusState parent_obj;
+
+ MemoryRegion iomem[3];
+} ChipideaState;
+
+#define TYPE_CHIPIDEA "usb-chipidea"
+#define CHIPIDEA(obj) OBJECT_CHECK(ChipideaState, (obj), TYPE_CHIPIDEA)
+
+#endif /* CHIPIDEA_H */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 32a4767..8bb9a2c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -512,6 +512,21 @@
ARM_HWCAP_A64_SHA1 = 1 << 5,
ARM_HWCAP_A64_SHA2 = 1 << 6,
ARM_HWCAP_A64_CRC32 = 1 << 7,
+ ARM_HWCAP_A64_ATOMICS = 1 << 8,
+ ARM_HWCAP_A64_FPHP = 1 << 9,
+ ARM_HWCAP_A64_ASIMDHP = 1 << 10,
+ ARM_HWCAP_A64_CPUID = 1 << 11,
+ ARM_HWCAP_A64_ASIMDRDM = 1 << 12,
+ ARM_HWCAP_A64_JSCVT = 1 << 13,
+ ARM_HWCAP_A64_FCMA = 1 << 14,
+ ARM_HWCAP_A64_LRCPC = 1 << 15,
+ ARM_HWCAP_A64_DCPOP = 1 << 16,
+ ARM_HWCAP_A64_SHA3 = 1 << 17,
+ ARM_HWCAP_A64_SM3 = 1 << 18,
+ ARM_HWCAP_A64_SM4 = 1 << 19,
+ ARM_HWCAP_A64_ASIMDDP = 1 << 20,
+ ARM_HWCAP_A64_SHA512 = 1 << 21,
+ ARM_HWCAP_A64_SVE = 1 << 22,
};
#define ELF_HWCAP get_elf_hwcap()
@@ -532,6 +547,10 @@
GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1);
GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
+ GET_FEATURE(ARM_FEATURE_V8_SHA3, ARM_HWCAP_A64_SHA3);
+ GET_FEATURE(ARM_FEATURE_V8_SM3, ARM_HWCAP_A64_SM3);
+ GET_FEATURE(ARM_FEATURE_V8_SM4, ARM_HWCAP_A64_SM4);
+ GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512);
#undef GET_FEATURE
return hwcaps;
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d2bb59e..521444a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -153,6 +153,49 @@
uint32_t base_mask;
} TCR;
+/* Define a maximum sized vector register.
+ * For 32-bit, this is a 128-bit NEON/AdvSIMD register.
+ * For 64-bit, this is a 2048-bit SVE register.
+ *
+ * Note that the mapping between S, D, and Q views of the register bank
+ * differs between AArch64 and AArch32.
+ * In AArch32:
+ * Qn = regs[n].d[1]:regs[n].d[0]
+ * Dn = regs[n / 2].d[n & 1]
+ * Sn = regs[n / 4].d[n % 4 / 2],
+ * bits 31..0 for even n, and bits 63..32 for odd n
+ * (and regs[16] to regs[31] are inaccessible)
+ * In AArch64:
+ * Zn = regs[n].d[*]
+ * Qn = regs[n].d[1]:regs[n].d[0]
+ * Dn = regs[n].d[0]
+ * Sn = regs[n].d[0] bits 31..0
+ *
+ * This corresponds to the architecturally defined mapping between
+ * the two execution states, and means we do not need to explicitly
+ * map these registers when changing states.
+ *
+ * Align the data for use with TCG host vector operations.
+ */
+
+#ifdef TARGET_AARCH64
+# define ARM_MAX_VQ 16
+#else
+# define ARM_MAX_VQ 1
+#endif
+
+typedef struct ARMVectorReg {
+ uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
+} ARMVectorReg;
+
+/* In AArch32 mode, predicate registers do not exist at all. */
+#ifdef TARGET_AARCH64
+typedef struct ARMPredicateReg {
+ uint64_t p[2 * ARM_MAX_VQ / 8] QEMU_ALIGNED(16);
+} ARMPredicateReg;
+#endif
+
+
typedef struct CPUARMState {
/* Regs for current mode. */
uint32_t regs[16];
@@ -477,22 +520,12 @@
/* VFP coprocessor state. */
struct {
- /* VFP/Neon register state. Note that the mapping between S, D and Q
- * views of the register bank differs between AArch64 and AArch32:
- * In AArch32:
- * Qn = regs[2n+1]:regs[2n]
- * Dn = regs[n]
- * Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n
- * (and regs[32] to regs[63] are inaccessible)
- * In AArch64:
- * Qn = regs[2n+1]:regs[2n]
- * Dn = regs[2n]
- * Sn = regs[2n] bits 31..0
- * This corresponds to the architecturally defined mapping between
- * the two execution states, and means we do not need to explicitly
- * map these registers when changing states.
- */
- uint64_t regs[64];
+ ARMVectorReg zregs[32];
+
+#ifdef TARGET_AARCH64
+ /* Store FFR as pregs[16] to make it easier to treat as any other. */
+ ARMPredicateReg pregs[17];
+#endif
uint32_t xregs[16];
/* We store these fpcsr fields separately for convenience. */
@@ -516,6 +549,9 @@
*/
float_status fp_status;
float_status standard_fp_status;
+
+ /* ZCR_EL[1-3] */
+ uint64_t zcr_el[4];
} vfp;
uint64_t exclusive_addr;
uint64_t exclusive_val;
@@ -890,6 +926,8 @@
#define CPTR_TCPAC (1U << 31)
#define CPTR_TTA (1U << 20)
#define CPTR_TFP (1U << 10)
+#define CPTR_TZ (1U << 8) /* CPTR_EL2 */
+#define CPTR_EZ (1U << 8) /* CPTR_EL3 */
#define MDCR_EPMAD (1U << 21)
#define MDCR_EDAD (1U << 20)
@@ -1341,6 +1379,10 @@
ARM_FEATURE_M_SECURITY, /* M profile Security Extension */
ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */
ARM_FEATURE_SVE, /* has Scalable Vector Extension */
+ ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
};
static inline int arm_feature(CPUARMState *env, int feature)
@@ -1506,16 +1548,42 @@
*/
void armv7m_nvic_set_pending(void *opaque, int irq, bool secure);
/**
+ * armv7m_nvic_set_pending_derived: mark this derived exception as pending
+ * @opaque: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for derived
+ * exceptions (exceptions generated in the course of trying to take
+ * a different exception).
+ */
+void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure);
+/**
+ * armv7m_nvic_get_pending_irq_info: return highest priority pending
+ * exception, and whether it targets Secure state
+ * @opaque: the NVIC
+ * @pirq: set to pending exception number
+ * @ptargets_secure: set to whether pending exception targets Secure
+ *
+ * This function writes the number of the highest priority pending
+ * exception (the one which would be made active by
+ * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
+ * to true if the current highest priority pending exception should
+ * be taken to Secure state, false for NS.
+ */
+void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq,
+ bool *ptargets_secure);
+/**
* armv7m_nvic_acknowledge_irq: make highest priority pending exception active
* @opaque: the NVIC
*
* Move the current highest priority pending exception from the pending
* state to the active state, and update v7m.exception to indicate that
* it is the exception currently being handled.
- *
- * Returns: true if exception should be taken to Secure state, false for NS
*/
-bool armv7m_nvic_acknowledge_irq(void *opaque);
+void armv7m_nvic_acknowledge_irq(void *opaque);
/**
* armv7m_nvic_complete_irq: complete specified interrupt or exception
* @opaque: the NVIC
@@ -2610,6 +2678,10 @@
#define ARM_TBFLAG_TBI0_MASK (0x1ull << ARM_TBFLAG_TBI0_SHIFT)
#define ARM_TBFLAG_TBI1_SHIFT 1 /* TBI1 for EL0/1 */
#define ARM_TBFLAG_TBI1_MASK (0x1ull << ARM_TBFLAG_TBI1_SHIFT)
+#define ARM_TBFLAG_SVEEXC_EL_SHIFT 2
+#define ARM_TBFLAG_SVEEXC_EL_MASK (0x3 << ARM_TBFLAG_SVEEXC_EL_SHIFT)
+#define ARM_TBFLAG_ZCR_LEN_SHIFT 4
+#define ARM_TBFLAG_ZCR_LEN_MASK (0xf << ARM_TBFLAG_ZCR_LEN_SHIFT)
/* some convenience accessor macros */
#define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -2646,6 +2718,10 @@
(((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT)
#define ARM_TBFLAG_TBI1(F) \
(((F) & ARM_TBFLAG_TBI1_MASK) >> ARM_TBFLAG_TBI1_SHIFT)
+#define ARM_TBFLAG_SVEEXC_EL(F) \
+ (((F) & ARM_TBFLAG_SVEEXC_EL_MASK) >> ARM_TBFLAG_SVEEXC_EL_SHIFT)
+#define ARM_TBFLAG_ZCR_LEN(F) \
+ (((F) & ARM_TBFLAG_ZCR_LEN_MASK) >> ARM_TBFLAG_ZCR_LEN_SHIFT)
static inline bool bswap_code(bool sctlr_b)
{
@@ -2769,7 +2845,7 @@
*/
static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
{
- return &env->vfp.regs[regno];
+ return &env->vfp.zregs[regno >> 1].d[regno & 1];
}
/**
@@ -2778,7 +2854,7 @@
*/
static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
{
- return &env->vfp.regs[2 * regno];
+ return &env->vfp.zregs[regno].d[0];
}
/**
@@ -2787,7 +2863,7 @@
*/
static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
{
- return &env->vfp.regs[2 * regno];
+ return &env->vfp.zregs[regno].d[0];
}
#endif
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 670c07a..1c330ad 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -224,6 +224,10 @@
set_feature(&cpu->env, ARM_FEATURE_V8_AES);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA512);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA3);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SM3);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
index 9ca0bde..cc339ea 100644
--- a/target/arm/crypto_helper.c
+++ b/target/arm/crypto_helper.c
@@ -1,7 +1,7 @@
/*
* crypto_helper.c - emulate v8 Crypto Extensions instructions
*
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -419,3 +419,278 @@
rd[0] = d.l[0];
rd[1] = d.l[1];
}
+
+/*
+ * The SHA-512 logical functions (same as above but using 64-bit operands)
+ */
+
+static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
+{
+ return (x & (y ^ z)) ^ z;
+}
+
+static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
+{
+ return (x & y) | ((x | y) & z);
+}
+
+static uint64_t S0_512(uint64_t x)
+{
+ return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
+}
+
+static uint64_t S1_512(uint64_t x)
+{
+ return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
+}
+
+static uint64_t s0_512(uint64_t x)
+{
+ return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
+}
+
+static uint64_t s1_512(uint64_t x)
+{
+ return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
+}
+
+void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ uint64_t d0 = rd[0];
+ uint64_t d1 = rd[1];
+
+ d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
+ d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
+
+ rd[0] = d0;
+ rd[1] = d1;
+}
+
+void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ uint64_t d0 = rd[0];
+ uint64_t d1 = rd[1];
+
+ d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
+ d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
+
+ rd[0] = d0;
+ rd[1] = d1;
+}
+
+void HELPER(crypto_sha512su0)(void *vd, void *vn)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t d0 = rd[0];
+ uint64_t d1 = rd[1];
+
+ d0 += s0_512(rd[1]);
+ d1 += s0_512(rn[0]);
+
+ rd[0] = d0;
+ rd[1] = d1;
+}
+
+void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+
+ rd[0] += s1_512(rn[0]) + rm[0];
+ rd[1] += s1_512(rn[1]) + rm[1];
+}
+
+void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t;
+
+ t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
+ CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
+ CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
+ CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
+ CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
+
+void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
+
+ CR_ST_WORD(d, 0) ^= t;
+ CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
+ CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
+ CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
+ ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
+
+void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
+ uint32_t opcode)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t;
+
+ assert(imm2 < 4);
+
+ if (opcode == 0 || opcode == 2) {
+ /* SM3TT1A, SM3TT2A */
+ t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
+ } else if (opcode == 1) {
+ /* SM3TT1B */
+ t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
+ } else if (opcode == 3) {
+ /* SM3TT2B */
+ t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
+ } else {
+ g_assert_not_reached();
+ }
+
+ t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
+
+ CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
+
+ if (opcode < 2) {
+ /* SM3TT1A, SM3TT1B */
+ t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
+
+ CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
+ } else {
+ /* SM3TT2A, SM3TT2B */
+ t += CR_ST_WORD(n, 3);
+ t ^= rol32(t, 9) ^ rol32(t, 17);
+
+ CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
+ }
+
+ CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
+ CR_ST_WORD(d, 3) = t;
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
+
+static uint8_t const sm4_sbox[] = {
+ 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+ 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+ 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+ 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+ 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+ 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+ 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+ 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+ 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+ 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+ 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+ 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+ 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+ 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+ 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+ 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+ 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+ 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+ 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+ 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+ 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+ 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+ 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+ 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+ 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+ 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+ 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+ 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+ 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+ 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+ 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+ 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
+};
+
+void HELPER(crypto_sm4e)(void *vd, void *vn)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ uint32_t t, i;
+
+ for (i = 0; i < 4; i++) {
+ t = CR_ST_WORD(d, (i + 1) % 4) ^
+ CR_ST_WORD(d, (i + 2) % 4) ^
+ CR_ST_WORD(d, (i + 3) % 4) ^
+ CR_ST_WORD(n, i);
+
+ t = sm4_sbox[t & 0xff] |
+ sm4_sbox[(t >> 8) & 0xff] << 8 |
+ sm4_sbox[(t >> 16) & 0xff] << 16 |
+ sm4_sbox[(t >> 24) & 0xff] << 24;
+
+ CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
+ rol32(t, 24);
+ }
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
+
+void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d;
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t, i;
+
+ d = n;
+ for (i = 0; i < 4; i++) {
+ t = CR_ST_WORD(d, (i + 1) % 4) ^
+ CR_ST_WORD(d, (i + 2) % 4) ^
+ CR_ST_WORD(d, (i + 3) % 4) ^
+ CR_ST_WORD(m, i);
+
+ t = sm4_sbox[t & 0xff] |
+ sm4_sbox[(t >> 8) & 0xff] << 8 |
+ sm4_sbox[(t >> 16) & 0xff] << 16 |
+ sm4_sbox[(t >> 24) & 0xff] << 24;
+
+ CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
+ }
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
diff --git a/target/arm/helper.c b/target/arm/helper.c
index bfce096..180ab75 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4266,6 +4266,125 @@
REGINFO_SENTINEL
};
+/* Return the exception level to which SVE-disabled exceptions should
+ * be taken, or 0 if SVE is enabled.
+ */
+static int sve_exception_el(CPUARMState *env)
+{
+#ifndef CONFIG_USER_ONLY
+ unsigned current_el = arm_current_el(env);
+
+ /* The CPACR.ZEN controls traps to EL1:
+ * 0, 2 : trap EL0 and EL1 accesses
+ * 1 : trap only EL0 accesses
+ * 3 : trap no accesses
+ */
+ switch (extract32(env->cp15.cpacr_el1, 16, 2)) {
+ default:
+ if (current_el <= 1) {
+ /* Trap to PL1, which might be EL1 or EL3 */
+ if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+ return 3;
+ }
+ return 1;
+ }
+ break;
+ case 1:
+ if (current_el == 0) {
+ return 1;
+ }
+ break;
+ case 3:
+ break;
+ }
+
+ /* Similarly for CPACR.FPEN, after having checked ZEN. */
+ switch (extract32(env->cp15.cpacr_el1, 20, 2)) {
+ default:
+ if (current_el <= 1) {
+ if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+ return 3;
+ }
+ return 1;
+ }
+ break;
+ case 1:
+ if (current_el == 0) {
+ return 1;
+ }
+ break;
+ case 3:
+ break;
+ }
+
+ /* CPTR_EL2. Check both TZ and TFP. */
+ if (current_el <= 2
+ && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ))
+ && !arm_is_secure_below_el3(env)) {
+ return 2;
+ }
+
+ /* CPTR_EL3. Check both EZ and TFP. */
+ if (!(env->cp15.cptr_el[3] & CPTR_EZ)
+ || (env->cp15.cptr_el[3] & CPTR_TFP)) {
+ return 3;
+ }
+#endif
+ return 0;
+}
+
+static CPAccessResult zcr_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ switch (sve_exception_el(env)) {
+ case 3:
+ return CP_ACCESS_TRAP_EL3;
+ case 2:
+ return CP_ACCESS_TRAP_EL2;
+ case 1:
+ return CP_ACCESS_TRAP;
+ }
+ return CP_ACCESS_OK;
+}
+
+static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Bits other than [3:0] are RAZ/WI. */
+ raw_write(env, ri, value & 0xf);
+}
+
+static const ARMCPRegInfo zcr_el1_reginfo = {
+ .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT,
+ .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
+ .writefn = zcr_write, .raw_writefn = raw_write
+};
+
+static const ARMCPRegInfo zcr_el2_reginfo = {
+ .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
+ .access = PL2_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT,
+ .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]),
+ .writefn = zcr_write, .raw_writefn = raw_write
+};
+
+static const ARMCPRegInfo zcr_no_el2_reginfo = {
+ .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
+ .access = PL2_RW, .type = ARM_CP_64BIT,
+ .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore
+};
+
+static const ARMCPRegInfo zcr_el3_reginfo = {
+ .name = "ZCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0,
+ .access = PL3_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT,
+ .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]),
+ .writefn = zcr_write, .raw_writefn = raw_write
+};
+
void hw_watchpoint_update(ARMCPU *cpu, int n)
{
CPUARMState *env = &cpu->env;
@@ -5332,6 +5451,18 @@
}
define_one_arm_cp_reg(cpu, &sctlr);
}
+
+ if (arm_feature(env, ARM_FEATURE_SVE)) {
+ define_one_arm_cp_reg(cpu, &zcr_el1_reginfo);
+ if (arm_feature(env, ARM_FEATURE_EL2)) {
+ define_one_arm_cp_reg(cpu, &zcr_el2_reginfo);
+ } else {
+ define_one_arm_cp_reg(cpu, &zcr_no_el2_reginfo);
+ }
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ define_one_arm_cp_reg(cpu, &zcr_el3_reginfo);
+ }
+ }
}
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
@@ -6161,12 +6292,127 @@
return target_el;
}
-static void v7m_push(CPUARMState *env, uint32_t val)
+static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
+ ARMMMUIdx mmu_idx, bool ignfault)
{
- CPUState *cs = CPU(arm_env_get_cpu(env));
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ MemTxAttrs attrs = {};
+ MemTxResult txres;
+ target_ulong page_size;
+ hwaddr physaddr;
+ int prot;
+ ARMMMUFaultInfo fi;
+ bool secure = mmu_idx & ARM_MMU_IDX_M_S;
+ int exc;
+ bool exc_secure;
- env->regs[13] -= 4;
- stl_phys(cs->as, env->regs[13], val);
+ if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr,
+ &attrs, &prot, &page_size, &fi, NULL)) {
+ /* MPU/SAU lookup failed */
+ if (fi.type == ARMFault_QEMU_SFault) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault with SFSR.AUVIOL during stacking\n");
+ env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
+ env->v7m.sfar = addr;
+ exc = ARMV7M_EXCP_SECURE;
+ exc_secure = false;
+ } else {
+ qemu_log_mask(CPU_LOG_INT, "...MemManageFault with CFSR.MSTKERR\n");
+ env->v7m.cfsr[secure] |= R_V7M_CFSR_MSTKERR_MASK;
+ exc = ARMV7M_EXCP_MEM;
+ exc_secure = secure;
+ }
+ goto pend_fault;
+ }
+ address_space_stl_le(arm_addressspace(cs, attrs), physaddr, value,
+ attrs, &txres);
+ if (txres != MEMTX_OK) {
+ /* BusFault trying to write the data */
+ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.STKERR\n");
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_STKERR_MASK;
+ exc = ARMV7M_EXCP_BUS;
+ exc_secure = false;
+ goto pend_fault;
+ }
+ return true;
+
+pend_fault:
+ /* By pending the exception at this point we are making
+ * the IMPDEF choice "overridden exceptions pended" (see the
+ * MergeExcInfo() pseudocode). The other choice would be to not
+ * pend them now and then make a choice about which to throw away
+ * later if we have two derived exceptions.
+ * The only case when we must not pend the exception but instead
+ * throw it away is if we are doing the push of the callee registers
+ * and we've already generated a derived exception. Even in this
+ * case we will still update the fault status registers.
+ */
+ if (!ignfault) {
+ armv7m_nvic_set_pending_derived(env->nvic, exc, exc_secure);
+ }
+ return false;
+}
+
+static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
+ ARMMMUIdx mmu_idx)
+{
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ MemTxAttrs attrs = {};
+ MemTxResult txres;
+ target_ulong page_size;
+ hwaddr physaddr;
+ int prot;
+ ARMMMUFaultInfo fi;
+ bool secure = mmu_idx & ARM_MMU_IDX_M_S;
+ int exc;
+ bool exc_secure;
+ uint32_t value;
+
+ if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr,
+ &attrs, &prot, &page_size, &fi, NULL)) {
+ /* MPU/SAU lookup failed */
+ if (fi.type == ARMFault_QEMU_SFault) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault with SFSR.AUVIOL during unstack\n");
+ env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
+ env->v7m.sfar = addr;
+ exc = ARMV7M_EXCP_SECURE;
+ exc_secure = false;
+ } else {
+ qemu_log_mask(CPU_LOG_INT,
+ "...MemManageFault with CFSR.MUNSTKERR\n");
+ env->v7m.cfsr[secure] |= R_V7M_CFSR_MUNSTKERR_MASK;
+ exc = ARMV7M_EXCP_MEM;
+ exc_secure = secure;
+ }
+ goto pend_fault;
+ }
+
+ value = address_space_ldl(arm_addressspace(cs, attrs), physaddr,
+ attrs, &txres);
+ if (txres != MEMTX_OK) {
+ /* BusFault trying to read the data */
+ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.UNSTKERR\n");
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_UNSTKERR_MASK;
+ exc = ARMV7M_EXCP_BUS;
+ exc_secure = false;
+ goto pend_fault;
+ }
+
+ *dest = value;
+ return true;
+
+pend_fault:
+ /* By pending the exception at this point we are making
+ * the IMPDEF choice "overridden exceptions pended" (see the
+ * MergeExcInfo() pseudocode). The other choice would be to not
+ * pend them now and then make a choice about which to throw away
+ * later if we have two derived exceptions.
+ */
+ armv7m_nvic_set_pending(env->nvic, exc, exc_secure);
+ return false;
}
/* Return true if we're using the process stack pointer (not the MSP) */
@@ -6395,65 +6641,126 @@
}
}
-static uint32_t arm_v7m_load_vector(ARMCPU *cpu, bool targets_secure)
+static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure,
+ uint32_t *pvec)
{
CPUState *cs = CPU(cpu);
CPUARMState *env = &cpu->env;
MemTxResult result;
- hwaddr vec = env->v7m.vecbase[targets_secure] + env->v7m.exception * 4;
- uint32_t addr;
+ uint32_t addr = env->v7m.vecbase[targets_secure] + exc * 4;
+ uint32_t vector_entry;
+ MemTxAttrs attrs = {};
+ ARMMMUIdx mmu_idx;
+ bool exc_secure;
- addr = address_space_ldl(cs->as, vec,
- MEMTXATTRS_UNSPECIFIED, &result);
- if (result != MEMTX_OK) {
- /* Architecturally this should cause a HardFault setting HSFR.VECTTBL,
- * which would then be immediately followed by our failing to load
- * the entry vector for that HardFault, which is a Lockup case.
- * Since we don't model Lockup, we just report this guest error
- * via cpu_abort().
- */
- cpu_abort(cs, "Failed to read from %s exception vector table "
- "entry %08x\n", targets_secure ? "secure" : "nonsecure",
- (unsigned)vec);
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targets_secure, true);
+
+ /* We don't do a get_phys_addr() here because the rules for vector
+ * loads are special: they always use the default memory map, and
+ * the default memory map permits reads from all addresses.
+ * Since there's no easy way to pass through to pmsav8_mpu_lookup()
+ * that we want this special case which would always say "yes",
+ * we just do the SAU lookup here followed by a direct physical load.
+ */
+ attrs.secure = targets_secure;
+ attrs.user = false;
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ V8M_SAttributes sattrs = {};
+
+ v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, &sattrs);
+ if (sattrs.ns) {
+ attrs.secure = false;
+ } else if (!targets_secure) {
+ /* NS access to S memory */
+ goto load_fail;
+ }
}
- return addr;
+
+ vector_entry = address_space_ldl(arm_addressspace(cs, attrs), addr,
+ attrs, &result);
+ if (result != MEMTX_OK) {
+ goto load_fail;
+ }
+ *pvec = vector_entry;
+ return true;
+
+load_fail:
+ /* All vector table fetch fails are reported as HardFault, with
+ * HFSR.VECTTBL and .FORCED set. (FORCED is set because
+ * technically the underlying exception is a MemManage or BusFault
+ * that is escalated to HardFault.) This is a terminal exception,
+ * so we will either take the HardFault immediately or else enter
+ * lockup (the latter case is handled in armv7m_nvic_set_pending_derived()).
+ */
+ exc_secure = targets_secure ||
+ !(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK);
+ env->v7m.hfsr |= R_V7M_HFSR_VECTTBL_MASK | R_V7M_HFSR_FORCED_MASK;
+ armv7m_nvic_set_pending_derived(env->nvic, ARMV7M_EXCP_HARD, exc_secure);
+ return false;
}
-static void v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain)
+static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
+ bool ignore_faults)
{
/* For v8M, push the callee-saves register part of the stack frame.
* Compare the v8M pseudocode PushCalleeStack().
* In the tailchaining case this may not be the current stack.
*/
CPUARMState *env = &cpu->env;
- CPUState *cs = CPU(cpu);
uint32_t *frame_sp_p;
uint32_t frameptr;
+ ARMMMUIdx mmu_idx;
+ bool stacked_ok;
if (dotailchain) {
- frame_sp_p = get_v7m_sp_ptr(env, true,
- lr & R_V7M_EXCRET_MODE_MASK,
+ bool mode = lr & R_V7M_EXCRET_MODE_MASK;
+ bool priv = !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_NPRIV_MASK) ||
+ !mode;
+
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv);
+ frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode,
lr & R_V7M_EXCRET_SPSEL_MASK);
} else {
+ mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
frame_sp_p = &env->regs[13];
}
frameptr = *frame_sp_p - 0x28;
- stl_phys(cs->as, frameptr, 0xfefa125b);
- stl_phys(cs->as, frameptr + 0x8, env->regs[4]);
- stl_phys(cs->as, frameptr + 0xc, env->regs[5]);
- stl_phys(cs->as, frameptr + 0x10, env->regs[6]);
- stl_phys(cs->as, frameptr + 0x14, env->regs[7]);
- stl_phys(cs->as, frameptr + 0x18, env->regs[8]);
- stl_phys(cs->as, frameptr + 0x1c, env->regs[9]);
- stl_phys(cs->as, frameptr + 0x20, env->regs[10]);
- stl_phys(cs->as, frameptr + 0x24, env->regs[11]);
+ /* Write as much of the stack frame as we can. A write failure may
+ * cause us to pend a derived exception.
+ */
+ stacked_ok =
+ v7m_stack_write(cpu, frameptr, 0xfefa125b, mmu_idx, ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x8, env->regs[4], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0xc, env->regs[5], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x10, env->regs[6], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x14, env->regs[7], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x18, env->regs[8], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x1c, env->regs[9], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x20, env->regs[10], mmu_idx,
+ ignore_faults) &&
+ v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx,
+ ignore_faults);
+ /* Update SP regardless of whether any of the stack accesses failed.
+ * When we implement v8M stack limit checking then this attempt to
+ * update SP might also fail and result in a derived exception.
+ */
*frame_sp_p = frameptr;
+
+ return !stacked_ok;
}
-static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain)
+static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
+ bool ignore_stackfaults)
{
/* Do the "take the exception" parts of exception entry,
* but not the pushing of state to the stack. This is
@@ -6462,8 +6769,10 @@
CPUARMState *env = &cpu->env;
uint32_t addr;
bool targets_secure;
+ int exc;
+ bool push_failed = false;
- targets_secure = armv7m_nvic_acknowledge_irq(env->nvic);
+ armv7m_nvic_get_pending_irq_info(env->nvic, &exc, &targets_secure);
if (arm_feature(env, ARM_FEATURE_V8)) {
if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
@@ -6489,7 +6798,8 @@
*/
if (lr & R_V7M_EXCRET_DCRS_MASK &&
!(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) {
- v7m_push_callee_stack(cpu, lr, dotailchain);
+ push_failed = v7m_push_callee_stack(cpu, lr, dotailchain,
+ ignore_stackfaults);
}
lr |= R_V7M_EXCRET_DCRS_MASK;
}
@@ -6531,6 +6841,27 @@
}
}
+ if (push_failed && !ignore_stackfaults) {
+ /* Derived exception on callee-saves register stacking:
+ * we might now want to take a different exception which
+ * targets a different security state, so try again from the top.
+ */
+ v7m_exception_taken(cpu, lr, true, true);
+ return;
+ }
+
+ if (!arm_v7m_load_vector(cpu, exc, targets_secure, &addr)) {
+ /* Vector load failed: derived exception */
+ v7m_exception_taken(cpu, lr, true, true);
+ return;
+ }
+
+ /* Now we've done everything that might cause a derived exception
+ * we can go ahead and activate whichever exception we're going to
+ * take (which might now be the derived exception).
+ */
+ armv7m_nvic_acknowledge_irq(env->nvic);
+
/* Switch to target security state -- must do this before writing SPSEL */
switch_v7m_security_state(env, targets_secure);
write_v7m_control_spsel(env, 0);
@@ -6538,34 +6869,55 @@
/* Clear IT bits */
env->condexec_bits = 0;
env->regs[14] = lr;
- addr = arm_v7m_load_vector(cpu, targets_secure);
env->regs[15] = addr & 0xfffffffe;
env->thumb = addr & 1;
}
-static void v7m_push_stack(ARMCPU *cpu)
+static bool v7m_push_stack(ARMCPU *cpu)
{
/* Do the "set up stack frame" part of exception entry,
* similar to pseudocode PushStack().
+ * Return true if we generate a derived exception (and so
+ * should ignore further stack faults trying to process
+ * that derived exception.)
*/
+ bool stacked_ok;
CPUARMState *env = &cpu->env;
uint32_t xpsr = xpsr_read(env);
+ uint32_t frameptr = env->regs[13];
+ ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
/* Align stack pointer if the guest wants that */
- if ((env->regs[13] & 4) &&
+ if ((frameptr & 4) &&
(env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKALIGN_MASK)) {
- env->regs[13] -= 4;
+ frameptr -= 4;
xpsr |= XPSR_SPREALIGN;
}
- /* Switch to the handler mode. */
- v7m_push(env, xpsr);
- v7m_push(env, env->regs[15]);
- v7m_push(env, env->regs[14]);
- v7m_push(env, env->regs[12]);
- v7m_push(env, env->regs[3]);
- v7m_push(env, env->regs[2]);
- v7m_push(env, env->regs[1]);
- v7m_push(env, env->regs[0]);
+
+ frameptr -= 0x20;
+
+ /* Write as much of the stack frame as we can. If we fail a stack
+ * write this will result in a derived exception being pended
+ * (which may be taken in preference to the one we started with
+ * if it has higher priority).
+ */
+ stacked_ok =
+ v7m_stack_write(cpu, frameptr, env->regs[0], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 4, env->regs[1], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 8, env->regs[2], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 12, env->regs[3], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 16, env->regs[12], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 20, env->regs[14], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) &&
+ v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false);
+
+ /* Update SP regardless of whether any of the stack accesses failed.
+ * When we implement v8M stack limit checking then this attempt to
+ * update SP might also fail and result in a derived exception.
+ */
+ env->regs[13] = frameptr;
+
+ return !stacked_ok;
}
static void do_v7m_exception_exit(ARMCPU *cpu)
@@ -6711,7 +7063,7 @@
if (sfault) {
env->v7m.sfsr |= R_V7M_SFSR_INVER_MASK;
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- v7m_exception_taken(cpu, excret, true);
+ v7m_exception_taken(cpu, excret, true, false);
qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
"stackframe: failed EXC_RETURN.ES validity check\n");
return;
@@ -6723,7 +7075,7 @@
*/
env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
- v7m_exception_taken(cpu, excret, true);
+ v7m_exception_taken(cpu, excret, true, false);
qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
"stackframe: failed exception return integrity check\n");
return;
@@ -6752,6 +7104,11 @@
!return_to_handler,
return_to_sp_process);
uint32_t frameptr = *frame_sp_p;
+ bool pop_ok = true;
+ ARMMMUIdx mmu_idx;
+
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, return_to_secure,
+ !return_to_handler);
if (!QEMU_IS_ALIGNED(frameptr, 8) &&
arm_feature(env, ARM_FEATURE_V8)) {
@@ -6771,36 +7128,45 @@
/* Take a SecureFault on the current stack */
env->v7m.sfsr |= R_V7M_SFSR_INVIS_MASK;
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- v7m_exception_taken(cpu, excret, true);
+ v7m_exception_taken(cpu, excret, true, false);
qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
"stackframe: failed exception return integrity "
"signature check\n");
return;
}
- env->regs[4] = ldl_phys(cs->as, frameptr + 0x8);
- env->regs[5] = ldl_phys(cs->as, frameptr + 0xc);
- env->regs[6] = ldl_phys(cs->as, frameptr + 0x10);
- env->regs[7] = ldl_phys(cs->as, frameptr + 0x14);
- env->regs[8] = ldl_phys(cs->as, frameptr + 0x18);
- env->regs[9] = ldl_phys(cs->as, frameptr + 0x1c);
- env->regs[10] = ldl_phys(cs->as, frameptr + 0x20);
- env->regs[11] = ldl_phys(cs->as, frameptr + 0x24);
+ pop_ok =
+ v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[8], frameptr + 0x18, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[9], frameptr + 0x1c, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[10], frameptr + 0x20, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[11], frameptr + 0x24, mmu_idx);
frameptr += 0x28;
}
- /* Pop registers. TODO: make these accesses use the correct
- * attributes and address space (S/NS, priv/unpriv) and handle
- * memory transaction failures.
- */
- env->regs[0] = ldl_phys(cs->as, frameptr);
- env->regs[1] = ldl_phys(cs->as, frameptr + 0x4);
- env->regs[2] = ldl_phys(cs->as, frameptr + 0x8);
- env->regs[3] = ldl_phys(cs->as, frameptr + 0xc);
- env->regs[12] = ldl_phys(cs->as, frameptr + 0x10);
- env->regs[14] = ldl_phys(cs->as, frameptr + 0x14);
- env->regs[15] = ldl_phys(cs->as, frameptr + 0x18);
+ /* Pop registers */
+ pop_ok = pop_ok &&
+ v7m_stack_read(cpu, &env->regs[0], frameptr, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[1], frameptr + 0x4, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[2], frameptr + 0x8, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[3], frameptr + 0xc, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[12], frameptr + 0x10, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[14], frameptr + 0x14, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[15], frameptr + 0x18, mmu_idx) &&
+ v7m_stack_read(cpu, &xpsr, frameptr + 0x1c, mmu_idx);
+
+ if (!pop_ok) {
+ /* v7m_stack_read() pended a fault, so take it (as a tail
+ * chained exception on the same stack frame)
+ */
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
/* Returning from an exception with a PC with bit 0 set is defined
* behaviour on v8M (bit 0 is ignored), but for v7M it was specified
@@ -6819,8 +7185,6 @@
}
}
- xpsr = ldl_phys(cs->as, frameptr + 0x1c);
-
if (arm_feature(env, ARM_FEATURE_V8)) {
/* For v8M we have to check whether the xPSR exception field
* matches the EXCRET value for return to handler/thread
@@ -6836,7 +7200,7 @@
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
env->v7m.secure);
env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
- v7m_exception_taken(cpu, excret, true);
+ v7m_exception_taken(cpu, excret, true, false);
qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
"stackframe: failed exception return integrity "
"check\n");
@@ -6869,11 +7233,13 @@
/* Take an INVPC UsageFault by pushing the stack again;
* we know we're v7M so this is never a Secure UsageFault.
*/
+ bool ignore_stackfaults;
+
assert(!arm_feature(env, ARM_FEATURE_V8));
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, false);
env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
- v7m_push_stack(cpu);
- v7m_exception_taken(cpu, excret, false);
+ ignore_stackfaults = v7m_push_stack(cpu);
+ v7m_exception_taken(cpu, excret, false, ignore_stackfaults);
qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: "
"failed exception return integrity check\n");
return;
@@ -7114,6 +7480,7 @@
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
uint32_t lr;
+ bool ignore_stackfaults;
arm_log_exception(cs->exception_index);
@@ -7288,8 +7655,8 @@
lr |= R_V7M_EXCRET_MODE_MASK;
}
- v7m_push_stack(cpu);
- v7m_exception_taken(cpu, lr, false);
+ ignore_stackfaults = v7m_push_stack(cpu);
+ v7m_exception_taken(cpu, lr, false, ignore_stackfaults);
qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception);
}
@@ -11692,14 +12059,37 @@
target_ulong *cs_base, uint32_t *pflags)
{
ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
+ int fp_el = fp_exception_el(env);
uint32_t flags;
if (is_a64(env)) {
+ int sve_el = sve_exception_el(env);
+ uint32_t zcr_len;
+
*pc = env->pc;
flags = ARM_TBFLAG_AARCH64_STATE_MASK;
/* Get control bits for tagged addresses */
flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT);
flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT);
+ flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
+
+ /* If SVE is disabled, but FP is enabled,
+ then the effective len is 0. */
+ if (sve_el != 0 && fp_el == 0) {
+ zcr_len = 0;
+ } else {
+ int current_el = arm_current_el(env);
+
+ zcr_len = env->vfp.zcr_el[current_el <= 1 ? 1 : current_el];
+ zcr_len &= 0xf;
+ if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+ zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+ }
+ if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+ zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+ }
+ }
+ flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
} else {
*pc = env->regs[15];
flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
@@ -11742,7 +12132,7 @@
if (arm_cpu_data_is_big_endian(env)) {
flags |= ARM_TBFLAG_BE_DATA_MASK;
}
- flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT;
+ flags |= fp_el << ARM_TBFLAG_FPEXC_EL_SHIFT;
if (arm_v7m_is_handler_mode(env)) {
flags |= ARM_TBFLAG_HANDLER_MASK;
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 5dec2e6..6383d7d 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -534,6 +534,18 @@
DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index ff53e9f..cfb7e5a 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -234,6 +234,10 @@
exit(1);
#endif
} else {
+ if (kvm_enabled()) {
+ error_report("Userspace GICv3 is not supported with KVM");
+ exit(1);
+ }
return "arm-gicv3";
}
}
diff --git a/target/arm/machine.c b/target/arm/machine.c
index a85c243..2c8b430 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -50,7 +50,40 @@
.minimum_version_id = 3,
.needed = vfp_needed,
.fields = (VMStateField[]) {
- VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64),
+ /* For compatibility, store Qn out of Zn here. */
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[2].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[3].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[4].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[5].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[6].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[7].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[8].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[9].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[10].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[11].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[12].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[13].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[14].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[15].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[16].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[17].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[18].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[19].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[20].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[21].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[22].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[23].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[24].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[25].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[26].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[27].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[28].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[29].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[30].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[31].d, ARMCPU, 0, 2),
+
/* The xregs array is a little awkward because element 1 (FPSCR)
* requires a specific accessor, so we have to split it up in
* the vmstate:
@@ -89,6 +122,56 @@
}
};
+#ifdef TARGET_AARCH64
+/* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build,
+ * and ARMPredicateReg is actively empty. This triggers errors
+ * in the expansion of the VMSTATE macros.
+ */
+
+static bool sve_needed(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ return arm_feature(env, ARM_FEATURE_SVE);
+}
+
+/* The first two words of each Zreg is stored in VFP state. */
+static const VMStateDescription vmstate_zreg_hi_reg = {
+ .name = "cpu/sve/zreg_hi",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64_SUB_ARRAY(d, ARMVectorReg, 2, ARM_MAX_VQ - 2),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_preg_reg = {
+ .name = "cpu/sve/preg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64_ARRAY(p, ARMPredicateReg, 2 * ARM_MAX_VQ / 8),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_sve = {
+ .name = "cpu/sve",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = sve_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_ARRAY(env.vfp.zregs, ARMCPU, 32, 0,
+ vmstate_zreg_hi_reg, ARMVectorReg),
+ VMSTATE_STRUCT_ARRAY(env.vfp.pregs, ARMCPU, 17, 0,
+ vmstate_preg_reg, ARMPredicateReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
+#endif /* AARCH64 */
+
static bool m_needed(void *opaque)
{
ARMCPU *cpu = opaque;
@@ -553,6 +636,9 @@
&vmstate_pmsav7,
&vmstate_pmsav8,
&vmstate_m_security,
+#ifdef TARGET_AARCH64
+ &vmstate_sve,
+#endif
NULL
}
};
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index eed64c7..fb1a4cb 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -21,6 +21,7 @@
#include "cpu.h"
#include "exec/exec-all.h"
#include "tcg-op.h"
+#include "tcg-op-gvec.h"
#include "qemu/log.h"
#include "arm_ldst.h"
#include "translate.h"
@@ -84,6 +85,13 @@
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+/* Note that the gvec expanders operate on offsets + sizes. */
+typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
+ uint32_t, uint32_t);
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t);
+
/* initialize TCG globals. */
void a64_translate_init(void)
{
@@ -517,8 +525,8 @@
{
int offs = 0;
#ifdef HOST_WORDS_BIGENDIAN
- /* This is complicated slightly because vfp.regs[2n] is
- * still the low half and vfp.regs[2n+1] the high half
+ /* This is complicated slightly because vfp.zregs[n].d[0] is
+ * still the low half and vfp.zregs[n].d[1] the high half
* of the 128 bit vector, even on big endian systems.
* Calculate the offset assuming a fully bigendian 128 bits,
* then XOR to account for the order of the two 64 bit halves.
@@ -528,7 +536,7 @@
#else
offs += element * (1 << size);
#endif
- offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
+ offs += offsetof(CPUARMState, vfp.zregs[regno]);
assert_fp_access_checked(s);
return offs;
}
@@ -537,7 +545,7 @@
static inline int vec_full_reg_offset(DisasContext *s, int regno)
{
assert_fp_access_checked(s);
- return offsetof(CPUARMState, vfp.regs[regno * 2]);
+ return offsetof(CPUARMState, vfp.zregs[regno]);
}
/* Return a newly allocated pointer to the vector register. */
@@ -548,6 +556,14 @@
return ret;
}
+/* Return the byte size of the "whole" vector register, VL / 8. */
+static inline int vec_full_reg_size(DisasContext *s)
+{
+ /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
+ In the meantime this is just the AdvSIMD length of 128. */
+ return 128 / 8;
+}
+
/* Return the offset into CPUARMState of a slice (from
* the least significant end) of FP register Qn (ie
* Dn, Sn, Hn or Bn).
@@ -618,6 +634,51 @@
return statusptr;
}
+/* Expand a 2-operand AdvSIMD vector operation using an expander function. */
+static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
+ GVecGen2Fn *gvec_fn, int vece)
+{
+ gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+ is_q ? 16 : 8, vec_full_reg_size(s));
+}
+
+/* Expand a 2-operand + immediate AdvSIMD vector operation using
+ * an expander function.
+ */
+static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
+ int64_t imm, GVecGen2iFn *gvec_fn, int vece)
+{
+ gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+ imm, is_q ? 16 : 8, vec_full_reg_size(s));
+}
+
+/* Expand a 3-operand AdvSIMD vector operation using an expander function. */
+static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
+ GVecGen3Fn *gvec_fn, int vece)
+{
+ gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
+}
+
+/* Expand a 2-operand + immediate AdvSIMD vector operation using
+ * an op descriptor.
+ */
+static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
+ int rn, int64_t imm, const GVecGen2i *gvec_op)
+{
+ tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+ is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
+}
+
+/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
+static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
+ int rn, int rm, const GVecGen3 *gvec_op)
+{
+ tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), is_q ? 16 : 8,
+ vec_full_reg_size(s), gvec_op);
+}
+
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@@ -4566,14 +4627,17 @@
TCGv_i64 tcg_op;
TCGv_i64 tcg_res;
+ switch (opcode) {
+ case 0x0: /* FMOV */
+ gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
+ return;
+ }
+
fpst = get_fpstatus_ptr();
tcg_op = read_fp_dreg(s, rn);
tcg_res = tcg_temp_new_i64();
switch (opcode) {
- case 0x0: /* FMOV */
- tcg_gen_mov_i64(tcg_res, tcg_op);
- break;
case 0x1: /* FABS */
gen_helper_vfp_absd(tcg_res, tcg_op);
break;
@@ -5848,10 +5912,7 @@
int imm5)
{
int size = ctz32(imm5);
- int esize = 8 << size;
- int elements = (is_q ? 128 : 64) / esize;
- int index, i;
- TCGv_i64 tmp;
+ int index = imm5 >> (size + 1);
if (size > 3 || (size == 3 && !is_q)) {
unallocated_encoding(s);
@@ -5862,20 +5923,9 @@
return;
}
- index = imm5 >> (size + 1);
-
- tmp = tcg_temp_new_i64();
- read_vec_element(s, tmp, rn, index, size);
-
- for (i = 0; i < elements; i++) {
- write_vec_element(s, tmp, rd, i, size);
- }
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
-
- tcg_temp_free_i64(tmp);
+ tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
+ vec_reg_offset(s, rn, index, size),
+ is_q ? 16 : 8, vec_full_reg_size(s));
}
/* DUP (element, scalar)
@@ -5924,9 +5974,7 @@
int imm5)
{
int size = ctz32(imm5);
- int esize = 8 << size;
- int elements = (is_q ? 128 : 64)/esize;
- int i = 0;
+ uint32_t dofs, oprsz, maxsz;
if (size > 3 || ((size == 3) && !is_q)) {
unallocated_encoding(s);
@@ -5937,12 +5985,11 @@
return;
}
- for (i = 0; i < elements; i++) {
- write_vec_element(s, cpu_reg(s, rn), rd, i, size);
- }
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ dofs = vec_full_reg_offset(s, rd);
+ oprsz = is_q ? 16 : 8;
+ maxsz = vec_full_reg_size(s);
+
+ tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
}
/* INS (Element)
@@ -6133,8 +6180,6 @@
bool is_neg = extract32(insn, 29, 1);
bool is_q = extract32(insn, 30, 1);
uint64_t imm = 0;
- TCGv_i64 tcg_rd, tcg_imm;
- int i;
if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
unallocated_encoding(s);
@@ -6215,32 +6260,18 @@
imm = ~imm;
}
- tcg_imm = tcg_const_i64(imm);
- tcg_rd = new_tmp_a64(s);
-
- for (i = 0; i < 2; i++) {
- int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
-
- if (i == 1 && !is_q) {
- /* non-quad ops clear high half of vector */
- tcg_gen_movi_i64(tcg_rd, 0);
- } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
- tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
- if (is_neg) {
- /* AND (BIC) */
- tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
- } else {
- /* ORR */
- tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
- }
+ if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
+ /* MOVI or MVNI, with MVNI negation handled above. */
+ tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
+ vec_full_reg_size(s), imm);
+ } else {
+ /* ORR or BIC, with BIC negation to AND handled above. */
+ if (is_neg) {
+ gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
} else {
- /* MOVI */
- tcg_gen_mov_i64(tcg_rd, tcg_imm);
+ gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
}
- tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
}
-
- tcg_temp_free_i64(tcg_imm);
}
/* AdvSIMD scalar copy
@@ -6485,32 +6516,6 @@
}
}
-/* Common SHL/SLI - Shift left with an optional insert */
-static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
- bool insert, int shift)
-{
- if (insert) { /* SLI */
- tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
- } else { /* SHL */
- tcg_gen_shli_i64(tcg_res, tcg_src, shift);
- }
-}
-
-/* SRI: shift right with insert */
-static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
- int size, int shift)
-{
- int esize = 8 << size;
-
- /* shift count same as element size is valid but does nothing;
- * special case to avoid potential shift by 64.
- */
- if (shift != esize) {
- tcg_gen_shri_i64(tcg_src, tcg_src, shift);
- tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
- }
-}
-
/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
static void handle_scalar_simd_shri(DisasContext *s,
bool is_u, int immh, int immb,
@@ -6561,7 +6566,14 @@
tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
if (insert) {
- handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
+ /* shift count same as element size is valid but does nothing;
+ * special case to avoid potential shift by 64.
+ */
+ int esize = 8 << size;
+ if (shift != esize) {
+ tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
+ }
} else {
handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
accumulate, is_u, size, shift);
@@ -6599,7 +6611,11 @@
tcg_rn = read_fp_dreg(s, rn);
tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
- handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+ if (insert) {
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
+ } else {
+ tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
+ }
write_fp_dreg(s, rd, tcg_rd);
@@ -7175,6 +7191,28 @@
}
}
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_and_i32(d, a, b);
+ tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
+ tcg_gen_neg_i32(d, d);
+}
+
+static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_and_i64(d, a, b);
+ tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
+ tcg_gen_neg_i64(d, d);
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_and_vec(vece, d, a, b);
+ tcg_gen_dupi_vec(vece, a, 0);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
static void handle_3same_64(DisasContext *s, int opcode, bool u,
TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
{
@@ -7218,10 +7256,7 @@
cond = TCG_COND_EQ;
goto do_cmop;
}
- /* CMTST : test is "if (X & Y != 0)". */
- tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
- tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
- tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
break;
case 0x8: /* SSHL, USHL */
if (u) {
@@ -8329,16 +8364,195 @@
}
}
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_sari_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_sari_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_sari_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_shri_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
int immh, int immb, int opcode, int rn, int rd)
{
+ static const GVecGen2i ssra_op[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_64 },
+ };
+ static const GVecGen2i usra_op[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_64, },
+ };
+ static const GVecGen2i sri_op[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_64 },
+ };
+
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = 2 * (8 << size) - immhb;
bool accumulate = false;
- bool round = false;
- bool insert = false;
int dsize = is_q ? 128 : 64;
int esize = 8 << size;
int elements = dsize/esize;
@@ -8346,6 +8560,7 @@
TCGv_i64 tcg_rn = new_tmp_a64(s);
TCGv_i64 tcg_rd = new_tmp_a64(s);
TCGv_i64 tcg_round;
+ uint64_t round_const;
int i;
if (extract32(immh, 3, 1) && !is_q) {
@@ -8364,64 +8579,159 @@
switch (opcode) {
case 0x02: /* SSRA / USRA (accumulate) */
- accumulate = true;
- break;
+ if (is_u) {
+ /* Shift count same as element size produces zero to add. */
+ if (shift == 8 << size) {
+ goto done;
+ }
+ gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
+ } else {
+ /* Shift count same as element size produces all sign to add. */
+ if (shift == 8 << size) {
+ shift -= 1;
+ }
+ gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
+ }
+ return;
+ case 0x08: /* SRI */
+ /* Shift count same as element size is valid but does nothing. */
+ if (shift == 8 << size) {
+ goto done;
+ }
+ gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
+ return;
+
+ case 0x00: /* SSHR / USHR */
+ if (is_u) {
+ if (shift == 8 << size) {
+ /* Shift count the same size as element size produces zero. */
+ tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s), 0);
+ } else {
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
+ }
+ } else {
+ /* Shift count the same size as element size produces all sign. */
+ if (shift == 8 << size) {
+ shift -= 1;
+ }
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
+ }
+ return;
+
case 0x04: /* SRSHR / URSHR (rounding) */
- round = true;
break;
case 0x06: /* SRSRA / URSRA (accum + rounding) */
- accumulate = round = true;
+ accumulate = true;
break;
- case 0x08: /* SRI */
- insert = true;
- break;
+ default:
+ g_assert_not_reached();
}
- if (round) {
- uint64_t round_const = 1ULL << (shift - 1);
- tcg_round = tcg_const_i64(round_const);
- } else {
- tcg_round = NULL;
- }
+ round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
for (i = 0; i < elements; i++) {
read_vec_element(s, tcg_rn, rn, i, memop);
- if (accumulate || insert) {
+ if (accumulate) {
read_vec_element(s, tcg_rd, rd, i, memop);
}
- if (insert) {
- handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
- } else {
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
- }
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
write_vec_element(s, tcg_rd, rd, i, size);
}
+ tcg_temp_free_i64(tcg_round);
+ done:
if (!is_q) {
clear_vec_high(s, rd);
}
+}
- if (round) {
- tcg_temp_free_i64(tcg_round);
- }
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
+}
+
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+}
+
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ uint64_t mask = (1ull << sh) - 1;
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, mask);
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
}
/* SHL/SLI - Vector shift left */
static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
- int immh, int immb, int opcode, int rn, int rd)
+ int immh, int immb, int opcode, int rn, int rd)
{
+ static const GVecGen2i shi_op[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .opc = INDEX_op_shli_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .opc = INDEX_op_shli_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .opc = INDEX_op_shli_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .opc = INDEX_op_shli_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = immhb - (8 << size);
- int dsize = is_q ? 128 : 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- TCGv_i64 tcg_rn = new_tmp_a64(s);
- TCGv_i64 tcg_rd = new_tmp_a64(s);
- int i;
if (extract32(immh, 3, 1) && !is_q) {
unallocated_encoding(s);
@@ -8437,19 +8747,10 @@
return;
}
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, size);
- if (insert) {
- read_vec_element(s, tcg_rd, rd, i, size);
- }
-
- handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
-
- write_vec_element(s, tcg_rd, rd, i, size);
- }
-
- if (!is_q) {
- clear_vec_high(s, rd);
+ if (insert) {
+ gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
+ } else {
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
}
}
@@ -9072,85 +9373,115 @@
}
}
+static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_xor_i64(rn, rn, rm);
+ tcg_gen_and_i64(rn, rn, rd);
+ tcg_gen_xor_i64(rd, rm, rn);
+}
+
+static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_xor_i64(rn, rn, rd);
+ tcg_gen_and_i64(rn, rn, rm);
+ tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_xor_i64(rn, rn, rd);
+ tcg_gen_andc_i64(rn, rn, rm);
+ tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+ tcg_gen_xor_vec(vece, rn, rn, rm);
+ tcg_gen_and_vec(vece, rn, rn, rd);
+ tcg_gen_xor_vec(vece, rd, rm, rn);
+}
+
+static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+ tcg_gen_xor_vec(vece, rn, rn, rd);
+ tcg_gen_and_vec(vece, rn, rn, rm);
+ tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
+static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+ tcg_gen_xor_vec(vece, rn, rn, rd);
+ tcg_gen_andc_vec(vece, rn, rn, rm);
+ tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
/* Logic op (opcode == 3) subgroup of C3.6.16. */
static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
{
+ static const GVecGen3 bsl_op = {
+ .fni8 = gen_bsl_i64,
+ .fniv = gen_bsl_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true
+ };
+ static const GVecGen3 bit_op = {
+ .fni8 = gen_bit_i64,
+ .fniv = gen_bit_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true
+ };
+ static const GVecGen3 bif_op = {
+ .fni8 = gen_bif_i64,
+ .fniv = gen_bif_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true
+ };
+
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rm = extract32(insn, 16, 5);
int size = extract32(insn, 22, 2);
bool is_u = extract32(insn, 29, 1);
bool is_q = extract32(insn, 30, 1);
- TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
- int pass;
if (!fp_access_check(s)) {
return;
}
- tcg_op1 = tcg_temp_new_i64();
- tcg_op2 = tcg_temp_new_i64();
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- if (!is_u) {
- switch (size) {
- case 0: /* AND */
- tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
- break;
- case 1: /* BIC */
- tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
- break;
- case 2: /* ORR */
- tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
- break;
- case 3: /* ORN */
- tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
- break;
- }
+ switch (size + 4 * is_u) {
+ case 0: /* AND */
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
+ return;
+ case 1: /* BIC */
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
+ return;
+ case 2: /* ORR */
+ if (rn == rm) { /* MOV */
+ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
} else {
- if (size != 0) {
- /* B* ops need res loaded to operate on */
- read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
-
- switch (size) {
- case 0: /* EOR */
- tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
- break;
- case 1: /* BSL bitwise select */
- tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
- tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
- tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
- break;
- case 2: /* BIT, bitwise insert if true */
- tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
- tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
- tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- break;
- case 3: /* BIF, bitwise insert if false */
- tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
- tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
- tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- break;
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
}
- }
+ return;
+ case 3: /* ORN */
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
+ return;
+ case 4: /* EOR */
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
+ return;
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- if (!is_q) {
- tcg_gen_movi_i64(tcg_res[1], 0);
- }
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+ case 5: /* BSL bitwise select */
+ gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
+ return;
+ case 6: /* BIT, bitwise insert if true */
+ gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
+ return;
+ case 7: /* BIF, bitwise insert if false */
+ gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
+ return;
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res[0]);
- tcg_temp_free_i64(tcg_res[1]);
+ default:
+ g_assert_not_reached();
+ }
}
/* Helper functions for 32 bit comparisons */
@@ -9400,9 +9731,131 @@
}
}
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, d, d, a);
+}
+
/* Integer op subgroup of C3.6.16. */
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
{
+ static const GVecGen3 cmtst_op[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+ static const GVecGen3 mla_op[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ static const GVecGen3 mls_op[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
int is_q = extract32(insn, 30, 1);
int u = extract32(insn, 29, 1);
int size = extract32(insn, 22, 2);
@@ -9411,6 +9864,7 @@
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
int pass;
+ TCGCond cond;
switch (opcode) {
case 0x13: /* MUL, PMUL */
@@ -9450,6 +9904,48 @@
return;
}
+ switch (opcode) {
+ case 0x10: /* ADD, SUB */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
+ }
+ return;
+ case 0x13: /* MUL, PMUL */
+ if (!u) { /* MUL */
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
+ return;
+ }
+ break;
+ case 0x12: /* MLA, MLS */
+ if (u) {
+ gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
+ } else {
+ gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
+ }
+ return;
+ case 0x11:
+ if (!u) { /* CMTST */
+ gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
+ return;
+ }
+ /* else CMEQ */
+ cond = TCG_COND_EQ;
+ goto do_gvec_cmp;
+ case 0x06: /* CMGT, CMHI */
+ cond = u ? TCG_COND_GTU : TCG_COND_GT;
+ goto do_gvec_cmp;
+ case 0x07: /* CMGE, CMHS */
+ cond = u ? TCG_COND_GEU : TCG_COND_GE;
+ do_gvec_cmp:
+ tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s));
+ return;
+ }
+
if (size == 3) {
assert(is_q);
for (pass = 0; pass < 2; pass++) {
@@ -9530,26 +10026,6 @@
genenvfn = fns[size][u];
break;
}
- case 0x6: /* CMGT, CMHI */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
- { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
- { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
- };
- genfn = fns[size][u];
- break;
- }
- case 0x7: /* CMGE, CMHS */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
- { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
- { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
- };
- genfn = fns[size][u];
- break;
- }
case 0x8: /* SSHL, USHL */
{
static NeonGenTwoOpFn * const fns[3][2] = {
@@ -9622,44 +10098,11 @@
genfn = fns[size][u];
break;
}
- case 0x10: /* ADD, SUB */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
- { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
- { tcg_gen_add_i32, tcg_gen_sub_i32 },
- };
- genfn = fns[size][u];
- break;
- }
- case 0x11: /* CMTST, CMEQ */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
- { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
- { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
- };
- genfn = fns[size][u];
- break;
- }
case 0x13: /* MUL, PMUL */
- if (u) {
- /* PMUL */
- assert(size == 0);
- genfn = gen_helper_neon_mul_p8;
- break;
- }
- /* fall through : MUL */
- case 0x12: /* MLA, MLS */
- {
- static NeonGenTwoOpFn * const fns[3] = {
- gen_helper_neon_mul_u8,
- gen_helper_neon_mul_u16,
- tcg_gen_mul_i32,
- };
- genfn = fns[size];
+ assert(u); /* PMUL */
+ assert(size == 0);
+ genfn = gen_helper_neon_mul_p8;
break;
- }
case 0x16: /* SQDMULH, SQRDMULH */
{
static NeonGenTwoOpEnvFn * const fns[2][2] = {
@@ -9680,18 +10123,16 @@
genfn(tcg_res, tcg_op1, tcg_op2);
}
- if (opcode == 0xf || opcode == 0x12) {
- /* SABA, UABA, MLA, MLS: accumulating ops */
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
- { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
- { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ if (opcode == 0xf) {
+ /* SABA, UABA: accumulating ops */
+ static NeonGenTwoOpFn * const fns[3] = {
+ gen_helper_neon_add_u8,
+ gen_helper_neon_add_u16,
+ tcg_gen_add_i32,
};
- bool is_sub = (opcode == 0x12 && u); /* MLS */
- genfn = fns[size][is_sub];
read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
- genfn(tcg_res, tcg_op1, tcg_res);
+ fns[size](tcg_res, tcg_op1, tcg_res);
}
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
@@ -10003,8 +10444,7 @@
return;
case 0x5: /* CNT, NOT, RBIT */
if (u && size == 0) {
- /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
- size = 3;
+ /* NOT */
break;
} else if (u && size == 1) {
/* RBIT */
@@ -10256,6 +10696,21 @@
tcg_rmode = NULL;
}
+ switch (opcode) {
+ case 0x5:
+ if (u && size == 0) { /* NOT */
+ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
+ return;
+ }
+ break;
+ case 0xb:
+ if (u) { /* NEG */
+ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
+ return;
+ }
+ break;
+ }
+
if (size == 3) {
/* All 64-bit element operations can be shared with scalar 2misc */
int pass;
@@ -11132,6 +11587,341 @@
tcg_temp_free_ptr(tcg_rn_ptr);
}
+/* Crypto three-reg SHA512
+ * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
+ * +-----------------------+------+---+---+-----+--------+------+------+
+ * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
+ * +-----------------------+------+---+---+-----+--------+------+------+
+ */
+static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
+{
+ int opcode = extract32(insn, 10, 2);
+ int o = extract32(insn, 14, 1);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int feature;
+ CryptoThreeOpFn *genfn;
+
+ if (o == 0) {
+ switch (opcode) {
+ case 0: /* SHA512H */
+ feature = ARM_FEATURE_V8_SHA512;
+ genfn = gen_helper_crypto_sha512h;
+ break;
+ case 1: /* SHA512H2 */
+ feature = ARM_FEATURE_V8_SHA512;
+ genfn = gen_helper_crypto_sha512h2;
+ break;
+ case 2: /* SHA512SU1 */
+ feature = ARM_FEATURE_V8_SHA512;
+ genfn = gen_helper_crypto_sha512su1;
+ break;
+ case 3: /* RAX1 */
+ feature = ARM_FEATURE_V8_SHA3;
+ genfn = NULL;
+ break;
+ }
+ } else {
+ switch (opcode) {
+ case 0: /* SM3PARTW1 */
+ feature = ARM_FEATURE_V8_SM3;
+ genfn = gen_helper_crypto_sm3partw1;
+ break;
+ case 1: /* SM3PARTW2 */
+ feature = ARM_FEATURE_V8_SM3;
+ genfn = gen_helper_crypto_sm3partw2;
+ break;
+ case 2: /* SM4EKEY */
+ feature = ARM_FEATURE_V8_SM4;
+ genfn = gen_helper_crypto_sm4ekey;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ if (genfn) {
+ TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
+
+ tcg_rd_ptr = vec_full_reg_ptr(s, rd);
+ tcg_rn_ptr = vec_full_reg_ptr(s, rn);
+ tcg_rm_ptr = vec_full_reg_ptr(s, rm);
+
+ genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
+
+ tcg_temp_free_ptr(tcg_rd_ptr);
+ tcg_temp_free_ptr(tcg_rn_ptr);
+ tcg_temp_free_ptr(tcg_rm_ptr);
+ } else {
+ TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
+ int pass;
+
+ tcg_op1 = tcg_temp_new_i64();
+ tcg_op2 = tcg_temp_new_i64();
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = tcg_temp_new_i64();
+
+ for (pass = 0; pass < 2; pass++) {
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
+ tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+ }
+ write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res[0]);
+ tcg_temp_free_i64(tcg_res[1]);
+ }
+}
+
+/* Crypto two-reg SHA512
+ * 31 12 11 10 9 5 4 0
+ * +-----------------------------------------+--------+------+------+
+ * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
+ * +-----------------------------------------+--------+------+------+
+ */
+static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
+{
+ int opcode = extract32(insn, 10, 2);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
+ int feature;
+ CryptoTwoOpFn *genfn;
+
+ switch (opcode) {
+ case 0: /* SHA512SU0 */
+ feature = ARM_FEATURE_V8_SHA512;
+ genfn = gen_helper_crypto_sha512su0;
+ break;
+ case 1: /* SM4E */
+ feature = ARM_FEATURE_V8_SM4;
+ genfn = gen_helper_crypto_sm4e;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ tcg_rd_ptr = vec_full_reg_ptr(s, rd);
+ tcg_rn_ptr = vec_full_reg_ptr(s, rn);
+
+ genfn(tcg_rd_ptr, tcg_rn_ptr);
+
+ tcg_temp_free_ptr(tcg_rd_ptr);
+ tcg_temp_free_ptr(tcg_rn_ptr);
+}
+
+/* Crypto four-register
+ * 31 23 22 21 20 16 15 14 10 9 5 4 0
+ * +-------------------+-----+------+---+------+------+------+
+ * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
+ * +-------------------+-----+------+---+------+------+------+
+ */
+static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
+{
+ int op0 = extract32(insn, 21, 2);
+ int rm = extract32(insn, 16, 5);
+ int ra = extract32(insn, 10, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int feature;
+
+ switch (op0) {
+ case 0: /* EOR3 */
+ case 1: /* BCAX */
+ feature = ARM_FEATURE_V8_SHA3;
+ break;
+ case 2: /* SM3SS1 */
+ feature = ARM_FEATURE_V8_SM3;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ if (op0 < 2) {
+ TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
+ int pass;
+
+ tcg_op1 = tcg_temp_new_i64();
+ tcg_op2 = tcg_temp_new_i64();
+ tcg_op3 = tcg_temp_new_i64();
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = tcg_temp_new_i64();
+
+ for (pass = 0; pass < 2; pass++) {
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+ read_vec_element(s, tcg_op3, ra, pass, MO_64);
+
+ if (op0 == 0) {
+ /* EOR3 */
+ tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
+ } else {
+ /* BCAX */
+ tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
+ }
+ tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+ }
+ write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_op3);
+ tcg_temp_free_i64(tcg_res[0]);
+ tcg_temp_free_i64(tcg_res[1]);
+ } else {
+ TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
+
+ tcg_op1 = tcg_temp_new_i32();
+ tcg_op2 = tcg_temp_new_i32();
+ tcg_op3 = tcg_temp_new_i32();
+ tcg_res = tcg_temp_new_i32();
+ tcg_zero = tcg_const_i32(0);
+
+ read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
+ read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
+ read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
+
+ tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
+ tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
+ tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
+ tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
+
+ write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
+ write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
+ write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
+ write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i32(tcg_op3);
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_zero);
+ }
+}
+
+/* Crypto XAR
+ * 31 21 20 16 15 10 9 5 4 0
+ * +-----------------------+------+--------+------+------+
+ * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
+ * +-----------------------+------+--------+------+------+
+ */
+static void disas_crypto_xar(DisasContext *s, uint32_t insn)
+{
+ int rm = extract32(insn, 16, 5);
+ int imm6 = extract32(insn, 10, 6);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ tcg_op1 = tcg_temp_new_i64();
+ tcg_op2 = tcg_temp_new_i64();
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = tcg_temp_new_i64();
+
+ for (pass = 0; pass < 2; pass++) {
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
+ }
+ write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res[0]);
+ tcg_temp_free_i64(tcg_res[1]);
+}
+
+/* Crypto three-reg imm2
+ * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
+ * +-----------------------+------+-----+------+--------+------+------+
+ * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
+ * +-----------------------+------+-----+------+--------+------+------+
+ */
+static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
+{
+ int opcode = extract32(insn, 10, 2);
+ int imm2 = extract32(insn, 12, 2);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
+ TCGv_i32 tcg_imm2, tcg_opcode;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ tcg_rd_ptr = vec_full_reg_ptr(s, rd);
+ tcg_rn_ptr = vec_full_reg_ptr(s, rn);
+ tcg_rm_ptr = vec_full_reg_ptr(s, rm);
+ tcg_imm2 = tcg_const_i32(imm2);
+ tcg_opcode = tcg_const_i32(opcode);
+
+ gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
+ tcg_opcode);
+
+ tcg_temp_free_ptr(tcg_rd_ptr);
+ tcg_temp_free_ptr(tcg_rn_ptr);
+ tcg_temp_free_ptr(tcg_rm_ptr);
+ tcg_temp_free_i32(tcg_imm2);
+ tcg_temp_free_i32(tcg_opcode);
+}
+
/* C3.6 Data processing - SIMD, inc Crypto
*
* As the decode gets a little complex we are using a table based
@@ -11161,6 +11951,11 @@
{ 0x4e280800, 0xff3e0c00, disas_crypto_aes },
{ 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
{ 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
+ { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
+ { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
+ { 0xce000000, 0xff808000, disas_crypto_four_reg },
+ { 0xce800000, 0xffe00000, disas_crypto_xar },
+ { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
{ 0x00000000, 0x00000000, NULL }
};
@@ -11263,6 +12058,8 @@
dc->user = (dc->current_el == 0);
#endif
dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
+ dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
+ dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 55826b7..1270022 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1512,13 +1512,12 @@
}
}
-static inline long
-vfp_reg_offset (int dp, int reg)
+static inline long vfp_reg_offset(bool dp, unsigned reg)
{
if (dp) {
- return offsetof(CPUARMState, vfp.regs[reg]);
+ return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
} else {
- long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]);
+ long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
if (reg & 1) {
ofs += offsetof(CPU_DoubleU, l.upper);
} else {
@@ -9926,6 +9925,7 @@
tcg_temp_free_i32(addr);
tcg_temp_free_i32(op);
store_reg(s, rd, ttresp);
+ break;
}
goto illegal_op;
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 3f4df91..c47febf 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -29,6 +29,8 @@
bool tbi1; /* TBI1 for EL0/1, not used for EL2/3 */
bool ns; /* Use non-secure CPREG bank on access */
int fp_excp_el; /* FP exception EL or 0 if enabled */
+ int sve_excp_el; /* SVE exception EL or 0 if enabled */
+ int sve_len; /* SVE vector length in bytes */
/* Flag indicating that exceptions from secure mode are routed to EL3. */
bool secure_routed_to_el3;
bool vfp_enabled; /* FP enabled via FPSCR.EN */
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 979469d..da7cb9c 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -100,7 +100,6 @@
{
S390CPU *cpu = S390_CPU(s);
CPUS390XState *env = &cpu->env;
- int i;
s390_cpu_reset(s);
/* initial reset does not clear everything! */
@@ -116,10 +115,6 @@
env->gbea = 1;
env->pfault_token = -1UL;
- for (i = 0; i < ARRAY_SIZE(env->io_index); i++) {
- env->io_index[i] = -1;
- }
- env->mchk_index = -1;
/* tininess for underflow is detected before rounding */
set_float_detect_tininess(float_tininess_before_rounding,
@@ -137,7 +132,6 @@
S390CPU *cpu = S390_CPU(s);
S390CPUClass *scc = S390_CPU_GET_CLASS(cpu);
CPUS390XState *env = &cpu->env;
- int i;
scc->parent_reset(s);
cpu->env.sigp_order = 0;
@@ -153,10 +147,6 @@
env->gbea = 1;
env->pfault_token = -1UL;
- for (i = 0; i < ARRAY_SIZE(env->io_index); i++) {
- env->io_index[i] = -1;
- }
- env->mchk_index = -1;
/* tininess for underflow is detected before rounding */
set_float_detect_tininess(float_tininess_before_rounding,
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index a1123ad..21ce40d 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -53,12 +53,6 @@
#define MMU_USER_IDX 0
-#define MAX_IO_QUEUE 16
-#define MAX_MCHK_QUEUE 16
-
-#define PSW_MCHK_MASK 0x0004000000000000
-#define PSW_IO_MASK 0x0200000000000000
-
#define S390_MAX_CPUS 248
typedef struct PSW {
@@ -66,17 +60,6 @@
uint64_t addr;
} PSW;
-typedef struct IOIntQueue {
- uint16_t id;
- uint16_t nr;
- uint32_t parm;
- uint32_t word;
-} IOIntQueue;
-
-typedef struct MchkQueue {
- uint16_t type;
-} MchkQueue;
-
struct CPUS390XState {
uint64_t regs[16]; /* GP registers */
/*
@@ -122,15 +105,9 @@
uint64_t cregs[16]; /* control registers */
- IOIntQueue io_queue[MAX_IO_QUEUE][8];
- MchkQueue mchk_queue[MAX_MCHK_QUEUE];
-
int pending_int;
- uint32_t service_param;
uint16_t external_call_addr;
DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS);
- int io_index[8];
- int mchk_index;
uint64_t ckc;
uint64_t cputm;
@@ -409,9 +386,6 @@
#define EXCP_IO 7 /* I/O interrupt */
#define EXCP_MCHK 8 /* machine check */
-#define INTERRUPT_IO (1 << 0)
-#define INTERRUPT_MCHK (1 << 1)
-#define INTERRUPT_EXT_SERVICE (1 << 2)
#define INTERRUPT_EXT_CPU_TIMER (1 << 3)
#define INTERRUPT_EXT_CLOCK_COMPARATOR (1 << 4)
#define INTERRUPT_EXTERNAL_CALL (1 << 5)
@@ -452,62 +426,66 @@
}
/* STSI */
-#define STSI_LEVEL_MASK 0x00000000f0000000ULL
-#define STSI_LEVEL_CURRENT 0x0000000000000000ULL
-#define STSI_LEVEL_1 0x0000000010000000ULL
-#define STSI_LEVEL_2 0x0000000020000000ULL
-#define STSI_LEVEL_3 0x0000000030000000ULL
+#define STSI_R0_FC_MASK 0x00000000f0000000ULL
+#define STSI_R0_FC_CURRENT 0x0000000000000000ULL
+#define STSI_R0_FC_LEVEL_1 0x0000000010000000ULL
+#define STSI_R0_FC_LEVEL_2 0x0000000020000000ULL
+#define STSI_R0_FC_LEVEL_3 0x0000000030000000ULL
#define STSI_R0_RESERVED_MASK 0x000000000fffff00ULL
#define STSI_R0_SEL1_MASK 0x00000000000000ffULL
#define STSI_R1_RESERVED_MASK 0x00000000ffff0000ULL
#define STSI_R1_SEL2_MASK 0x000000000000ffffULL
/* Basic Machine Configuration */
-struct sysib_111 {
- uint32_t res1[8];
+typedef struct SysIB_111 {
+ uint8_t res1[32];
uint8_t manuf[16];
uint8_t type[4];
uint8_t res2[12];
uint8_t model[16];
uint8_t sequence[16];
uint8_t plant[4];
- uint8_t res3[156];
-};
+ uint8_t res3[3996];
+} SysIB_111;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_111) != 4096);
/* Basic Machine CPU */
-struct sysib_121 {
- uint32_t res1[80];
+typedef struct SysIB_121 {
+ uint8_t res1[80];
uint8_t sequence[16];
uint8_t plant[4];
uint8_t res2[2];
uint16_t cpu_addr;
- uint8_t res3[152];
-};
+ uint8_t res3[3992];
+} SysIB_121;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_121) != 4096);
/* Basic Machine CPUs */
-struct sysib_122 {
+typedef struct SysIB_122 {
uint8_t res1[32];
uint32_t capability;
uint16_t total_cpus;
- uint16_t active_cpus;
+ uint16_t conf_cpus;
uint16_t standby_cpus;
uint16_t reserved_cpus;
uint16_t adjustments[2026];
-};
+} SysIB_122;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_122) != 4096);
/* LPAR CPU */
-struct sysib_221 {
- uint32_t res1[80];
+typedef struct SysIB_221 {
+ uint8_t res1[80];
uint8_t sequence[16];
uint8_t plant[4];
uint16_t cpu_id;
uint16_t cpu_addr;
- uint8_t res3[152];
-};
+ uint8_t res3[3992];
+} SysIB_221;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_221) != 4096);
/* LPAR CPUs */
-struct sysib_222 {
- uint32_t res1[32];
+typedef struct SysIB_222 {
+ uint8_t res1[32];
uint16_t lpar_num;
uint8_t res2;
uint8_t lcpuc;
@@ -520,11 +498,12 @@
uint8_t res3[16];
uint16_t dedicated_cpus;
uint16_t shared_cpus;
- uint8_t res4[180];
-};
+ uint8_t res4[4020];
+} SysIB_222;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_222) != 4096);
/* VM CPUs */
-struct sysib_322 {
+typedef struct SysIB_322 {
uint8_t res1[31];
uint8_t count;
struct {
@@ -543,7 +522,18 @@
} vm[8];
uint8_t res4[1504];
uint8_t ext_names[8][256];
-};
+} SysIB_322;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
+
+typedef union SysIB {
+ SysIB_111 sysib_111;
+ SysIB_121 sysib_121;
+ SysIB_122 sysib_122;
+ SysIB_221 sysib_221;
+ SysIB_222 sysib_222;
+ SysIB_322 sysib_322;
+} SysIB;
+QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
/* MMU defines */
#define _ASCE_ORIGIN ~0xfffULL /* segment table origin */
@@ -718,6 +708,10 @@
return 0;
}
#endif /* CONFIG_USER_ONLY */
+static inline uint8_t s390_cpu_get_state(S390CPU *cpu)
+{
+ return cpu->env.cpu_state;
+}
/* cpu_models.c */
@@ -752,7 +746,6 @@
/* service interrupts are floating therefore we must not pass an cpustate */
void s390_sclp_extint(uint32_t parm);
-
/* mmu_helper.c */
int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf,
int len, bool is_write);
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 85d10b5..a5619f2 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -156,8 +156,12 @@
FEAT_INIT("ptff-qpc", S390_FEAT_TYPE_PTFF, 3, "PTFF Query Physical Clock"),
FEAT_INIT("ptff-qui", S390_FEAT_TYPE_PTFF, 4, "PTFF Query UTC Information"),
FEAT_INIT("ptff-qtou", S390_FEAT_TYPE_PTFF, 5, "PTFF Query TOD Offset User"),
+ FEAT_INIT("ptff-qsie", S390_FEAT_TYPE_PTFF, 10, "PTFF Query Steering Information Extended"),
+ FEAT_INIT("ptff-qtoue", S390_FEAT_TYPE_PTFF, 13, "PTFF Query TOD Offset User Extended"),
FEAT_INIT("ptff-sto", S390_FEAT_TYPE_PTFF, 65, "PTFF Set TOD Offset"),
FEAT_INIT("ptff-stou", S390_FEAT_TYPE_PTFF, 69, "PTFF Set TOD Offset User"),
+ FEAT_INIT("ptff-stoe", S390_FEAT_TYPE_PTFF, 73, "PTFF Set TOD Offset Extended"),
+ FEAT_INIT("ptff-stoue", S390_FEAT_TYPE_PTFF, 77, "PTFF Set TOD Offset User Extended"),
FEAT_INIT("kmac-dea", S390_FEAT_TYPE_KMAC, 1, "KMAC DEA"),
FEAT_INIT("kmac-tdea-128", S390_FEAT_TYPE_KMAC, 2, "KMAC TDEA-128"),
@@ -445,6 +449,7 @@
FEAT_GROUP_INIT("plo", PLO, "Perform-locked-operation facility"),
FEAT_GROUP_INIT("tods", TOD_CLOCK_STEERING, "Tod-clock-steering facility"),
FEAT_GROUP_INIT("gen13ptff", GEN13_PTFF, "PTFF enhancements introduced with z13"),
+ FEAT_GROUP_INIT("mepochptff", MULTIPLE_EPOCH_PTFF, "PTFF enhancements introduced with Multiple-epoch facility"),
FEAT_GROUP_INIT("msa", MSA, "Message-security-assist facility"),
FEAT_GROUP_INIT("msa1", MSA_EXT_1, "Message-security-assist-extension 1 facility"),
FEAT_GROUP_INIT("msa2", MSA_EXT_2, "Message-security-assist-extension 2 facility"),
diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h
index 4d93087..7c5915c 100644
--- a/target/s390x/cpu_features_def.h
+++ b/target/s390x/cpu_features_def.h
@@ -151,8 +151,12 @@
S390_FEAT_PTFF_QPT,
S390_FEAT_PTFF_QUI,
S390_FEAT_PTFF_QTOU,
+ S390_FEAT_PTFF_QSIE,
+ S390_FEAT_PTFF_QTOUE,
S390_FEAT_PTFF_STO,
S390_FEAT_PTFF_STOU,
+ S390_FEAT_PTFF_STOE,
+ S390_FEAT_PTFF_STOUE,
/* KMAC */
S390_FEAT_KMAC_DEA,
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 584c409..1d5f0da 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -23,6 +23,7 @@
#include "qapi/qmp/qdict.h"
#ifndef CONFIG_USER_ONLY
#include "sysemu/arch_init.h"
+#include "hw/pci/pci.h"
#endif
#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
@@ -1271,6 +1272,11 @@
/* init all bitmaps from gnerated data initially */
s390_init_feat_bitmap(qemu_max_cpu_feat_init, qemu_max_cpu_feat);
+#ifndef CONFIG_USER_ONLY
+ if (!pci_available) {
+ clear_bit(S390_FEAT_ZPCI, qemu_max_cpu_feat);
+ }
+#endif
for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) {
s390_init_feat_bitmap(s390_cpu_defs[i].base_init,
s390_cpu_defs[i].base_feat);
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index 6967fbf..411051e 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -28,6 +28,7 @@
#include "exec/address-spaces.h"
#ifndef CONFIG_USER_ONLY
#include "sysemu/sysemu.h"
+#include "hw/s390x/s390_flic.h"
#endif
/* #define DEBUG_S390 */
@@ -236,6 +237,7 @@
static void do_ext_interrupt(CPUS390XState *env)
{
+ QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
S390CPU *cpu = s390_env_get_cpu(env);
uint64_t mask, addr;
uint16_t cpu_addr;
@@ -272,17 +274,14 @@
lowcore->ext_int_code = cpu_to_be16(EXT_CPU_TIMER);
lowcore->cpu_addr = 0;
env->pending_int &= ~INTERRUPT_EXT_CPU_TIMER;
- } else if ((env->pending_int & INTERRUPT_EXT_SERVICE) &&
+ } else if (qemu_s390_flic_has_service(flic) &&
(env->cregs[0] & CR0_SERVICE_SC)) {
- /*
- * FIXME: floating IRQs should be considered by all CPUs and
- * shuld not get cleared by CPU reset.
- */
+ uint32_t param;
+
+ param = qemu_s390_flic_dequeue_service(flic);
lowcore->ext_int_code = cpu_to_be16(EXT_SERVICE);
- lowcore->ext_params = cpu_to_be32(env->service_param);
+ lowcore->ext_params = cpu_to_be32(param);
lowcore->cpu_addr = 0;
- env->service_param = 0;
- env->pending_int &= ~INTERRUPT_EXT_SERVICE;
} else {
g_assert_not_reached();
}
@@ -302,95 +301,46 @@
static void do_io_interrupt(CPUS390XState *env)
{
- S390CPU *cpu = s390_env_get_cpu(env);
+ QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+ uint64_t mask, addr;
+ QEMUS390FlicIO *io;
LowCore *lowcore;
- IOIntQueue *q;
- uint8_t isc;
- int disable = 1;
- int found = 0;
- if (!(env->psw.mask & PSW_MASK_IO)) {
- cpu_abort(CPU(cpu), "I/O int w/o I/O mask\n");
- }
+ g_assert(env->psw.mask & PSW_MASK_IO);
+ io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
+ g_assert(io);
- for (isc = 0; isc < ARRAY_SIZE(env->io_index); isc++) {
- uint64_t isc_bits;
+ lowcore = cpu_map_lowcore(env);
- if (env->io_index[isc] < 0) {
- continue;
- }
- if (env->io_index[isc] >= MAX_IO_QUEUE) {
- cpu_abort(CPU(cpu), "I/O queue overrun for isc %d: %d\n",
- isc, env->io_index[isc]);
- }
+ lowcore->subchannel_id = cpu_to_be16(io->id);
+ lowcore->subchannel_nr = cpu_to_be16(io->nr);
+ lowcore->io_int_parm = cpu_to_be32(io->parm);
+ lowcore->io_int_word = cpu_to_be32(io->word);
+ lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+ lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
+ mask = be64_to_cpu(lowcore->io_new_psw.mask);
+ addr = be64_to_cpu(lowcore->io_new_psw.addr);
- q = &env->io_queue[env->io_index[isc]][isc];
- isc_bits = ISC_TO_ISC_BITS(IO_INT_WORD_ISC(q->word));
- if (!(env->cregs[6] & isc_bits)) {
- disable = 0;
- continue;
- }
- if (!found) {
- uint64_t mask, addr;
+ cpu_unmap_lowcore(lowcore);
+ g_free(io);
- found = 1;
- lowcore = cpu_map_lowcore(env);
-
- lowcore->subchannel_id = cpu_to_be16(q->id);
- lowcore->subchannel_nr = cpu_to_be16(q->nr);
- lowcore->io_int_parm = cpu_to_be32(q->parm);
- lowcore->io_int_word = cpu_to_be32(q->word);
- lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
- lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
- mask = be64_to_cpu(lowcore->io_new_psw.mask);
- addr = be64_to_cpu(lowcore->io_new_psw.addr);
-
- cpu_unmap_lowcore(lowcore);
-
- env->io_index[isc]--;
-
- DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
- env->psw.mask, env->psw.addr);
- load_psw(env, mask, addr);
- }
- if (env->io_index[isc] >= 0) {
- disable = 0;
- }
- continue;
- }
-
- if (disable) {
- env->pending_int &= ~INTERRUPT_IO;
- }
-
+ DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__, env->psw.mask,
+ env->psw.addr);
+ load_psw(env, mask, addr);
}
static void do_mchk_interrupt(CPUS390XState *env)
{
- S390CPU *cpu = s390_env_get_cpu(env);
+ QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
uint64_t mask, addr;
LowCore *lowcore;
- MchkQueue *q;
int i;
- if (!(env->psw.mask & PSW_MASK_MCHECK)) {
- cpu_abort(CPU(cpu), "Machine check w/o mchk mask\n");
- }
+ /* for now we only support channel report machine checks (floating) */
+ g_assert(env->psw.mask & PSW_MASK_MCHECK);
+ g_assert(env->cregs[14] & CR14_CHANNEL_REPORT_SC);
- if (env->mchk_index < 0 || env->mchk_index >= MAX_MCHK_QUEUE) {
- cpu_abort(CPU(cpu), "Mchk queue overrun: %d\n", env->mchk_index);
- }
-
- q = &env->mchk_queue[env->mchk_index];
-
- if (q->type != 1) {
- /* Don't know how to handle this... */
- cpu_abort(CPU(cpu), "Unknown machine check type %d\n", q->type);
- }
- if (!(env->cregs[14] & (1 << 28))) {
- /* CRW machine checks disabled */
- return;
- }
+ qemu_s390_flic_dequeue_crw_mchk(flic);
lowcore = cpu_map_lowcore(env);
@@ -417,11 +367,6 @@
cpu_unmap_lowcore(lowcore);
- env->mchk_index--;
- if (env->mchk_index == -1) {
- env->pending_int &= ~INTERRUPT_MCHK;
- }
-
DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
env->psw.mask, env->psw.addr);
@@ -430,12 +375,15 @@
void s390_cpu_do_interrupt(CPUState *cs)
{
+ QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
S390CPU *cpu = S390_CPU(cs);
CPUS390XState *env = &cpu->env;
+ bool stopped = false;
qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
__func__, cs->exception_index, env->psw.addr);
+try_deliver:
/* handle machine checks */
if (cs->exception_index == -1 && s390_cpu_has_mcck_int(cpu)) {
cs->exception_index = EXCP_MCHK;
@@ -478,20 +426,30 @@
break;
case EXCP_STOP:
do_stop_interrupt(env);
+ stopped = true;
break;
}
- /* WAIT PSW during interrupt injection or STOP interrupt */
- if (cs->exception_index == EXCP_HLT) {
- /* don't trigger a cpu_loop_exit(), use an interrupt instead */
- cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+ if (cs->exception_index != -1 && !stopped) {
+ /* check if there are more pending interrupts to deliver */
+ cs->exception_index = -1;
+ goto try_deliver;
}
cs->exception_index = -1;
/* we might still have pending interrupts, but not deliverable */
- if (!env->pending_int) {
+ if (!env->pending_int && !qemu_s390_flic_has_any(flic)) {
cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
}
+
+ /* WAIT PSW during interrupt injection or STOP interrupt */
+ if ((env->psw.mask & PSW_MASK_WAIT) || stopped) {
+ /* don't trigger a cpu_loop_exit(), use an interrupt instead */
+ cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+ } else if (cs->halted) {
+ /* unhalt if we had a WAIT PSW somehwere in our injection chain */
+ s390_cpu_unhalt(cpu);
+ }
}
bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
@@ -509,6 +467,11 @@
s390_cpu_do_interrupt(cs);
return true;
}
+ if (env->psw.mask & PSW_MASK_WAIT) {
+ /* Woken up because of a floating interrupt but it has already
+ * been delivered. Go back to sleep. */
+ cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+ }
}
return false;
}
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index bd48333..0cdbc15 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -57,6 +57,12 @@
S390_FEAT_PTFF_QTOU, \
S390_FEAT_PTFF_STOU
+#define S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF \
+ S390_FEAT_PTFF_QSIE, \
+ S390_FEAT_PTFF_QTOUE, \
+ S390_FEAT_PTFF_STOE, \
+ S390_FEAT_PTFF_STOUE
+
#define S390_FEAT_GROUP_MSA \
S390_FEAT_MSA, \
S390_FEAT_KMAC_DEA, \
@@ -217,6 +223,9 @@
static uint16_t group_GEN13_PTFF[] = {
S390_FEAT_GROUP_GEN13_PTFF,
};
+static uint16_t group_MULTIPLE_EPOCH_PTFF[] = {
+ S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF,
+};
static uint16_t group_MSA[] = {
S390_FEAT_GROUP_MSA,
};
@@ -464,6 +473,7 @@
S390_FEAT_CMM_NT,
S390_FEAT_HPMA2,
S390_FEAT_SIE_KSS,
+ S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF,
};
/* Default features (in order of release)
@@ -570,8 +580,10 @@
S390_FEAT_STFLE_49,
S390_FEAT_LOCAL_TLB_CLEARING,
S390_FEAT_INTERLOCKED_ACCESS_2,
- S390_FEAT_MSA_EXT_4,
+ S390_FEAT_ADAPTER_EVENT_NOTIFICATION,
+ S390_FEAT_ADAPTER_INT_SUPPRESSION,
S390_FEAT_MSA_EXT_3,
+ S390_FEAT_MSA_EXT_4,
};
/* add all new definitions before this point */
@@ -580,6 +592,8 @@
S390_FEAT_STFLE_53,
/* generates a dependency warning, leave it out for now */
S390_FEAT_MSA_EXT_5,
+ /* only with CONFIG_PCI */
+ S390_FEAT_ZPCI,
};
/****** END FEATURE DEFS ******/
@@ -662,6 +676,7 @@
FEAT_GROUP_INITIALIZER(PLO),
FEAT_GROUP_INITIALIZER(TOD_CLOCK_STEERING),
FEAT_GROUP_INITIALIZER(GEN13_PTFF),
+ FEAT_GROUP_INITIALIZER(MULTIPLE_EPOCH_PTFF),
FEAT_GROUP_INITIALIZER(MSA),
FEAT_GROUP_INITIALIZER(MSA_EXT_1),
FEAT_GROUP_INITIALIZER(MSA_EXT_2),
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 59a1d98..59cba86 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -170,6 +170,16 @@
DEF_HELPER_3(ssch, void, env, i64, i64)
DEF_HELPER_2(stcrw, void, env, i64)
DEF_HELPER_3(stsch, void, env, i64, i64)
+DEF_HELPER_2(tpi, i32, env, i64)
DEF_HELPER_3(tsch, void, env, i64, i64)
DEF_HELPER_2(chsc, void, env, i64)
+
+DEF_HELPER_2(clp, void, env, i32)
+DEF_HELPER_3(pcilg, void, env, i32, i32)
+DEF_HELPER_3(pcistg, void, env, i32, i32)
+DEF_HELPER_4(stpcifc, void, env, i32, i64, i32)
+DEF_HELPER_3(sic, void, env, i64, i64)
+DEF_HELPER_3(rpcit, void, env, i32, i32)
+DEF_HELPER_5(pcistb, void, env, i32, i32, i64, i32)
+DEF_HELPER_4(mpcifc, void, env, i32, i64, i32)
#endif
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 11ee43d..621e10d 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1063,8 +1063,22 @@
C(0xb233, SSCH, S, Z, 0, insn, 0, 0, ssch, 0)
C(0xb239, STCRW, S, Z, 0, insn, 0, 0, stcrw, 0)
C(0xb234, STSCH, S, Z, 0, insn, 0, 0, stsch, 0)
+ C(0xb236, TPI , S, Z, la2, 0, 0, 0, tpi, 0)
C(0xb235, TSCH, S, Z, 0, insn, 0, 0, tsch, 0)
/* ??? Not listed in PoO ninth edition, but there's a linux driver that
uses it: "A CHSC subchannel is usually present on LPAR only." */
C(0xb25f, CHSC, RRE, Z, 0, insn, 0, 0, chsc, 0)
+
+/* zPCI Instructions */
+ /* None of these instructions are documented in the PoP, so this is all
+ based upon target/s390x/kvm.c and Linux code and likely incomplete */
+ C(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0)
+ C(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0)
+ C(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0)
+ C(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0)
+ C(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0)
+ C(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0)
+ C(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0)
+ C(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0)
+
#endif /* CONFIG_USER_ONLY */
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index fea165f..d911e84 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -278,11 +278,6 @@
cpu_reset(cs);
}
-static inline uint8_t s390_cpu_get_state(S390CPU *cpu)
-{
- return cpu->env.cpu_state;
-}
-
/* arch_dump.c */
int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target/s390x/interrupt.c b/target/s390x/interrupt.c
index 39c026b..25cfb3e 100644
--- a/target/s390x/interrupt.c
+++ b/target/s390x/interrupt.c
@@ -15,6 +15,9 @@
#include "exec/exec-all.h"
#include "sysemu/kvm.h"
#include "hw/s390x/ioinst.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/s390x/s390_flic.h"
+#endif
/* Ensure to exit the TB after this call! */
void trigger_pgm_exception(CPUS390XState *env, uint32_t code, uint32_t ilen)
@@ -55,17 +58,6 @@
}
#if !defined(CONFIG_USER_ONLY)
-static void cpu_inject_service(S390CPU *cpu, uint32_t param)
-{
- CPUS390XState *env = &cpu->env;
-
- /* multiplexing is good enough for sclp - kvm does it internally as well*/
- env->service_param |= param;
-
- env->pending_int |= INTERRUPT_EXT_SERVICE;
- cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-}
-
void cpu_inject_clock_comparator(S390CPU *cpu)
{
CPUS390XState *env = &cpu->env;
@@ -134,48 +126,6 @@
cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
}
-static void cpu_inject_io(S390CPU *cpu, uint16_t subchannel_id,
- uint16_t subchannel_number,
- uint32_t io_int_parm, uint32_t io_int_word)
-{
- CPUS390XState *env = &cpu->env;
- int isc = IO_INT_WORD_ISC(io_int_word);
-
- if (env->io_index[isc] == MAX_IO_QUEUE - 1) {
- /* ugh - can't queue anymore. Let's drop. */
- return;
- }
-
- env->io_index[isc]++;
- assert(env->io_index[isc] < MAX_IO_QUEUE);
-
- env->io_queue[env->io_index[isc]][isc].id = subchannel_id;
- env->io_queue[env->io_index[isc]][isc].nr = subchannel_number;
- env->io_queue[env->io_index[isc]][isc].parm = io_int_parm;
- env->io_queue[env->io_index[isc]][isc].word = io_int_word;
-
- env->pending_int |= INTERRUPT_IO;
- cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-}
-
-static void cpu_inject_crw_mchk(S390CPU *cpu)
-{
- CPUS390XState *env = &cpu->env;
-
- if (env->mchk_index == MAX_MCHK_QUEUE - 1) {
- /* ugh - can't queue anymore. Let's drop. */
- return;
- }
-
- env->mchk_index++;
- assert(env->mchk_index < MAX_MCHK_QUEUE);
-
- env->mchk_queue[env->mchk_index].type = 1;
-
- env->pending_int |= INTERRUPT_MCHK;
- cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HARD);
-}
-
/*
* All of the following interrupts are floating, i.e. not per-vcpu.
* We just need a dummy cpustate in order to be able to inject in the
@@ -183,53 +133,50 @@
*/
void s390_sclp_extint(uint32_t parm)
{
- if (kvm_enabled()) {
- kvm_s390_service_interrupt(parm);
- } else {
- S390CPU *dummy_cpu = s390_cpu_addr2state(0);
+ S390FLICState *fs = s390_get_flic();
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
- cpu_inject_service(dummy_cpu, parm);
- }
+ fsc->inject_service(fs, parm);
}
void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
uint32_t io_int_parm, uint32_t io_int_word)
{
- if (kvm_enabled()) {
- kvm_s390_io_interrupt(subchannel_id, subchannel_nr, io_int_parm,
- io_int_word);
- } else {
- S390CPU *dummy_cpu = s390_cpu_addr2state(0);
+ S390FLICState *fs = s390_get_flic();
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
- cpu_inject_io(dummy_cpu, subchannel_id, subchannel_nr, io_int_parm,
- io_int_word);
- }
+ fsc->inject_io(fs, subchannel_id, subchannel_nr, io_int_parm, io_int_word);
}
void s390_crw_mchk(void)
{
- if (kvm_enabled()) {
- kvm_s390_crw_mchk();
- } else {
- S390CPU *dummy_cpu = s390_cpu_addr2state(0);
+ S390FLICState *fs = s390_get_flic();
+ S390FLICStateClass *fsc = s390_get_flic_class(fs);
- cpu_inject_crw_mchk(dummy_cpu);
- }
+ fsc->inject_crw_mchk(fs);
}
bool s390_cpu_has_mcck_int(S390CPU *cpu)
{
+ QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
CPUS390XState *env = &cpu->env;
if (!(env->psw.mask & PSW_MASK_MCHECK)) {
return false;
}
- return env->pending_int & INTERRUPT_MCHK;
+ /* for now we only support channel report machine checks (floating) */
+ if (qemu_s390_flic_has_crw_mchk(flic) &&
+ (env->cregs[14] & CR14_CHANNEL_REPORT_SC)) {
+ return true;
+ }
+
+ return false;
}
bool s390_cpu_has_ext_int(S390CPU *cpu)
{
+ QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
CPUS390XState *env = &cpu->env;
if (!(env->psw.mask & PSW_MASK_EXT)) {
@@ -261,7 +208,7 @@
return true;
}
- if ((env->pending_int & INTERRUPT_EXT_SERVICE) &&
+ if (qemu_s390_flic_has_service(flic) &&
(env->cregs[0] & CR0_SERVICE_SC)) {
return true;
}
@@ -271,13 +218,14 @@
bool s390_cpu_has_io_int(S390CPU *cpu)
{
+ QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
CPUS390XState *env = &cpu->env;
if (!(env->psw.mask & PSW_MASK_IO)) {
return false;
}
- return env->pending_int & INTERRUPT_IO;
+ return qemu_s390_flic_has_io(flic, env->cregs[6]);
}
bool s390_cpu_has_restart_int(S390CPU *cpu)
diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c
index 6bae3e9..8cdcf83 100644
--- a/target/s390x/kvm-stub.c
+++ b/target/s390x/kvm-stub.c
@@ -12,10 +12,6 @@
#include "cpu.h"
#include "kvm_s390x.h"
-void kvm_s390_service_interrupt(uint32_t parm)
-{
-}
-
void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code)
{
}
@@ -30,15 +26,6 @@
{
}
-void kvm_s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
- uint32_t io_int_parm, uint32_t io_int_word)
-{
-}
-
-void kvm_s390_crw_mchk(void)
-{
-}
-
int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state)
{
return -ENOSYS;
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 8fbbf72..0301e9d 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -1034,7 +1034,7 @@
inject_vcpu_irq_legacy(cs, irq);
}
-static void __kvm_s390_floating_interrupt(struct kvm_s390_irq *irq)
+void kvm_s390_floating_interrupt_legacy(struct kvm_s390_irq *irq)
{
struct kvm_s390_interrupt kvmint = {};
int r;
@@ -1052,33 +1052,6 @@
}
}
-void kvm_s390_floating_interrupt(struct kvm_s390_irq *irq)
-{
- static bool use_flic = true;
- int r;
-
- if (use_flic) {
- r = kvm_s390_inject_flic(irq);
- if (r == -ENOSYS) {
- use_flic = false;
- }
- if (!r) {
- return;
- }
- }
- __kvm_s390_floating_interrupt(irq);
-}
-
-void kvm_s390_service_interrupt(uint32_t parm)
-{
- struct kvm_s390_irq irq = {
- .type = KVM_S390_INT_SERVICE,
- .u.ext.ext_params = parm,
- };
-
- kvm_s390_floating_interrupt(&irq);
-}
-
void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code)
{
struct kvm_s390_irq irq = {
@@ -1690,10 +1663,10 @@
* If an I/O interrupt had been dequeued, we have to reinject it.
*/
if (run->s390_tsch.dequeued) {
- kvm_s390_io_interrupt(run->s390_tsch.subchannel_id,
- run->s390_tsch.subchannel_nr,
- run->s390_tsch.io_int_parm,
- run->s390_tsch.io_int_word);
+ s390_io_interrupt(run->s390_tsch.subchannel_id,
+ run->s390_tsch.subchannel_nr,
+ run->s390_tsch.io_int_parm,
+ run->s390_tsch.io_int_word);
}
ret = 0;
}
@@ -1702,7 +1675,7 @@
static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar)
{
- struct sysib_322 sysib;
+ SysIB_322 sysib;
int del;
if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) {
@@ -1840,37 +1813,6 @@
return true;
}
-void kvm_s390_io_interrupt(uint16_t subchannel_id,
- uint16_t subchannel_nr, uint32_t io_int_parm,
- uint32_t io_int_word)
-{
- struct kvm_s390_irq irq = {
- .u.io.subchannel_id = subchannel_id,
- .u.io.subchannel_nr = subchannel_nr,
- .u.io.io_int_parm = io_int_parm,
- .u.io.io_int_word = io_int_word,
- };
-
- if (io_int_word & IO_INT_WORD_AI) {
- irq.type = KVM_S390_INT_IO(1, 0, 0, 0);
- } else {
- irq.type = KVM_S390_INT_IO(0, (subchannel_id & 0xff00) >> 8,
- (subchannel_id & 0x0006),
- subchannel_nr);
- }
- kvm_s390_floating_interrupt(&irq);
-}
-
-void kvm_s390_crw_mchk(void)
-{
- struct kvm_s390_irq irq = {
- .type = KVM_S390_MCHK,
- .u.mchk.cr14 = CR14_CHANNEL_REPORT_SC,
- .u.mchk.mcic = s390_build_validity_mcic() | MCIC_SC_CP,
- };
- kvm_s390_floating_interrupt(&irq);
-}
-
void kvm_s390_enable_css_support(S390CPU *cpu)
{
int r;
@@ -2279,6 +2221,14 @@
return;
}
+ /* PTFF subfunctions might be indicated although kernel support missing */
+ if (!test_bit(S390_FEAT_MULTIPLE_EPOCH, model->features)) {
+ clear_bit(S390_FEAT_PTFF_QSIE, model->features);
+ clear_bit(S390_FEAT_PTFF_QTOUE, model->features);
+ clear_bit(S390_FEAT_PTFF_STOE, model->features);
+ clear_bit(S390_FEAT_PTFF_STOUE, model->features);
+ }
+
/* with cpu model support, CMM is only indicated if really available */
if (kvm_s390_cmma_available()) {
set_bit(S390_FEAT_CMM, model->features);
diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h
index 79b3594..7a3b862 100644
--- a/target/s390x/kvm_s390x.h
+++ b/target/s390x/kvm_s390x.h
@@ -12,17 +12,12 @@
struct kvm_s390_irq;
-void kvm_s390_floating_interrupt(struct kvm_s390_irq *irq);
-void kvm_s390_service_interrupt(uint32_t parm);
+void kvm_s390_floating_interrupt_legacy(struct kvm_s390_irq *irq);
void kvm_s390_vcpu_interrupt(S390CPU *cpu, struct kvm_s390_irq *irq);
void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code);
int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf,
int len, bool is_write);
void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code);
-void kvm_s390_io_interrupt(uint16_t subchannel_id,
- uint16_t subchannel_nr, uint32_t io_int_parm,
- uint32_t io_int_word);
-void kvm_s390_crw_mchk(void);
int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state);
void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
@@ -44,7 +39,4 @@
void kvm_s390_restart_interrupt(S390CPU *cpu);
void kvm_s390_stop_interrupt(S390CPU *cpu);
-/* implemented outside of target/s390x/ */
-int kvm_s390_inject_flic(struct kvm_s390_irq *irq);
-
#endif /* KVM_S390X_H */
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 86da6aa..e0b23c1 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -36,6 +36,10 @@
#include "hw/s390x/ebcdic.h"
#include "hw/s390x/s390-virtio-hcall.h"
#include "hw/s390x/sclp.h"
+#include "hw/s390x/s390_flic.h"
+#include "hw/s390x/ioinst.h"
+#include "hw/s390x/s390-pci-inst.h"
+#include "hw/boards.h"
#endif
/* #define DEBUG_HELPER */
@@ -194,130 +198,146 @@
}
/* Store System Information */
-uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
- uint64_t r0, uint64_t r1)
+uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1)
{
+ const uintptr_t ra = GETPC();
+ const uint32_t sel1 = r0 & STSI_R0_SEL1_MASK;
+ const uint32_t sel2 = r1 & STSI_R1_SEL2_MASK;
+ const MachineState *ms = MACHINE(qdev_get_machine());
+ uint16_t total_cpus = 0, conf_cpus = 0, reserved_cpus = 0;
S390CPU *cpu = s390_env_get_cpu(env);
- int cc = 0;
- int sel1, sel2;
+ SysIB sysib = { 0 };
+ int i, cc = 0;
- if ((r0 & STSI_LEVEL_MASK) <= STSI_LEVEL_3 &&
- ((r0 & STSI_R0_RESERVED_MASK) || (r1 & STSI_R1_RESERVED_MASK))) {
- /* valid function code, invalid reserved bits */
- s390_program_interrupt(env, PGM_SPECIFICATION, 4, GETPC());
+ if ((r0 & STSI_R0_FC_MASK) > STSI_R0_FC_LEVEL_3) {
+ /* invalid function code: no other checks are performed */
+ return 3;
}
- sel1 = r0 & STSI_R0_SEL1_MASK;
- sel2 = r1 & STSI_R1_SEL2_MASK;
+ if ((r0 & STSI_R0_RESERVED_MASK) || (r1 & STSI_R1_RESERVED_MASK)) {
+ s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
+ }
- /* XXX: spec exception if sysib is not 4k-aligned */
+ if ((r0 & STSI_R0_FC_MASK) == STSI_R0_FC_CURRENT) {
+ /* query the current level: no further checks are performed */
+ env->regs[0] = STSI_R0_FC_LEVEL_3;
+ return 0;
+ }
- switch (r0 & STSI_LEVEL_MASK) {
- case STSI_LEVEL_1:
+ if (a0 & ~TARGET_PAGE_MASK) {
+ s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
+ }
+
+ /* count the cpus and split them into configured and reserved ones */
+ for (i = 0; i < ms->possible_cpus->len; i++) {
+ total_cpus++;
+ if (ms->possible_cpus->cpus[i].cpu) {
+ conf_cpus++;
+ } else {
+ reserved_cpus++;
+ }
+ }
+
+ /*
+ * In theory, we could report Level 1 / Level 2 as current. However,
+ * the Linux kernel will detect this as running under LPAR and assume
+ * that we have a sclp linemode console (which is always present on
+ * LPAR, but not the default for QEMU), therefore not displaying boot
+ * messages and making booting a Linux kernel under TCG harder.
+ *
+ * For now we fake the same SMP configuration on all levels.
+ *
+ * TODO: We could later make the level configurable via the machine
+ * and change defaults (linemode console) based on machine type
+ * and accelerator.
+ */
+ switch (r0 & STSI_R0_FC_MASK) {
+ case STSI_R0_FC_LEVEL_1:
if ((sel1 == 1) && (sel2 == 1)) {
/* Basic Machine Configuration */
- struct sysib_111 sysib;
char type[5] = {};
- memset(&sysib, 0, sizeof(sysib));
- ebcdic_put(sysib.manuf, "QEMU ", 16);
+ ebcdic_put(sysib.sysib_111.manuf, "QEMU ", 16);
/* same as machine type number in STORE CPU ID, but in EBCDIC */
snprintf(type, ARRAY_SIZE(type), "%X", cpu->model->def->type);
- ebcdic_put(sysib.type, type, 4);
+ ebcdic_put(sysib.sysib_111.type, type, 4);
/* model number (not stored in STORE CPU ID for z/Architecure) */
- ebcdic_put(sysib.model, "QEMU ", 16);
- ebcdic_put(sysib.sequence, "QEMU ", 16);
- ebcdic_put(sysib.plant, "QEMU", 4);
- cpu_physical_memory_write(a0, &sysib, sizeof(sysib));
+ ebcdic_put(sysib.sysib_111.model, "QEMU ", 16);
+ ebcdic_put(sysib.sysib_111.sequence, "QEMU ", 16);
+ ebcdic_put(sysib.sysib_111.plant, "QEMU", 4);
} else if ((sel1 == 2) && (sel2 == 1)) {
/* Basic Machine CPU */
- struct sysib_121 sysib;
-
- memset(&sysib, 0, sizeof(sysib));
- /* XXX make different for different CPUs? */
- ebcdic_put(sysib.sequence, "QEMUQEMUQEMUQEMU", 16);
- ebcdic_put(sysib.plant, "QEMU", 4);
- stw_p(&sysib.cpu_addr, env->core_id);
- cpu_physical_memory_write(a0, &sysib, sizeof(sysib));
+ ebcdic_put(sysib.sysib_121.sequence, "QEMUQEMUQEMUQEMU", 16);
+ ebcdic_put(sysib.sysib_121.plant, "QEMU", 4);
+ sysib.sysib_121.cpu_addr = cpu_to_be16(env->core_id);
} else if ((sel1 == 2) && (sel2 == 2)) {
/* Basic Machine CPUs */
- struct sysib_122 sysib;
-
- memset(&sysib, 0, sizeof(sysib));
- stl_p(&sysib.capability, 0x443afc29);
- /* XXX change when SMP comes */
- stw_p(&sysib.total_cpus, 1);
- stw_p(&sysib.active_cpus, 1);
- stw_p(&sysib.standby_cpus, 0);
- stw_p(&sysib.reserved_cpus, 0);
- cpu_physical_memory_write(a0, &sysib, sizeof(sysib));
+ sysib.sysib_122.capability = cpu_to_be32(0x443afc29);
+ sysib.sysib_122.total_cpus = cpu_to_be16(total_cpus);
+ sysib.sysib_122.conf_cpus = cpu_to_be16(conf_cpus);
+ sysib.sysib_122.reserved_cpus = cpu_to_be16(reserved_cpus);
} else {
cc = 3;
}
break;
- case STSI_LEVEL_2:
- {
- if ((sel1 == 2) && (sel2 == 1)) {
- /* LPAR CPU */
- struct sysib_221 sysib;
-
- memset(&sysib, 0, sizeof(sysib));
- /* XXX make different for different CPUs? */
- ebcdic_put(sysib.sequence, "QEMUQEMUQEMUQEMU", 16);
- ebcdic_put(sysib.plant, "QEMU", 4);
- stw_p(&sysib.cpu_addr, env->core_id);
- stw_p(&sysib.cpu_id, 0);
- cpu_physical_memory_write(a0, &sysib, sizeof(sysib));
- } else if ((sel1 == 2) && (sel2 == 2)) {
- /* LPAR CPUs */
- struct sysib_222 sysib;
-
- memset(&sysib, 0, sizeof(sysib));
- stw_p(&sysib.lpar_num, 0);
- sysib.lcpuc = 0;
- /* XXX change when SMP comes */
- stw_p(&sysib.total_cpus, 1);
- stw_p(&sysib.conf_cpus, 1);
- stw_p(&sysib.standby_cpus, 0);
- stw_p(&sysib.reserved_cpus, 0);
- ebcdic_put(sysib.name, "QEMU ", 8);
- stl_p(&sysib.caf, 1000);
- stw_p(&sysib.dedicated_cpus, 0);
- stw_p(&sysib.shared_cpus, 0);
- cpu_physical_memory_write(a0, &sysib, sizeof(sysib));
- } else {
- cc = 3;
- }
- break;
+ case STSI_R0_FC_LEVEL_2:
+ if ((sel1 == 2) && (sel2 == 1)) {
+ /* LPAR CPU */
+ ebcdic_put(sysib.sysib_221.sequence, "QEMUQEMUQEMUQEMU", 16);
+ ebcdic_put(sysib.sysib_221.plant, "QEMU", 4);
+ sysib.sysib_221.cpu_addr = cpu_to_be16(env->core_id);
+ } else if ((sel1 == 2) && (sel2 == 2)) {
+ /* LPAR CPUs */
+ sysib.sysib_222.lcpuc = 0x80; /* dedicated */
+ sysib.sysib_222.total_cpus = cpu_to_be16(total_cpus);
+ sysib.sysib_222.conf_cpus = cpu_to_be16(conf_cpus);
+ sysib.sysib_222.reserved_cpus = cpu_to_be16(reserved_cpus);
+ ebcdic_put(sysib.sysib_222.name, "QEMU ", 8);
+ sysib.sysib_222.caf = cpu_to_be32(1000);
+ sysib.sysib_222.dedicated_cpus = cpu_to_be16(conf_cpus);
+ } else {
+ cc = 3;
}
- case STSI_LEVEL_3:
- {
- if ((sel1 == 2) && (sel2 == 2)) {
- /* VM CPUs */
- struct sysib_322 sysib;
+ break;
+ case STSI_R0_FC_LEVEL_3:
+ if ((sel1 == 2) && (sel2 == 2)) {
+ /* VM CPUs */
+ sysib.sysib_322.count = 1;
+ sysib.sysib_322.vm[0].total_cpus = cpu_to_be16(total_cpus);
+ sysib.sysib_322.vm[0].conf_cpus = cpu_to_be16(conf_cpus);
+ sysib.sysib_322.vm[0].reserved_cpus = cpu_to_be16(reserved_cpus);
+ sysib.sysib_322.vm[0].caf = cpu_to_be32(1000);
+ /* Linux kernel uses this to distinguish us from z/VM */
+ ebcdic_put(sysib.sysib_322.vm[0].cpi, "KVM/Linux ", 16);
+ sysib.sysib_322.vm[0].ext_name_encoding = 2; /* UTF-8 */
- memset(&sysib, 0, sizeof(sysib));
- sysib.count = 1;
- /* XXX change when SMP comes */
- stw_p(&sysib.vm[0].total_cpus, 1);
- stw_p(&sysib.vm[0].conf_cpus, 1);
- stw_p(&sysib.vm[0].standby_cpus, 0);
- stw_p(&sysib.vm[0].reserved_cpus, 0);
- ebcdic_put(sysib.vm[0].name, "KVMguest", 8);
- stl_p(&sysib.vm[0].caf, 1000);
- ebcdic_put(sysib.vm[0].cpi, "KVM/Linux ", 16);
- cpu_physical_memory_write(a0, &sysib, sizeof(sysib));
+ /* If our VM has a name, use the real name */
+ if (qemu_name) {
+ memset(sysib.sysib_322.vm[0].name, 0x40,
+ sizeof(sysib.sysib_322.vm[0].name));
+ ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name,
+ MIN(sizeof(sysib.sysib_322.vm[0].name),
+ strlen(qemu_name)));
+ strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name,
+ sizeof(sysib.sysib_322.ext_names[0]));
} else {
- cc = 3;
+ ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8);
+ strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest");
}
- break;
+
+ /* add the uuid */
+ memcpy(sysib.sysib_322.vm[0].uuid, &qemu_uuid,
+ sizeof(sysib.sysib_322.vm[0].uuid));
+ } else {
+ cc = 3;
}
- case STSI_LEVEL_CURRENT:
- env->regs[0] = STSI_LEVEL_3;
break;
- default:
- cc = 3;
- break;
+ }
+
+ if (cc == 0) {
+ if (s390_cpu_virt_mem_write(cpu, a0, 0, &sysib, sizeof(sysib))) {
+ s390_cpu_virt_mem_handle_exc(cpu, ra);
+ }
}
return cc;
@@ -429,6 +449,59 @@
qemu_mutex_unlock_iothread();
}
+uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr)
+{
+ const uintptr_t ra = GETPC();
+ S390CPU *cpu = s390_env_get_cpu(env);
+ QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
+ QEMUS390FlicIO *io = NULL;
+ LowCore *lowcore;
+
+ if (addr & 0x3) {
+ s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
+ }
+
+ qemu_mutex_lock_iothread();
+ io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
+ if (!io) {
+ qemu_mutex_unlock_iothread();
+ return 0;
+ }
+
+ if (addr) {
+ struct {
+ uint16_t id;
+ uint16_t nr;
+ uint32_t parm;
+ } intc = {
+ .id = cpu_to_be16(io->id),
+ .nr = cpu_to_be16(io->nr),
+ .parm = cpu_to_be32(io->parm),
+ };
+
+ if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) {
+ /* writing failed, reinject and properly clean up */
+ s390_io_interrupt(io->id, io->nr, io->parm, io->word);
+ qemu_mutex_unlock_iothread();
+ g_free(io);
+ s390_cpu_virt_mem_handle_exc(cpu, ra);
+ return 0;
+ }
+ } else {
+ /* no protection applies */
+ lowcore = cpu_map_lowcore(env);
+ lowcore->subchannel_id = cpu_to_be16(io->id);
+ lowcore->subchannel_nr = cpu_to_be16(io->nr);
+ lowcore->io_int_parm = cpu_to_be32(io->parm);
+ lowcore->io_int_word = cpu_to_be32(io->word);
+ cpu_unmap_lowcore(lowcore);
+ }
+
+ g_free(io);
+ qemu_mutex_unlock_iothread();
+ return 1;
+}
+
void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
{
S390CPU *cpu = s390_env_get_cpu(env);
@@ -560,3 +633,91 @@
env->regs[0] = deposit64(env->regs[0], 0, 8, (max_bytes / 8) - 1);
return count_bytes >= max_bytes ? 0 : 3;
}
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * Note: we ignore any return code of the functions called for the pci
+ * instructions, as the only time they return !0 is when the stub is
+ * called, and in that case we didn't even offer the zpci facility.
+ * The only exception is SIC, where program checks need to be handled
+ * by the caller.
+ */
+void HELPER(clp)(CPUS390XState *env, uint32_t r2)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ clp_service_call(cpu, r2, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ pcilg_service_call(cpu, r1, r2, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ pcistg_service_call(cpu, r1, r2, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
+ uint32_t ar)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ stpcifc_service_call(cpu, r1, fiba, ar, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+
+void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3)
+{
+ int r;
+
+ qemu_mutex_lock_iothread();
+ r = css_do_sic(env, (r3 >> 27) & 0x7, r1 & 0xffff);
+ qemu_mutex_unlock_iothread();
+ /* css_do_sic() may actually return a PGM_xxx value to inject */
+ if (r) {
+ s390_program_interrupt(env, -r, 4, GETPC());
+ }
+}
+
+void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ rpcit_service_call(cpu, r1, r2, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3,
+ uint64_t gaddr, uint32_t ar)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+
+void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
+ uint32_t ar)
+{
+ S390CPU *cpu = s390_env_get_cpu(env);
+
+ qemu_mutex_lock_iothread();
+ mpcifc_service_call(cpu, r1, fiba, ar, GETPC());
+ qemu_mutex_unlock_iothread();
+}
+#endif
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index df0b416..b470d69 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -4199,6 +4199,14 @@
return NO_EXIT;
}
+static ExitStatus op_tpi(DisasContext *s, DisasOps *o)
+{
+ check_privileged(s);
+ gen_helper_tpi(cc_op, cpu_env, o->addr1);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
static ExitStatus op_tsch(DisasContext *s, DisasOps *o)
{
check_privileged(s);
@@ -4777,6 +4785,106 @@
return NO_EXIT;
}
+#ifndef CONFIG_USER_ONLY
+static ExitStatus op_clp(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
+
+ check_privileged(s);
+ gen_helper_clp(cpu_env, r2);
+ tcg_temp_free_i32(r2);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
+static ExitStatus op_pcilg(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
+ TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
+
+ check_privileged(s);
+ gen_helper_pcilg(cpu_env, r1, r2);
+ tcg_temp_free_i32(r1);
+ tcg_temp_free_i32(r2);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
+static ExitStatus op_pcistg(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
+ TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
+
+ check_privileged(s);
+ gen_helper_pcistg(cpu_env, r1, r2);
+ tcg_temp_free_i32(r1);
+ tcg_temp_free_i32(r2);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
+static ExitStatus op_stpcifc(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
+ TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2));
+
+ check_privileged(s);
+ gen_helper_stpcifc(cpu_env, r1, o->addr1, ar);
+ tcg_temp_free_i32(ar);
+ tcg_temp_free_i32(r1);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
+static ExitStatus op_sic(DisasContext *s, DisasOps *o)
+{
+ check_privileged(s);
+ gen_helper_sic(cpu_env, o->in1, o->in2);
+ return NO_EXIT;
+}
+
+static ExitStatus op_rpcit(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
+ TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
+
+ check_privileged(s);
+ gen_helper_rpcit(cpu_env, r1, r2);
+ tcg_temp_free_i32(r1);
+ tcg_temp_free_i32(r2);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
+static ExitStatus op_pcistb(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
+ TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
+ TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2));
+
+ check_privileged(s);
+ gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar);
+ tcg_temp_free_i32(ar);
+ tcg_temp_free_i32(r1);
+ tcg_temp_free_i32(r3);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+
+static ExitStatus op_mpcifc(DisasContext *s, DisasOps *o)
+{
+ TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
+ TCGv_i32 ar = tcg_const_i32(get_field(s->fields, b2));
+
+ check_privileged(s);
+ gen_helper_mpcifc(cpu_env, r1, o->addr1, ar);
+ tcg_temp_free_i32(ar);
+ tcg_temp_free_i32(r1);
+ set_cc_static(s);
+ return NO_EXIT;
+}
+#endif
+
/* ====================================================================== */
/* The "Cc OUTput" generators. Given the generated output (and in some cases
the original inputs), update the various cc data structures in order to
@@ -5708,6 +5816,8 @@
#define FAC_MSA4 S390_FEAT_MSA_EXT_4 /* msa-extension-4 facility */
#define FAC_MSA5 S390_FEAT_MSA_EXT_5 /* msa-extension-5 facility */
#define FAC_ECT S390_FEAT_EXTRACT_CPU_TIME
+#define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */
+#define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION
static const DisasInsn insn_info[] = {
#include "insn-data.def"
diff --git a/tcg/README b/tcg/README
index 03bfb6a..bb2ea51 100644
--- a/tcg/README
+++ b/tcg/README
@@ -503,6 +503,92 @@
For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in flags.
+********* Host vector operations
+
+All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.
+The former specifies the length of the vector in log2 64-bit units; the
+later specifies the length of the element (if applicable) in log2 8-bit units.
+E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
+
+* mov_vec v0, v1
+* ld_vec v0, t1
+* st_vec v0, t1
+
+ Move, load and store.
+
+* dup_vec v0, r1
+
+ Duplicate the low N bits of R1 into VECL/VECE copies across V0.
+
+* dupi_vec v0, c
+
+ Similarly, for a constant.
+ Smaller values will be replicated to host register size by the expanders.
+
+* dup2_vec v0, r1, r2
+
+ Duplicate r2:r1 into VECL/64 copies across V0. This opcode is
+ only present for 32-bit hosts.
+
+* add_vec v0, v1, v2
+
+ v0 = v1 + v2, in elements across the vector.
+
+* sub_vec v0, v1, v2
+
+ Similarly, v0 = v1 - v2.
+
+* mul_vec v0, v1, v2
+
+ Similarly, v0 = v1 * v2.
+
+* neg_vec v0, v1
+
+ Similarly, v0 = -v1.
+
+* and_vec v0, v1, v2
+* or_vec v0, v1, v2
+* xor_vec v0, v1, v2
+* andc_vec v0, v1, v2
+* orc_vec v0, v1, v2
+* not_vec v0, v1
+
+ Similarly, logical operations with and without compliment.
+ Note that VECE is unused.
+
+* shli_vec v0, v1, i2
+* shls_vec v0, v1, s2
+
+ Shift all elements from v1 by a scalar i2/s2. I.e.
+
+ for (i = 0; i < VECL/VECE; ++i) {
+ v0[i] = v1[i] << s2;
+ }
+
+* shri_vec v0, v1, i2
+* sari_vec v0, v1, i2
+* shrs_vec v0, v1, s2
+* sars_vec v0, v1, s2
+
+ Similarly for logical and arithmetic right shift.
+
+* shlv_vec v0, v1, v2
+
+ Shift elements from v1 by elements from v2. I.e.
+
+ for (i = 0; i < VECL/VECE; ++i) {
+ v0[i] = v1[i] << v2[i];
+ }
+
+* shrv_vec v0, v1, v2
+* sarv_vec v0, v1, v2
+
+ Similarly for logical and arithmetic right shift.
+
+* cmp_vec v0, v1, v2, cond
+
+ Compare vectors by element, storing -1 for true and 0 for false.
+
*********
Note 1: Some shortcuts are defined when the last operand is known to be
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index c252506..9aea1d1 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -31,13 +31,22 @@
TCG_REG_SP = 31,
TCG_REG_XZR = 31,
+ TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
/* Aliases. */
TCG_REG_FP = TCG_REG_X29,
TCG_REG_LR = TCG_REG_X30,
TCG_AREG0 = TCG_REG_X19,
} TCGReg;
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_SP
@@ -113,6 +122,20 @@
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_direct_jump 1
+#define TCG_TARGET_HAS_v64 1
+#define TCG_TARGET_HAS_v128 1
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 1
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_cmp_vec 1
+#define TCG_TARGET_HAS_mul_vec 1
+
#define TCG_TARGET_DEFAULT_MO (0)
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 150530f..be31920 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -20,10 +20,15 @@
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
- "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
- "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
- "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
- "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
+
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
};
#endif /* CONFIG_DEBUG_TCG */
@@ -43,6 +48,14 @@
/* X19 reserved for AREG0 */
/* X29 reserved as fp */
/* X30 reserved as temporary */
+
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ /* V8 - V15 are call-saved, and skipped. */
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
};
static const int tcg_target_call_iarg_regs[8] = {
@@ -54,6 +67,7 @@
};
#define TCG_REG_TMP TCG_REG_X30
+#define TCG_VEC_TMP TCG_REG_V31
#ifndef CONFIG_SOFTMMU
/* Note that XZR cannot be encoded in the address base register slot,
@@ -119,9 +133,13 @@
const char *ct_str, TCGType type)
{
switch (*ct_str++) {
- case 'r':
+ case 'r': /* general registers */
ct->ct |= TCG_CT_REG;
- ct->u.regs = 0xffffffffu;
+ ct->u.regs |= 0xffffffffu;
+ break;
+ case 'w': /* advsimd registers */
+ ct->ct |= TCG_CT_REG;
+ ct->u.regs |= 0xffffffff00000000ull;
break;
case 'l': /* qemu_ld / qemu_st address, data_reg */
ct->ct |= TCG_CT_REG;
@@ -153,11 +171,13 @@
return ct_str;
}
+/* Match a constant valid for addition (12-bit, optionally shifted). */
static inline bool is_aimm(uint64_t val)
{
return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
}
+/* Match a constant valid for logical operations. */
static inline bool is_limm(uint64_t val)
{
/* Taking a simplified view of the logical immediates for now, ignoring
@@ -178,6 +198,106 @@
return (val & (val - 1)) == 0;
}
+/* Match a constant that is valid for vectors. */
+static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
+{
+ int i;
+
+ *op = 0;
+ /* Match replication across 8 bits. */
+ if (v64 == dup_const(MO_8, v64)) {
+ *cmode = 0xe;
+ *imm8 = v64 & 0xff;
+ return true;
+ }
+ /* Match replication across 16 bits. */
+ if (v64 == dup_const(MO_16, v64)) {
+ uint16_t v16 = v64;
+
+ if (v16 == (v16 & 0xff)) {
+ *cmode = 0x8;
+ *imm8 = v16 & 0xff;
+ return true;
+ } else if (v16 == (v16 & 0xff00)) {
+ *cmode = 0xa;
+ *imm8 = v16 >> 8;
+ return true;
+ }
+ }
+ /* Match replication across 32 bits. */
+ if (v64 == dup_const(MO_32, v64)) {
+ uint32_t v32 = v64;
+
+ if (v32 == (v32 & 0xff)) {
+ *cmode = 0x0;
+ *imm8 = v32 & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff00)) {
+ *cmode = 0x2;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff0000)) {
+ *cmode = 0x4;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff000000)) {
+ *cmode = 0x6;
+ *imm8 = v32 >> 24;
+ return true;
+ } else if ((v32 & 0xffff00ff) == 0xff) {
+ *cmode = 0xc;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if ((v32 & 0xff00ffff) == 0xffff) {
+ *cmode = 0xd;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ }
+ /* Match forms of a float32. */
+ if (extract32(v32, 0, 19) == 0
+ && (extract32(v32, 25, 6) == 0x20
+ || extract32(v32, 25, 6) == 0x1f)) {
+ *cmode = 0xf;
+ *imm8 = (extract32(v32, 31, 1) << 7)
+ | (extract32(v32, 25, 1) << 6)
+ | extract32(v32, 19, 6);
+ return true;
+ }
+ }
+ /* Match forms of a float64. */
+ if (extract64(v64, 0, 48) == 0
+ && (extract64(v64, 54, 9) == 0x100
+ || extract64(v64, 54, 9) == 0x0ff)) {
+ *cmode = 0xf;
+ *op = 1;
+ *imm8 = (extract64(v64, 63, 1) << 7)
+ | (extract64(v64, 54, 1) << 6)
+ | extract64(v64, 48, 6);
+ return true;
+ }
+ /* Match bytes of 0x00 and 0xff. */
+ for (i = 0; i < 64; i += 8) {
+ uint64_t byte = extract64(v64, i, 8);
+ if (byte != 0 && byte != 0xff) {
+ break;
+ }
+ }
+ if (i == 64) {
+ *cmode = 0xe;
+ *op = 1;
+ *imm8 = (extract64(v64, 0, 1) << 0)
+ | (extract64(v64, 8, 1) << 1)
+ | (extract64(v64, 16, 1) << 2)
+ | (extract64(v64, 24, 1) << 3)
+ | (extract64(v64, 32, 1) << 4)
+ | (extract64(v64, 40, 1) << 5)
+ | (extract64(v64, 48, 1) << 6)
+ | (extract64(v64, 56, 1) << 7);
+ return true;
+ }
+ return false;
+}
+
static int tcg_target_const_match(tcg_target_long val, TCGType type,
const TCGArgConstraint *arg_ct)
{
@@ -271,6 +391,9 @@
/* Load literal for loading the address at pc-relative offset */
I3305_LDR = 0x58000000,
+ I3305_LDR_v64 = 0x5c000000,
+ I3305_LDR_v128 = 0x9c000000,
+
/* Load/store register. Described here as 3.3.12, but the helper
that emits them can transform to 3.3.10 or 3.3.13. */
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
@@ -290,6 +413,15 @@
I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
+ I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
+ I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
+
+ I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
+ I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
+
+ I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
+ I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
+
I3312_TO_I3310 = 0x00200800,
I3312_TO_I3313 = 0x01000000,
@@ -374,8 +506,48 @@
I3510_EON = 0x4a200000,
I3510_ANDS = 0x6a000000,
- NOP = 0xd503201f,
+ /* AdvSIMD copy */
+ I3605_DUP = 0x0e000400,
+ I3605_INS = 0x4e001c00,
+ I3605_UMOV = 0x0e003c00,
+
+ /* AdvSIMD modified immediate */
+ I3606_MOVI = 0x0f000400,
+
+ /* AdvSIMD shift by immediate */
+ I3614_SSHR = 0x0f000400,
+ I3614_SSRA = 0x0f001400,
+ I3614_SHL = 0x0f005400,
+ I3614_USHR = 0x2f000400,
+ I3614_USRA = 0x2f001400,
+
+ /* AdvSIMD three same. */
+ I3616_ADD = 0x0e208400,
+ I3616_AND = 0x0e201c00,
+ I3616_BIC = 0x0e601c00,
+ I3616_EOR = 0x2e201c00,
+ I3616_MUL = 0x0e209c00,
+ I3616_ORR = 0x0ea01c00,
+ I3616_ORN = 0x0ee01c00,
+ I3616_SUB = 0x2e208400,
+ I3616_CMGT = 0x0e203400,
+ I3616_CMGE = 0x0e203c00,
+ I3616_CMTST = 0x0e208c00,
+ I3616_CMHI = 0x2e203400,
+ I3616_CMHS = 0x2e203c00,
+ I3616_CMEQ = 0x2e208c00,
+
+ /* AdvSIMD two-reg misc. */
+ I3617_CMGT0 = 0x0e208800,
+ I3617_CMEQ0 = 0x0e209800,
+ I3617_CMLT0 = 0x0e20a800,
+ I3617_CMGE0 = 0x2e208800,
+ I3617_CMLE0 = 0x2e20a800,
+ I3617_NOT = 0x2e205800,
+ I3617_NEG = 0x2e20b800,
+
/* System instructions. */
+ NOP = 0xd503201f,
DMB_ISH = 0xd50338bf,
DMB_LD = 0x00000100,
DMB_ST = 0x00000200,
@@ -520,26 +692,64 @@
tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
}
+static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
+{
+ /* Note that bit 11 set means general register input. Therefore
+ we can handle both register sets with one function. */
+ tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
+ | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
+}
+
+static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rd, bool op, int cmode, uint8_t imm8)
+{
+ tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
+ | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
+}
+
+static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rd, TCGReg rn, unsigned immhb)
+{
+ tcg_out32(s, insn | q << 30 | immhb << 16
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
+ unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
+ unsigned size, TCGReg rd, TCGReg rn)
+{
+ tcg_out32(s, insn | q << 30 | (size << 22)
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
TCGReg rd, TCGReg base, TCGType ext,
TCGReg regoff)
{
/* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
- 0x4000 | ext << 13 | base << 5 | rd);
+ 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
}
static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
TCGReg rd, TCGReg rn, intptr_t offset)
{
- tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
+ tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
}
static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
{
/* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
- tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
+ tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
+ | rn << 5 | (rd & 0x1f));
}
/* Register to register move using ORR (shifted register with no shift). */
@@ -585,6 +795,22 @@
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
}
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
+ TCGReg rd, uint64_t v64)
+{
+ int op, cmode, imm8;
+
+ if (is_fimm(v64, &op, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
+ } else if (type == TCG_TYPE_V128) {
+ new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
+ tcg_out_insn(s, 3305, LDR_v128, 0, rd);
+ } else {
+ new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
+ tcg_out_insn(s, 3305, LDR_v64, 0, rd);
+ }
+}
+
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
@@ -594,6 +820,22 @@
int s0, s1;
AArch64Insn opc;
+ switch (type) {
+ case TCG_TYPE_I32:
+ case TCG_TYPE_I64:
+ tcg_debug_assert(rd < 32);
+ break;
+
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ tcg_debug_assert(rd >= 32);
+ tcg_out_dupi_vec(s, type, rd, value);
+ return;
+
+ default:
+ g_assert_not_reached();
+ }
+
/* For 32-bit values, discard potential garbage in value. For 64-bit
values within [2**31, 2**32-1], we can create smaller sequences by
interpreting this as a negative 32-bit number, while ensuring that
@@ -669,15 +911,13 @@
/* Define something more legible for general use. */
#define tcg_out_ldst_r tcg_out_insn_3310
-static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
- TCGReg rd, TCGReg rn, intptr_t offset)
+static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
+ TCGReg rn, intptr_t offset, int lgsize)
{
- TCGMemOp size = (uint32_t)insn >> 30;
-
/* If the offset is naturally aligned and in range, then we can
use the scaled uimm12 encoding */
- if (offset >= 0 && !(offset & ((1 << size) - 1))) {
- uintptr_t scaled_uimm = offset >> size;
+ if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
+ uintptr_t scaled_uimm = offset >> lgsize;
if (scaled_uimm <= 0xfff) {
tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
return;
@@ -695,32 +935,102 @@
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
}
-static inline void tcg_out_mov(TCGContext *s,
- TCGType type, TCGReg ret, TCGReg arg)
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
- if (ret != arg) {
- tcg_out_movr(s, type, ret, arg);
+ if (ret == arg) {
+ return;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ case TCG_TYPE_I64:
+ if (ret < 32 && arg < 32) {
+ tcg_out_movr(s, type, ret, arg);
+ break;
+ } else if (ret < 32) {
+ tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
+ break;
+ } else if (arg < 32) {
+ tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
+ break;
+ }
+ /* FALLTHRU */
+
+ case TCG_TYPE_V64:
+ tcg_debug_assert(ret >= 32 && arg >= 32);
+ tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= 32 && arg >= 32);
+ tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
+ break;
+
+ default:
+ g_assert_not_reached();
}
}
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg base, intptr_t ofs)
{
- tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
- arg, arg1, arg2);
+ AArch64Insn insn;
+ int lgsz;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
+ lgsz = 2;
+ break;
+ case TCG_TYPE_I64:
+ insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V64:
+ insn = I3312_LDRVD;
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V128:
+ insn = I3312_LDRVQ;
+ lgsz = 4;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
}
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
+ TCGReg base, intptr_t ofs)
{
- tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
- arg, arg1, arg2);
+ AArch64Insn insn;
+ int lgsz;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ insn = (src < 32 ? I3312_STRW : I3312_STRVS);
+ lgsz = 2;
+ break;
+ case TCG_TYPE_I64:
+ insn = (src < 32 ? I3312_STRX : I3312_STRVD);
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V64:
+ insn = I3312_STRVD;
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V128:
+ insn = I3312_STRVQ;
+ lgsz = 4;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_out_ldst(s, insn, src, base, ofs, lgsz);
}
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
TCGReg base, intptr_t ofs)
{
- if (val == 0) {
+ if (type <= TCG_TYPE_I64 && val == 0) {
tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
return true;
}
@@ -1210,14 +1520,15 @@
/* Merge "low bits" from tlb offset, load the tlb comparator into X0.
X0 = load [X2 + (tlb_offset & 0x000fff)] */
tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
- TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
+ TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
+ TARGET_LONG_BITS == 32 ? 2 : 3);
/* Load the tlb addend. Do that early to avoid stalling.
X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
(tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
(is_read ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write)));
+ : offsetof(CPUTLBEntry, addr_write)), 3);
/* Perform the address comparison. */
tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
@@ -1435,49 +1746,49 @@
case INDEX_op_ld8u_i32:
case INDEX_op_ld8u_i64:
- tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
break;
case INDEX_op_ld8s_i32:
- tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
break;
case INDEX_op_ld8s_i64:
- tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
break;
case INDEX_op_ld16u_i32:
case INDEX_op_ld16u_i64:
- tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
break;
case INDEX_op_ld16s_i32:
- tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
break;
case INDEX_op_ld16s_i64:
- tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
break;
case INDEX_op_ld_i32:
case INDEX_op_ld32u_i64:
- tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
break;
case INDEX_op_ld32s_i64:
- tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
break;
case INDEX_op_ld_i64:
- tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
+ tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
break;
case INDEX_op_st8_i32:
case INDEX_op_st8_i64:
- tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
+ tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
break;
case INDEX_op_st16_i32:
case INDEX_op_st16_i64:
- tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
+ tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
break;
case INDEX_op_st_i32:
case INDEX_op_st32_i64:
- tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
+ tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
break;
case INDEX_op_st_i64:
- tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
+ tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
break;
case INDEX_op_add_i32:
@@ -1776,25 +2087,176 @@
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
+ case INDEX_op_mov_vec:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
case INDEX_op_movi_i64:
+ case INDEX_op_dupi_vec:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
- tcg_abort();
+ g_assert_not_reached();
}
#undef REG0
}
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg *args, const int *const_args)
+{
+ static const AArch64Insn cmp_insn[16] = {
+ [TCG_COND_EQ] = I3616_CMEQ,
+ [TCG_COND_GT] = I3616_CMGT,
+ [TCG_COND_GE] = I3616_CMGE,
+ [TCG_COND_GTU] = I3616_CMHI,
+ [TCG_COND_GEU] = I3616_CMHS,
+ };
+ static const AArch64Insn cmp0_insn[16] = {
+ [TCG_COND_EQ] = I3617_CMEQ0,
+ [TCG_COND_GT] = I3617_CMGT0,
+ [TCG_COND_GE] = I3617_CMGE0,
+ [TCG_COND_LT] = I3617_CMLT0,
+ [TCG_COND_LE] = I3617_CMLE0,
+ };
+
+ TCGType type = vecl + TCG_TYPE_V64;
+ unsigned is_q = vecl;
+ TCGArg a0, a1, a2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_add_vec:
+ tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_sub_vec:
+ tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_mul_vec:
+ tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_neg_vec:
+ tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
+ break;
+ case INDEX_op_and_vec:
+ tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_or_vec:
+ tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_xor_vec:
+ tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_andc_vec:
+ tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_orc_vec:
+ tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_not_vec:
+ tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
+ break;
+ case INDEX_op_dup_vec:
+ tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
+ break;
+ case INDEX_op_shli_vec:
+ tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
+ break;
+ case INDEX_op_shri_vec:
+ tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
+ break;
+ case INDEX_op_sari_vec:
+ tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
+ break;
+ case INDEX_op_cmp_vec:
+ {
+ TCGCond cond = args[3];
+ AArch64Insn insn;
+
+ if (cond == TCG_COND_NE) {
+ if (const_args[2]) {
+ tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
+ } else {
+ tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
+ tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
+ }
+ } else {
+ if (const_args[2]) {
+ insn = cmp0_insn[cond];
+ if (insn) {
+ tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
+ break;
+ }
+ tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
+ a2 = TCG_VEC_TMP;
+ }
+ insn = cmp_insn[cond];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = cmp_insn[cond];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+}
+
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
{
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
+ static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
+ static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
+ static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
+ static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
+ static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
@@ -1938,6 +2400,29 @@
case INDEX_op_sub2_i64:
return &add2;
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_orc_vec:
+ return &w_w_w;
+ case INDEX_op_not_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return &w_w;
+ case INDEX_op_ld_vec:
+ case INDEX_op_st_vec:
+ return &w_r;
+ case INDEX_op_dup_vec:
+ return &w_wr;
+ case INDEX_op_cmp_vec:
+ return &w_w_wZ;
+
default:
return NULL;
}
@@ -1947,8 +2432,10 @@
{
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
- tcg_target_call_clobber_regs = 0xfffffffu;
+ tcg_target_call_clobber_regs = -1ull;
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
@@ -1960,12 +2447,21 @@
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
}
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h
new file mode 100644
index 0000000..4816a6c
--- /dev/null
+++ b/tcg/aarch64/tcg-target.opc.h
@@ -0,0 +1,3 @@
+/* Target-specific opcodes for host vector expansion. These will be
+ emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ consider these to be UNSPEC with names. */
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b89daba..9fdf37f 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -30,10 +30,10 @@
#ifdef __x86_64__
# define TCG_TARGET_REG_BITS 64
-# define TCG_TARGET_NB_REGS 16
+# define TCG_TARGET_NB_REGS 32
#else
# define TCG_TARGET_REG_BITS 32
-# define TCG_TARGET_NB_REGS 8
+# define TCG_TARGET_NB_REGS 24
#endif
typedef enum {
@@ -56,6 +56,26 @@
TCG_REG_R13,
TCG_REG_R14,
TCG_REG_R15,
+
+ TCG_REG_XMM0,
+ TCG_REG_XMM1,
+ TCG_REG_XMM2,
+ TCG_REG_XMM3,
+ TCG_REG_XMM4,
+ TCG_REG_XMM5,
+ TCG_REG_XMM6,
+ TCG_REG_XMM7,
+
+ /* 64-bit registers; likewise always define. */
+ TCG_REG_XMM8,
+ TCG_REG_XMM9,
+ TCG_REG_XMM10,
+ TCG_REG_XMM11,
+ TCG_REG_XMM12,
+ TCG_REG_XMM13,
+ TCG_REG_XMM14,
+ TCG_REG_XMM15,
+
TCG_REG_RAX = TCG_REG_EAX,
TCG_REG_RCX = TCG_REG_ECX,
TCG_REG_RDX = TCG_REG_EDX,
@@ -77,6 +97,8 @@
extern bool have_bmi1;
extern bool have_popcnt;
+extern bool have_avx1;
+extern bool have_avx2;
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -146,6 +168,21 @@
#define TCG_TARGET_HAS_mulsh_i64 0
#endif
+/* We do not support older SSE systems, only beginning with AVX1. */
+#define TCG_TARGET_HAS_v64 have_avx1
+#define TCG_TARGET_HAS_v128 have_avx1
+#define TCG_TARGET_HAS_v256 have_avx2
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_neg_vec 0
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_cmp_vec 1
+#define TCG_TARGET_HAS_mul_vec 1
+
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
(((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
((ofs) == 0 && (len) == 16))
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 63d27f1..fc05909 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -28,10 +28,15 @@
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#if TCG_TARGET_REG_BITS == 64
"%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
- "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
#else
"%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
#endif
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+ "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
+#if TCG_TARGET_REG_BITS == 64
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
+#endif
};
#endif
@@ -61,6 +66,28 @@
TCG_REG_EDX,
TCG_REG_EAX,
#endif
+ TCG_REG_XMM0,
+ TCG_REG_XMM1,
+ TCG_REG_XMM2,
+ TCG_REG_XMM3,
+ TCG_REG_XMM4,
+ TCG_REG_XMM5,
+#ifndef _WIN64
+ /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
+ any of them. Therefore only allow xmm0-xmm5 to be allocated. */
+ TCG_REG_XMM6,
+ TCG_REG_XMM7,
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_XMM8,
+ TCG_REG_XMM9,
+ TCG_REG_XMM10,
+ TCG_REG_XMM11,
+ TCG_REG_XMM12,
+ TCG_REG_XMM13,
+ TCG_REG_XMM14,
+ TCG_REG_XMM15,
+#endif
+#endif
};
static const int tcg_target_call_iarg_regs[] = {
@@ -94,7 +121,7 @@
#define TCG_CT_CONST_I32 0x400
#define TCG_CT_CONST_WSZ 0x800
-/* Registers used with L constraint, which are the first argument
+/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
i386. */
#if TCG_TARGET_REG_BITS == 64
@@ -125,6 +152,8 @@
it there. Therefore we always define the variable. */
bool have_bmi1;
bool have_popcnt;
+bool have_avx1;
+bool have_avx2;
#ifdef CONFIG_CPUID_H
static bool have_movbe;
@@ -148,6 +177,8 @@
if (value != (int32_t)value) {
tcg_abort();
}
+ /* FALLTHRU */
+ case R_386_32:
tcg_patch32(code_ptr, value);
break;
case R_386_PC8:
@@ -162,6 +193,14 @@
}
}
+#if TCG_TARGET_REG_BITS == 64
+#define ALL_GENERAL_REGS 0x0000ffffu
+#define ALL_VECTOR_REGS 0xffff0000u
+#else
+#define ALL_GENERAL_REGS 0x000000ffu
+#define ALL_VECTOR_REGS 0x00ff0000u
+#endif
+
/* parse target specific constraints */
static const char *target_parse_constraint(TCGArgConstraint *ct,
const char *ct_str, TCGType type)
@@ -192,21 +231,29 @@
tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
break;
case 'q':
+ /* A register that can be used as a byte operand. */
ct->ct |= TCG_CT_REG;
ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
break;
case 'Q':
+ /* A register with an addressable second byte (e.g. %ah). */
ct->ct |= TCG_CT_REG;
ct->u.regs = 0xf;
break;
case 'r':
+ /* A general register. */
ct->ct |= TCG_CT_REG;
- ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
+ ct->u.regs |= ALL_GENERAL_REGS;
break;
case 'W':
/* With TZCNT/LZCNT, we can have operand-size as an input. */
ct->ct |= TCG_CT_CONST_WSZ;
break;
+ case 'x':
+ /* A vector register. */
+ ct->ct |= TCG_CT_REG;
+ ct->u.regs |= ALL_VECTOR_REGS;
+ break;
/* qemu_ld/st address constraint */
case 'L':
@@ -277,14 +324,17 @@
# define P_REXB_RM 0
# define P_GS 0
#endif
-#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
-#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
+#define P_EXT3A 0x10000 /* 0x0f 0x3a opcode prefix */
+#define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */
+#define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */
+#define P_VEXL 0x80000 /* Set VEX.L = 1 */
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
#define OPC_ANDN (0xf2 | P_EXT38)
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16)
#define OPC_BSF (0xbc | P_EXT)
#define OPC_BSR (0xbd | P_EXT)
#define OPC_BSWAP (0xc8 | P_EXT)
@@ -310,11 +360,68 @@
#define OPC_MOVL_Iv (0xb8)
#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
+#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
+#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
+#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
+#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
+#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
+#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
+#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3)
+#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3)
+#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16)
#define OPC_MOVSBL (0xbe | P_EXT)
#define OPC_MOVSWL (0xbf | P_EXT)
#define OPC_MOVSLQ (0x63 | P_REXW)
#define OPC_MOVZBL (0xb6 | P_EXT)
#define OPC_MOVZWL (0xb7 | P_EXT)
+#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
+#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
+#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
+#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16)
+#define OPC_PADDB (0xfc | P_EXT | P_DATA16)
+#define OPC_PADDW (0xfd | P_EXT | P_DATA16)
+#define OPC_PADDD (0xfe | P_EXT | P_DATA16)
+#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16)
+#define OPC_PAND (0xdb | P_EXT | P_DATA16)
+#define OPC_PANDN (0xdf | P_EXT | P_DATA16)
+#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16)
+#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16)
+#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16)
+#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16)
+#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16)
+#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16)
+#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
+#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
+#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
+#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
+#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
+#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
+#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16)
+#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16)
+#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
+#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
+#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
+#define OPC_POR (0xeb | P_EXT | P_DATA16)
+#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
+#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
+#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
+#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
+#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16)
+#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16)
+#define OPC_PSUBD (0xfa | P_EXT | P_DATA16)
+#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16)
+#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16)
+#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16)
+#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16)
+#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16)
+#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16)
+#define OPC_PXOR (0xef | P_EXT | P_DATA16)
#define OPC_POP_r32 (0x58)
#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
#define OPC_PUSH_r32 (0x50)
@@ -326,14 +433,26 @@
#define OPC_SHIFT_Ib (0xc1)
#define OPC_SHIFT_cl (0xd3)
#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
+#define OPC_SHUFPS (0xc6 | P_EXT)
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_TESTL (0x85)
#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
+#define OPC_UD2 (0x0b | P_EXT)
+#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
+#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
+#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
+#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW)
+#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
+#define OPC_VZEROUPPER (0x77 | P_EXT)
#define OPC_XCHG_ax_r32 (0x90)
#define OPC_GRP3_Ev (0xf7)
#define OPC_GRP5 (0xff)
+#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
/* Group 1 opcode extensions for 0x80-0x83.
These are also used as modifiers for OPC_ARITH. */
@@ -439,10 +558,12 @@
tcg_out8(s, (uint8_t)(rex | 0x40));
}
- if (opc & (P_EXT | P_EXT38)) {
+ if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
tcg_out8(s, 0x0f);
if (opc & P_EXT38) {
tcg_out8(s, 0x38);
+ } else if (opc & P_EXT3A) {
+ tcg_out8(s, 0x3a);
}
}
@@ -459,10 +580,12 @@
} else if (opc & P_SIMDF2) {
tcg_out8(s, 0xf2);
}
- if (opc & (P_EXT | P_EXT38)) {
+ if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
tcg_out8(s, 0x0f);
if (opc & P_EXT38) {
tcg_out8(s, 0x38);
+ } else if (opc & P_EXT3A) {
+ tcg_out8(s, 0x3a);
}
}
tcg_out8(s, opc);
@@ -479,34 +602,42 @@
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
-static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
+static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
+ int rm, int index)
{
int tmp;
- if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
+ /* Use the two byte form if possible, which cannot encode
+ VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */
+ if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT
+ && ((rm | index) & 8) == 0) {
+ /* Two byte VEX prefix. */
+ tcg_out8(s, 0xc5);
+
+ tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
+ } else {
/* Three byte VEX prefix. */
tcg_out8(s, 0xc4);
/* VEX.m-mmmm */
- if (opc & P_EXT38) {
+ if (opc & P_EXT3A) {
+ tmp = 3;
+ } else if (opc & P_EXT38) {
tmp = 2;
} else if (opc & P_EXT) {
tmp = 1;
} else {
- tcg_abort();
+ g_assert_not_reached();
}
- tmp |= 0x40; /* VEX.X */
- tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
- tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
+ tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
+ tmp |= (index & 8 ? 0 : 0x40); /* VEX.X */
+ tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
tcg_out8(s, tmp);
- tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
- } else {
- /* Two byte VEX prefix. */
- tcg_out8(s, 0xc5);
-
- tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
+ tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
}
+
+ tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */
/* VEX.pp */
if (opc & P_DATA16) {
tmp |= 1; /* 0x66 */
@@ -518,6 +649,11 @@
tmp |= (~v & 15) << 3; /* VEX.vvvv */
tcg_out8(s, tmp);
tcg_out8(s, opc);
+}
+
+static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
+{
+ tcg_out_vex_opc(s, opc, r, v, rm, 0);
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
@@ -526,8 +662,8 @@
mode for absolute addresses, ~RM is the size of the immediate operand
that will follow the instruction. */
-static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
- int index, int shift, intptr_t offset)
+static void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index,
+ int shift, intptr_t offset)
{
int mod, len;
@@ -538,7 +674,6 @@
intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
intptr_t disp = offset - pc;
if (disp == (int32_t)disp) {
- tcg_out_opc(s, opc, r, 0, 0);
tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
tcg_out32(s, disp);
return;
@@ -548,7 +683,6 @@
use of the MODRM+SIB encoding and is therefore larger than
rip-relative addressing. */
if (offset == (int32_t)offset) {
- tcg_out_opc(s, opc, r, 0, 0);
tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
tcg_out8(s, (4 << 3) | 5);
tcg_out32(s, offset);
@@ -556,10 +690,9 @@
}
/* ??? The memory isn't directly addressable. */
- tcg_abort();
+ g_assert_not_reached();
} else {
/* Absolute address. */
- tcg_out_opc(s, opc, r, 0, 0);
tcg_out8(s, (r << 3) | 5);
tcg_out32(s, offset);
return;
@@ -582,7 +715,6 @@
that would be used for %esp is the escape to the two byte form. */
if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
/* Single byte MODRM format. */
- tcg_out_opc(s, opc, r, rm, 0);
tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
} else {
/* Two byte MODRM+SIB format. */
@@ -596,7 +728,6 @@
tcg_debug_assert(index != TCG_REG_ESP);
}
- tcg_out_opc(s, opc, r, rm, index);
tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
}
@@ -608,6 +739,21 @@
}
}
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+ int index, int shift, intptr_t offset)
+{
+ tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
+ tcg_out_sib_offset(s, r, rm, index, shift, offset);
+}
+
+static void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v,
+ int rm, int index, int shift,
+ intptr_t offset)
+{
+ tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
+ tcg_out_sib_offset(s, r, rm, index, shift, offset);
+}
+
/* A simplification of the above with no index or shift. */
static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
int rm, intptr_t offset)
@@ -615,6 +761,30 @@
tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
}
+static inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r,
+ int v, int rm, intptr_t offset)
+{
+ tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
+}
+
+/* Output an opcode with an expected reference to the constant pool. */
+static inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r)
+{
+ tcg_out_opc(s, opc, r, 0, 0);
+ /* Absolute for 32-bit, pc-relative for 64-bit. */
+ tcg_out8(s, LOWREGMASK(r) << 3 | 5);
+ tcg_out32(s, 0);
+}
+
+/* Output an opcode with an expected reference to the constant pool. */
+static inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r)
+{
+ tcg_out_vex_opc(s, opc, r, 0, 0, 0);
+ /* Absolute for 32-bit, pc-relative for 64-bit. */
+ tcg_out8(s, LOWREGMASK(r) << 3 | 5);
+ tcg_out32(s, 0);
+}
+
/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
{
@@ -625,12 +795,116 @@
tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
}
-static inline void tcg_out_mov(TCGContext *s, TCGType type,
- TCGReg ret, TCGReg arg)
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
- if (arg != ret) {
- int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
- tcg_out_modrm(s, opc, ret, arg);
+ int rexw = 0;
+
+ if (arg == ret) {
+ return;
+ }
+ switch (type) {
+ case TCG_TYPE_I64:
+ rexw = P_REXW;
+ /* fallthru */
+ case TCG_TYPE_I32:
+ if (ret < 16) {
+ if (arg < 16) {
+ tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
+ } else {
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret);
+ }
+ } else {
+ if (arg < 16) {
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg);
+ } else {
+ tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
+ }
+ }
+ break;
+
+ case TCG_TYPE_V64:
+ tcg_debug_assert(ret >= 16 && arg >= 16);
+ tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= 16 && arg >= 16);
+ tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg);
+ break;
+ case TCG_TYPE_V256:
+ tcg_debug_assert(ret >= 16 && arg >= 16);
+ tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg r, TCGReg a)
+{
+ if (have_avx2) {
+ static const int dup_insn[4] = {
+ OPC_VPBROADCASTB, OPC_VPBROADCASTW,
+ OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
+ };
+ int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
+ tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a);
+ } else {
+ switch (vece) {
+ case MO_8:
+ /* ??? With zero in a register, use PSHUFB. */
+ tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, 0, a);
+ a = r;
+ /* FALLTHRU */
+ case MO_16:
+ tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, 0, a);
+ a = r;
+ /* FALLTHRU */
+ case MO_32:
+ tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a);
+ /* imm8 operand: all output lanes selected from input lane 0. */
+ tcg_out8(s, 0);
+ break;
+ case MO_64:
+ tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, 0, a);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
+
+ if (arg == 0) {
+ tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
+ return;
+ }
+ if (arg == -1) {
+ tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret);
+ return;
+ }
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (type == TCG_TYPE_V64) {
+ tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
+ } else if (have_avx2) {
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
+ } else {
+ tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
+ }
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
+ } else if (have_avx2) {
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
+ } else {
+ tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret);
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
+ tcg_out_dup_vec(s, type, MO_32, ret, ret);
}
}
@@ -639,6 +913,25 @@
{
tcg_target_long diff;
+ switch (type) {
+ case TCG_TYPE_I32:
+#if TCG_TARGET_REG_BITS == 64
+ case TCG_TYPE_I64:
+#endif
+ if (ret < 16) {
+ break;
+ }
+ /* fallthru */
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ tcg_debug_assert(ret >= 16);
+ tcg_out_dupi_vec(s, type, ret, arg);
+ return;
+ default:
+ g_assert_not_reached();
+ }
+
if (arg == 0) {
tgen_arithr(s, ARITH_XOR, ret, ret);
return;
@@ -702,18 +995,74 @@
tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
}
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
{
- int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
- tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (ret < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
+ } else {
+ tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (ret < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
+ break;
+ }
+ /* FALLTHRU */
+ case TCG_TYPE_V64:
+ tcg_debug_assert(ret >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V256:
+ tcg_debug_assert(ret >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL,
+ ret, 0, arg1, arg2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
- TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
{
- int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
- tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (arg < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
+ } else {
+ tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (arg < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
+ break;
+ }
+ /* FALLTHRU */
+ case TCG_TYPE_V64:
+ tcg_debug_assert(arg >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(arg >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V256:
+ tcg_debug_assert(arg >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL,
+ arg, 0, arg1, arg2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -725,6 +1074,8 @@
return false;
}
rexw = P_REXW;
+ } else if (type != TCG_TYPE_I32) {
+ return false;
}
tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
tcg_out32(s, val);
@@ -2259,8 +2610,10 @@
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
+ case INDEX_op_mov_vec:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
case INDEX_op_movi_i64:
+ case INDEX_op_dupi_vec:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
tcg_abort();
@@ -2269,6 +2622,181 @@
#undef OP_32_64
}
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg *args, const int *const_args)
+{
+ static int const add_insn[4] = {
+ OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
+ };
+ static int const sub_insn[4] = {
+ OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
+ };
+ static int const mul_insn[4] = {
+ OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
+ };
+ static int const shift_imm_insn[4] = {
+ OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
+ };
+ static int const cmpeq_insn[4] = {
+ OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
+ };
+ static int const cmpgt_insn[4] = {
+ OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
+ };
+ static int const punpckl_insn[4] = {
+ OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
+ };
+ static int const punpckh_insn[4] = {
+ OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
+ };
+ static int const packss_insn[4] = {
+ OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
+ };
+ static int const packus_insn[4] = {
+ OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
+ };
+
+ TCGType type = vecl + TCG_TYPE_V64;
+ int insn, sub;
+ TCGArg a0, a1, a2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_add_vec:
+ insn = add_insn[vece];
+ goto gen_simd;
+ case INDEX_op_sub_vec:
+ insn = sub_insn[vece];
+ goto gen_simd;
+ case INDEX_op_mul_vec:
+ insn = mul_insn[vece];
+ goto gen_simd;
+ case INDEX_op_and_vec:
+ insn = OPC_PAND;
+ goto gen_simd;
+ case INDEX_op_or_vec:
+ insn = OPC_POR;
+ goto gen_simd;
+ case INDEX_op_xor_vec:
+ insn = OPC_PXOR;
+ goto gen_simd;
+ case INDEX_op_x86_punpckl_vec:
+ insn = punpckl_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_punpckh_vec:
+ insn = punpckh_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_packss_vec:
+ insn = packss_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_packus_vec:
+ insn = packus_insn[vece];
+ goto gen_simd;
+ gen_simd:
+ tcg_debug_assert(insn != OPC_UD2);
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ break;
+
+ case INDEX_op_cmp_vec:
+ sub = args[3];
+ if (sub == TCG_COND_EQ) {
+ insn = cmpeq_insn[vece];
+ } else if (sub == TCG_COND_GT) {
+ insn = cmpgt_insn[vece];
+ } else {
+ g_assert_not_reached();
+ }
+ goto gen_simd;
+
+ case INDEX_op_andc_vec:
+ insn = OPC_PANDN;
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a2, a1);
+ break;
+
+ case INDEX_op_shli_vec:
+ sub = 6;
+ goto gen_shift;
+ case INDEX_op_shri_vec:
+ sub = 2;
+ goto gen_shift;
+ case INDEX_op_sari_vec:
+ tcg_debug_assert(vece != MO_64);
+ sub = 4;
+ gen_shift:
+ tcg_debug_assert(vece != MO_8);
+ insn = shift_imm_insn[vece];
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, sub, a0, a1);
+ tcg_out8(s, a2);
+ break;
+
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_dup_vec:
+ tcg_out_dup_vec(s, type, vece, a0, a1);
+ break;
+
+ case INDEX_op_x86_shufps_vec:
+ insn = OPC_SHUFPS;
+ sub = args[3];
+ goto gen_simd_imm8;
+ case INDEX_op_x86_blend_vec:
+ if (vece == MO_16) {
+ insn = OPC_PBLENDW;
+ } else if (vece == MO_32) {
+ insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
+ } else {
+ g_assert_not_reached();
+ }
+ sub = args[3];
+ goto gen_simd_imm8;
+ case INDEX_op_x86_vperm2i128_vec:
+ insn = OPC_VPERM2I128;
+ sub = args[3];
+ goto gen_simd_imm8;
+ gen_simd_imm8:
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ tcg_out8(s, sub);
+ break;
+
+ case INDEX_op_x86_vpblendvb_vec:
+ insn = OPC_VPBLENDVB;
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ tcg_out8(s, args[3] << 4);
+ break;
+
+ case INDEX_op_x86_psrldq_vec:
+ tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
+ tcg_out8(s, a2);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
{
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
@@ -2292,6 +2820,11 @@
= { .args_ct_str = { "r", "r", "L", "L" } };
static const TCGTargetOpDef L_L_L_L
= { .args_ct_str = { "L", "L", "L", "L" } };
+ static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
+ static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
+ static const TCGTargetOpDef x_x_x_x
+ = { .args_ct_str = { "x", "x", "x", "x" } };
+ static const TCGTargetOpDef x_r = { .args_ct_str = { "x", "r" } };
switch (op) {
case INDEX_op_goto_ptr:
@@ -2493,12 +3026,342 @@
return &s2;
}
+ case INDEX_op_ld_vec:
+ case INDEX_op_st_vec:
+ return &x_r;
+
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_x86_shufps_vec:
+ case INDEX_op_x86_blend_vec:
+ case INDEX_op_x86_packss_vec:
+ case INDEX_op_x86_packus_vec:
+ case INDEX_op_x86_vperm2i128_vec:
+ case INDEX_op_x86_punpckl_vec:
+ case INDEX_op_x86_punpckh_vec:
+ return &x_x_x;
+ case INDEX_op_dup_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_x86_psrldq_vec:
+ return &x_x;
+ case INDEX_op_x86_vpblendvb_vec:
+ return &x_x_x_x;
+
default:
break;
}
return NULL;
}
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ return 1;
+ case INDEX_op_cmp_vec:
+ return -1;
+
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ /* We must expand the operation for MO_8. */
+ return vece == MO_8 ? -1 : 1;
+
+ case INDEX_op_sari_vec:
+ /* We must expand the operation for MO_8. */
+ if (vece == MO_8) {
+ return -1;
+ }
+ /* We can emulate this for MO_64, but it does not pay off
+ unless we're producing at least 4 values. */
+ if (vece == MO_64) {
+ return type >= TCG_TYPE_V256 ? -1 : 0;
+ }
+ return 1;
+
+ case INDEX_op_mul_vec:
+ if (vece == MO_8) {
+ /* We can expand the operation for MO_8. */
+ return -1;
+ }
+ if (vece == MO_64) {
+ return 0;
+ }
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGArg a1, a2;
+ TCGv_vec v0, t1, t2, t3, t4;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+
+ switch (opc) {
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ tcg_debug_assert(vece == MO_8);
+ a1 = va_arg(va, TCGArg);
+ a2 = va_arg(va, TCGArg);
+ /* Unpack to W, shift, and repack. Tricky bits:
+ (1) Use punpck*bw x,x to produce DDCCBBAA,
+ i.e. duplicate in other half of the 16-bit lane.
+ (2) For right-shift, add 8 so that the high half of
+ the lane becomes zero. For left-shift, we must
+ shift up and down again.
+ (3) Step 2 leaves high half zero such that PACKUSWB
+ (pack with unsigned saturation) does not modify
+ the quantity. */
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
+ tcgv_vec_arg(t1), a1, a1);
+ vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
+ tcgv_vec_arg(t2), a1, a1);
+ if (opc == INDEX_op_shri_vec) {
+ vec_gen_3(INDEX_op_shri_vec, type, MO_16,
+ tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
+ vec_gen_3(INDEX_op_shri_vec, type, MO_16,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
+ } else {
+ vec_gen_3(INDEX_op_shli_vec, type, MO_16,
+ tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
+ vec_gen_3(INDEX_op_shli_vec, type, MO_16,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
+ vec_gen_3(INDEX_op_shri_vec, type, MO_16,
+ tcgv_vec_arg(t1), tcgv_vec_arg(t1), 8);
+ vec_gen_3(INDEX_op_shri_vec, type, MO_16,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t2), 8);
+ }
+ vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
+ a0, tcgv_vec_arg(t1), tcgv_vec_arg(t2));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ case INDEX_op_sari_vec:
+ a1 = va_arg(va, TCGArg);
+ a2 = va_arg(va, TCGArg);
+ if (vece == MO_8) {
+ /* Unpack to W, shift, and repack, as above. */
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
+ tcgv_vec_arg(t1), a1, a1);
+ vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
+ tcgv_vec_arg(t2), a1, a1);
+ vec_gen_3(INDEX_op_sari_vec, type, MO_16,
+ tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
+ vec_gen_3(INDEX_op_sari_vec, type, MO_16,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
+ vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8,
+ a0, tcgv_vec_arg(t1), tcgv_vec_arg(t2));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+ }
+ tcg_debug_assert(vece == MO_64);
+ /* MO_64: If the shift is <= 32, we can emulate the sign extend by
+ performing an arithmetic 32-bit shift and overwriting the high
+ half of the result (note that the ISA says shift of 32 is valid). */
+ if (a2 <= 32) {
+ t1 = tcg_temp_new_vec(type);
+ vec_gen_3(INDEX_op_sari_vec, type, MO_32, tcgv_vec_arg(t1), a1, a2);
+ vec_gen_3(INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
+ vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
+ a0, a0, tcgv_vec_arg(t1), 0xaa);
+ tcg_temp_free_vec(t1);
+ break;
+ }
+ /* Otherwise we will need to use a compare vs 0 to produce the
+ sign-extend, shift and merge. */
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_const_zeros_vec(type);
+ vec_gen_4(INDEX_op_cmp_vec, type, MO_64,
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2), a1, TCG_COND_GT);
+ tcg_temp_free_vec(t2);
+ vec_gen_3(INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
+ vec_gen_3(INDEX_op_shli_vec, type, MO_64,
+ tcgv_vec_arg(t1), tcgv_vec_arg(t1), 64 - a2);
+ vec_gen_3(INDEX_op_or_vec, type, MO_64, a0, a0, tcgv_vec_arg(t1));
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_mul_vec:
+ tcg_debug_assert(vece == MO_8);
+ a1 = va_arg(va, TCGArg);
+ a2 = va_arg(va, TCGArg);
+ switch (type) {
+ case TCG_TYPE_V64:
+ t1 = tcg_temp_new_vec(TCG_TYPE_V128);
+ t2 = tcg_temp_new_vec(TCG_TYPE_V128);
+ tcg_gen_dup16i_vec(t2, 0);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t1), a1, tcgv_vec_arg(t2));
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2);
+ tcg_gen_mul_vec(MO_16, t1, t1, t2);
+ tcg_gen_shri_vec(MO_16, t1, t1, 8);
+ vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
+ a0, tcgv_vec_arg(t1), tcgv_vec_arg(t1));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ case TCG_TYPE_V128:
+ t1 = tcg_temp_new_vec(TCG_TYPE_V128);
+ t2 = tcg_temp_new_vec(TCG_TYPE_V128);
+ t3 = tcg_temp_new_vec(TCG_TYPE_V128);
+ t4 = tcg_temp_new_vec(TCG_TYPE_V128);
+ tcg_gen_dup16i_vec(t4, 0);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t1), a1, tcgv_vec_arg(t4));
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t4), a2);
+ vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t3), a1, tcgv_vec_arg(t4));
+ vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t4), tcgv_vec_arg(t4), a2);
+ tcg_gen_mul_vec(MO_16, t1, t1, t2);
+ tcg_gen_mul_vec(MO_16, t3, t3, t4);
+ tcg_gen_shri_vec(MO_16, t1, t1, 8);
+ tcg_gen_shri_vec(MO_16, t3, t3, 8);
+ vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
+ a0, tcgv_vec_arg(t1), tcgv_vec_arg(t3));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t3);
+ tcg_temp_free_vec(t4);
+ break;
+
+ case TCG_TYPE_V256:
+ t1 = tcg_temp_new_vec(TCG_TYPE_V256);
+ t2 = tcg_temp_new_vec(TCG_TYPE_V256);
+ t3 = tcg_temp_new_vec(TCG_TYPE_V256);
+ t4 = tcg_temp_new_vec(TCG_TYPE_V256);
+ tcg_gen_dup16i_vec(t4, 0);
+ /* a1: A[0-7] ... D[0-7]; a2: W[0-7] ... Z[0-7]
+ t1: extends of B[0-7], D[0-7]
+ t2: extends of X[0-7], Z[0-7]
+ t3: extends of A[0-7], C[0-7]
+ t4: extends of W[0-7], Y[0-7]. */
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
+ tcgv_vec_arg(t1), a1, tcgv_vec_arg(t4));
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(t4), a2);
+ vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
+ tcgv_vec_arg(t3), a1, tcgv_vec_arg(t4));
+ vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
+ tcgv_vec_arg(t4), tcgv_vec_arg(t4), a2);
+ /* t1: BX DZ; t2: AW CY. */
+ tcg_gen_mul_vec(MO_16, t1, t1, t2);
+ tcg_gen_mul_vec(MO_16, t3, t3, t4);
+ tcg_gen_shri_vec(MO_16, t1, t1, 8);
+ tcg_gen_shri_vec(MO_16, t3, t3, 8);
+ /* a0: AW BX CY DZ. */
+ vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V256, MO_8,
+ a0, tcgv_vec_arg(t1), tcgv_vec_arg(t3));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t3);
+ tcg_temp_free_vec(t4);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ break;
+
+ case INDEX_op_cmp_vec:
+ {
+ enum {
+ NEED_SWAP = 1,
+ NEED_INV = 2,
+ NEED_BIAS = 4
+ };
+ static const uint8_t fixups[16] = {
+ [0 ... 15] = -1,
+ [TCG_COND_EQ] = 0,
+ [TCG_COND_NE] = NEED_INV,
+ [TCG_COND_GT] = 0,
+ [TCG_COND_LT] = NEED_SWAP,
+ [TCG_COND_LE] = NEED_INV,
+ [TCG_COND_GE] = NEED_SWAP | NEED_INV,
+ [TCG_COND_GTU] = NEED_BIAS,
+ [TCG_COND_LTU] = NEED_BIAS | NEED_SWAP,
+ [TCG_COND_LEU] = NEED_BIAS | NEED_INV,
+ [TCG_COND_GEU] = NEED_BIAS | NEED_SWAP | NEED_INV,
+ };
+
+ TCGCond cond;
+ uint8_t fixup;
+
+ a1 = va_arg(va, TCGArg);
+ a2 = va_arg(va, TCGArg);
+ cond = va_arg(va, TCGArg);
+ fixup = fixups[cond & 15];
+ tcg_debug_assert(fixup != 0xff);
+
+ if (fixup & NEED_INV) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (fixup & NEED_SWAP) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ }
+
+ t1 = t2 = NULL;
+ if (fixup & NEED_BIAS) {
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1));
+ tcg_gen_sub_vec(vece, t1, temp_tcgv_vec(arg_temp(a1)), t2);
+ tcg_gen_sub_vec(vece, t2, temp_tcgv_vec(arg_temp(a2)), t2);
+ a1 = tcgv_vec_arg(t1);
+ a2 = tcgv_vec_arg(t2);
+ cond = tcg_signed_cond(cond);
+ }
+
+ tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond);
+
+ if (fixup & NEED_BIAS) {
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ }
+ if (fixup & NEED_INV) {
+ tcg_gen_not_vec(vece, v0, v0);
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ va_end(va);
+}
+
static const int tcg_target_callee_save_regs[] = {
#if TCG_TARGET_REG_BITS == 64
TCG_REG_RBP,
@@ -2577,6 +3440,9 @@
tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
+ if (have_avx2) {
+ tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0);
+ }
for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
}
@@ -2598,9 +3464,16 @@
static void tcg_target_init(TCGContext *s)
{
#ifdef CONFIG_CPUID_H
- unsigned a, b, c, d;
+ unsigned a, b, c, d, b7 = 0;
int max = __get_cpuid_max(0, 0);
+ if (max >= 7) {
+ /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
+ __cpuid_count(7, 0, a, b7, c, d);
+ have_bmi1 = (b7 & bit_BMI) != 0;
+ have_bmi2 = (b7 & bit_BMI2) != 0;
+ }
+
if (max >= 1) {
__cpuid(1, a, b, c, d);
#ifndef have_cmov
@@ -2609,17 +3482,22 @@
available, we'll use a small forward branch. */
have_cmov = (d & bit_CMOV) != 0;
#endif
+
/* MOVBE is only available on Intel Atom and Haswell CPUs, so we
need to probe for it. */
have_movbe = (c & bit_MOVBE) != 0;
have_popcnt = (c & bit_POPCNT) != 0;
- }
- if (max >= 7) {
- /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
- __cpuid_count(7, 0, a, b, c, d);
- have_bmi1 = (b & bit_BMI) != 0;
- have_bmi2 = (b & bit_BMI2) != 0;
+ /* There are a number of things we must check before we can be
+ sure of not hitting invalid opcode. */
+ if (c & bit_OSXSAVE) {
+ unsigned xcrl, xcrh;
+ asm ("xgetbv" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
+ if ((xcrl & 6) == 6) {
+ have_avx1 = (c & bit_AVX) != 0;
+ have_avx2 = (b7 & bit_AVX2) != 0;
+ }
+ }
}
max = __get_cpuid_max(0x8000000, 0);
@@ -2630,11 +3508,16 @@
}
#endif /* CONFIG_CPUID_H */
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
if (TCG_TARGET_REG_BITS == 64) {
- tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
- tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
- } else {
- tcg_target_available_regs[TCG_TYPE_I32] = 0xff;
+ tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
+ }
+ if (have_avx1) {
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+ }
+ if (have_avx2) {
+ tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
}
tcg_target_call_clobber_regs = 0;
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
new file mode 100644
index 0000000..e5fa88b
--- /dev/null
+++ b/tcg/i386/tcg-target.opc.h
@@ -0,0 +1,13 @@
+/* Target-specific opcodes for host vector expansion. These will be
+ emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ consider these to be UNSPEC with names. */
+
+DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_vpblendvb_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
+DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2cbbeef..d4ea67e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -32,6 +32,11 @@
glue(glue(case INDEX_op_, x), _i32): \
glue(glue(case INDEX_op_, x), _i64)
+#define CASE_OP_32_64_VEC(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64): \
+ glue(glue(case INDEX_op_, x), _vec)
+
struct tcg_temp_info {
bool is_const;
TCGTemp *prev_copy;
@@ -108,40 +113,6 @@
init_ts_info(infos, temps_used, arg_temp(arg));
}
-static int op_bits(TCGOpcode op)
-{
- const TCGOpDef *def = &tcg_op_defs[op];
- return def->flags & TCG_OPF_64BIT ? 64 : 32;
-}
-
-static TCGOpcode op_to_mov(TCGOpcode op)
-{
- switch (op_bits(op)) {
- case 32:
- return INDEX_op_mov_i32;
- case 64:
- return INDEX_op_mov_i64;
- default:
- fprintf(stderr, "op_to_mov: unexpected return value of "
- "function op_bits.\n");
- tcg_abort();
- }
-}
-
-static TCGOpcode op_to_movi(TCGOpcode op)
-{
- switch (op_bits(op)) {
- case 32:
- return INDEX_op_movi_i32;
- case 64:
- return INDEX_op_movi_i64;
- default:
- fprintf(stderr, "op_to_movi: unexpected return value of "
- "function op_bits.\n");
- tcg_abort();
- }
-}
-
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
{
TCGTemp *i;
@@ -199,11 +170,23 @@
static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
{
- TCGOpcode new_op = op_to_movi(op->opc);
+ const TCGOpDef *def;
+ TCGOpcode new_op;
tcg_target_ulong mask;
struct tcg_temp_info *di = arg_info(dst);
+ def = &tcg_op_defs[op->opc];
+ if (def->flags & TCG_OPF_VECTOR) {
+ new_op = INDEX_op_dupi_vec;
+ } else if (def->flags & TCG_OPF_64BIT) {
+ new_op = INDEX_op_movi_i64;
+ } else {
+ new_op = INDEX_op_movi_i32;
+ }
op->opc = new_op;
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
+ op->args[0] = dst;
+ op->args[1] = val;
reset_temp(dst);
di->is_const = true;
@@ -214,15 +197,13 @@
mask |= ~0xffffffffull;
}
di->mask = mask;
-
- op->args[0] = dst;
- op->args[1] = val;
}
static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
{
TCGTemp *dst_ts = arg_temp(dst);
TCGTemp *src_ts = arg_temp(src);
+ const TCGOpDef *def;
struct tcg_temp_info *di;
struct tcg_temp_info *si;
tcg_target_ulong mask;
@@ -236,9 +217,16 @@
reset_ts(dst_ts);
di = ts_info(dst_ts);
si = ts_info(src_ts);
- new_op = op_to_mov(op->opc);
-
+ def = &tcg_op_defs[op->opc];
+ if (def->flags & TCG_OPF_VECTOR) {
+ new_op = INDEX_op_mov_vec;
+ } else if (def->flags & TCG_OPF_64BIT) {
+ new_op = INDEX_op_mov_i64;
+ } else {
+ new_op = INDEX_op_mov_i32;
+ }
op->opc = new_op;
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
op->args[0] = dst;
op->args[1] = src;
@@ -417,8 +405,9 @@
static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
{
+ const TCGOpDef *def = &tcg_op_defs[op];
TCGArg res = do_constant_folding_2(op, x, y);
- if (op_bits(op) == 32) {
+ if (!(def->flags & TCG_OPF_64BIT)) {
res = (int32_t)res;
}
return res;
@@ -508,13 +497,12 @@
tcg_target_ulong xv = arg_info(x)->val;
tcg_target_ulong yv = arg_info(y)->val;
if (arg_is_const(x) && arg_is_const(y)) {
- switch (op_bits(op)) {
- case 32:
- return do_constant_folding_cond_32(xv, yv, c);
- case 64:
+ const TCGOpDef *def = &tcg_op_defs[op];
+ tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
+ if (def->flags & TCG_OPF_64BIT) {
return do_constant_folding_cond_64(xv, yv, c);
- default:
- tcg_abort();
+ } else {
+ return do_constant_folding_cond_32(xv, yv, c);
}
} else if (args_are_copies(x, y)) {
return do_constant_folding_cond_eq(c);
@@ -653,11 +641,11 @@
/* For commutative operations make constant second argument */
switch (opc) {
- CASE_OP_32_64(add):
- CASE_OP_32_64(mul):
- CASE_OP_32_64(and):
- CASE_OP_32_64(or):
- CASE_OP_32_64(xor):
+ CASE_OP_32_64_VEC(add):
+ CASE_OP_32_64_VEC(mul):
+ CASE_OP_32_64_VEC(and):
+ CASE_OP_32_64_VEC(or):
+ CASE_OP_32_64_VEC(xor):
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
@@ -722,7 +710,7 @@
continue;
}
break;
- CASE_OP_32_64(sub):
+ CASE_OP_32_64_VEC(sub):
{
TCGOpcode neg_op;
bool have_neg;
@@ -734,9 +722,12 @@
if (opc == INDEX_op_sub_i32) {
neg_op = INDEX_op_neg_i32;
have_neg = TCG_TARGET_HAS_neg_i32;
- } else {
+ } else if (opc == INDEX_op_sub_i64) {
neg_op = INDEX_op_neg_i64;
have_neg = TCG_TARGET_HAS_neg_i64;
+ } else {
+ neg_op = INDEX_op_neg_vec;
+ have_neg = TCG_TARGET_HAS_neg_vec;
}
if (!have_neg) {
break;
@@ -750,7 +741,7 @@
}
}
break;
- CASE_OP_32_64(xor):
+ CASE_OP_32_64_VEC(xor):
CASE_OP_32_64(nand):
if (!arg_is_const(op->args[1])
&& arg_is_const(op->args[2])
@@ -767,7 +758,7 @@
goto try_not;
}
break;
- CASE_OP_32_64(andc):
+ CASE_OP_32_64_VEC(andc):
if (!arg_is_const(op->args[2])
&& arg_is_const(op->args[1])
&& arg_info(op->args[1])->val == -1) {
@@ -775,7 +766,7 @@
goto try_not;
}
break;
- CASE_OP_32_64(orc):
+ CASE_OP_32_64_VEC(orc):
CASE_OP_32_64(eqv):
if (!arg_is_const(op->args[2])
&& arg_is_const(op->args[1])
@@ -789,7 +780,10 @@
TCGOpcode not_op;
bool have_not;
- if (def->flags & TCG_OPF_64BIT) {
+ if (def->flags & TCG_OPF_VECTOR) {
+ not_op = INDEX_op_not_vec;
+ have_not = TCG_TARGET_HAS_not_vec;
+ } else if (def->flags & TCG_OPF_64BIT) {
not_op = INDEX_op_not_i64;
have_not = TCG_TARGET_HAS_not_i64;
} else {
@@ -810,16 +804,16 @@
/* Simplify expression for "op r, a, const => mov r, a" cases */
switch (opc) {
- CASE_OP_32_64(add):
- CASE_OP_32_64(sub):
+ CASE_OP_32_64_VEC(add):
+ CASE_OP_32_64_VEC(sub):
+ CASE_OP_32_64_VEC(or):
+ CASE_OP_32_64_VEC(xor):
+ CASE_OP_32_64_VEC(andc):
CASE_OP_32_64(shl):
CASE_OP_32_64(shr):
CASE_OP_32_64(sar):
CASE_OP_32_64(rotl):
CASE_OP_32_64(rotr):
- CASE_OP_32_64(or):
- CASE_OP_32_64(xor):
- CASE_OP_32_64(andc):
if (!arg_is_const(op->args[1])
&& arg_is_const(op->args[2])
&& arg_info(op->args[2])->val == 0) {
@@ -827,8 +821,8 @@
continue;
}
break;
- CASE_OP_32_64(and):
- CASE_OP_32_64(orc):
+ CASE_OP_32_64_VEC(and):
+ CASE_OP_32_64_VEC(orc):
CASE_OP_32_64(eqv):
if (!arg_is_const(op->args[1])
&& arg_is_const(op->args[2])
@@ -1042,8 +1036,8 @@
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
switch (opc) {
- CASE_OP_32_64(and):
- CASE_OP_32_64(mul):
+ CASE_OP_32_64_VEC(and):
+ CASE_OP_32_64_VEC(mul):
CASE_OP_32_64(muluh):
CASE_OP_32_64(mulsh):
if (arg_is_const(op->args[2])
@@ -1058,8 +1052,8 @@
/* Simplify expression for "op r, a, a => mov r, a" cases */
switch (opc) {
- CASE_OP_32_64(or):
- CASE_OP_32_64(and):
+ CASE_OP_32_64_VEC(or):
+ CASE_OP_32_64_VEC(and):
if (args_are_copies(op->args[1], op->args[2])) {
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
continue;
@@ -1071,9 +1065,9 @@
/* Simplify expression for "op r, a, a => movi r, 0" cases */
switch (opc) {
- CASE_OP_32_64(andc):
- CASE_OP_32_64(sub):
- CASE_OP_32_64(xor):
+ CASE_OP_32_64_VEC(andc):
+ CASE_OP_32_64_VEC(sub):
+ CASE_OP_32_64_VEC(xor):
if (args_are_copies(op->args[1], op->args[2])) {
tcg_opt_gen_movi(s, op, op->args[0], 0);
continue;
@@ -1087,13 +1081,23 @@
folding. Constants will be substituted to arguments by register
allocator where needed and possible. Also detect copies. */
switch (opc) {
- CASE_OP_32_64(mov):
+ CASE_OP_32_64_VEC(mov):
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
break;
CASE_OP_32_64(movi):
+ case INDEX_op_dupi_vec:
tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
break;
+ case INDEX_op_dup_vec:
+ if (arg_is_const(op->args[1])) {
+ tmp = arg_info(op->args[1])->val;
+ tmp = dup_const(TCGOP_VECE(op), tmp);
+ tcg_opt_gen_movi(s, op, op->args[0], tmp);
+ continue;
+ }
+ break;
+
CASE_OP_32_64(not):
CASE_OP_32_64(neg):
CASE_OP_32_64(ext8s):
diff --git a/tcg/tcg-gvec-desc.h b/tcg/tcg-gvec-desc.h
new file mode 100644
index 0000000..3b4c2d9
--- /dev/null
+++ b/tcg/tcg-gvec-desc.h
@@ -0,0 +1,49 @@
+/*
+ * Generic vector operation descriptor
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
+#define SIMD_OPRSZ_SHIFT 0
+#define SIMD_OPRSZ_BITS 5
+
+#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
+#define SIMD_MAXSZ_BITS 5
+
+#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+#define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT)
+
+/* Create a descriptor from components. */
+uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
+
+/* Extract the operation size from a descriptor. */
+static inline intptr_t simd_oprsz(uint32_t desc)
+{
+ return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
+}
+
+/* Extract the max vector size from a descriptor. */
+static inline intptr_t simd_maxsz(uint32_t desc)
+{
+ return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
+}
+
+/* Extract the operation-specific data from a descriptor. */
+static inline int32_t simd_data(uint32_t desc)
+{
+ return sextract32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS);
+}
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
new file mode 100644
index 0000000..bfe44bb
--- /dev/null
+++ b/tcg/tcg-op-gvec.c
@@ -0,0 +1,2216 @@
+/*
+ * Generic vector operation expansion
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "tcg.h"
+#include "tcg-op.h"
+#include "tcg-op-gvec.h"
+#include "tcg-gvec-desc.h"
+
+#define MAX_UNROLL 4
+
+/* Verify vector size and alignment rules. OFS should be the OR of all
+ of the operand offsets so that we can check them all at once. */
+static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
+{
+ uint32_t opr_align = oprsz >= 16 ? 15 : 7;
+ uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
+ tcg_debug_assert(oprsz > 0);
+ tcg_debug_assert(oprsz <= maxsz);
+ tcg_debug_assert((oprsz & opr_align) == 0);
+ tcg_debug_assert((maxsz & max_align) == 0);
+ tcg_debug_assert((ofs & max_align) == 0);
+}
+
+/* Verify vector overlap rules for two operands. */
+static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s)
+{
+ tcg_debug_assert(d == a || d + s <= a || a + s <= d);
+}
+
+/* Verify vector overlap rules for three operands. */
+static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)
+{
+ check_overlap_2(d, a, s);
+ check_overlap_2(d, b, s);
+ check_overlap_2(a, b, s);
+}
+
+/* Verify vector overlap rules for four operands. */
+static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b,
+ uint32_t c, uint32_t s)
+{
+ check_overlap_2(d, a, s);
+ check_overlap_2(d, b, s);
+ check_overlap_2(d, c, s);
+ check_overlap_2(a, b, s);
+ check_overlap_2(a, c, s);
+ check_overlap_2(b, c, s);
+}
+
+/* Create a descriptor from components. */
+uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
+{
+ uint32_t desc = 0;
+
+ assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
+ assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
+ assert(data == sextract32(data, 0, SIMD_DATA_BITS));
+
+ oprsz = (oprsz / 8) - 1;
+ maxsz = (maxsz / 8) - 1;
+ desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
+ desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
+ desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
+
+ return desc;
+}
+
+/* Generate a call to a gvec-style helper with two vector operands. */
+void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2 *fn)
+{
+ TCGv_ptr a0, a1;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ fn(a0, a1, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with two vector operands
+ and one scalar operand. */
+void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2i *fn)
+{
+ TCGv_ptr a0, a1;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ fn(a0, a1, c, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with three vector operands. */
+void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_3 *fn)
+{
+ TCGv_ptr a0, a1, a2;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+
+ fn(a0, a1, a2, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with four vector operands. */
+void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_4 *fn)
+{
+ TCGv_ptr a0, a1, a2, a3;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+
+ fn(a0, a1, a2, a3, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with five vector operands. */
+void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t xofs, uint32_t oprsz,
+ uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn)
+{
+ TCGv_ptr a0, a1, a2, a3, a4;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+ a4 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+ tcg_gen_addi_ptr(a4, cpu_env, xofs);
+
+ fn(a0, a1, a2, a3, a4, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+ tcg_temp_free_ptr(a4);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with three vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
+ TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_2_ptr *fn)
+{
+ TCGv_ptr a0, a1;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ fn(a0, a1, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with three vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr a0, a1, a2;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+
+ fn(a0, a1, a2, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_i32(desc);
+}
+
+/* Generate a call to a gvec-style helper with four vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
+ uint32_t maxsz, int32_t data,
+ gen_helper_gvec_4_ptr *fn)
+{
+ TCGv_ptr a0, a1, a2, a3;
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+
+ fn(a0, a1, a2, a3, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+ tcg_temp_free_i32(desc);
+}
+
+/* Return true if we want to implement something of OPRSZ bytes
+ in units of LNSZ. This limits the expansion of inline code. */
+static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
+{
+ uint32_t lnct = oprsz / lnsz;
+ return lnct >= 1 && lnct <= MAX_UNROLL;
+}
+
+static void expand_clr(uint32_t dofs, uint32_t maxsz);
+
+/* Duplicate C as per VECE. */
+uint64_t (dup_const)(unsigned vece, uint64_t c)
+{
+ switch (vece) {
+ case MO_8:
+ return 0x0101010101010101ull * (uint8_t)c;
+ case MO_16:
+ return 0x0001000100010001ull * (uint16_t)c;
+ case MO_32:
+ return 0x0000000100000001ull * (uint32_t)c;
+ case MO_64:
+ return c;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Duplicate IN into OUT as per VECE. */
+static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
+{
+ switch (vece) {
+ case MO_8:
+ tcg_gen_ext8u_i32(out, in);
+ tcg_gen_muli_i32(out, out, 0x01010101);
+ break;
+ case MO_16:
+ tcg_gen_deposit_i32(out, in, in, 16, 16);
+ break;
+ case MO_32:
+ tcg_gen_mov_i32(out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
+{
+ switch (vece) {
+ case MO_8:
+ tcg_gen_ext8u_i64(out, in);
+ tcg_gen_muli_i64(out, out, 0x0101010101010101ull);
+ break;
+ case MO_16:
+ tcg_gen_ext16u_i64(out, in);
+ tcg_gen_muli_i64(out, out, 0x0001000100010001ull);
+ break;
+ case MO_32:
+ tcg_gen_deposit_i64(out, in, in, 32, 32);
+ break;
+ case MO_64:
+ tcg_gen_mov_i64(out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
+ * Only one of IN_32 or IN_64 may be set;
+ * IN_C is used if IN_32 and IN_64 are unset.
+ */
+static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64,
+ uint64_t in_c)
+{
+ TCGType type;
+ TCGv_i64 t_64;
+ TCGv_i32 t_32, t_desc;
+ TCGv_ptr t_ptr;
+ uint32_t i;
+
+ assert(vece <= (in_32 ? MO_32 : MO_64));
+ assert(in_32 == NULL || in_64 == NULL);
+
+ /* If we're storing 0, expand oprsz to maxsz. */
+ if (in_32 == NULL && in_64 == NULL) {
+ in_c = dup_const(vece, in_c);
+ if (in_c == 0) {
+ oprsz = maxsz;
+ }
+ }
+
+ type = 0;
+ if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
+ type = TCG_TYPE_V256;
+ } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
+ type = TCG_TYPE_V128;
+ } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8)
+ /* Prefer integer when 64-bit host and no variable dup. */
+ && !(TCG_TARGET_REG_BITS == 64 && in_32 == NULL
+ && (in_64 == NULL || vece == MO_64))) {
+ type = TCG_TYPE_V64;
+ }
+
+ /* Implement inline with a vector type, if possible. */
+ if (type != 0) {
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+
+ if (in_32) {
+ tcg_gen_dup_i32_vec(vece, t_vec, in_32);
+ } else if (in_64) {
+ tcg_gen_dup_i64_vec(vece, t_vec, in_64);
+ } else {
+ switch (vece) {
+ case MO_8:
+ tcg_gen_dup8i_vec(t_vec, in_c);
+ break;
+ case MO_16:
+ tcg_gen_dup16i_vec(t_vec, in_c);
+ break;
+ case MO_32:
+ tcg_gen_dup32i_vec(t_vec, in_c);
+ break;
+ default:
+ tcg_gen_dup64i_vec(t_vec, in_c);
+ break;
+ }
+ }
+
+ i = 0;
+ if (TCG_TARGET_HAS_v256) {
+ for (; i + 32 <= oprsz; i += 32) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
+ }
+ }
+ if (TCG_TARGET_HAS_v128) {
+ for (; i + 16 <= oprsz; i += 16) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
+ }
+ }
+ if (TCG_TARGET_HAS_v64) {
+ for (; i < oprsz; i += 8) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
+ }
+ }
+ tcg_temp_free_vec(t_vec);
+ goto done;
+ }
+
+ /* Otherwise, inline with an integer type, unless "large". */
+ if (check_size_impl(oprsz, TCG_TARGET_REG_BITS / 8)) {
+ t_64 = NULL;
+ t_32 = NULL;
+
+ if (in_32) {
+ /* We are given a 32-bit variable input. For a 64-bit host,
+ use a 64-bit operation unless the 32-bit operation would
+ be simple enough. */
+ if (TCG_TARGET_REG_BITS == 64
+ && (vece != MO_32 || !check_size_impl(oprsz, 4))) {
+ t_64 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t_64, in_32);
+ gen_dup_i64(vece, t_64, t_64);
+ } else {
+ t_32 = tcg_temp_new_i32();
+ gen_dup_i32(vece, t_32, in_32);
+ }
+ } else if (in_64) {
+ /* We are given a 64-bit variable input. */
+ t_64 = tcg_temp_new_i64();
+ gen_dup_i64(vece, t_64, in_64);
+ } else {
+ /* We are given a constant input. */
+ /* For 64-bit hosts, use 64-bit constants for "simple" constants
+ or when we'd need too many 32-bit stores, or when a 64-bit
+ constant is really required. */
+ if (vece == MO_64
+ || (TCG_TARGET_REG_BITS == 64
+ && (in_c == 0 || in_c == -1
+ || !check_size_impl(oprsz, 4)))) {
+ t_64 = tcg_const_i64(in_c);
+ } else {
+ t_32 = tcg_const_i32(in_c);
+ }
+ }
+
+ /* Implement inline if we picked an implementation size above. */
+ if (t_32) {
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_st_i32(t_32, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t_32);
+ goto done;
+ }
+ if (t_64) {
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_st_i64(t_64, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t_64);
+ goto done;
+ }
+ }
+
+ /* Otherwise implement out of line. */
+ t_ptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
+ t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
+
+ if (vece == MO_64) {
+ if (in_64) {
+ gen_helper_gvec_dup64(t_ptr, t_desc, in_64);
+ } else {
+ t_64 = tcg_const_i64(in_c);
+ gen_helper_gvec_dup64(t_ptr, t_desc, t_64);
+ tcg_temp_free_i64(t_64);
+ }
+ } else {
+ typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
+ static dup_fn * const fns[3] = {
+ gen_helper_gvec_dup8,
+ gen_helper_gvec_dup16,
+ gen_helper_gvec_dup32
+ };
+
+ if (in_32) {
+ fns[vece](t_ptr, t_desc, in_32);
+ } else {
+ t_32 = tcg_temp_new_i32();
+ if (in_64) {
+ tcg_gen_extrl_i64_i32(t_32, in_64);
+ } else if (vece == MO_8) {
+ tcg_gen_movi_i32(t_32, in_c & 0xff);
+ } else if (vece == MO_16) {
+ tcg_gen_movi_i32(t_32, in_c & 0xffff);
+ } else {
+ tcg_gen_movi_i32(t_32, in_c);
+ }
+ fns[vece](t_ptr, t_desc, t_32);
+ tcg_temp_free_i32(t_32);
+ }
+ }
+
+ tcg_temp_free_ptr(t_ptr);
+ tcg_temp_free_i32(t_desc);
+ return;
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Likewise, but with zero. */
+static void expand_clr(uint32_t dofs, uint32_t maxsz)
+{
+ do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0);
+}
+
+/* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
+static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ void (*fni)(TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ fni(t0, t0);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+}
+
+static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ int32_t c, bool load_dest,
+ void (*fni)(TCGv_i32, TCGv_i32, int32_t))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i32(t1, cpu_env, dofs + i);
+ }
+ fni(t1, t0, c);
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+}
+
+static void expand_2s_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ TCGv_i32 c, bool scalar_first,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ if (scalar_first) {
+ fni(t1, c, t0);
+ } else {
+ fni(t1, t0, c);
+ }
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
+static void expand_3_i32(uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, bool load_dest,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i32(t2, cpu_env, dofs + i);
+ }
+ fni(t2, t0, t1);
+ tcg_gen_st_i32(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
+static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t1, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t2, cpu_env, bofs + i);
+ tcg_gen_ld_i32(t3, cpu_env, cofs + i);
+ fni(t0, t1, t2, t3);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t3);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
+/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
+static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ void (*fni)(TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ fni(t0, t0);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+}
+
+static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ int64_t c, bool load_dest,
+ void (*fni)(TCGv_i64, TCGv_i64, int64_t))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i64(t1, cpu_env, dofs + i);
+ }
+ fni(t1, t0, c);
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+}
+
+static void expand_2s_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ TCGv_i64 c, bool scalar_first,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ if (scalar_first) {
+ fni(t1, c, t0);
+ } else {
+ fni(t1, t0, c);
+ }
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
+static void expand_3_i64(uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, bool load_dest,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i64(t2, cpu_env, dofs + i);
+ }
+ fni(t2, t0, t1);
+ tcg_gen_st_i64(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
+static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t1, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t2, cpu_env, bofs + i);
+ tcg_gen_ld_i64(t3, cpu_env, cofs + i);
+ fni(t0, t1, t2, t3);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t3);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
+/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
+static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ fni(vece, t0, t0);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+}
+
+/* Expand OPSZ bytes worth of two-vector operands and an immediate operand
+ using host vectors. */
+static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ int64_t c, bool load_dest,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_vec(t1, cpu_env, dofs + i);
+ }
+ fni(vece, t1, t0, c);
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+ tcg_temp_free_vec(t1);
+}
+
+static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ TCGv_vec c, bool scalar_first,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ if (scalar_first) {
+ fni(vece, t1, c, t0);
+ } else {
+ fni(vece, t1, t0, c);
+ }
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+ tcg_temp_free_vec(t1);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using host vectors. */
+static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz,
+ uint32_t tysz, TCGType type, bool load_dest,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_vec(t2, cpu_env, dofs + i);
+ }
+ fni(vece, t2, t0, t1);
+ tcg_gen_st_vec(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
+/* Expand OPSZ bytes worth of four-operand operations using host vectors. */
+static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t cofs, uint32_t oprsz,
+ uint32_t tysz, TCGType type,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec,
+ TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ TCGv_vec t3 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t2, cpu_env, bofs + i);
+ tcg_gen_ld_vec(t3, cpu_env, cofs + i);
+ fni(vece, t0, t1, t2, t3);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t3);
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
+/* Expand a vector two-operand operation. */
+void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
+{
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.
+ Expand with successively smaller host vector sizes. The intent is
+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
+ /* ??? For maxsz > oprsz, the host may be able to use an opr-sized
+ operation, zeroing the balance of the register. We can then
+ use a max-sized store to implement the clearing without an extra
+ store operation. This is true for aarch64 and x86_64 hosts. */
+
+ if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
+ uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
+ if (some == oprsz) {
+ goto done;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ }
+
+ if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
+ } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
+ && g->fniv && check_size_impl(oprsz, 8)
+ && (!g->opc
+ || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
+ } else if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_2_i64(dofs, aofs, oprsz, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_2_i32(dofs, aofs, oprsz, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
+ return;
+ }
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector operation with two vectors and an immediate. */
+void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, int64_t c, const GVecGen2i *g)
+{
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.
+ Expand with successively smaller host vector sizes. The intent is
+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
+
+ if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
+ uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+ c, g->load_dest, g->fniv);
+ if (some == oprsz) {
+ goto done;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ }
+
+ if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
+ expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+ c, g->load_dest, g->fniv);
+ } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
+ && g->fniv && check_size_impl(oprsz, 8)
+ && (!g->opc
+ || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
+ expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+ c, g->load_dest, g->fniv);
+ } else if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
+ } else {
+ if (g->fno) {
+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
+ } else {
+ TCGv_i64 tcg_c = tcg_const_i64(c);
+ tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz, maxsz, c, g->fnoi);
+ tcg_temp_free_i64(tcg_c);
+ }
+ return;
+ }
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector operation with two vectors and a scalar. */
+void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i64 c, const GVecGen2s *g)
+{
+ TCGType type;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
+ type = TCG_TYPE_V256;
+ } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
+ type = TCG_TYPE_V128;
+ } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
+ && check_size_impl(oprsz, 8)) {
+ type = TCG_TYPE_V64;
+ }
+ }
+ if (type != 0) {
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+
+ tcg_gen_dup_i64_vec(g->vece, t_vec, c);
+
+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.
+ Expand with successively smaller host vector sizes. The intent is
+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
+ switch (type) {
+ case TCG_TYPE_V256:
+ {
+ uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+ t_vec, g->scalar_first, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V128:
+ expand_2s_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+ t_vec, g->scalar_first, g->fniv);
+ break;
+
+ case TCG_TYPE_V64:
+ expand_2s_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+ t_vec, g->scalar_first, g->fniv);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_vec(t_vec);
+ } else if (g->fni8 && check_size_impl(oprsz, 8)) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+
+ gen_dup_i64(g->vece, t64, c);
+ expand_2s_i64(dofs, aofs, oprsz, t64, g->scalar_first, g->fni8);
+ tcg_temp_free_i64(t64);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ TCGv_i32 t32 = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(t32, c);
+ gen_dup_i32(g->vece, t32, t32);
+ expand_2s_i32(dofs, aofs, oprsz, t32, g->scalar_first, g->fni4);
+ tcg_temp_free_i32(t32);
+ } else {
+ tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, 0, g->fno);
+ return;
+ }
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector three-operand operation. */
+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
+{
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs);
+ check_overlap_3(dofs, aofs, bofs, maxsz);
+
+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.
+ Expand with successively smaller host vector sizes. The intent is
+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
+
+ if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
+ uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
+ g->load_dest, g->fniv);
+ if (some == oprsz) {
+ goto done;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ }
+
+ if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
+ expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
+ g->load_dest, g->fniv);
+ } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
+ && g->fniv && check_size_impl(oprsz, 8)
+ && (!g->opc
+ || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
+ expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
+ g->load_dest, g->fniv);
+ } else if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno);
+ }
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector four-operand operation. */
+void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
+{
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
+ check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
+
+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.
+ Expand with successively smaller host vector sizes. The intent is
+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
+
+ if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
+ uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some,
+ 32, TCG_TYPE_V256, g->fniv);
+ if (some == oprsz) {
+ goto done;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ cofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ }
+
+ if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
+ && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
+ expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+ 16, TCG_TYPE_V128, g->fniv);
+ } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
+ && g->fniv && check_size_impl(oprsz, 8)
+ && (!g->opc
+ || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
+ expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+ 8, TCG_TYPE_V64, g->fniv);
+ } else if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
+ oprsz, maxsz, g->data, g->fno);
+ return;
+ }
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/*
+ * Expand specific vector operations.
+ */
+
+static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mov_vec(a, b);
+}
+
+void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2 g = {
+ .fni8 = tcg_gen_mov_i64,
+ .fniv = vec_mov2,
+ .fno = gen_helper_gvec_mov,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (dofs != aofs) {
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
+ } else {
+ check_size_align(oprsz, maxsz, dofs);
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+ }
+}
+
+void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i32 in)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ tcg_debug_assert(vece <= MO_32);
+ do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
+}
+
+void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i64 in)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ tcg_debug_assert(vece <= MO_64);
+ do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
+}
+
+void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ if (vece <= MO_32) {
+ TCGv_i32 in = tcg_temp_new_i32();
+ switch (vece) {
+ case MO_8:
+ tcg_gen_ld8u_i32(in, cpu_env, aofs);
+ break;
+ case MO_16:
+ tcg_gen_ld16u_i32(in, cpu_env, aofs);
+ break;
+ case MO_32:
+ tcg_gen_ld_i32(in, cpu_env, aofs);
+ break;
+ }
+ tcg_gen_gvec_dup_i32(vece, dofs, oprsz, maxsz, in);
+ tcg_temp_free_i32(in);
+ } else if (vece == MO_64) {
+ TCGv_i64 in = tcg_temp_new_i64();
+ tcg_gen_ld_i64(in, cpu_env, aofs);
+ tcg_gen_gvec_dup_i64(MO_64, dofs, oprsz, maxsz, in);
+ tcg_temp_free_i64(in);
+ } else {
+ /* 128-bit duplicate. */
+ /* ??? Dup to 256-bit vector. */
+ int i;
+
+ tcg_debug_assert(vece == 4);
+ tcg_debug_assert(oprsz >= 16);
+ if (TCG_TARGET_HAS_v128) {
+ TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V128);
+
+ tcg_gen_ld_vec(in, cpu_env, aofs);
+ for (i = 0; i < oprsz; i += 16) {
+ tcg_gen_st_vec(in, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(in);
+ } else {
+ TCGv_i64 in0 = tcg_temp_new_i64();
+ TCGv_i64 in1 = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(in0, cpu_env, aofs);
+ tcg_gen_ld_i64(in1, cpu_env, aofs + 8);
+ for (i = 0; i < oprsz; i += 16) {
+ tcg_gen_st_i64(in0, cpu_env, dofs + i);
+ tcg_gen_st_i64(in1, cpu_env, dofs + i + 8);
+ }
+ tcg_temp_free_i64(in0);
+ tcg_temp_free_i64(in1);
+ }
+ }
+}
+
+void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, uint64_t x)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
+}
+
+void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, uint32_t x)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
+}
+
+void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, uint16_t x)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
+}
+
+void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, uint8_t x)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
+}
+
+void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2 g = {
+ .fni8 = tcg_gen_not_i64,
+ .fniv = tcg_gen_not_vec,
+ .fno = gen_helper_gvec_not,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
+}
+
+/* Perform a vector addition using normal addition and a mask. The mask
+ should be the sign bit of each lane. This 6-operation form is more
+ efficient than separate additions when there are 4 or more lanes in
+ the 64-bit operation. */
+static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ tcg_gen_andc_i64(t1, a, m);
+ tcg_gen_andc_i64(t2, b, m);
+ tcg_gen_xor_i64(t3, a, b);
+ tcg_gen_add_i64(d, t1, t2);
+ tcg_gen_and_i64(t3, t3, m);
+ tcg_gen_xor_i64(d, d, t3);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+}
+
+void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
+ gen_addv_mask(d, a, b, m);
+ tcg_temp_free_i64(m);
+}
+
+void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
+ gen_addv_mask(d, a, b, m);
+ tcg_temp_free_i64(m);
+}
+
+void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t1, a, ~0xffffffffull);
+ tcg_gen_add_i64(t2, a, b);
+ tcg_gen_add_i64(t1, t1, b);
+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fni8 = tcg_gen_vec_add8_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add8,
+ .opc = INDEX_op_add_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_add16_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add16,
+ .opc = INDEX_op_add_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_add_i32,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add32,
+ .opc = INDEX_op_add_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_add_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add64,
+ .opc = INDEX_op_add_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2s g[4] = {
+ { .fni8 = tcg_gen_vec_add8_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds8,
+ .opc = INDEX_op_add_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_add16_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds16,
+ .opc = INDEX_op_add_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_add_i32,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds32,
+ .opc = INDEX_op_add_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_add_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds64,
+ .opc = INDEX_op_add_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
+}
+
+void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_const_i64(c);
+ tcg_gen_gvec_adds(vece, dofs, aofs, tmp, oprsz, maxsz);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2s g[4] = {
+ { .fni8 = tcg_gen_vec_sub8_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs8,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_sub16_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs16,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sub_i32,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs32,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sub_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs64,
+ .opc = INDEX_op_sub_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
+}
+
+/* Perform a vector subtraction using normal subtraction and a mask.
+ Compare gen_addv_mask above. */
+static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ tcg_gen_or_i64(t1, a, m);
+ tcg_gen_andc_i64(t2, b, m);
+ tcg_gen_eqv_i64(t3, a, b);
+ tcg_gen_sub_i64(d, t1, t2);
+ tcg_gen_and_i64(t3, t3, m);
+ tcg_gen_xor_i64(d, d, t3);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+}
+
+void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
+ gen_subv_mask(d, a, b, m);
+ tcg_temp_free_i64(m);
+}
+
+void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
+ gen_subv_mask(d, a, b, m);
+ tcg_temp_free_i64(m);
+}
+
+void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t1, b, ~0xffffffffull);
+ tcg_gen_sub_i64(t2, a, b);
+ tcg_gen_sub_i64(t1, a, t1);
+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fni8 = tcg_gen_vec_sub8_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub8,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_sub16_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub16,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sub_i32,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub32,
+ .opc = INDEX_op_sub_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sub_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub64,
+ .opc = INDEX_op_sub_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul8,
+ .opc = INDEX_op_mul_vec,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul16,
+ .opc = INDEX_op_mul_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_mul_i32,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul32,
+ .opc = INDEX_op_mul_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_mul_i64,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul64,
+ .opc = INDEX_op_mul_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2s g[4] = {
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls8,
+ .opc = INDEX_op_mul_vec,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls16,
+ .opc = INDEX_op_mul_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_mul_i32,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls32,
+ .opc = INDEX_op_mul_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_mul_i64,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls64,
+ .opc = INDEX_op_mul_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
+}
+
+void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_const_i64(c);
+ tcg_gen_gvec_muls(vece, dofs, aofs, tmp, oprsz, maxsz);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fno = gen_helper_gvec_ssadd8, .vece = MO_8 },
+ { .fno = gen_helper_gvec_ssadd16, .vece = MO_16 },
+ { .fno = gen_helper_gvec_ssadd32, .vece = MO_32 },
+ { .fno = gen_helper_gvec_ssadd64, .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fno = gen_helper_gvec_sssub8, .vece = MO_8 },
+ { .fno = gen_helper_gvec_sssub16, .vece = MO_16 },
+ { .fno = gen_helper_gvec_sssub32, .vece = MO_32 },
+ { .fno = gen_helper_gvec_sssub64, .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_vec_usadd32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 max = tcg_const_i32(-1);
+ tcg_gen_add_i32(d, a, b);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, d, a, max, d);
+ tcg_temp_free_i32(max);
+}
+
+static void tcg_gen_vec_usadd32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 max = tcg_const_i64(-1);
+ tcg_gen_add_i64(d, a, b);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, d, a, max, d);
+ tcg_temp_free_i64(max);
+}
+
+void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fno = gen_helper_gvec_usadd8, .vece = MO_8 },
+ { .fno = gen_helper_gvec_usadd16, .vece = MO_16 },
+ { .fni4 = tcg_gen_vec_usadd32_i32,
+ .fno = gen_helper_gvec_usadd32,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_vec_usadd32_i64,
+ .fno = gen_helper_gvec_usadd64,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_vec_ussub32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 min = tcg_const_i32(0);
+ tcg_gen_sub_i32(d, a, b);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, min, d);
+ tcg_temp_free_i32(min);
+}
+
+static void tcg_gen_vec_ussub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 min = tcg_const_i64(0);
+ tcg_gen_sub_i64(d, a, b);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, min, d);
+ tcg_temp_free_i64(min);
+}
+
+void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fno = gen_helper_gvec_ussub8, .vece = MO_8 },
+ { .fno = gen_helper_gvec_ussub16, .vece = MO_16 },
+ { .fni4 = tcg_gen_vec_ussub32_i32,
+ .fno = gen_helper_gvec_ussub32,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_vec_ussub32_i64,
+ .fno = gen_helper_gvec_ussub64,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/* Perform a vector negation using normal negation and a mask.
+ Compare gen_subv_mask above. */
+static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
+{
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ tcg_gen_andc_i64(t3, m, b);
+ tcg_gen_andc_i64(t2, b, m);
+ tcg_gen_sub_i64(d, m, t2);
+ tcg_gen_xor_i64(d, d, t3);
+
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+}
+
+void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
+ gen_negv_mask(d, b, m);
+ tcg_temp_free_i64(m);
+}
+
+void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
+ gen_negv_mask(d, b, m);
+ tcg_temp_free_i64(m);
+}
+
+void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t1, b, ~0xffffffffull);
+ tcg_gen_neg_i64(t2, b);
+ tcg_gen_neg_i64(t1, t1);
+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2 g[4] = {
+ { .fni8 = tcg_gen_vec_neg8_i64,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg8,
+ .opc = INDEX_op_neg_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_neg16_i64,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg16,
+ .opc = INDEX_op_neg_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_neg_i32,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg32,
+ .opc = INDEX_op_neg_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_neg_i64,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg64,
+ .opc = INDEX_op_neg_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_and_i64,
+ .fniv = tcg_gen_and_vec,
+ .fno = gen_helper_gvec_and,
+ .opc = INDEX_op_and_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_or_i64,
+ .fniv = tcg_gen_or_vec,
+ .fno = gen_helper_gvec_or,
+ .opc = INDEX_op_or_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_xor_i64,
+ .fniv = tcg_gen_xor_vec,
+ .fno = gen_helper_gvec_xor,
+ .opc = INDEX_op_xor_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_andc_i64,
+ .fniv = tcg_gen_andc_vec,
+ .fno = gen_helper_gvec_andc,
+ .opc = INDEX_op_andc_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_orc_i64,
+ .fniv = tcg_gen_orc_vec,
+ .fno = gen_helper_gvec_orc,
+ .opc = INDEX_op_orc_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+}
+
+static const GVecGen2s gop_ands = {
+ .fni8 = tcg_gen_and_i64,
+ .fniv = tcg_gen_and_vec,
+ .fno = gen_helper_gvec_ands,
+ .opc = INDEX_op_and_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+};
+
+void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
+ tcg_temp_free_i64(tmp);
+}
+
+static const GVecGen2s gop_xors = {
+ .fni8 = tcg_gen_xor_i64,
+ .fniv = tcg_gen_xor_vec,
+ .fno = gen_helper_gvec_xors,
+ .opc = INDEX_op_xor_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+};
+
+void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
+ tcg_temp_free_i64(tmp);
+}
+
+static const GVecGen2s gop_ors = {
+ .fni8 = tcg_gen_or_i64,
+ .fniv = tcg_gen_or_vec,
+ .fno = gen_helper_gvec_ors,
+ .opc = INDEX_op_or_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+};
+
+void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << c);
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_shl8i_i64,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl8i,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_shl16i_i64,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl16i,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shli_i32,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl32i,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shli_i64,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl64i,
+ .opc = INDEX_op_shli_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> c);
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> c);
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_shr8i_i64,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr8i,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_shr16i_i64,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr16i,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shri_i32,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr32i,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shri_i64,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr64i,
+ .opc = INDEX_op_shri_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t s_mask = dup_const(MO_8, 0x80 >> c);
+ uint64_t c_mask = dup_const(MO_8, 0xff >> c);
+ TCGv_i64 s = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_andi_i64(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_or_i64(d, d, s); /* include sign extension */
+ tcg_temp_free_i64(s);
+}
+
+void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t s_mask = dup_const(MO_16, 0x8000 >> c);
+ uint64_t c_mask = dup_const(MO_16, 0xffff >> c);
+ TCGv_i64 s = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_andi_i64(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_or_i64(d, d, s); /* include sign extension */
+ tcg_temp_free_i64(s);
+}
+
+void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_sar8i_i64,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar8i,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_sar16i_i64,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar16i,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sari_i32,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar32i,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sari_i64,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar64i,
+ .opc = INDEX_op_sari_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
+static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, TCGCond cond)
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t1, cpu_env, bofs + i);
+ tcg_gen_setcond_i32(cond, t0, t0, t1);
+ tcg_gen_neg_i32(t0, t0);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
+static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, TCGCond cond)
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t1, cpu_env, bofs + i);
+ tcg_gen_setcond_i64(cond, t0, t0, t1);
+ tcg_gen_neg_i64(t0, t0);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
+static void expand_cmp_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t tysz,
+ TCGType type, TCGCond cond)
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t1, cpu_env, bofs + i);
+ tcg_gen_cmp_vec(cond, vece, t0, t0, t1);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
+void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static gen_helper_gvec_3 * const eq_fn[4] = {
+ gen_helper_gvec_eq8, gen_helper_gvec_eq16,
+ gen_helper_gvec_eq32, gen_helper_gvec_eq64
+ };
+ static gen_helper_gvec_3 * const ne_fn[4] = {
+ gen_helper_gvec_ne8, gen_helper_gvec_ne16,
+ gen_helper_gvec_ne32, gen_helper_gvec_ne64
+ };
+ static gen_helper_gvec_3 * const lt_fn[4] = {
+ gen_helper_gvec_lt8, gen_helper_gvec_lt16,
+ gen_helper_gvec_lt32, gen_helper_gvec_lt64
+ };
+ static gen_helper_gvec_3 * const le_fn[4] = {
+ gen_helper_gvec_le8, gen_helper_gvec_le16,
+ gen_helper_gvec_le32, gen_helper_gvec_le64
+ };
+ static gen_helper_gvec_3 * const ltu_fn[4] = {
+ gen_helper_gvec_ltu8, gen_helper_gvec_ltu16,
+ gen_helper_gvec_ltu32, gen_helper_gvec_ltu64
+ };
+ static gen_helper_gvec_3 * const leu_fn[4] = {
+ gen_helper_gvec_leu8, gen_helper_gvec_leu16,
+ gen_helper_gvec_leu32, gen_helper_gvec_leu64
+ };
+ static gen_helper_gvec_3 * const * const fns[16] = {
+ [TCG_COND_EQ] = eq_fn,
+ [TCG_COND_NE] = ne_fn,
+ [TCG_COND_LT] = lt_fn,
+ [TCG_COND_LE] = le_fn,
+ [TCG_COND_LTU] = ltu_fn,
+ [TCG_COND_LEU] = leu_fn,
+ };
+
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs);
+ check_overlap_3(dofs, aofs, bofs, maxsz);
+
+ if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
+ do_dup(MO_8, dofs, oprsz, maxsz,
+ NULL, NULL, -(cond == TCG_COND_ALWAYS));
+ return;
+ }
+
+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.
+ Expand with successively smaller host vector sizes. The intent is
+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
+
+ if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)
+ && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V256, vece)) {
+ uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_cmp_vec(vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, cond);
+ if (some == oprsz) {
+ goto done;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ }
+
+ if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)
+ && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V128, vece)) {
+ expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, cond);
+ } else if (TCG_TARGET_HAS_v64
+ && check_size_impl(oprsz, 8)
+ && (TCG_TARGET_REG_BITS == 32 || vece != MO_64)
+ && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V64, vece)) {
+ expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, cond);
+ } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+ expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
+ } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+ expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
+ } else {
+ gen_helper_gvec_3 * const *fn = fns[cond];
+
+ if (fn == NULL) {
+ uint32_t tmp;
+ tmp = aofs, aofs = bofs, bofs = tmp;
+ cond = tcg_swap_cond(cond);
+ fn = fns[cond];
+ assert(fn != NULL);
+ }
+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
+ return;
+ }
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h
new file mode 100644
index 0000000..ff43a29
--- /dev/null
+++ b/tcg/tcg-op-gvec.h
@@ -0,0 +1,306 @@
+/*
+ * Generic vector operation expansion
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * "Generic" vectors. All operands are given as offsets from ENV,
+ * and therefore cannot also be allocated via tcg_global_mem_new_*.
+ * OPRSZ is the byte size of the vector upon which the operation is performed.
+ * MAXSZ is the byte size of the full vector; bytes beyond OPSZ are cleared.
+ *
+ * All sizes must be 8 or any multiple of 16.
+ * When OPRSZ is 8, the alignment may be 8, otherwise must be 16.
+ * Operands may completely, but not partially, overlap.
+ */
+
+/* Expand a call to a gvec-style helper, with pointers to two vector
+ operands, and a descriptor (see tcg-gvec-desc.h). */
+typedef void gen_helper_gvec_2(TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2 *fn);
+
+/* Similarly, passing an extra data value. */
+typedef void gen_helper_gvec_2i(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2i *fn);
+
+/* Similarly, passing an extra pointer (e.g. env or float_status). */
+typedef void gen_helper_gvec_2_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
+ TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_2_ptr *fn);
+
+/* Similarly, with three vector operands. */
+typedef void gen_helper_gvec_3(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_3 *fn);
+
+/* Similarly, with four vector operands. */
+typedef void gen_helper_gvec_4(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_4 *fn);
+
+/* Similarly, with five vector operands. */
+typedef void gen_helper_gvec_5(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t xofs, uint32_t oprsz,
+ uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn);
+
+typedef void gen_helper_gvec_3_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_3_ptr *fn);
+
+typedef void gen_helper_gvec_4_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
+ uint32_t maxsz, int32_t data,
+ gen_helper_gvec_4_ptr *fn);
+
+/* Expand a gvec operation. Either inline or out-of-line depending on
+ the actual vector size and the operations supported by the host. */
+typedef struct {
+ /* Expand inline as a 64-bit or 32-bit integer.
+ Only one of these will be non-NULL. */
+ void (*fni8)(TCGv_i64, TCGv_i64);
+ void (*fni4)(TCGv_i32, TCGv_i32);
+ /* Expand inline with a host vector type. */
+ void (*fniv)(unsigned, TCGv_vec, TCGv_vec);
+ /* Expand out-of-line helper w/descriptor. */
+ gen_helper_gvec_2 *fno;
+ /* The opcode, if any, to which this corresponds. */
+ TCGOpcode opc;
+ /* The data argument to the out-of-line helper. */
+ int32_t data;
+ /* The vector element size, if applicable. */
+ uint8_t vece;
+ /* Prefer i64 to v64. */
+ bool prefer_i64;
+} GVecGen2;
+
+typedef struct {
+ /* Expand inline as a 64-bit or 32-bit integer.
+ Only one of these will be non-NULL. */
+ void (*fni8)(TCGv_i64, TCGv_i64, int64_t);
+ void (*fni4)(TCGv_i32, TCGv_i32, int32_t);
+ /* Expand inline with a host vector type. */
+ void (*fniv)(unsigned, TCGv_vec, TCGv_vec, int64_t);
+ /* Expand out-of-line helper w/descriptor, data in descriptor. */
+ gen_helper_gvec_2 *fno;
+ /* Expand out-of-line helper w/descriptor, data as argument. */
+ gen_helper_gvec_2i *fnoi;
+ /* The opcode, if any, to which this corresponds. */
+ TCGOpcode opc;
+ /* The vector element size, if applicable. */
+ uint8_t vece;
+ /* Prefer i64 to v64. */
+ bool prefer_i64;
+ /* Load dest as a 3rd source operand. */
+ bool load_dest;
+} GVecGen2i;
+
+typedef struct {
+ /* Expand inline as a 64-bit or 32-bit integer.
+ Only one of these will be non-NULL. */
+ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
+ void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
+ /* Expand inline with a host vector type. */
+ void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
+ /* Expand out-of-line helper w/descriptor. */
+ gen_helper_gvec_2i *fno;
+ /* The opcode, if any, to which this corresponds. */
+ TCGOpcode opc;
+ /* The data argument to the out-of-line helper. */
+ uint32_t data;
+ /* The vector element size, if applicable. */
+ uint8_t vece;
+ /* Prefer i64 to v64. */
+ bool prefer_i64;
+ /* Load scalar as 1st source operand. */
+ bool scalar_first;
+} GVecGen2s;
+
+typedef struct {
+ /* Expand inline as a 64-bit or 32-bit integer.
+ Only one of these will be non-NULL. */
+ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
+ void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
+ /* Expand inline with a host vector type. */
+ void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
+ /* Expand out-of-line helper w/descriptor. */
+ gen_helper_gvec_3 *fno;
+ /* The opcode, if any, to which this corresponds. */
+ TCGOpcode opc;
+ /* The data argument to the out-of-line helper. */
+ int32_t data;
+ /* The vector element size, if applicable. */
+ uint8_t vece;
+ /* Prefer i64 to v64. */
+ bool prefer_i64;
+ /* Load dest as a 3rd source operand. */
+ bool load_dest;
+} GVecGen3;
+
+typedef struct {
+ /* Expand inline as a 64-bit or 32-bit integer.
+ Only one of these will be non-NULL. */
+ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64);
+ void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+ /* Expand inline with a host vector type. */
+ void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec);
+ /* Expand out-of-line helper w/descriptor. */
+ gen_helper_gvec_4 *fno;
+ /* The opcode, if any, to which this corresponds. */
+ TCGOpcode opc;
+ /* The data argument to the out-of-line helper. */
+ int32_t data;
+ /* The vector element size, if applicable. */
+ uint8_t vece;
+ /* Prefer i64 to v64. */
+ bool prefer_i64;
+} GVecGen4;
+
+void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
+void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, int64_t c, const GVecGen2i *);
+void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
+void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
+
+/* Expand a specific vector operation. */
+
+void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+
+/* Saturated arithmetic. */
+void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t s, uint32_t m);
+void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
+ uint32_t m, TCGv_i32);
+void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
+ uint32_t m, TCGv_i64);
+
+void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
+void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
+void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
+void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
+
+void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz);
+
+/*
+ * 64-bit vector operations. Use these when the register has been allocated
+ * with tcg_global_mem_new_i64, and so we cannot also address it via pointer.
+ * OPRSZ = MAXSZ = 8.
+ */
+
+void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 a);
+void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 a);
+void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
+
+void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+
+void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+
+void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
new file mode 100644
index 0000000..70ec889
--- /dev/null
+++ b/tcg/tcg-op-vec.c
@@ -0,0 +1,389 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2018 Linaro, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg.h"
+#include "tcg-op.h"
+#include "tcg-mo.h"
+
+/* Reduce the number of ifdefs below. This assumes that all uses of
+ TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
+ the compiler can eliminate. */
+#if TCG_TARGET_REG_BITS == 64
+extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
+extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
+#define TCGV_LOW TCGV_LOW_link_error
+#define TCGV_HIGH TCGV_HIGH_link_error
+#endif
+
+void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+}
+
+void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg r, TCGArg a, TCGArg b)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+ op->args[2] = b;
+}
+
+void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg r, TCGArg a, TCGArg b, TCGArg c)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+ op->args[2] = b;
+ op->args[3] = c;
+}
+
+static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGType type = rt->base_type;
+
+ /* Must enough inputs for the output. */
+ tcg_debug_assert(at->base_type >= type);
+ vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
+}
+
+static void vec_gen_op3(TCGOpcode opc, unsigned vece,
+ TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGType type = rt->base_type;
+
+ /* Must enough inputs for the output. */
+ tcg_debug_assert(at->base_type >= type);
+ tcg_debug_assert(bt->base_type >= type);
+ vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
+}
+
+void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
+{
+ if (r != a) {
+ vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
+ }
+}
+
+#define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
+
+static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
+}
+
+TCGv_vec tcg_const_zeros_vec(TCGType type)
+{
+ TCGv_vec ret = tcg_temp_new_vec(type);
+ do_dupi_vec(ret, MO_REG, 0);
+ return ret;
+}
+
+TCGv_vec tcg_const_ones_vec(TCGType type)
+{
+ TCGv_vec ret = tcg_temp_new_vec(type);
+ do_dupi_vec(ret, MO_REG, -1);
+ return ret;
+}
+
+TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
+{
+ TCGTemp *t = tcgv_vec_temp(m);
+ return tcg_const_zeros_vec(t->base_type);
+}
+
+TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
+{
+ TCGTemp *t = tcgv_vec_temp(m);
+ return tcg_const_ones_vec(t->base_type);
+}
+
+void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
+{
+ if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
+ do_dupi_vec(r, MO_32, a);
+ } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
+ do_dupi_vec(r, MO_64, a);
+ } else {
+ TCGv_i64 c = tcg_const_i64(a);
+ tcg_gen_dup_i64_vec(MO_64, r, c);
+ tcg_temp_free_i64(c);
+ }
+}
+
+void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
+{
+ do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
+}
+
+void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
+{
+ do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
+}
+
+void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
+{
+ do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
+}
+
+void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
+{
+ do_dupi_vec(r, MO_REG, dup_const(vece, a));
+}
+
+void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ TCGArg ai = tcgv_i64_arg(a);
+ vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
+ } else if (vece == MO_64) {
+ TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
+ TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
+ vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
+ } else {
+ TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
+ vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
+ }
+}
+
+void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg ai = tcgv_i32_arg(a);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
+}
+
+static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg bi = tcgv_ptr_arg(b);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ vec_gen_3(opc, type, 0, ri, bi, o);
+}
+
+void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
+{
+ vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
+}
+
+void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
+{
+ vec_gen_ldst(INDEX_op_st_vec, r, b, o);
+}
+
+void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg bi = tcgv_ptr_arg(b);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ tcg_debug_assert(low_type >= TCG_TYPE_V64);
+ tcg_debug_assert(low_type <= type);
+ vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
+}
+
+void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_add_vec, vece, r, a, b);
+}
+
+void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_sub_vec, vece, r, a, b);
+}
+
+void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
+}
+
+void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
+}
+
+void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
+}
+
+void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ if (TCG_TARGET_HAS_andc_vec) {
+ vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+ tcg_gen_not_vec(0, t, b);
+ tcg_gen_and_vec(0, r, a, t);
+ tcg_temp_free_vec(t);
+ }
+}
+
+void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ if (TCG_TARGET_HAS_orc_vec) {
+ vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+ tcg_gen_not_vec(0, t, b);
+ tcg_gen_or_vec(0, r, a, t);
+ tcg_temp_free_vec(t);
+ }
+}
+
+void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ if (TCG_TARGET_HAS_not_vec) {
+ vec_gen_op2(INDEX_op_not_vec, 0, r, a);
+ } else {
+ TCGv_vec t = tcg_const_ones_vec_matching(r);
+ tcg_gen_xor_vec(0, r, a, t);
+ tcg_temp_free_vec(t);
+ }
+}
+
+void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ if (TCG_TARGET_HAS_neg_vec) {
+ vec_gen_op2(INDEX_op_neg_vec, vece, r, a);
+ } else {
+ TCGv_vec t = tcg_const_zeros_vec_matching(r);
+ tcg_gen_sub_vec(vece, r, t, a);
+ tcg_temp_free_vec(t);
+ }
+}
+
+static void do_shifti(TCGOpcode opc, unsigned vece,
+ TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type == type);
+ tcg_debug_assert(i >= 0 && i < (8 << vece));
+
+ if (i == 0) {
+ tcg_gen_mov_vec(r, a);
+ return;
+ }
+
+ can = tcg_can_emit_vec_op(opc, type, vece);
+ if (can > 0) {
+ vec_gen_3(opc, type, vece, ri, ai, i);
+ } else {
+ /* We leave the choice of expansion via scalar or vector shift
+ to the target. Often, but not always, dupi can feed a vector
+ shift easier than a scalar. */
+ tcg_debug_assert(can < 0);
+ tcg_expand_vec_op(opc, type, vece, ri, ai, i);
+ }
+}
+
+void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_shli_vec, vece, r, a, i);
+}
+
+void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_shri_vec, vece, r, a, i);
+}
+
+void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_sari_vec, vece, r, a, i);
+}
+
+void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
+ TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGArg bi = temp_arg(bt);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type == type);
+ tcg_debug_assert(bt->base_type == type);
+ can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
+ if (can > 0) {
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
+ } else {
+ tcg_debug_assert(can < 0);
+ tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
+ }
+}
+
+void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGArg bi = temp_arg(bt);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type == type);
+ tcg_debug_assert(bt->base_type == type);
+ can = tcg_can_emit_vec_op(INDEX_op_mul_vec, type, vece);
+ if (can > 0) {
+ vec_gen_3(INDEX_op_mul_vec, type, vece, ri, ai, bi);
+ } else {
+ tcg_debug_assert(can < 0);
+ tcg_expand_vec_op(INDEX_op_mul_vec, type, vece, ri, ai, bi);
+ }
+}
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 0c509bf..3467787 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -140,7 +140,7 @@
}
}
-void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
TCGv_i32 t0;
/* Some cases can be optimized here. */
@@ -148,17 +148,17 @@
case 0:
tcg_gen_movi_i32(ret, 0);
return;
- case 0xffffffffu:
+ case -1:
tcg_gen_mov_i32(ret, arg1);
return;
- case 0xffu:
+ case 0xff:
/* Don't recurse with tcg_gen_ext8u_i32. */
if (TCG_TARGET_HAS_ext8u_i32) {
tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
return;
}
break;
- case 0xffffu:
+ case 0xffff:
if (TCG_TARGET_HAS_ext16u_i32) {
tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
return;
@@ -199,9 +199,9 @@
}
}
-void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- tcg_debug_assert(arg2 < 32);
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
} else {
@@ -211,9 +211,9 @@
}
}
-void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- tcg_debug_assert(arg2 < 32);
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
} else {
@@ -223,9 +223,9 @@
}
}
-void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- tcg_debug_assert(arg2 < 32);
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
} else {
@@ -1201,7 +1201,7 @@
}
}
-void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
TCGv_i64 t0;
@@ -1216,23 +1216,23 @@
case 0:
tcg_gen_movi_i64(ret, 0);
return;
- case 0xffffffffffffffffull:
+ case -1:
tcg_gen_mov_i64(ret, arg1);
return;
- case 0xffull:
+ case 0xff:
/* Don't recurse with tcg_gen_ext8u_i64. */
if (TCG_TARGET_HAS_ext8u_i64) {
tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
return;
}
break;
- case 0xffffu:
+ case 0xffff:
if (TCG_TARGET_HAS_ext16u_i64) {
tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
return;
}
break;
- case 0xffffffffull:
+ case 0xffffffffu:
if (TCG_TARGET_HAS_ext32u_i64) {
tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
return;
@@ -1332,9 +1332,9 @@
}
}
-void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- tcg_debug_assert(arg2 < 64);
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
} else if (arg2 == 0) {
@@ -1346,9 +1346,9 @@
}
}
-void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- tcg_debug_assert(arg2 < 64);
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
} else if (arg2 == 0) {
@@ -1360,9 +1360,9 @@
}
}
-void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- tcg_debug_assert(arg2 < 64);
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
if (TCG_TARGET_REG_BITS == 32) {
tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
} else if (arg2 == 0) {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index ca07b32..75bb55a 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -35,6 +35,10 @@
void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
+void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
+void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
+
static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
{
tcg_gen_op1(opc, tcgv_i32_arg(a1));
@@ -265,12 +269,12 @@
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2);
void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
-void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
-void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
-void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
-void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
@@ -454,12 +458,12 @@
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2);
void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
-void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
-void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
-void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
-void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
@@ -903,6 +907,36 @@
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
+void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
+void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
+void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
+void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
+void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
+void tcg_gen_dup64i_vec(TCGv_vec, uint64_t);
+void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t);
+void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+
+void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+
+void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
+ TCGv_vec a, TCGv_vec b);
+
+void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
+void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
+void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
+
#if TARGET_LONG_BITS == 64
#define tcg_gen_movi_tl tcg_gen_movi_i64
#define tcg_gen_mov_tl tcg_gen_mov_i64
@@ -1001,6 +1035,7 @@
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
+#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
#define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -1098,6 +1133,7 @@
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
+#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec
#endif
#if UINTPTR_MAX == UINT32_MAX
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 956fb1e..d81a6c4 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -204,8 +204,54 @@
DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+/* Host vector support. */
+
+#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
+
+DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
+DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
+
+DEF(dup_vec, 1, 1, 0, IMPLVEC)
+DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
+
+DEF(ld_vec, 1, 1, 1, IMPLVEC)
+DEF(st_vec, 0, 2, 1, IMPLVEC)
+
+DEF(add_vec, 1, 2, 0, IMPLVEC)
+DEF(sub_vec, 1, 2, 0, IMPLVEC)
+DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
+DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
+
+DEF(and_vec, 1, 2, 0, IMPLVEC)
+DEF(or_vec, 1, 2, 0, IMPLVEC)
+DEF(xor_vec, 1, 2, 0, IMPLVEC)
+DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
+DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
+DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
+
+DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+
+DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+
+DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+
+DEF(cmp_vec, 1, 2, 1, IMPLVEC)
+
+DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
+
+#if TCG_TARGET_MAYBE_vec
+#include "tcg-target.opc.h"
+#endif
+
#undef TLADDR_ARGS
#undef DATA64_ARGS
#undef IMPL
#undef IMPL64
+#undef IMPLVEC
#undef DEF
diff --git a/tcg/tcg-pool.inc.c b/tcg/tcg-pool.inc.c
index 8a85131..7af5513 100644
--- a/tcg/tcg-pool.inc.c
+++ b/tcg/tcg-pool.inc.c
@@ -22,39 +22,110 @@
typedef struct TCGLabelPoolData {
struct TCGLabelPoolData *next;
- tcg_target_ulong data;
tcg_insn_unit *label;
intptr_t addend;
- int type;
+ int rtype;
+ unsigned nlong;
+ tcg_target_ulong data[];
} TCGLabelPoolData;
-static void new_pool_label(TCGContext *s, tcg_target_ulong data, int type,
- tcg_insn_unit *label, intptr_t addend)
+static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
+ tcg_insn_unit *label, intptr_t addend)
{
- TCGLabelPoolData *n = tcg_malloc(sizeof(*n));
- TCGLabelPoolData *i, **pp;
+ TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
+ + sizeof(tcg_target_ulong) * nlong);
- n->data = data;
n->label = label;
- n->type = type;
n->addend = addend;
+ n->rtype = rtype;
+ n->nlong = nlong;
+ return n;
+}
+
+static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
+{
+ TCGLabelPoolData *i, **pp;
+ int nlong = n->nlong;
/* Insertion sort on the pool. */
- for (pp = &s->pool_labels; (i = *pp) && i->data < data; pp = &i->next) {
- continue;
+ for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
+ if (nlong > i->nlong) {
+ break;
+ }
+ if (nlong < i->nlong) {
+ continue;
+ }
+ if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
+ break;
+ }
}
n->next = *pp;
*pp = n;
}
+/* The "usual" for generic integer code. */
+static inline void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
+ tcg_insn_unit *label, intptr_t addend)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
+ n->data[0] = d;
+ new_pool_insert(s, n);
+}
+
+/* For v64 or v128, depending on the host. */
+static inline void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
+ intptr_t addend, tcg_target_ulong d0,
+ tcg_target_ulong d1)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
+ n->data[0] = d0;
+ n->data[1] = d1;
+ new_pool_insert(s, n);
+}
+
+/* For v128 or v256, depending on the host. */
+static inline void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
+ intptr_t addend, tcg_target_ulong d0,
+ tcg_target_ulong d1, tcg_target_ulong d2,
+ tcg_target_ulong d3)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
+ n->data[0] = d0;
+ n->data[1] = d1;
+ n->data[2] = d2;
+ n->data[3] = d3;
+ new_pool_insert(s, n);
+}
+
+/* For v256, for 32-bit host. */
+static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
+ intptr_t addend, tcg_target_ulong d0,
+ tcg_target_ulong d1, tcg_target_ulong d2,
+ tcg_target_ulong d3, tcg_target_ulong d4,
+ tcg_target_ulong d5, tcg_target_ulong d6,
+ tcg_target_ulong d7)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
+ n->data[0] = d0;
+ n->data[1] = d1;
+ n->data[2] = d2;
+ n->data[3] = d3;
+ n->data[4] = d4;
+ n->data[5] = d5;
+ n->data[6] = d6;
+ n->data[7] = d7;
+ new_pool_insert(s, n);
+}
+
/* To be provided by cpu/tcg-target.inc.c. */
static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
static bool tcg_out_pool_finalize(TCGContext *s)
{
TCGLabelPoolData *p = s->pool_labels;
- tcg_target_ulong d, *a;
+ TCGLabelPoolData *l = NULL;
+ void *a;
if (p == NULL) {
return true;
@@ -62,24 +133,24 @@
/* ??? Round up to qemu_icache_linesize, but then do not round
again when allocating the next TranslationBlock structure. */
- a = (void *)ROUND_UP((uintptr_t)s->code_ptr, sizeof(tcg_target_ulong));
+ a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
+ sizeof(tcg_target_ulong) * p->nlong);
tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
s->data_gen_ptr = a;
- /* Ensure the first comparison fails. */
- d = p->data + 1;
-
for (; p != NULL; p = p->next) {
- if (p->data != d) {
- d = p->data;
- if (unlikely((void *)a > s->code_gen_highwater)) {
+ size_t size = sizeof(tcg_target_ulong) * p->nlong;
+ if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
+ if (unlikely(a > s->code_gen_highwater)) {
return false;
}
- *a++ = d;
+ memcpy(a, p->data, size);
+ a += size;
+ l = p;
}
- patch_reloc(p->label, p->type, (intptr_t)(a - 1), p->addend);
+ patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend);
}
- s->code_ptr = (void *)a;
+ s->code_ptr = a;
return true;
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 93caa0b..bb24526 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -106,6 +106,18 @@
TCGReg ret, tcg_target_long arg);
static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
const int *const_args);
+#if TCG_TARGET_MAYBE_vec
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
+ unsigned vece, const TCGArg *args,
+ const int *const_args);
+#else
+static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
+ unsigned vece, const TCGArg *args,
+ const int *const_args)
+{
+ g_assert_not_reached();
+}
+#endif
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
intptr_t arg2);
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
@@ -146,8 +158,7 @@
};
static struct tcg_region_state region;
-
-static TCGRegSet tcg_target_available_regs[2];
+static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
static TCGRegSet tcg_target_call_clobber_regs;
#if TCG_TARGET_INSN_UNIT_SIZE == 1
@@ -1026,6 +1037,41 @@
return temp_tcgv_i64(t);
}
+TCGv_vec tcg_temp_new_vec(TCGType type)
+{
+ TCGTemp *t;
+
+#ifdef CONFIG_DEBUG_TCG
+ switch (type) {
+ case TCG_TYPE_V64:
+ assert(TCG_TARGET_HAS_v64);
+ break;
+ case TCG_TYPE_V128:
+ assert(TCG_TARGET_HAS_v128);
+ break;
+ case TCG_TYPE_V256:
+ assert(TCG_TARGET_HAS_v256);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+#endif
+
+ t = tcg_temp_new_internal(type, 0);
+ return temp_tcgv_vec(t);
+}
+
+/* Create a new temp of the same type as an existing temp. */
+TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
+{
+ TCGTemp *t = tcgv_vec_temp(match);
+
+ tcg_debug_assert(t->temp_allocated != 0);
+
+ t = tcg_temp_new_internal(t->base_type, 0);
+ return temp_tcgv_vec(t);
+}
+
static void tcg_temp_free_internal(TCGTemp *ts)
{
TCGContext *s = tcg_ctx;
@@ -1057,6 +1103,11 @@
tcg_temp_free_internal(tcgv_i64_temp(arg));
}
+void tcg_temp_free_vec(TCGv_vec arg)
+{
+ tcg_temp_free_internal(tcgv_vec_temp(arg));
+}
+
TCGv_i32 tcg_const_i32(int32_t val)
{
TCGv_i32 t0;
@@ -1114,6 +1165,9 @@
Test the runtime variable that controls each opcode. */
bool tcg_op_supported(TCGOpcode op)
{
+ const bool have_vec
+ = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
+
switch (op) {
case INDEX_op_discard:
case INDEX_op_set_label:
@@ -1327,10 +1381,47 @@
case INDEX_op_mulsh_i64:
return TCG_TARGET_HAS_mulsh_i64;
- case NB_OPS:
- break;
+ case INDEX_op_mov_vec:
+ case INDEX_op_dup_vec:
+ case INDEX_op_dupi_vec:
+ case INDEX_op_ld_vec:
+ case INDEX_op_st_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_cmp_vec:
+ return have_vec;
+ case INDEX_op_dup2_vec:
+ return have_vec && TCG_TARGET_REG_BITS == 32;
+ case INDEX_op_not_vec:
+ return have_vec && TCG_TARGET_HAS_not_vec;
+ case INDEX_op_neg_vec:
+ return have_vec && TCG_TARGET_HAS_neg_vec;
+ case INDEX_op_andc_vec:
+ return have_vec && TCG_TARGET_HAS_andc_vec;
+ case INDEX_op_orc_vec:
+ return have_vec && TCG_TARGET_HAS_orc_vec;
+ case INDEX_op_mul_vec:
+ return have_vec && TCG_TARGET_HAS_mul_vec;
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return have_vec && TCG_TARGET_HAS_shi_vec;
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ return have_vec && TCG_TARGET_HAS_shs_vec;
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ return have_vec && TCG_TARGET_HAS_shv_vec;
+
+ default:
+ tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
+ return true;
}
- g_assert_not_reached();
}
/* Note: we convert the 64 bit args to 32 bit and do some alignment
@@ -1661,6 +1752,11 @@
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
+ if (def->flags & TCG_OPF_VECTOR) {
+ col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
+ 8 << TCGOP_VECE(op));
+ }
+
k = 0;
for (i = 0; i < nb_oargs; i++) {
if (k != 0) {
@@ -1685,6 +1781,7 @@
case INDEX_op_brcond_i64:
case INDEX_op_setcond_i64:
case INDEX_op_movcond_i64:
+ case INDEX_op_cmp_vec:
if (op->args[k] < ARRAY_SIZE(cond_name)
&& cond_name[op->args[k]]) {
col += qemu_log(",%s", cond_name[op->args[k++]]);
@@ -2890,8 +2987,13 @@
}
/* emit instruction */
- tcg_out_op(s, op->opc, new_args, const_args);
-
+ if (def->flags & TCG_OPF_VECTOR) {
+ tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
+ new_args, const_args);
+ } else {
+ tcg_out_op(s, op->opc, new_args, const_args);
+ }
+
/* move the outputs in the correct register if needed */
for(i = 0; i < nb_oargs; i++) {
ts = arg_temp(op->args[i]);
@@ -3239,10 +3341,12 @@
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
+ case INDEX_op_mov_vec:
tcg_reg_alloc_mov(s, op);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
+ case INDEX_op_dupi_vec:
tcg_reg_alloc_movi(s, op);
break;
case INDEX_op_insn_start:
@@ -3645,3 +3749,10 @@
{
}
#endif /* ELF_HOST_MACHINE */
+
+#if !TCG_TARGET_MAYBE_vec
+void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
+{
+ g_assert_not_reached();
+}
+#endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 2ce497c..9e2d909 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -170,6 +170,31 @@
# error "Missing unsigned widening multiply"
#endif
+#if !defined(TCG_TARGET_HAS_v64) \
+ && !defined(TCG_TARGET_HAS_v128) \
+ && !defined(TCG_TARGET_HAS_v256)
+#define TCG_TARGET_MAYBE_vec 0
+#define TCG_TARGET_HAS_neg_vec 0
+#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_andc_vec 0
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_shi_vec 0
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_mul_vec 0
+#else
+#define TCG_TARGET_MAYBE_vec 1
+#endif
+#ifndef TCG_TARGET_HAS_v64
+#define TCG_TARGET_HAS_v64 0
+#endif
+#ifndef TCG_TARGET_HAS_v128
+#define TCG_TARGET_HAS_v128 0
+#endif
+#ifndef TCG_TARGET_HAS_v256
+#define TCG_TARGET_HAS_v256 0
+#endif
+
#ifndef TARGET_INSN_START_EXTRA_WORDS
# define TARGET_INSN_START_WORDS 1
#else
@@ -246,6 +271,11 @@
typedef enum TCGType {
TCG_TYPE_I32,
TCG_TYPE_I64,
+
+ TCG_TYPE_V64,
+ TCG_TYPE_V128,
+ TCG_TYPE_V256,
+
TCG_TYPE_COUNT, /* number of different types */
/* An alias for the size of the host register. */
@@ -396,6 +426,8 @@
* TCGv_i32 : 32 bit integer type
* TCGv_i64 : 64 bit integer type
* TCGv_ptr : a host pointer type
+ * TCGv_vec : a host vector type; the exact size is not exposed
+ to the CPU front-end code.
* TCGv : an integer type the same size as target_ulong
(an alias for either TCGv_i32 or TCGv_i64)
The compiler's type checking will complain if you mix them
@@ -418,6 +450,7 @@
typedef struct TCGv_i32_d *TCGv_i32;
typedef struct TCGv_i64_d *TCGv_i64;
typedef struct TCGv_ptr_d *TCGv_ptr;
+typedef struct TCGv_vec_d *TCGv_vec;
typedef TCGv_ptr TCGv_env;
#if TARGET_LONG_BITS == 32
#define TCGv TCGv_i32
@@ -589,6 +622,9 @@
#define TCGOP_CALLI(X) (X)->param1
#define TCGOP_CALLO(X) (X)->param2
+#define TCGOP_VECL(X) (X)->param1
+#define TCGOP_VECE(X) (X)->param2
+
/* Make sure operands fit in the bitfields above. */
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
@@ -726,6 +762,11 @@
return tcgv_i32_temp((TCGv_i32)v);
}
+static inline TCGTemp *tcgv_vec_temp(TCGv_vec v)
+{
+ return tcgv_i32_temp((TCGv_i32)v);
+}
+
static inline TCGArg tcgv_i32_arg(TCGv_i32 v)
{
return temp_arg(tcgv_i32_temp(v));
@@ -741,6 +782,11 @@
return temp_arg(tcgv_ptr_temp(v));
}
+static inline TCGArg tcgv_vec_arg(TCGv_vec v)
+{
+ return temp_arg(tcgv_vec_temp(v));
+}
+
static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t)
{
(void)temp_idx(t); /* trigger embedded assert */
@@ -757,6 +803,11 @@
return (TCGv_ptr)temp_tcgv_i32(t);
}
+static inline TCGv_vec temp_tcgv_vec(TCGTemp *t)
+{
+ return (TCGv_vec)temp_tcgv_i32(t);
+}
+
#if TCG_TARGET_REG_BITS == 32
static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
{
@@ -832,9 +883,12 @@
TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
+TCGv_vec tcg_temp_new_vec(TCGType type);
+TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
void tcg_temp_free_i32(TCGv_i32 arg);
void tcg_temp_free_i64(TCGv_i64 arg);
+void tcg_temp_free_vec(TCGv_vec arg);
static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
const char *name)
@@ -916,6 +970,8 @@
/* Instruction is optional and not implemented by the host, or insn
is generic and should not be implemened by the host. */
TCG_OPF_NOT_PRESENT = 0x10,
+ /* Instruction operands are vectors. */
+ TCG_OPF_VECTOR = 0x20,
};
typedef struct TCGOpDef {
@@ -981,6 +1037,10 @@
TCGv_i64 tcg_const_i64(int64_t val);
TCGv_i32 tcg_const_local_i32(int32_t val);
TCGv_i64 tcg_const_local_i64(int64_t val);
+TCGv_vec tcg_const_zeros_vec(TCGType);
+TCGv_vec tcg_const_ones_vec(TCGType);
+TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec);
+TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
TCGLabel *gen_new_label(void);
@@ -1151,6 +1211,33 @@
void tcg_register_jit(void *buf, size_t buf_size);
+#if TCG_TARGET_MAYBE_vec
+/* Return zero if the tuple (opc, type, vece) is unsupportable;
+ return > 0 if it is directly supportable;
+ return < 0 if we must call tcg_expand_vec_op. */
+int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
+#else
+static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
+{
+ return 0;
+}
+#endif
+
+/* Expand the tuple (opc, type, vece) on the given arguments. */
+void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
+
+/* Replicate a constant C accoring to the log2 of the element size. */
+uint64_t dup_const(unsigned vece, uint64_t c);
+
+#define dup_const(VECE, C) \
+ (__builtin_constant_p(VECE) \
+ ? ( (VECE) == MO_8 ? 0x0101010101010101ull * (uint8_t)(C) \
+ : (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C) \
+ : (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C) \
+ : dup_const(VECE, C)) \
+ : dup_const(VECE, C))
+
+
/*
* Memory helpers that will be used by TCG generated code.
*/