Blame - target/arm/translate-neon.c.inc - qemu

blob: 9879731a521d32759b83ee5d7d4f7c75b6286a6a [file] [log] [blame]

Peter Maydell	625e3dd	2020-04-30 19:09:30 +0100	[diff] [blame]	1	/*
				2	* ARM translation: AArch32 Neon instructions
				3	*
				4	* Copyright (c) 2003 Fabrice Bellard
				5	* Copyright (c) 2005-2007 CodeSourcery
				6	* Copyright (c) 2007 OpenedHand, Ltd.
				7	* Copyright (c) 2020 Linaro, Ltd.
				8	*
				9	* This library is free software; you can redistribute it and/or
				10	* modify it under the terms of the GNU Lesser General Public
				11	* License as published by the Free Software Foundation; either
				12	* version 2 of the License, or (at your option) any later version.
				13	*
				14	* This library is distributed in the hope that it will be useful,
				15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				17	* Lesser General Public License for more details.
				18	*
				19	* You should have received a copy of the GNU Lesser General Public
				20	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
				21	*/
				22
				23	/*
				24	* This file is intended to be included from translate.c; it uses
				25	* some macros and definitions provided by that file.
				26	* It might be possible to convert it to a standalone .c file eventually.
				27	*/
				28
Peter Maydell	123ce4e	2020-04-30 19:09:40 +0100	[diff] [blame]	29	static inline int plus1(DisasContext *s, int x)
				30	{
				31	return x + 1;
				32	}
				33
Peter Maydell	66432d6	2020-05-22 15:55:13 +0100	[diff] [blame]	34	static inline int rsub_64(DisasContext *s, int x)
				35	{
				36	return 64 - x;
				37	}
				38
				39	static inline int rsub_32(DisasContext *s, int x)
				40	{
				41	return 32 - x;
				42	}
				43	static inline int rsub_16(DisasContext *s, int x)
				44	{
				45	return 16 - x;
				46	}
				47	static inline int rsub_8(DisasContext *s, int x)
				48	{
				49	return 8 - x;
				50	}
				51
Peter Maydell	625e3dd	2020-04-30 19:09:30 +0100	[diff] [blame]	52	/* Include the generated Neon decoder */
Paolo Bonzini	139c183	2020-02-04 12:41:01 +0100	[diff] [blame]	53	#include "decode-neon-dp.c.inc"
				54	#include "decode-neon-ls.c.inc"
				55	#include "decode-neon-shared.c.inc"
Peter Maydell	afff8de	2020-04-30 19:09:31 +0100	[diff] [blame]	56
Peter Maydell	6fb5787	2020-06-16 18:08:44 +0100	[diff] [blame]	57	/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
				58	* where 0 is the least significant end of the register.
				59	*/
				60	static inline long
				61	neon_element_offset(int reg, int element, MemOp size)
				62	{
				63	int element_size = 1 << size;
				64	int ofs = element * element_size;
				65	#ifdef HOST_WORDS_BIGENDIAN
				66	/* Calculate the offset assuming fully little-endian,
				67	* then XOR to account for the order of the 8-byte units.
				68	*/
				69	if (element_size < 8) {
				70	ofs ^= 8 - element_size;
				71	}
				72	#endif
				73	return neon_reg_offset(reg, 0) + ofs;
				74	}
				75
				76	static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
				77	{
				78	long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
				79
				80	switch (mop) {
				81	case MO_UB:
				82	tcg_gen_ld8u_i32(var, cpu_env, offset);
				83	break;
				84	case MO_UW:
				85	tcg_gen_ld16u_i32(var, cpu_env, offset);
				86	break;
				87	case MO_UL:
				88	tcg_gen_ld_i32(var, cpu_env, offset);
				89	break;
				90	default:
				91	g_assert_not_reached();
				92	}
				93	}
				94
				95	static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
				96	{
				97	long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
				98
				99	switch (mop) {
				100	case MO_UB:
				101	tcg_gen_ld8u_i64(var, cpu_env, offset);
				102	break;
				103	case MO_UW:
				104	tcg_gen_ld16u_i64(var, cpu_env, offset);
				105	break;
				106	case MO_UL:
				107	tcg_gen_ld32u_i64(var, cpu_env, offset);
				108	break;
				109	case MO_Q:
				110	tcg_gen_ld_i64(var, cpu_env, offset);
				111	break;
				112	default:
				113	g_assert_not_reached();
				114	}
				115	}
				116
				117	static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
				118	{
				119	long offset = neon_element_offset(reg, ele, size);
				120
				121	switch (size) {
				122	case MO_8:
				123	tcg_gen_st8_i32(var, cpu_env, offset);
				124	break;
				125	case MO_16:
				126	tcg_gen_st16_i32(var, cpu_env, offset);
				127	break;
				128	case MO_32:
				129	tcg_gen_st_i32(var, cpu_env, offset);
				130	break;
				131	default:
				132	g_assert_not_reached();
				133	}
				134	}
				135
				136	static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
				137	{
				138	long offset = neon_element_offset(reg, ele, size);
				139
				140	switch (size) {
				141	case MO_8:
				142	tcg_gen_st8_i64(var, cpu_env, offset);
				143	break;
				144	case MO_16:
				145	tcg_gen_st16_i64(var, cpu_env, offset);
				146	break;
				147	case MO_32:
				148	tcg_gen_st32_i64(var, cpu_env, offset);
				149	break;
				150	case MO_64:
				151	tcg_gen_st_i64(var, cpu_env, offset);
				152	break;
				153	default:
				154	g_assert_not_reached();
				155	}
				156	}
				157
Peter Maydell	afff8de	2020-04-30 19:09:31 +0100	[diff] [blame]	158	static bool trans_VCMLA(DisasContext s, arg_VCMLA a)
				159	{
				160	int opr_sz;
				161	TCGv_ptr fpst;
				162	gen_helper_gvec_3_ptr *fn_gvec_ptr;
				163
				164	if (!dc_isar_feature(aa32_vcma, s)
				165	\|\| (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
				166	return false;
				167	}
				168
				169	/* UNDEF accesses to D16-D31 if they don't exist. */
				170	if (!dc_isar_feature(aa32_simd_r32, s) &&
				171	((a->vd \| a->vn \| a->vm) & 0x10)) {
				172	return false;
				173	}
				174
				175	if ((a->vn \| a->vm \| a->vd) & a->q) {
				176	return false;
				177	}
				178
				179	if (!vfp_access_check(s)) {
				180	return true;
				181	}
				182
				183	opr_sz = (1 + a->q) * 8;
Peter Maydell	b34aa51	2020-08-06 11:44:53 +0100	[diff] [blame]	184	fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
Peter Maydell	afff8de	2020-04-30 19:09:31 +0100	[diff] [blame]	185	fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
				186	tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
				187	vfp_reg_offset(1, a->vn),
				188	vfp_reg_offset(1, a->vm),
				189	fpst, opr_sz, opr_sz, a->rot,
				190	fn_gvec_ptr);
				191	tcg_temp_free_ptr(fpst);
				192	return true;
				193	}
Peter Maydell	94d5eb7	2020-04-30 19:09:32 +0100	[diff] [blame]	194
				195	static bool trans_VCADD(DisasContext s, arg_VCADD a)
				196	{
				197	int opr_sz;
				198	TCGv_ptr fpst;
				199	gen_helper_gvec_3_ptr *fn_gvec_ptr;
				200
				201	if (!dc_isar_feature(aa32_vcma, s)
				202	\|\| (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
				203	return false;
				204	}
				205
				206	/* UNDEF accesses to D16-D31 if they don't exist. */
				207	if (!dc_isar_feature(aa32_simd_r32, s) &&
				208	((a->vd \| a->vn \| a->vm) & 0x10)) {
				209	return false;
				210	}
				211
				212	if ((a->vn \| a->vm \| a->vd) & a->q) {
				213	return false;
				214	}
				215
				216	if (!vfp_access_check(s)) {
				217	return true;
				218	}
				219
				220	opr_sz = (1 + a->q) * 8;
Peter Maydell	b34aa51	2020-08-06 11:44:53 +0100	[diff] [blame]	221	fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
Peter Maydell	94d5eb7	2020-04-30 19:09:32 +0100	[diff] [blame]	222	fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
				223	tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
				224	vfp_reg_offset(1, a->vn),
				225	vfp_reg_offset(1, a->vm),
				226	fpst, opr_sz, opr_sz, a->rot,
				227	fn_gvec_ptr);
				228	tcg_temp_free_ptr(fpst);
				229	return true;
				230	}
Peter Maydell	32da0e3	2020-04-30 19:09:33 +0100	[diff] [blame]	231
				232	static bool trans_VDOT(DisasContext s, arg_VDOT a)
				233	{
				234	int opr_sz;
				235	gen_helper_gvec_3 *fn_gvec;
				236
				237	if (!dc_isar_feature(aa32_dp, s)) {
				238	return false;
				239	}
				240
				241	/* UNDEF accesses to D16-D31 if they don't exist. */
				242	if (!dc_isar_feature(aa32_simd_r32, s) &&
				243	((a->vd \| a->vn \| a->vm) & 0x10)) {
				244	return false;
				245	}
				246
				247	if ((a->vn \| a->vm \| a->vd) & a->q) {
				248	return false;
				249	}
				250
				251	if (!vfp_access_check(s)) {
				252	return true;
				253	}
				254
				255	opr_sz = (1 + a->q) * 8;
				256	fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
				257	tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
				258	vfp_reg_offset(1, a->vn),
				259	vfp_reg_offset(1, a->vm),
				260	opr_sz, opr_sz, 0, fn_gvec);
				261	return true;
				262	}
Peter Maydell	9a107e7	2020-04-30 19:09:34 +0100	[diff] [blame]	263
				264	static bool trans_VFML(DisasContext s, arg_VFML a)
				265	{
				266	int opr_sz;
				267
				268	if (!dc_isar_feature(aa32_fhm, s)) {
				269	return false;
				270	}
				271
				272	/* UNDEF accesses to D16-D31 if they don't exist. */
				273	if (!dc_isar_feature(aa32_simd_r32, s) &&
				274	(a->vd & 0x10)) {
				275	return false;
				276	}
				277
				278	if (a->vd & a->q) {
				279	return false;
				280	}
				281
				282	if (!vfp_access_check(s)) {
				283	return true;
				284	}
				285
				286	opr_sz = (1 + a->q) * 8;
				287	tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
				288	vfp_reg_offset(a->q, a->vn),
				289	vfp_reg_offset(a->q, a->vm),
				290	cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
				291	gen_helper_gvec_fmlal_a32);
				292	return true;
				293	}
Peter Maydell	7e1b5d6	2020-04-30 19:09:35 +0100	[diff] [blame]	294
				295	static bool trans_VCMLA_scalar(DisasContext s, arg_VCMLA_scalar a)
				296	{
				297	gen_helper_gvec_3_ptr *fn_gvec_ptr;
				298	int opr_sz;
				299	TCGv_ptr fpst;
				300
				301	if (!dc_isar_feature(aa32_vcma, s)) {
				302	return false;
				303	}
				304	if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
				305	return false;
				306	}
				307
				308	/* UNDEF accesses to D16-D31 if they don't exist. */
				309	if (!dc_isar_feature(aa32_simd_r32, s) &&
				310	((a->vd \| a->vn \| a->vm) & 0x10)) {
				311	return false;
				312	}
				313
				314	if ((a->vd \| a->vn) & a->q) {
				315	return false;
				316	}
				317
				318	if (!vfp_access_check(s)) {
				319	return true;
				320	}
				321
				322	fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
				323	: gen_helper_gvec_fcmlah_idx);
				324	opr_sz = (1 + a->q) * 8;
Peter Maydell	b34aa51	2020-08-06 11:44:53 +0100	[diff] [blame]	325	fpst = fpstatus_ptr(a->size == 0 ? FPST_STD_F16 : FPST_STD);
Peter Maydell	7e1b5d6	2020-04-30 19:09:35 +0100	[diff] [blame]	326	tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
				327	vfp_reg_offset(1, a->vn),
				328	vfp_reg_offset(1, a->vm),
				329	fpst, opr_sz, opr_sz,
				330	(a->index << 2) \| a->rot, fn_gvec_ptr);
				331	tcg_temp_free_ptr(fpst);
				332	return true;
				333	}
Peter Maydell	35f5d4d	2020-04-30 19:09:36 +0100	[diff] [blame]	334
				335	static bool trans_VDOT_scalar(DisasContext s, arg_VDOT_scalar a)
				336	{
				337	gen_helper_gvec_3 *fn_gvec;
				338	int opr_sz;
				339	TCGv_ptr fpst;
				340
				341	if (!dc_isar_feature(aa32_dp, s)) {
				342	return false;
				343	}
				344
				345	/* UNDEF accesses to D16-D31 if they don't exist. */
				346	if (!dc_isar_feature(aa32_simd_r32, s) &&
				347	((a->vd \| a->vn) & 0x10)) {
				348	return false;
				349	}
				350
				351	if ((a->vd \| a->vn) & a->q) {
				352	return false;
				353	}
				354
				355	if (!vfp_access_check(s)) {
				356	return true;
				357	}
				358
				359	fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
				360	opr_sz = (1 + a->q) * 8;
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	361	fpst = fpstatus_ptr(FPST_STD);
Peter Maydell	35f5d4d	2020-04-30 19:09:36 +0100	[diff] [blame]	362	tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
				363	vfp_reg_offset(1, a->vn),
				364	vfp_reg_offset(1, a->rm),
				365	opr_sz, opr_sz, a->index, fn_gvec);
				366	tcg_temp_free_ptr(fpst);
				367	return true;
				368	}
Peter Maydell	d27e82f	2020-04-30 19:09:37 +0100	[diff] [blame]	369
				370	static bool trans_VFML_scalar(DisasContext s, arg_VFML_scalar a)
				371	{
				372	int opr_sz;
				373
				374	if (!dc_isar_feature(aa32_fhm, s)) {
				375	return false;
				376	}
				377
				378	/* UNDEF accesses to D16-D31 if they don't exist. */
				379	if (!dc_isar_feature(aa32_simd_r32, s) &&
				380	((a->vd & 0x10) \|\| (a->q && (a->vn & 0x10)))) {
				381	return false;
				382	}
				383
				384	if (a->vd & a->q) {
				385	return false;
				386	}
				387
				388	if (!vfp_access_check(s)) {
				389	return true;
				390	}
				391
				392	opr_sz = (1 + a->q) * 8;
				393	tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
				394	vfp_reg_offset(a->q, a->vn),
				395	vfp_reg_offset(a->q, a->rm),
				396	cpu_env, opr_sz, opr_sz,
				397	(a->index << 2) \| a->s, /* is_2 == 0 */
				398	gen_helper_gvec_fmlal_idx_a32);
				399	return true;
				400	}
Peter Maydell	a27b463	2020-04-30 19:09:38 +0100	[diff] [blame]	401
				402	static struct {
				403	int nregs;
				404	int interleave;
				405	int spacing;
				406	} const neon_ls_element_type[11] = {
				407	{1, 4, 1},
				408	{1, 4, 2},
				409	{4, 1, 1},
				410	{2, 2, 2},
				411	{1, 3, 1},
				412	{1, 3, 2},
				413	{3, 1, 1},
				414	{1, 1, 1},
				415	{1, 2, 1},
				416	{1, 2, 2},
				417	{2, 1, 1}
				418	};
				419
				420	static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
				421	int stride)
				422	{
				423	if (rm != 15) {
				424	TCGv_i32 base;
				425
				426	base = load_reg(s, rn);
				427	if (rm == 13) {
				428	tcg_gen_addi_i32(base, base, stride);
				429	} else {
				430	TCGv_i32 index;
				431	index = load_reg(s, rm);
				432	tcg_gen_add_i32(base, base, index);
				433	tcg_temp_free_i32(index);
				434	}
				435	store_reg(s, rn, base);
				436	}
				437	}
				438
				439	static bool trans_VLDST_multiple(DisasContext s, arg_VLDST_multiple a)
				440	{
				441	/* Neon load/store multiple structures */
				442	int nregs, interleave, spacing, reg, n;
				443	MemOp endian = s->be_data;
				444	int mmu_idx = get_mem_index(s);
				445	int size = a->size;
				446	TCGv_i64 tmp64;
				447	TCGv_i32 addr, tmp;
				448
				449	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				450	return false;
				451	}
				452
				453	/* UNDEF accesses to D16-D31 if they don't exist */
				454	if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
				455	return false;
				456	}
				457	if (a->itype > 10) {
				458	return false;
				459	}
				460	/* Catch UNDEF cases for bad values of align field */
				461	switch (a->itype & 0xc) {
				462	case 4:
				463	if (a->align >= 2) {
				464	return false;
				465	}
				466	break;
				467	case 8:
				468	if (a->align == 3) {
				469	return false;
				470	}
				471	break;
				472	default:
				473	break;
				474	}
				475	nregs = neon_ls_element_type[a->itype].nregs;
				476	interleave = neon_ls_element_type[a->itype].interleave;
				477	spacing = neon_ls_element_type[a->itype].spacing;
				478	if (size == 3 && (interleave \| spacing) != 1) {
				479	return false;
				480	}
				481
				482	if (!vfp_access_check(s)) {
				483	return true;
				484	}
				485
				486	/* For our purposes, bytes are always little-endian. */
				487	if (size == 0) {
				488	endian = MO_LE;
				489	}
				490	/*
				491	* Consecutive little-endian elements from a single register
				492	* can be promoted to a larger little-endian operation.
				493	*/
				494	if (interleave == 1 && endian == MO_LE) {
				495	size = 3;
				496	}
				497	tmp64 = tcg_temp_new_i64();
				498	addr = tcg_temp_new_i32();
				499	tmp = tcg_const_i32(1 << size);
				500	load_reg_var(s, addr, a->rn);
				501	for (reg = 0; reg < nregs; reg++) {
				502	for (n = 0; n < 8 >> size; n++) {
				503	int xs;
				504	for (xs = 0; xs < interleave; xs++) {
				505	int tt = a->vd + reg + spacing * xs;
				506
				507	if (a->l) {
				508	gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian \| size);
				509	neon_store_element64(tt, n, size, tmp64);
				510	} else {
				511	neon_load_element64(tmp64, tt, n, size);
				512	gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian \| size);
				513	}
				514	tcg_gen_add_i32(addr, addr, tmp);
				515	}
				516	}
				517	}
				518	tcg_temp_free_i32(addr);
				519	tcg_temp_free_i32(tmp);
				520	tcg_temp_free_i64(tmp64);
				521
				522	gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
				523	return true;
				524	}
Peter Maydell	3698747	2020-04-30 19:09:39 +0100	[diff] [blame]	525
				526	static bool trans_VLD_all_lanes(DisasContext s, arg_VLD_all_lanes a)
				527	{
				528	/* Neon load single structure to all lanes */
				529	int reg, stride, vec_size;
				530	int vd = a->vd;
				531	int size = a->size;
				532	int nregs = a->n + 1;
				533	TCGv_i32 addr, tmp;
				534
				535	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				536	return false;
				537	}
				538
				539	/* UNDEF accesses to D16-D31 if they don't exist */
				540	if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
				541	return false;
				542	}
				543
				544	if (size == 3) {
				545	if (nregs != 4 \|\| a->a == 0) {
				546	return false;
				547	}
				548	/* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
				549	size = 2;
				550	}
				551	if (nregs == 1 && a->a == 1 && size == 0) {
				552	return false;
				553	}
				554	if (nregs == 3 && a->a == 1) {
				555	return false;
				556	}
				557
				558	if (!vfp_access_check(s)) {
				559	return true;
				560	}
				561
				562	/*
				563	* VLD1 to all lanes: T bit indicates how many Dregs to write.
				564	* VLD2/3/4 to all lanes: T bit indicates register stride.
				565	*/
				566	stride = a->t ? 2 : 1;
				567	vec_size = nregs == 1 ? stride * 8 : 8;
				568
				569	tmp = tcg_temp_new_i32();
				570	addr = tcg_temp_new_i32();
				571	load_reg_var(s, addr, a->rn);
				572	for (reg = 0; reg < nregs; reg++) {
				573	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
				574	s->be_data \| size);
				575	if ((vd & 1) && vec_size == 16) {
				576	/*
				577	* We cannot write 16 bytes at once because the
				578	* destination is unaligned.
				579	*/
				580	tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
				581	8, 8, tmp);
				582	tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
				583	neon_reg_offset(vd, 0), 8, 8);
				584	} else {
				585	tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
				586	vec_size, vec_size, tmp);
				587	}
				588	tcg_gen_addi_i32(addr, addr, 1 << size);
				589	vd += stride;
				590	}
				591	tcg_temp_free_i32(tmp);
				592	tcg_temp_free_i32(addr);
				593
				594	gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
				595
				596	return true;
				597	}
Peter Maydell	123ce4e	2020-04-30 19:09:40 +0100	[diff] [blame]	598
				599	static bool trans_VLDST_single(DisasContext s, arg_VLDST_single a)
				600	{
				601	/* Neon load/store single structure to one lane */
				602	int reg;
				603	int nregs = a->n + 1;
				604	int vd = a->vd;
				605	TCGv_i32 addr, tmp;
				606
				607	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				608	return false;
				609	}
				610
				611	/* UNDEF accesses to D16-D31 if they don't exist */
				612	if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
				613	return false;
				614	}
				615
				616	/* Catch the UNDEF cases. This is unavoidably a bit messy. */
				617	switch (nregs) {
				618	case 1:
				619	if (((a->align & (1 << a->size)) != 0) \|\|
				620	(a->size == 2 && ((a->align & 3) == 1 \|\| (a->align & 3) == 2))) {
				621	return false;
				622	}
				623	break;
				624	case 3:
				625	if ((a->align & 1) != 0) {
				626	return false;
				627	}
				628	/* fall through */
				629	case 2:
				630	if (a->size == 2 && (a->align & 2) != 0) {
				631	return false;
				632	}
				633	break;
				634	case 4:
				635	if ((a->size == 2) && ((a->align & 3) == 3)) {
				636	return false;
				637	}
				638	break;
				639	default:
				640	abort();
				641	}
				642	if ((vd + a->stride * (nregs - 1)) > 31) {
				643	/*
				644	* Attempts to write off the end of the register file are
				645	* UNPREDICTABLE; we choose to UNDEF because otherwise we would
				646	* access off the end of the array that holds the register data.
				647	*/
				648	return false;
				649	}
				650
				651	if (!vfp_access_check(s)) {
				652	return true;
				653	}
				654
				655	tmp = tcg_temp_new_i32();
				656	addr = tcg_temp_new_i32();
				657	load_reg_var(s, addr, a->rn);
				658	/*
				659	* TODO: if we implemented alignment exceptions, we should check
				660	* addr against the alignment encoded in a->align here.
				661	*/
				662	for (reg = 0; reg < nregs; reg++) {
				663	if (a->l) {
				664	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
				665	s->be_data \| a->size);
				666	neon_store_element(vd, a->reg_idx, a->size, tmp);
				667	} else { /* Store */
				668	neon_load_element(tmp, vd, a->reg_idx, a->size);
				669	gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
				670	s->be_data \| a->size);
				671	}
				672	vd += a->stride;
				673	tcg_gen_addi_i32(addr, addr, 1 << a->size);
				674	}
				675	tcg_temp_free_i32(addr);
				676	tcg_temp_free_i32(tmp);
				677
				678	gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
				679
				680	return true;
				681	}
Peter Maydell	a4e143a	2020-04-30 19:09:41 +0100	[diff] [blame]	682
				683	static bool do_3same(DisasContext s, arg_3same a, GVecGen3Fn fn)
				684	{
				685	int vec_size = a->q ? 16 : 8;
				686	int rd_ofs = neon_reg_offset(a->vd, 0);
				687	int rn_ofs = neon_reg_offset(a->vn, 0);
				688	int rm_ofs = neon_reg_offset(a->vm, 0);
				689
				690	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				691	return false;
				692	}
				693
				694	/* UNDEF accesses to D16-D31 if they don't exist. */
				695	if (!dc_isar_feature(aa32_simd_r32, s) &&
				696	((a->vd \| a->vn \| a->vm) & 0x10)) {
				697	return false;
				698	}
				699
				700	if ((a->vn \| a->vm \| a->vd) & a->q) {
				701	return false;
				702	}
				703
				704	if (!vfp_access_check(s)) {
				705	return true;
				706	}
				707
				708	fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
				709	return true;
				710	}
				711
				712	#define DO_3SAME(INSN, FUNC) \
				713	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				714	{ \
				715	return do_3same(s, a, FUNC); \
				716	}
				717
				718	DO_3SAME(VADD, tcg_gen_gvec_add)
				719	DO_3SAME(VSUB, tcg_gen_gvec_sub)
Peter Maydell	35a548e	2020-04-30 19:09:42 +0100	[diff] [blame]	720	DO_3SAME(VAND, tcg_gen_gvec_and)
				721	DO_3SAME(VBIC, tcg_gen_gvec_andc)
				722	DO_3SAME(VORR, tcg_gen_gvec_or)
				723	DO_3SAME(VORN, tcg_gen_gvec_orc)
				724	DO_3SAME(VEOR, tcg_gen_gvec_xor)
Richard Henderson	8161b75	2020-05-13 09:32:38 -0700	[diff] [blame]	725	DO_3SAME(VSHL_S, gen_gvec_sshl)
				726	DO_3SAME(VSHL_U, gen_gvec_ushl)
Richard Henderson	c7715b6	2020-05-13 09:32:39 -0700	[diff] [blame]	727	DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
				728	DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
				729	DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
				730	DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
Peter Maydell	35a548e	2020-04-30 19:09:42 +0100	[diff] [blame]	731
				732	/* These insns are all gvec_bitsel but with the inputs in various orders. */
				733	#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
				734	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				735	uint32_t rn_ofs, uint32_t rm_ofs, \
				736	uint32_t oprsz, uint32_t maxsz) \
				737	{ \
				738	tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
				739	} \
				740	DO_3SAME(INSN, gen_##INSN##_3s)
				741
				742	DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
				743	DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
				744	DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
Peter Maydell	36b5931	2020-04-30 19:09:43 +0100	[diff] [blame]	745
				746	#define DO_3SAME_NO_SZ_3(INSN, FUNC) \
				747	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				748	{ \
				749	if (a->size == 3) { \
				750	return false; \
				751	} \
				752	return do_3same(s, a, FUNC); \
				753	}
				754
				755	DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
				756	DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
				757	DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
				758	DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
Peter Maydell	0de34fd	2020-04-30 19:09:46 +0100	[diff] [blame]	759	DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
Richard Henderson	2710632	2020-05-13 09:32:36 -0700	[diff] [blame]	760	DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
				761	DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
Richard Henderson	8161b75	2020-05-13 09:32:38 -0700	[diff] [blame]	762	DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
Peter Maydell	7715098	2020-05-12 17:38:52 +0100	[diff] [blame]	763	DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
				764	DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
				765	DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
				766	DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
Peter Maydell	02bd0cd	2020-04-30 19:09:44 +0100	[diff] [blame]	767
				768	#define DO_3SAME_CMP(INSN, COND) \
				769	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				770	uint32_t rn_ofs, uint32_t rm_ofs, \
				771	uint32_t oprsz, uint32_t maxsz) \
				772	{ \
				773	tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
				774	} \
				775	DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
				776
				777	DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
				778	DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
				779	DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
				780	DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
				781	DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
				782
Richard Henderson	effa992	2020-05-14 14:28:29 -0700	[diff] [blame]	783	#define WRAP_OOL_FN(WRAPNAME, FUNC) \
				784	static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
				785	uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
				786	{ \
				787	tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
				788	}
				789
				790	WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
Peter Maydell	0de34fd	2020-04-30 19:09:46 +0100	[diff] [blame]	791
				792	static bool trans_VMUL_p_3s(DisasContext s, arg_3same a)
				793	{
				794	if (a->size != 0) {
				795	return false;
				796	}
				797	return do_3same(s, a, gen_VMUL_p_3s);
				798	}
Peter Maydell	a063569	2020-05-12 17:38:48 +0100	[diff] [blame]	799
				800	#define DO_VQRDMLAH(INSN, FUNC) \
				801	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				802	{ \
				803	if (!dc_isar_feature(aa32_rdm, s)) { \
				804	return false; \
				805	} \
				806	if (a->size != 1 && a->size != 2) { \
				807	return false; \
				808	} \
				809	return do_3same(s, a, FUNC); \
				810	}
				811
				812	DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
				813	DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
Peter Maydell	21290ed	2020-05-12 17:38:49 +0100	[diff] [blame]	814
Richard Henderson	afc8b7d	2020-05-14 14:28:30 -0700	[diff] [blame]	815	#define DO_SHA1(NAME, FUNC) \
				816	WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
				817	static bool trans_##NAME##_3s(DisasContext s, arg_3same a) \
				818	{ \
				819	if (!dc_isar_feature(aa32_sha1, s)) { \
				820	return false; \
				821	} \
				822	return do_3same(s, a, gen_##NAME##_3s); \
Peter Maydell	21290ed	2020-05-12 17:38:49 +0100	[diff] [blame]	823	}
				824
Richard Henderson	afc8b7d	2020-05-14 14:28:30 -0700	[diff] [blame]	825	DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
				826	DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
				827	DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
				828	DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
Peter Maydell	21290ed	2020-05-12 17:38:49 +0100	[diff] [blame]	829
Richard Henderson	effa992	2020-05-14 14:28:29 -0700	[diff] [blame]	830	#define DO_SHA2(NAME, FUNC) \
				831	WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
				832	static bool trans_##NAME##_3s(DisasContext s, arg_3same a) \
				833	{ \
				834	if (!dc_isar_feature(aa32_sha2, s)) { \
				835	return false; \
				836	} \
				837	return do_3same(s, a, gen_##NAME##_3s); \
Peter Maydell	21290ed	2020-05-12 17:38:49 +0100	[diff] [blame]	838	}
				839
Richard Henderson	effa992	2020-05-14 14:28:29 -0700	[diff] [blame]	840	DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
				841	DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
				842	DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
Peter Maydell	35d4352	2020-05-12 17:38:50 +0100	[diff] [blame]	843
				844	#define DO_3SAME_64(INSN, FUNC) \
				845	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				846	uint32_t rn_ofs, uint32_t rm_ofs, \
				847	uint32_t oprsz, uint32_t maxsz) \
				848	{ \
				849	static const GVecGen3 op = { .fni8 = FUNC }; \
				850	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
				851	} \
				852	DO_3SAME(INSN, gen_##INSN##_3s)
				853
				854	#define DO_3SAME_64_ENV(INSN, FUNC) \
				855	static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
				856	{ \
				857	FUNC(d, cpu_env, n, m); \
				858	} \
				859	DO_3SAME_64(INSN, gen_##INSN##_elt)
				860
				861	DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
				862	DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
				863	DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
				864	DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
				865	DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
				866	DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
Peter Maydell	cb294bc	2020-05-12 17:38:51 +0100	[diff] [blame]	867
				868	#define DO_3SAME_32(INSN, FUNC) \
				869	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				870	uint32_t rn_ofs, uint32_t rm_ofs, \
				871	uint32_t oprsz, uint32_t maxsz) \
				872	{ \
				873	static const GVecGen3 ops[4] = { \
				874	{ .fni4 = gen_helper_neon_##FUNC##8 }, \
				875	{ .fni4 = gen_helper_neon_##FUNC##16 }, \
				876	{ .fni4 = gen_helper_neon_##FUNC##32 }, \
				877	{ 0 }, \
				878	}; \
				879	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
				880	} \
				881	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				882	{ \
				883	if (a->size > 2) { \
				884	return false; \
				885	} \
				886	return do_3same(s, a, gen_##INSN##_3s); \
				887	}
				888
Peter Maydell	6812dfd	2020-05-12 17:38:54 +0100	[diff] [blame]	889	/*
				890	* Some helper functions need to be passed the cpu_env. In order
				891	* to use those with the gvec APIs like tcg_gen_gvec_3() we need
				892	* to create wrapper functions whose prototype is a NeonGenTwoOpFn()
				893	* and which call a NeonGenTwoOpEnvFn().
				894	*/
				895	#define WRAP_ENV_FN(WRAPNAME, FUNC) \
				896	static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
				897	{ \
				898	FUNC(d, cpu_env, n, m); \
				899	}
				900
				901	#define DO_3SAME_32_ENV(INSN, FUNC) \
				902	WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
				903	WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
				904	WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
				905	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				906	uint32_t rn_ofs, uint32_t rm_ofs, \
				907	uint32_t oprsz, uint32_t maxsz) \
				908	{ \
				909	static const GVecGen3 ops[4] = { \
				910	{ .fni4 = gen_##INSN##_tramp8 }, \
				911	{ .fni4 = gen_##INSN##_tramp16 }, \
				912	{ .fni4 = gen_##INSN##_tramp32 }, \
				913	{ 0 }, \
				914	}; \
				915	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
				916	} \
				917	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				918	{ \
				919	if (a->size > 2) { \
				920	return false; \
				921	} \
				922	return do_3same(s, a, gen_##INSN##_3s); \
				923	}
				924
Peter Maydell	cb294bc	2020-05-12 17:38:51 +0100	[diff] [blame]	925	DO_3SAME_32(VHADD_S, hadd_s)
				926	DO_3SAME_32(VHADD_U, hadd_u)
Peter Maydell	8e44d03	2020-05-12 17:38:53 +0100	[diff] [blame]	927	DO_3SAME_32(VHSUB_S, hsub_s)
				928	DO_3SAME_32(VHSUB_U, hsub_u)
				929	DO_3SAME_32(VRHADD_S, rhadd_s)
				930	DO_3SAME_32(VRHADD_U, rhadd_u)
Peter Maydell	6812dfd	2020-05-12 17:38:54 +0100	[diff] [blame]	931	DO_3SAME_32(VRSHL_S, rshl_s)
				932	DO_3SAME_32(VRSHL_U, rshl_u)
				933
				934	DO_3SAME_32_ENV(VQSHL_S, qshl_s)
				935	DO_3SAME_32_ENV(VQSHL_U, qshl_u)
				936	DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
				937	DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
Peter Maydell	059c239	2020-05-12 17:38:55 +0100	[diff] [blame]	938
				939	static bool do_3same_pair(DisasContext s, arg_3same a, NeonGenTwoOpFn *fn)
				940	{
				941	/* Operations handled pairwise 32 bits at a time */
				942	TCGv_i32 tmp, tmp2, tmp3;
				943
				944	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				945	return false;
				946	}
				947
				948	/* UNDEF accesses to D16-D31 if they don't exist. */
				949	if (!dc_isar_feature(aa32_simd_r32, s) &&
				950	((a->vd \| a->vn \| a->vm) & 0x10)) {
				951	return false;
				952	}
				953
				954	if (a->size == 3) {
				955	return false;
				956	}
				957
				958	if (!vfp_access_check(s)) {
				959	return true;
				960	}
				961
				962	assert(a->q == 0); /* enforced by decode patterns */
				963
				964	/*
				965	* Note that we have to be careful not to clobber the source operands
				966	* in the "vm == vd" case by storing the result of the first pass too
				967	* early. Since Q is 0 there are always just two passes, so instead
				968	* of a complicated loop over each pass we just unroll.
				969	*/
				970	tmp = neon_load_reg(a->vn, 0);
				971	tmp2 = neon_load_reg(a->vn, 1);
				972	fn(tmp, tmp, tmp2);
				973	tcg_temp_free_i32(tmp2);
				974
				975	tmp3 = neon_load_reg(a->vm, 0);
				976	tmp2 = neon_load_reg(a->vm, 1);
				977	fn(tmp3, tmp3, tmp2);
				978	tcg_temp_free_i32(tmp2);
				979
				980	neon_store_reg(a->vd, 0, tmp);
				981	neon_store_reg(a->vd, 1, tmp3);
				982	return true;
				983	}
				984
				985	#define DO_3SAME_PAIR(INSN, func) \
				986	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				987	{ \
				988	static NeonGenTwoOpFn * const fns[] = { \
				989	gen_helper_neon_##func##8, \
				990	gen_helper_neon_##func##16, \
				991	gen_helper_neon_##func##32, \
				992	}; \
				993	if (a->size > 2) { \
				994	return false; \
				995	} \
				996	return do_3same_pair(s, a, fns[a->size]); \
				997	}
				998
				999	/* 32-bit pairwise ops end up the same as the elementwise versions. */
				1000	#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
				1001	#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
				1002	#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
				1003	#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
Peter Maydell	fa22827	2020-05-12 17:38:56 +0100	[diff] [blame]	1004	#define gen_helper_neon_padd_u32 tcg_gen_add_i32
Peter Maydell	059c239	2020-05-12 17:38:55 +0100	[diff] [blame]	1005
				1006	DO_3SAME_PAIR(VPMAX_S, pmax_s)
				1007	DO_3SAME_PAIR(VPMIN_S, pmin_s)
				1008	DO_3SAME_PAIR(VPMAX_U, pmax_u)
				1009	DO_3SAME_PAIR(VPMIN_U, pmin_u)
Peter Maydell	fa22827	2020-05-12 17:38:56 +0100	[diff] [blame]	1010	DO_3SAME_PAIR(VPADD, padd_u)
Peter Maydell	7ecc28b	2020-05-12 17:38:57 +0100	[diff] [blame]	1011
				1012	#define DO_3SAME_VQDMULH(INSN, FUNC) \
				1013	WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
				1014	WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
				1015	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				1016	uint32_t rn_ofs, uint32_t rm_ofs, \
				1017	uint32_t oprsz, uint32_t maxsz) \
				1018	{ \
				1019	static const GVecGen3 ops[2] = { \
				1020	{ .fni4 = gen_##INSN##_tramp16 }, \
				1021	{ .fni4 = gen_##INSN##_tramp32 }, \
				1022	}; \
				1023	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
				1024	} \
				1025	static bool trans_##INSN##_3s(DisasContext s, arg_3same a) \
				1026	{ \
				1027	if (a->size != 1 && a->size != 2) { \
				1028	return false; \
				1029	} \
				1030	return do_3same(s, a, gen_##INSN##_3s); \
				1031	}
				1032
				1033	DO_3SAME_VQDMULH(VQDMULH, qdmulh)
				1034	DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
Peter Maydell	a26a352	2020-05-12 17:38:58 +0100	[diff] [blame]	1035
Peter Maydell	8aa71ea	2020-05-12 17:39:00 +0100	[diff] [blame]	1036	static bool do_3same_fp(DisasContext s, arg_3same a, VFPGen3OpSPFn *fn,
				1037	bool reads_vd)
				1038	{
				1039	/*
				1040	* FP operations handled elementwise 32 bits at a time.
				1041	* If reads_vd is true then the old value of Vd will be
				1042	* loaded before calling the callback function. This is
				1043	* used for multiply-accumulate type operations.
				1044	*/
				1045	TCGv_i32 tmp, tmp2;
				1046	int pass;
				1047
				1048	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1049	return false;
				1050	}
				1051
				1052	/* UNDEF accesses to D16-D31 if they don't exist. */
				1053	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1054	((a->vd \| a->vn \| a->vm) & 0x10)) {
				1055	return false;
				1056	}
				1057
				1058	if ((a->vn \| a->vm \| a->vd) & a->q) {
				1059	return false;
				1060	}
				1061
				1062	if (!vfp_access_check(s)) {
				1063	return true;
				1064	}
				1065
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	1066	TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD);
Peter Maydell	8aa71ea	2020-05-12 17:39:00 +0100	[diff] [blame]	1067	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				1068	tmp = neon_load_reg(a->vn, pass);
				1069	tmp2 = neon_load_reg(a->vm, pass);
				1070	if (reads_vd) {
				1071	TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
				1072	fn(tmp_rd, tmp, tmp2, fpstatus);
				1073	neon_store_reg(a->vd, pass, tmp_rd);
				1074	tcg_temp_free_i32(tmp);
				1075	} else {
				1076	fn(tmp, tmp, tmp2, fpstatus);
				1077	neon_store_reg(a->vd, pass, tmp);
				1078	}
				1079	tcg_temp_free_i32(tmp2);
				1080	}
				1081	tcg_temp_free_ptr(fpstatus);
				1082	return true;
				1083	}
				1084
Peter Maydell	a26a352	2020-05-12 17:38:58 +0100	[diff] [blame]	1085	/*
				1086	* For all the functions using this macro, size == 1 means fp16,
				1087	* which is an architecture extension we don't implement yet.
				1088	*/
				1089	#define DO_3S_FP_GVEC(INSN,FUNC) \
				1090	static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
				1091	uint32_t rn_ofs, uint32_t rm_ofs, \
				1092	uint32_t oprsz, uint32_t maxsz) \
				1093	{ \
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	1094	TCGv_ptr fpst = fpstatus_ptr(FPST_STD); \
Peter Maydell	a26a352	2020-05-12 17:38:58 +0100	[diff] [blame]	1095	tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
				1096	oprsz, maxsz, 0, FUNC); \
				1097	tcg_temp_free_ptr(fpst); \
				1098	} \
				1099	static bool trans_##INSN##_fp_3s(DisasContext s, arg_3same a) \
				1100	{ \
				1101	if (a->size != 0) { \
				1102	/* TODO fp16 support */ \
				1103	return false; \
				1104	} \
				1105	return do_3same(s, a, gen_##INSN##_3s); \
				1106	}
				1107
				1108
				1109	DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
				1110	DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
				1111	DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
Peter Maydell	8aa71ea	2020-05-12 17:39:00 +0100	[diff] [blame]	1112	DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
				1113
				1114	/*
				1115	* For all the functions using this macro, size == 1 means fp16,
				1116	* which is an architecture extension we don't implement yet.
				1117	*/
				1118	#define DO_3S_FP(INSN,FUNC,READS_VD) \
				1119	static bool trans_##INSN##_fp_3s(DisasContext s, arg_3same a) \
				1120	{ \
				1121	if (a->size != 0) { \
				1122	/* TODO fp16 support */ \
				1123	return false; \
				1124	} \
				1125	return do_3same_fp(s, a, FUNC, READS_VD); \
				1126	}
				1127
Peter Maydell	727ff1d	2020-05-12 17:39:01 +0100	[diff] [blame]	1128	DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
				1129	DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
				1130	DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
				1131	DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
				1132	DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
Peter Maydell	d5fdf9e	2020-05-12 17:39:03 +0100	[diff] [blame]	1133	DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
				1134	DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
Peter Maydell	727ff1d	2020-05-12 17:39:01 +0100	[diff] [blame]	1135
Peter Maydell	8aa71ea	2020-05-12 17:39:00 +0100	[diff] [blame]	1136	static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
				1137	TCGv_ptr fpstatus)
				1138	{
				1139	gen_helper_vfp_muls(vn, vn, vm, fpstatus);
				1140	gen_helper_vfp_adds(vd, vd, vn, fpstatus);
				1141	}
				1142
				1143	static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
				1144	TCGv_ptr fpstatus)
				1145	{
				1146	gen_helper_vfp_muls(vn, vn, vm, fpstatus);
				1147	gen_helper_vfp_subs(vd, vd, vn, fpstatus);
				1148	}
				1149
				1150	DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
				1151	DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
Peter Maydell	ab97833	2020-05-12 17:38:59 +0100	[diff] [blame]	1152
Peter Maydell	d5fdf9e	2020-05-12 17:39:03 +0100	[diff] [blame]	1153	static bool trans_VMAXNM_fp_3s(DisasContext s, arg_3same a)
				1154	{
				1155	if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
				1156	return false;
				1157	}
				1158
				1159	if (a->size != 0) {
				1160	/* TODO fp16 support */
				1161	return false;
				1162	}
				1163
				1164	return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
				1165	}
				1166
				1167	static bool trans_VMINNM_fp_3s(DisasContext s, arg_3same a)
				1168	{
				1169	if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
				1170	return false;
				1171	}
				1172
				1173	if (a->size != 0) {
				1174	/* TODO fp16 support */
				1175	return false;
				1176	}
				1177
				1178	return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
				1179	}
				1180
				1181	WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
				1182
				1183	static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs,
				1184	uint32_t rn_ofs, uint32_t rm_ofs,
				1185	uint32_t oprsz, uint32_t maxsz)
				1186	{
				1187	static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
				1188	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
				1189	}
				1190
				1191	static bool trans_VRECPS_fp_3s(DisasContext s, arg_3same a)
				1192	{
				1193	if (a->size != 0) {
				1194	/* TODO fp16 support */
				1195	return false;
				1196	}
				1197
				1198	return do_3same(s, a, gen_VRECPS_fp_3s);
				1199	}
				1200
				1201	WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
				1202
				1203	static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs,
				1204	uint32_t rn_ofs, uint32_t rm_ofs,
				1205	uint32_t oprsz, uint32_t maxsz)
				1206	{
				1207	static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
				1208	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
				1209	}
				1210
				1211	static bool trans_VRSQRTS_fp_3s(DisasContext s, arg_3same a)
				1212	{
				1213	if (a->size != 0) {
				1214	/* TODO fp16 support */
				1215	return false;
				1216	}
				1217
				1218	return do_3same(s, a, gen_VRSQRTS_fp_3s);
				1219	}
				1220
Peter Maydell	e95485f	2020-05-12 17:39:04 +0100	[diff] [blame]	1221	static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
				1222	TCGv_ptr fpstatus)
				1223	{
				1224	gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
				1225	}
				1226
				1227	static bool trans_VFMA_fp_3s(DisasContext s, arg_3same a)
				1228	{
				1229	if (!dc_isar_feature(aa32_simdfmac, s)) {
				1230	return false;
				1231	}
				1232
				1233	if (a->size != 0) {
				1234	/* TODO fp16 support */
				1235	return false;
				1236	}
				1237
				1238	return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
				1239	}
				1240
				1241	static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
				1242	TCGv_ptr fpstatus)
				1243	{
				1244	gen_helper_vfp_negs(vn, vn);
				1245	gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
				1246	}
				1247
				1248	static bool trans_VFMS_fp_3s(DisasContext s, arg_3same a)
				1249	{
				1250	if (!dc_isar_feature(aa32_simdfmac, s)) {
				1251	return false;
				1252	}
				1253
				1254	if (a->size != 0) {
				1255	/* TODO fp16 support */
				1256	return false;
				1257	}
				1258
				1259	return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
				1260	}
				1261
Peter Maydell	ab97833	2020-05-12 17:38:59 +0100	[diff] [blame]	1262	static bool do_3same_fp_pair(DisasContext s, arg_3same a, VFPGen3OpSPFn *fn)
				1263	{
				1264	/* FP operations handled pairwise 32 bits at a time */
				1265	TCGv_i32 tmp, tmp2, tmp3;
				1266	TCGv_ptr fpstatus;
				1267
				1268	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1269	return false;
				1270	}
				1271
				1272	/* UNDEF accesses to D16-D31 if they don't exist. */
				1273	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1274	((a->vd \| a->vn \| a->vm) & 0x10)) {
				1275	return false;
				1276	}
				1277
				1278	if (!vfp_access_check(s)) {
				1279	return true;
				1280	}
				1281
				1282	assert(a->q == 0); /* enforced by decode patterns */
				1283
				1284	/*
				1285	* Note that we have to be careful not to clobber the source operands
				1286	* in the "vm == vd" case by storing the result of the first pass too
				1287	* early. Since Q is 0 there are always just two passes, so instead
				1288	* of a complicated loop over each pass we just unroll.
				1289	*/
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	1290	fpstatus = fpstatus_ptr(FPST_STD);
Peter Maydell	ab97833	2020-05-12 17:38:59 +0100	[diff] [blame]	1291	tmp = neon_load_reg(a->vn, 0);
				1292	tmp2 = neon_load_reg(a->vn, 1);
				1293	fn(tmp, tmp, tmp2, fpstatus);
				1294	tcg_temp_free_i32(tmp2);
				1295
				1296	tmp3 = neon_load_reg(a->vm, 0);
				1297	tmp2 = neon_load_reg(a->vm, 1);
				1298	fn(tmp3, tmp3, tmp2, fpstatus);
				1299	tcg_temp_free_i32(tmp2);
				1300	tcg_temp_free_ptr(fpstatus);
				1301
				1302	neon_store_reg(a->vd, 0, tmp);
				1303	neon_store_reg(a->vd, 1, tmp3);
				1304	return true;
				1305	}
				1306
				1307	/*
				1308	* For all the functions using this macro, size == 1 means fp16,
				1309	* which is an architecture extension we don't implement yet.
				1310	*/
				1311	#define DO_3S_FP_PAIR(INSN,FUNC) \
				1312	static bool trans_##INSN##_fp_3s(DisasContext s, arg_3same a) \
				1313	{ \
				1314	if (a->size != 0) { \
				1315	/* TODO fp16 support */ \
				1316	return false; \
				1317	} \
				1318	return do_3same_fp_pair(s, a, FUNC); \
				1319	}
				1320
				1321	DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
				1322	DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
				1323	DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
Peter Maydell	d3c8c73	2020-05-22 15:55:12 +0100	[diff] [blame]	1324
				1325	static bool do_vector_2sh(DisasContext s, arg_2reg_shift a, GVecGen2iFn *fn)
				1326	{
				1327	/* Handle a 2-reg-shift insn which can be vectorized. */
				1328	int vec_size = a->q ? 16 : 8;
				1329	int rd_ofs = neon_reg_offset(a->vd, 0);
				1330	int rm_ofs = neon_reg_offset(a->vm, 0);
				1331
				1332	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1333	return false;
				1334	}
				1335
				1336	/* UNDEF accesses to D16-D31 if they don't exist. */
				1337	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1338	((a->vd \| a->vm) & 0x10)) {
				1339	return false;
				1340	}
				1341
				1342	if ((a->vm \| a->vd) & a->q) {
				1343	return false;
				1344	}
				1345
				1346	if (!vfp_access_check(s)) {
				1347	return true;
				1348	}
				1349
				1350	fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
				1351	return true;
				1352	}
				1353
				1354	#define DO_2SH(INSN, FUNC) \
				1355	static bool trans_##INSN##_2sh(DisasContext s, arg_2reg_shift a) \
				1356	{ \
				1357	return do_vector_2sh(s, a, FUNC); \
				1358	} \
				1359
				1360	DO_2SH(VSHL, tcg_gen_gvec_shli)
				1361	DO_2SH(VSLI, gen_gvec_sli)
Peter Maydell	434f71e	2020-05-22 15:55:14 +0100	[diff] [blame]	1362	DO_2SH(VSRI, gen_gvec_sri)
				1363	DO_2SH(VSRA_S, gen_gvec_ssra)
				1364	DO_2SH(VSRA_U, gen_gvec_usra)
				1365	DO_2SH(VRSHR_S, gen_gvec_srshr)
				1366	DO_2SH(VRSHR_U, gen_gvec_urshr)
				1367	DO_2SH(VRSRA_S, gen_gvec_srsra)
				1368	DO_2SH(VRSRA_U, gen_gvec_ursra)
Peter Maydell	66432d6	2020-05-22 15:55:13 +0100	[diff] [blame]	1369
				1370	static bool trans_VSHR_S_2sh(DisasContext s, arg_2reg_shift a)
				1371	{
				1372	/* Signed shift out of range results in all-sign-bits */
				1373	a->shift = MIN(a->shift, (8 << a->size) - 1);
				1374	return do_vector_2sh(s, a, tcg_gen_gvec_sari);
				1375	}
				1376
				1377	static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
				1378	int64_t shift, uint32_t oprsz, uint32_t maxsz)
				1379	{
				1380	tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
				1381	}
				1382
				1383	static bool trans_VSHR_U_2sh(DisasContext s, arg_2reg_shift a)
				1384	{
				1385	/* Shift out of range is architecturally valid and results in zero. */
				1386	if (a->shift >= (8 << a->size)) {
				1387	return do_vector_2sh(s, a, gen_zero_rd_2sh);
				1388	} else {
				1389	return do_vector_2sh(s, a, tcg_gen_gvec_shri);
				1390	}
				1391	}
Peter Maydell	37bfce8	2020-05-22 15:55:15 +0100	[diff] [blame]	1392
				1393	static bool do_2shift_env_64(DisasContext s, arg_2reg_shift a,
				1394	NeonGenTwo64OpEnvFn *fn)
				1395	{
				1396	/*
				1397	* 2-reg-and-shift operations, size == 3 case, where the
				1398	* function needs to be passed cpu_env.
				1399	*/
				1400	TCGv_i64 constimm;
				1401	int pass;
				1402
				1403	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1404	return false;
				1405	}
				1406
				1407	/* UNDEF accesses to D16-D31 if they don't exist. */
				1408	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1409	((a->vd \| a->vm) & 0x10)) {
				1410	return false;
				1411	}
				1412
				1413	if ((a->vm \| a->vd) & a->q) {
				1414	return false;
				1415	}
				1416
				1417	if (!vfp_access_check(s)) {
				1418	return true;
				1419	}
				1420
				1421	/*
				1422	* To avoid excessive duplication of ops we implement shift
				1423	* by immediate using the variable shift operations.
				1424	*/
				1425	constimm = tcg_const_i64(dup_const(a->size, a->shift));
				1426
				1427	for (pass = 0; pass < a->q + 1; pass++) {
				1428	TCGv_i64 tmp = tcg_temp_new_i64();
				1429
				1430	neon_load_reg64(tmp, a->vm + pass);
				1431	fn(tmp, cpu_env, tmp, constimm);
				1432	neon_store_reg64(tmp, a->vd + pass);
Peter Maydell	a4f67e1	2020-06-16 10:32:26 +0100	[diff] [blame]	1433	tcg_temp_free_i64(tmp);
Peter Maydell	37bfce8	2020-05-22 15:55:15 +0100	[diff] [blame]	1434	}
				1435	tcg_temp_free_i64(constimm);
				1436	return true;
				1437	}
				1438
				1439	static bool do_2shift_env_32(DisasContext s, arg_2reg_shift a,
				1440	NeonGenTwoOpEnvFn *fn)
				1441	{
				1442	/*
				1443	* 2-reg-and-shift operations, size < 3 case, where the
				1444	* helper needs to be passed cpu_env.
				1445	*/
				1446	TCGv_i32 constimm;
				1447	int pass;
				1448
				1449	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1450	return false;
				1451	}
				1452
				1453	/* UNDEF accesses to D16-D31 if they don't exist. */
				1454	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1455	((a->vd \| a->vm) & 0x10)) {
				1456	return false;
				1457	}
				1458
				1459	if ((a->vm \| a->vd) & a->q) {
				1460	return false;
				1461	}
				1462
				1463	if (!vfp_access_check(s)) {
				1464	return true;
				1465	}
				1466
				1467	/*
				1468	* To avoid excessive duplication of ops we implement shift
				1469	* by immediate using the variable shift operations.
				1470	*/
				1471	constimm = tcg_const_i32(dup_const(a->size, a->shift));
				1472
				1473	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				1474	TCGv_i32 tmp = neon_load_reg(a->vm, pass);
				1475	fn(tmp, cpu_env, tmp, constimm);
				1476	neon_store_reg(a->vd, pass, tmp);
				1477	}
				1478	tcg_temp_free_i32(constimm);
				1479	return true;
				1480	}
				1481
				1482	#define DO_2SHIFT_ENV(INSN, FUNC) \
				1483	static bool trans_##INSN##_64_2sh(DisasContext s, arg_2reg_shift a) \
				1484	{ \
				1485	return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
				1486	} \
				1487	static bool trans_##INSN##_2sh(DisasContext s, arg_2reg_shift a) \
				1488	{ \
				1489	static NeonGenTwoOpEnvFn * const fns[] = { \
				1490	gen_helper_neon_##FUNC##8, \
				1491	gen_helper_neon_##FUNC##16, \
				1492	gen_helper_neon_##FUNC##32, \
				1493	}; \
				1494	assert(a->size < ARRAY_SIZE(fns)); \
				1495	return do_2shift_env_32(s, a, fns[a->size]); \
				1496	}
				1497
				1498	DO_2SHIFT_ENV(VQSHLU, qshlu_s)
				1499	DO_2SHIFT_ENV(VQSHL_U, qshl_u)
				1500	DO_2SHIFT_ENV(VQSHL_S, qshl_s)
Peter Maydell	712182d	2020-05-22 15:55:16 +0100	[diff] [blame]	1501
				1502	static bool do_2shift_narrow_64(DisasContext s, arg_2reg_shift a,
				1503	NeonGenTwo64OpFn *shiftfn,
				1504	NeonGenNarrowEnvFn *narrowfn)
				1505	{
				1506	/* 2-reg-and-shift narrowing-shift operations, size == 3 case */
				1507	TCGv_i64 constimm, rm1, rm2;
				1508	TCGv_i32 rd;
				1509
				1510	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1511	return false;
				1512	}
				1513
				1514	/* UNDEF accesses to D16-D31 if they don't exist. */
				1515	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1516	((a->vd \| a->vm) & 0x10)) {
				1517	return false;
				1518	}
				1519
				1520	if (a->vm & 1) {
				1521	return false;
				1522	}
				1523
				1524	if (!vfp_access_check(s)) {
				1525	return true;
				1526	}
				1527
				1528	/*
				1529	* This is always a right shift, and the shiftfn is always a
				1530	* left-shift helper, which thus needs the negated shift count.
				1531	*/
				1532	constimm = tcg_const_i64(-a->shift);
				1533	rm1 = tcg_temp_new_i64();
				1534	rm2 = tcg_temp_new_i64();
				1535
				1536	/* Load both inputs first to avoid potential overwrite if rm == rd */
				1537	neon_load_reg64(rm1, a->vm);
				1538	neon_load_reg64(rm2, a->vm + 1);
				1539
				1540	shiftfn(rm1, rm1, constimm);
				1541	rd = tcg_temp_new_i32();
				1542	narrowfn(rd, cpu_env, rm1);
				1543	neon_store_reg(a->vd, 0, rd);
				1544
				1545	shiftfn(rm2, rm2, constimm);
				1546	rd = tcg_temp_new_i32();
				1547	narrowfn(rd, cpu_env, rm2);
				1548	neon_store_reg(a->vd, 1, rd);
				1549
				1550	tcg_temp_free_i64(rm1);
				1551	tcg_temp_free_i64(rm2);
				1552	tcg_temp_free_i64(constimm);
				1553
				1554	return true;
				1555	}
				1556
				1557	static bool do_2shift_narrow_32(DisasContext s, arg_2reg_shift a,
				1558	NeonGenTwoOpFn *shiftfn,
				1559	NeonGenNarrowEnvFn *narrowfn)
				1560	{
				1561	/* 2-reg-and-shift narrowing-shift operations, size < 3 case */
				1562	TCGv_i32 constimm, rm1, rm2, rm3, rm4;
				1563	TCGv_i64 rtmp;
				1564	uint32_t imm;
				1565
				1566	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1567	return false;
				1568	}
				1569
				1570	/* UNDEF accesses to D16-D31 if they don't exist. */
				1571	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1572	((a->vd \| a->vm) & 0x10)) {
				1573	return false;
				1574	}
				1575
				1576	if (a->vm & 1) {
				1577	return false;
				1578	}
				1579
				1580	if (!vfp_access_check(s)) {
				1581	return true;
				1582	}
				1583
				1584	/*
				1585	* This is always a right shift, and the shiftfn is always a
				1586	* left-shift helper, which thus needs the negated shift count
				1587	* duplicated into each lane of the immediate value.
				1588	*/
				1589	if (a->size == 1) {
				1590	imm = (uint16_t)(-a->shift);
				1591	imm \|= imm << 16;
				1592	} else {
				1593	/* size == 2 */
				1594	imm = -a->shift;
				1595	}
				1596	constimm = tcg_const_i32(imm);
				1597
				1598	/* Load all inputs first to avoid potential overwrite */
				1599	rm1 = neon_load_reg(a->vm, 0);
				1600	rm2 = neon_load_reg(a->vm, 1);
				1601	rm3 = neon_load_reg(a->vm + 1, 0);
				1602	rm4 = neon_load_reg(a->vm + 1, 1);
				1603	rtmp = tcg_temp_new_i64();
				1604
				1605	shiftfn(rm1, rm1, constimm);
				1606	shiftfn(rm2, rm2, constimm);
				1607
				1608	tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
				1609	tcg_temp_free_i32(rm2);
				1610
				1611	narrowfn(rm1, cpu_env, rtmp);
				1612	neon_store_reg(a->vd, 0, rm1);
				1613
				1614	shiftfn(rm3, rm3, constimm);
				1615	shiftfn(rm4, rm4, constimm);
				1616	tcg_temp_free_i32(constimm);
				1617
				1618	tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
				1619	tcg_temp_free_i32(rm4);
				1620
				1621	narrowfn(rm3, cpu_env, rtmp);
				1622	tcg_temp_free_i64(rtmp);
				1623	neon_store_reg(a->vd, 1, rm3);
				1624	return true;
				1625	}
				1626
				1627	#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
				1628	static bool trans_##INSN##_2sh(DisasContext s, arg_2reg_shift a) \
				1629	{ \
				1630	return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
				1631	}
				1632	#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
				1633	static bool trans_##INSN##_2sh(DisasContext s, arg_2reg_shift a) \
				1634	{ \
				1635	return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
				1636	}
				1637
				1638	static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
				1639	{
				1640	tcg_gen_extrl_i64_i32(dest, src);
				1641	}
				1642
				1643	static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
				1644	{
				1645	gen_helper_neon_narrow_u16(dest, src);
				1646	}
				1647
				1648	static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
				1649	{
				1650	gen_helper_neon_narrow_u8(dest, src);
				1651	}
				1652
				1653	DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
				1654	DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
				1655	DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
				1656
				1657	DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
				1658	DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
				1659	DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
				1660
				1661	DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
				1662	DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
				1663	DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
				1664
				1665	DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
				1666	DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
				1667	DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
Peter Maydell	b4a3a77	2020-05-22 15:55:17 +0100	[diff] [blame]	1668	DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
				1669	DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
				1670	DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
				1671
				1672	DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
				1673	DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
				1674	DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
				1675
				1676	DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
				1677	DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
				1678	DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
				1679
				1680	DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
				1681	DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
				1682	DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
Peter Maydell	968bf84	2020-05-22 15:55:18 +0100	[diff] [blame]	1683
				1684	static bool do_vshll_2sh(DisasContext s, arg_2reg_shift a,
				1685	NeonGenWidenFn *widenfn, bool u)
				1686	{
				1687	TCGv_i64 tmp;
				1688	TCGv_i32 rm0, rm1;
				1689	uint64_t widen_mask = 0;
				1690
				1691	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1692	return false;
				1693	}
				1694
				1695	/* UNDEF accesses to D16-D31 if they don't exist. */
				1696	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1697	((a->vd \| a->vm) & 0x10)) {
				1698	return false;
				1699	}
				1700
				1701	if (a->vd & 1) {
				1702	return false;
				1703	}
				1704
				1705	if (!vfp_access_check(s)) {
				1706	return true;
				1707	}
				1708
				1709	/*
				1710	* This is a widen-and-shift operation. The shift is always less
				1711	* than the width of the source type, so after widening the input
				1712	* vector we can simply shift the whole 64-bit widened register,
				1713	* and then clear the potential overflow bits resulting from left
				1714	* bits of the narrow input appearing as right bits of the left
				1715	* neighbour narrow input. Calculate a mask of bits to clear.
				1716	*/
				1717	if ((a->shift != 0) && (a->size < 2 \|\| u)) {
				1718	int esize = 8 << a->size;
				1719	widen_mask = MAKE_64BIT_MASK(0, esize);
				1720	widen_mask >>= esize - a->shift;
				1721	widen_mask = dup_const(a->size + 1, widen_mask);
				1722	}
				1723
				1724	rm0 = neon_load_reg(a->vm, 0);
				1725	rm1 = neon_load_reg(a->vm, 1);
				1726	tmp = tcg_temp_new_i64();
				1727
				1728	widenfn(tmp, rm0);
Peter Maydell	9593a39	2020-06-16 10:32:25 +0100	[diff] [blame]	1729	tcg_temp_free_i32(rm0);
Peter Maydell	968bf84	2020-05-22 15:55:18 +0100	[diff] [blame]	1730	if (a->shift != 0) {
				1731	tcg_gen_shli_i64(tmp, tmp, a->shift);
				1732	tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
				1733	}
				1734	neon_store_reg64(tmp, a->vd);
				1735
				1736	widenfn(tmp, rm1);
Peter Maydell	9593a39	2020-06-16 10:32:25 +0100	[diff] [blame]	1737	tcg_temp_free_i32(rm1);
Peter Maydell	968bf84	2020-05-22 15:55:18 +0100	[diff] [blame]	1738	if (a->shift != 0) {
				1739	tcg_gen_shli_i64(tmp, tmp, a->shift);
				1740	tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
				1741	}
				1742	neon_store_reg64(tmp, a->vd + 1);
				1743	tcg_temp_free_i64(tmp);
				1744	return true;
				1745	}
				1746
				1747	static bool trans_VSHLL_S_2sh(DisasContext s, arg_2reg_shift a)
				1748	{
Peter Maydell	448f0e5	2020-06-16 10:32:26 +0100	[diff] [blame]	1749	static NeonGenWidenFn * const widenfn[] = {
Peter Maydell	968bf84	2020-05-22 15:55:18 +0100	[diff] [blame]	1750	gen_helper_neon_widen_s8,
				1751	gen_helper_neon_widen_s16,
				1752	tcg_gen_ext_i32_i64,
				1753	};
				1754	return do_vshll_2sh(s, a, widenfn[a->size], false);
				1755	}
				1756
				1757	static bool trans_VSHLL_U_2sh(DisasContext s, arg_2reg_shift a)
				1758	{
Peter Maydell	448f0e5	2020-06-16 10:32:26 +0100	[diff] [blame]	1759	static NeonGenWidenFn * const widenfn[] = {
Peter Maydell	968bf84	2020-05-22 15:55:18 +0100	[diff] [blame]	1760	gen_helper_neon_widen_u8,
				1761	gen_helper_neon_widen_u16,
				1762	tcg_gen_extu_i32_i64,
				1763	};
				1764	return do_vshll_2sh(s, a, widenfn[a->size], true);
				1765	}
Peter Maydell	3da26f1	2020-05-22 15:55:19 +0100	[diff] [blame]	1766
				1767	static bool do_fp_2sh(DisasContext s, arg_2reg_shift a,
Peter Maydell	5de3fd0	2020-06-16 18:08:33 +0100	[diff] [blame]	1768	NeonGenTwoSingleOpFn *fn)
Peter Maydell	3da26f1	2020-05-22 15:55:19 +0100	[diff] [blame]	1769	{
				1770	/* FP operations in 2-reg-and-shift group */
				1771	TCGv_i32 tmp, shiftv;
				1772	TCGv_ptr fpstatus;
				1773	int pass;
				1774
				1775	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1776	return false;
				1777	}
				1778
				1779	/* UNDEF accesses to D16-D31 if they don't exist. */
				1780	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1781	((a->vd \| a->vm) & 0x10)) {
				1782	return false;
				1783	}
				1784
				1785	if ((a->vm \| a->vd) & a->q) {
				1786	return false;
				1787	}
				1788
				1789	if (!vfp_access_check(s)) {
				1790	return true;
				1791	}
				1792
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	1793	fpstatus = fpstatus_ptr(FPST_STD);
Peter Maydell	3da26f1	2020-05-22 15:55:19 +0100	[diff] [blame]	1794	shiftv = tcg_const_i32(a->shift);
				1795	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				1796	tmp = neon_load_reg(a->vm, pass);
				1797	fn(tmp, tmp, shiftv, fpstatus);
				1798	neon_store_reg(a->vd, pass, tmp);
				1799	}
				1800	tcg_temp_free_ptr(fpstatus);
				1801	tcg_temp_free_i32(shiftv);
				1802	return true;
				1803	}
				1804
				1805	#define DO_FP_2SH(INSN, FUNC) \
				1806	static bool trans_##INSN##_2sh(DisasContext s, arg_2reg_shift a) \
				1807	{ \
				1808	return do_fp_2sh(s, a, FUNC); \
				1809	}
				1810
				1811	DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos)
				1812	DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos)
				1813	DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero)
				1814	DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero)
Peter Maydell	2c35a39	2020-05-22 15:55:20 +0100	[diff] [blame]	1815
				1816	static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
				1817	{
				1818	/*
				1819	* Expand the encoded constant.
				1820	* Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
				1821	* We choose to not special-case this and will behave as if a
				1822	* valid constant encoding of 0 had been given.
				1823	* cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
				1824	*/
				1825	switch (cmode) {
				1826	case 0: case 1:
				1827	/* no-op */
				1828	break;
				1829	case 2: case 3:
				1830	imm <<= 8;
				1831	break;
				1832	case 4: case 5:
				1833	imm <<= 16;
				1834	break;
				1835	case 6: case 7:
				1836	imm <<= 24;
				1837	break;
				1838	case 8: case 9:
				1839	imm \|= imm << 16;
				1840	break;
				1841	case 10: case 11:
				1842	imm = (imm << 8) \| (imm << 24);
				1843	break;
				1844	case 12:
				1845	imm = (imm << 8) \| 0xff;
				1846	break;
				1847	case 13:
				1848	imm = (imm << 16) \| 0xffff;
				1849	break;
				1850	case 14:
				1851	if (op) {
				1852	/*
				1853	* This is the only case where the top and bottom 32 bits
				1854	* of the encoded constant differ.
				1855	*/
				1856	uint64_t imm64 = 0;
				1857	int n;
				1858
				1859	for (n = 0; n < 8; n++) {
				1860	if (imm & (1 << n)) {
				1861	imm64 \|= (0xffULL << (n * 8));
				1862	}
				1863	}
				1864	return imm64;
				1865	}
				1866	imm \|= (imm << 8) \| (imm << 16) \| (imm << 24);
				1867	break;
				1868	case 15:
				1869	imm = ((imm & 0x80) << 24) \| ((imm & 0x3f) << 19)
				1870	\| ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
				1871	break;
				1872	}
				1873	if (op) {
				1874	imm = ~imm;
				1875	}
				1876	return dup_const(MO_32, imm);
				1877	}
				1878
				1879	static bool do_1reg_imm(DisasContext s, arg_1reg_imm a,
				1880	GVecGen2iFn *fn)
				1881	{
				1882	uint64_t imm;
				1883	int reg_ofs, vec_size;
				1884
				1885	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1886	return false;
				1887	}
				1888
				1889	/* UNDEF accesses to D16-D31 if they don't exist. */
				1890	if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
				1891	return false;
				1892	}
				1893
				1894	if (a->vd & a->q) {
				1895	return false;
				1896	}
				1897
				1898	if (!vfp_access_check(s)) {
				1899	return true;
				1900	}
				1901
				1902	reg_ofs = neon_reg_offset(a->vd, 0);
				1903	vec_size = a->q ? 16 : 8;
				1904	imm = asimd_imm_const(a->imm, a->cmode, a->op);
				1905
				1906	fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
				1907	return true;
				1908	}
				1909
				1910	static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
				1911	int64_t c, uint32_t oprsz, uint32_t maxsz)
				1912	{
				1913	tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
				1914	}
				1915
				1916	static bool trans_Vimm_1r(DisasContext s, arg_1reg_imm a)
				1917	{
				1918	/* Handle decode of cmode/op here between VORR/VBIC/VMOV */
				1919	GVecGen2iFn *fn;
				1920
				1921	if ((a->cmode & 1) && a->cmode < 12) {
				1922	/* for op=1, the imm will be inverted, so BIC becomes AND. */
				1923	fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
				1924	} else {
				1925	/* There is one unallocated cmode/op combination in this space */
				1926	if (a->cmode == 15 && a->op == 1) {
				1927	return false;
				1928	}
				1929	fn = gen_VMOV_1r;
				1930	}
				1931	return do_1reg_imm(s, a, fn);
				1932	}
Peter Maydell	b28be09	2020-06-16 10:32:25 +0100	[diff] [blame]	1933
				1934	static bool do_prewiden_3d(DisasContext s, arg_3diff a,
				1935	NeonGenWidenFn *widenfn,
				1936	NeonGenTwo64OpFn *opfn,
				1937	bool src1_wide)
				1938	{
				1939	/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
				1940	TCGv_i64 rn0_64, rn1_64, rm_64;
				1941	TCGv_i32 rm;
				1942
				1943	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				1944	return false;
				1945	}
				1946
				1947	/* UNDEF accesses to D16-D31 if they don't exist. */
				1948	if (!dc_isar_feature(aa32_simd_r32, s) &&
				1949	((a->vd \| a->vn \| a->vm) & 0x10)) {
				1950	return false;
				1951	}
				1952
				1953	if (!widenfn \|\| !opfn) {
				1954	/* size == 3 case, which is an entirely different insn group */
				1955	return false;
				1956	}
				1957
				1958	if ((a->vd & 1) \|\| (src1_wide && (a->vn & 1))) {
				1959	return false;
				1960	}
				1961
				1962	if (!vfp_access_check(s)) {
				1963	return true;
				1964	}
				1965
				1966	rn0_64 = tcg_temp_new_i64();
				1967	rn1_64 = tcg_temp_new_i64();
				1968	rm_64 = tcg_temp_new_i64();
				1969
				1970	if (src1_wide) {
				1971	neon_load_reg64(rn0_64, a->vn);
				1972	} else {
				1973	TCGv_i32 tmp = neon_load_reg(a->vn, 0);
				1974	widenfn(rn0_64, tmp);
				1975	tcg_temp_free_i32(tmp);
				1976	}
				1977	rm = neon_load_reg(a->vm, 0);
				1978
				1979	widenfn(rm_64, rm);
				1980	tcg_temp_free_i32(rm);
				1981	opfn(rn0_64, rn0_64, rm_64);
				1982
				1983	/*
				1984	* Load second pass inputs before storing the first pass result, to
				1985	* avoid incorrect results if a narrow input overlaps with the result.
				1986	*/
				1987	if (src1_wide) {
				1988	neon_load_reg64(rn1_64, a->vn + 1);
				1989	} else {
				1990	TCGv_i32 tmp = neon_load_reg(a->vn, 1);
				1991	widenfn(rn1_64, tmp);
				1992	tcg_temp_free_i32(tmp);
				1993	}
				1994	rm = neon_load_reg(a->vm, 1);
				1995
				1996	neon_store_reg64(rn0_64, a->vd);
				1997
				1998	widenfn(rm_64, rm);
				1999	tcg_temp_free_i32(rm);
				2000	opfn(rn1_64, rn1_64, rm_64);
				2001	neon_store_reg64(rn1_64, a->vd + 1);
				2002
				2003	tcg_temp_free_i64(rn0_64);
				2004	tcg_temp_free_i64(rn1_64);
				2005	tcg_temp_free_i64(rm_64);
				2006
				2007	return true;
				2008	}
				2009
				2010	#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
				2011	static bool trans_##INSN##_3d(DisasContext s, arg_3diff a) \
				2012	{ \
				2013	static NeonGenWidenFn * const widenfn[] = { \
				2014	gen_helper_neon_widen_##S##8, \
				2015	gen_helper_neon_widen_##S##16, \
				2016	tcg_gen_##EXT##_i32_i64, \
				2017	NULL, \
				2018	}; \
				2019	static NeonGenTwo64OpFn * const addfn[] = { \
				2020	gen_helper_neon_##OP##l_u16, \
				2021	gen_helper_neon_##OP##l_u32, \
				2022	tcg_gen_##OP##_i64, \
				2023	NULL, \
				2024	}; \
				2025	return do_prewiden_3d(s, a, widenfn[a->size], \
				2026	addfn[a->size], SRC1WIDE); \
				2027	}
				2028
				2029	DO_PREWIDEN(VADDL_S, s, ext, add, false)
				2030	DO_PREWIDEN(VADDL_U, u, extu, add, false)
				2031	DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
				2032	DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
				2033	DO_PREWIDEN(VADDW_S, s, ext, add, true)
				2034	DO_PREWIDEN(VADDW_U, u, extu, add, true)
				2035	DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
				2036	DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
Peter Maydell	0fa1ab0	2020-06-16 10:32:25 +0100	[diff] [blame]	2037
				2038	static bool do_narrow_3d(DisasContext s, arg_3diff a,
				2039	NeonGenTwo64OpFn opfn, NeonGenNarrowFn narrowfn)
				2040	{
				2041	/* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
				2042	TCGv_i64 rn_64, rm_64;
				2043	TCGv_i32 rd0, rd1;
				2044
				2045	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2046	return false;
				2047	}
				2048
				2049	/* UNDEF accesses to D16-D31 if they don't exist. */
				2050	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2051	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2052	return false;
				2053	}
				2054
				2055	if (!opfn \|\| !narrowfn) {
				2056	/* size == 3 case, which is an entirely different insn group */
				2057	return false;
				2058	}
				2059
				2060	if ((a->vn \| a->vm) & 1) {
				2061	return false;
				2062	}
				2063
				2064	if (!vfp_access_check(s)) {
				2065	return true;
				2066	}
				2067
				2068	rn_64 = tcg_temp_new_i64();
				2069	rm_64 = tcg_temp_new_i64();
				2070	rd0 = tcg_temp_new_i32();
				2071	rd1 = tcg_temp_new_i32();
				2072
				2073	neon_load_reg64(rn_64, a->vn);
				2074	neon_load_reg64(rm_64, a->vm);
				2075
				2076	opfn(rn_64, rn_64, rm_64);
				2077
				2078	narrowfn(rd0, rn_64);
				2079
				2080	neon_load_reg64(rn_64, a->vn + 1);
				2081	neon_load_reg64(rm_64, a->vm + 1);
				2082
				2083	opfn(rn_64, rn_64, rm_64);
				2084
				2085	narrowfn(rd1, rn_64);
				2086
				2087	neon_store_reg(a->vd, 0, rd0);
				2088	neon_store_reg(a->vd, 1, rd1);
				2089
				2090	tcg_temp_free_i64(rn_64);
				2091	tcg_temp_free_i64(rm_64);
				2092
				2093	return true;
				2094	}
				2095
				2096	#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
				2097	static bool trans_##INSN##_3d(DisasContext s, arg_3diff a) \
				2098	{ \
				2099	static NeonGenTwo64OpFn * const addfn[] = { \
				2100	gen_helper_neon_##OP##l_u16, \
				2101	gen_helper_neon_##OP##l_u32, \
				2102	tcg_gen_##OP##_i64, \
				2103	NULL, \
				2104	}; \
				2105	static NeonGenNarrowFn * const narrowfn[] = { \
				2106	gen_helper_neon_##NARROWTYPE##_high_u8, \
				2107	gen_helper_neon_##NARROWTYPE##_high_u16, \
				2108	EXTOP, \
				2109	NULL, \
				2110	}; \
				2111	return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
				2112	}
				2113
				2114	static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
				2115	{
				2116	tcg_gen_addi_i64(rn, rn, 1u << 31);
				2117	tcg_gen_extrh_i64_i32(rd, rn);
				2118	}
				2119
				2120	DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
				2121	DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
				2122	DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
				2123	DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
Peter Maydell	f5b2840	2020-06-16 10:32:25 +0100	[diff] [blame]	2124
				2125	static bool do_long_3d(DisasContext s, arg_3diff a,
				2126	NeonGenTwoOpWidenFn *opfn,
				2127	NeonGenTwo64OpFn *accfn)
				2128	{
				2129	/*
				2130	* 3-regs different lengths, long operations.
				2131	* These perform an operation on two inputs that returns a double-width
				2132	* result, and then possibly perform an accumulation operation of
				2133	* that result into the double-width destination.
				2134	*/
				2135	TCGv_i64 rd0, rd1, tmp;
				2136	TCGv_i32 rn, rm;
				2137
				2138	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2139	return false;
				2140	}
				2141
				2142	/* UNDEF accesses to D16-D31 if they don't exist. */
				2143	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2144	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2145	return false;
				2146	}
				2147
				2148	if (!opfn) {
				2149	/* size == 3 case, which is an entirely different insn group */
				2150	return false;
				2151	}
				2152
				2153	if (a->vd & 1) {
				2154	return false;
				2155	}
				2156
				2157	if (!vfp_access_check(s)) {
				2158	return true;
				2159	}
				2160
				2161	rd0 = tcg_temp_new_i64();
				2162	rd1 = tcg_temp_new_i64();
				2163
				2164	rn = neon_load_reg(a->vn, 0);
				2165	rm = neon_load_reg(a->vm, 0);
				2166	opfn(rd0, rn, rm);
				2167	tcg_temp_free_i32(rn);
				2168	tcg_temp_free_i32(rm);
				2169
				2170	rn = neon_load_reg(a->vn, 1);
				2171	rm = neon_load_reg(a->vm, 1);
				2172	opfn(rd1, rn, rm);
				2173	tcg_temp_free_i32(rn);
				2174	tcg_temp_free_i32(rm);
				2175
				2176	/* Don't store results until after all loads: they might overlap */
				2177	if (accfn) {
				2178	tmp = tcg_temp_new_i64();
				2179	neon_load_reg64(tmp, a->vd);
				2180	accfn(tmp, tmp, rd0);
				2181	neon_store_reg64(tmp, a->vd);
				2182	neon_load_reg64(tmp, a->vd + 1);
				2183	accfn(tmp, tmp, rd1);
				2184	neon_store_reg64(tmp, a->vd + 1);
				2185	tcg_temp_free_i64(tmp);
				2186	} else {
				2187	neon_store_reg64(rd0, a->vd);
				2188	neon_store_reg64(rd1, a->vd + 1);
				2189	}
				2190
				2191	tcg_temp_free_i64(rd0);
				2192	tcg_temp_free_i64(rd1);
				2193
				2194	return true;
				2195	}
				2196
				2197	static bool trans_VABDL_S_3d(DisasContext s, arg_3diff a)
				2198	{
				2199	static NeonGenTwoOpWidenFn * const opfn[] = {
				2200	gen_helper_neon_abdl_s16,
				2201	gen_helper_neon_abdl_s32,
				2202	gen_helper_neon_abdl_s64,
				2203	NULL,
				2204	};
				2205
				2206	return do_long_3d(s, a, opfn[a->size], NULL);
				2207	}
				2208
				2209	static bool trans_VABDL_U_3d(DisasContext s, arg_3diff a)
				2210	{
				2211	static NeonGenTwoOpWidenFn * const opfn[] = {
				2212	gen_helper_neon_abdl_u16,
				2213	gen_helper_neon_abdl_u32,
				2214	gen_helper_neon_abdl_u64,
				2215	NULL,
				2216	};
				2217
				2218	return do_long_3d(s, a, opfn[a->size], NULL);
				2219	}
				2220
				2221	static bool trans_VABAL_S_3d(DisasContext s, arg_3diff a)
				2222	{
				2223	static NeonGenTwoOpWidenFn * const opfn[] = {
				2224	gen_helper_neon_abdl_s16,
				2225	gen_helper_neon_abdl_s32,
				2226	gen_helper_neon_abdl_s64,
				2227	NULL,
				2228	};
				2229	static NeonGenTwo64OpFn * const addfn[] = {
				2230	gen_helper_neon_addl_u16,
				2231	gen_helper_neon_addl_u32,
				2232	tcg_gen_add_i64,
				2233	NULL,
				2234	};
				2235
				2236	return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
				2237	}
				2238
				2239	static bool trans_VABAL_U_3d(DisasContext s, arg_3diff a)
				2240	{
				2241	static NeonGenTwoOpWidenFn * const opfn[] = {
				2242	gen_helper_neon_abdl_u16,
				2243	gen_helper_neon_abdl_u32,
				2244	gen_helper_neon_abdl_u64,
				2245	NULL,
				2246	};
				2247	static NeonGenTwo64OpFn * const addfn[] = {
				2248	gen_helper_neon_addl_u16,
				2249	gen_helper_neon_addl_u32,
				2250	tcg_gen_add_i64,
				2251	NULL,
				2252	};
				2253
				2254	return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
				2255	}
Peter Maydell	3a1d9eb	2020-06-16 10:32:26 +0100	[diff] [blame]	2256
				2257	static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
				2258	{
				2259	TCGv_i32 lo = tcg_temp_new_i32();
				2260	TCGv_i32 hi = tcg_temp_new_i32();
				2261
				2262	tcg_gen_muls2_i32(lo, hi, rn, rm);
				2263	tcg_gen_concat_i32_i64(rd, lo, hi);
				2264
				2265	tcg_temp_free_i32(lo);
				2266	tcg_temp_free_i32(hi);
				2267	}
				2268
				2269	static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
				2270	{
				2271	TCGv_i32 lo = tcg_temp_new_i32();
				2272	TCGv_i32 hi = tcg_temp_new_i32();
				2273
				2274	tcg_gen_mulu2_i32(lo, hi, rn, rm);
				2275	tcg_gen_concat_i32_i64(rd, lo, hi);
				2276
				2277	tcg_temp_free_i32(lo);
				2278	tcg_temp_free_i32(hi);
				2279	}
				2280
				2281	static bool trans_VMULL_S_3d(DisasContext s, arg_3diff a)
				2282	{
				2283	static NeonGenTwoOpWidenFn * const opfn[] = {
				2284	gen_helper_neon_mull_s8,
				2285	gen_helper_neon_mull_s16,
				2286	gen_mull_s32,
				2287	NULL,
				2288	};
				2289
				2290	return do_long_3d(s, a, opfn[a->size], NULL);
				2291	}
				2292
				2293	static bool trans_VMULL_U_3d(DisasContext s, arg_3diff a)
				2294	{
				2295	static NeonGenTwoOpWidenFn * const opfn[] = {
				2296	gen_helper_neon_mull_u8,
				2297	gen_helper_neon_mull_u16,
				2298	gen_mull_u32,
				2299	NULL,
				2300	};
				2301
				2302	return do_long_3d(s, a, opfn[a->size], NULL);
				2303	}
				2304
				2305	#define DO_VMLAL(INSN,MULL,ACC) \
				2306	static bool trans_##INSN##_3d(DisasContext s, arg_3diff a) \
				2307	{ \
				2308	static NeonGenTwoOpWidenFn * const opfn[] = { \
				2309	gen_helper_neon_##MULL##8, \
				2310	gen_helper_neon_##MULL##16, \
				2311	gen_##MULL##32, \
				2312	NULL, \
				2313	}; \
				2314	static NeonGenTwo64OpFn * const accfn[] = { \
				2315	gen_helper_neon_##ACC##l_u16, \
				2316	gen_helper_neon_##ACC##l_u32, \
				2317	tcg_gen_##ACC##_i64, \
				2318	NULL, \
				2319	}; \
				2320	return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
				2321	}
				2322
				2323	DO_VMLAL(VMLAL_S,mull_s,add)
				2324	DO_VMLAL(VMLAL_U,mull_u,add)
				2325	DO_VMLAL(VMLSL_S,mull_s,sub)
				2326	DO_VMLAL(VMLSL_U,mull_u,sub)
Peter Maydell	9546ca5	2020-06-16 10:32:26 +0100	[diff] [blame]	2327
				2328	static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
				2329	{
				2330	gen_helper_neon_mull_s16(rd, rn, rm);
				2331	gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
				2332	}
				2333
				2334	static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
				2335	{
				2336	gen_mull_s32(rd, rn, rm);
				2337	gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
				2338	}
				2339
				2340	static bool trans_VQDMULL_3d(DisasContext s, arg_3diff a)
				2341	{
				2342	static NeonGenTwoOpWidenFn * const opfn[] = {
				2343	NULL,
				2344	gen_VQDMULL_16,
				2345	gen_VQDMULL_32,
				2346	NULL,
				2347	};
				2348
				2349	return do_long_3d(s, a, opfn[a->size], NULL);
				2350	}
				2351
				2352	static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
				2353	{
				2354	gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
				2355	}
				2356
				2357	static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
				2358	{
				2359	gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
				2360	}
				2361
				2362	static bool trans_VQDMLAL_3d(DisasContext s, arg_3diff a)
				2363	{
				2364	static NeonGenTwoOpWidenFn * const opfn[] = {
				2365	NULL,
				2366	gen_VQDMULL_16,
				2367	gen_VQDMULL_32,
				2368	NULL,
				2369	};
				2370	static NeonGenTwo64OpFn * const accfn[] = {
				2371	NULL,
				2372	gen_VQDMLAL_acc_16,
				2373	gen_VQDMLAL_acc_32,
				2374	NULL,
				2375	};
				2376
				2377	return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
				2378	}
				2379
				2380	static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
				2381	{
				2382	gen_helper_neon_negl_u32(rm, rm);
				2383	gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
				2384	}
				2385
				2386	static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
				2387	{
				2388	tcg_gen_neg_i64(rm, rm);
				2389	gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
				2390	}
				2391
				2392	static bool trans_VQDMLSL_3d(DisasContext s, arg_3diff a)
				2393	{
				2394	static NeonGenTwoOpWidenFn * const opfn[] = {
				2395	NULL,
				2396	gen_VQDMULL_16,
				2397	gen_VQDMULL_32,
				2398	NULL,
				2399	};
				2400	static NeonGenTwo64OpFn * const accfn[] = {
				2401	NULL,
				2402	gen_VQDMLSL_acc_16,
				2403	gen_VQDMLSL_acc_32,
				2404	NULL,
				2405	};
				2406
				2407	return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
				2408	}
Peter Maydell	18fb58d	2020-06-16 10:32:26 +0100	[diff] [blame]	2409
				2410	static bool trans_VMULL_P_3d(DisasContext s, arg_3diff a)
				2411	{
				2412	gen_helper_gvec_3 *fn_gvec;
				2413
				2414	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2415	return false;
				2416	}
				2417
				2418	/* UNDEF accesses to D16-D31 if they don't exist. */
				2419	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2420	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2421	return false;
				2422	}
				2423
				2424	if (a->vd & 1) {
				2425	return false;
				2426	}
				2427
				2428	switch (a->size) {
				2429	case 0:
				2430	fn_gvec = gen_helper_neon_pmull_h;
				2431	break;
				2432	case 2:
				2433	if (!dc_isar_feature(aa32_pmull, s)) {
				2434	return false;
				2435	}
				2436	fn_gvec = gen_helper_gvec_pmull_q;
				2437	break;
				2438	default:
				2439	return false;
				2440	}
				2441
				2442	if (!vfp_access_check(s)) {
				2443	return true;
				2444	}
				2445
				2446	tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
				2447	neon_reg_offset(a->vn, 0),
				2448	neon_reg_offset(a->vm, 0),
				2449	16, 16, 0, fn_gvec);
				2450	return true;
				2451	}
Peter Maydell	96fc80f	2020-06-16 10:32:27 +0100	[diff] [blame]	2452
				2453	static void gen_neon_dup_low16(TCGv_i32 var)
				2454	{
				2455	TCGv_i32 tmp = tcg_temp_new_i32();
				2456	tcg_gen_ext16u_i32(var, var);
				2457	tcg_gen_shli_i32(tmp, var, 16);
				2458	tcg_gen_or_i32(var, var, tmp);
				2459	tcg_temp_free_i32(tmp);
				2460	}
				2461
				2462	static void gen_neon_dup_high16(TCGv_i32 var)
				2463	{
				2464	TCGv_i32 tmp = tcg_temp_new_i32();
				2465	tcg_gen_andi_i32(var, var, 0xffff0000);
				2466	tcg_gen_shri_i32(tmp, var, 16);
				2467	tcg_gen_or_i32(var, var, tmp);
				2468	tcg_temp_free_i32(tmp);
				2469	}
				2470
				2471	static inline TCGv_i32 neon_get_scalar(int size, int reg)
				2472	{
				2473	TCGv_i32 tmp;
				2474	if (size == 1) {
				2475	tmp = neon_load_reg(reg & 7, reg >> 4);
				2476	if (reg & 8) {
				2477	gen_neon_dup_high16(tmp);
				2478	} else {
				2479	gen_neon_dup_low16(tmp);
				2480	}
				2481	} else {
				2482	tmp = neon_load_reg(reg & 15, reg >> 4);
				2483	}
				2484	return tmp;
				2485	}
				2486
				2487	static bool do_2scalar(DisasContext s, arg_2scalar a,
				2488	NeonGenTwoOpFn opfn, NeonGenTwoOpFn accfn)
				2489	{
				2490	/*
				2491	* Two registers and a scalar: perform an operation between
				2492	* the input elements and the scalar, and then possibly
				2493	* perform an accumulation operation of that result into the
				2494	* destination.
				2495	*/
				2496	TCGv_i32 scalar;
				2497	int pass;
				2498
				2499	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2500	return false;
				2501	}
				2502
				2503	/* UNDEF accesses to D16-D31 if they don't exist. */
				2504	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2505	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2506	return false;
				2507	}
				2508
				2509	if (!opfn) {
				2510	/* Bad size (including size == 3, which is a different insn group) */
				2511	return false;
				2512	}
				2513
				2514	if (a->q && ((a->vd \| a->vn) & 1)) {
				2515	return false;
				2516	}
				2517
				2518	if (!vfp_access_check(s)) {
				2519	return true;
				2520	}
				2521
				2522	scalar = neon_get_scalar(a->size, a->vm);
				2523
				2524	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				2525	TCGv_i32 tmp = neon_load_reg(a->vn, pass);
				2526	opfn(tmp, tmp, scalar);
				2527	if (accfn) {
				2528	TCGv_i32 rd = neon_load_reg(a->vd, pass);
				2529	accfn(tmp, rd, tmp);
				2530	tcg_temp_free_i32(rd);
				2531	}
				2532	neon_store_reg(a->vd, pass, tmp);
				2533	}
				2534	tcg_temp_free_i32(scalar);
				2535	return true;
				2536	}
				2537
				2538	static bool trans_VMUL_2sc(DisasContext s, arg_2scalar a)
				2539	{
				2540	static NeonGenTwoOpFn * const opfn[] = {
				2541	NULL,
				2542	gen_helper_neon_mul_u16,
				2543	tcg_gen_mul_i32,
				2544	NULL,
				2545	};
				2546
				2547	return do_2scalar(s, a, opfn[a->size], NULL);
				2548	}
				2549
				2550	static bool trans_VMLA_2sc(DisasContext s, arg_2scalar a)
				2551	{
				2552	static NeonGenTwoOpFn * const opfn[] = {
				2553	NULL,
				2554	gen_helper_neon_mul_u16,
				2555	tcg_gen_mul_i32,
				2556	NULL,
				2557	};
				2558	static NeonGenTwoOpFn * const accfn[] = {
				2559	NULL,
				2560	gen_helper_neon_add_u16,
				2561	tcg_gen_add_i32,
				2562	NULL,
				2563	};
				2564
				2565	return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
				2566	}
				2567
				2568	static bool trans_VMLS_2sc(DisasContext s, arg_2scalar a)
				2569	{
				2570	static NeonGenTwoOpFn * const opfn[] = {
				2571	NULL,
				2572	gen_helper_neon_mul_u16,
				2573	tcg_gen_mul_i32,
				2574	NULL,
				2575	};
				2576	static NeonGenTwoOpFn * const accfn[] = {
				2577	NULL,
				2578	gen_helper_neon_sub_u16,
				2579	tcg_gen_sub_i32,
				2580	NULL,
				2581	};
				2582
				2583	return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
				2584	}
Peter Maydell	85ac9ae	2020-06-16 10:32:27 +0100	[diff] [blame]	2585
				2586	/*
				2587	* Rather than have a float-specific version of do_2scalar just for
				2588	* three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
				2589	* a NeonGenTwoOpFn.
				2590	*/
				2591	#define WRAP_FP_FN(WRAPNAME, FUNC) \
				2592	static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
				2593	{ \
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	2594	TCGv_ptr fpstatus = fpstatus_ptr(FPST_STD); \
Peter Maydell	85ac9ae	2020-06-16 10:32:27 +0100	[diff] [blame]	2595	FUNC(rd, rn, rm, fpstatus); \
				2596	tcg_temp_free_ptr(fpstatus); \
				2597	}
				2598
				2599	WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
				2600	WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
				2601	WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
				2602
				2603	static bool trans_VMUL_F_2sc(DisasContext s, arg_2scalar a)
				2604	{
				2605	static NeonGenTwoOpFn * const opfn[] = {
				2606	NULL,
				2607	NULL, /* TODO: fp16 support */
				2608	gen_VMUL_F_mul,
				2609	NULL,
				2610	};
				2611
				2612	return do_2scalar(s, a, opfn[a->size], NULL);
				2613	}
				2614
				2615	static bool trans_VMLA_F_2sc(DisasContext s, arg_2scalar a)
				2616	{
				2617	static NeonGenTwoOpFn * const opfn[] = {
				2618	NULL,
				2619	NULL, /* TODO: fp16 support */
				2620	gen_VMUL_F_mul,
				2621	NULL,
				2622	};
				2623	static NeonGenTwoOpFn * const accfn[] = {
				2624	NULL,
				2625	NULL, /* TODO: fp16 support */
				2626	gen_VMUL_F_add,
				2627	NULL,
				2628	};
				2629
				2630	return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
				2631	}
				2632
				2633	static bool trans_VMLS_F_2sc(DisasContext s, arg_2scalar a)
				2634	{
				2635	static NeonGenTwoOpFn * const opfn[] = {
				2636	NULL,
				2637	NULL, /* TODO: fp16 support */
				2638	gen_VMUL_F_mul,
				2639	NULL,
				2640	};
				2641	static NeonGenTwoOpFn * const accfn[] = {
				2642	NULL,
				2643	NULL, /* TODO: fp16 support */
				2644	gen_VMUL_F_sub,
				2645	NULL,
				2646	};
				2647
				2648	return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
				2649	}
Peter Maydell	b2fc7be	2020-06-16 10:32:27 +0100	[diff] [blame]	2650
				2651	WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
				2652	WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
				2653	WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
				2654	WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
				2655
				2656	static bool trans_VQDMULH_2sc(DisasContext s, arg_2scalar a)
				2657	{
				2658	static NeonGenTwoOpFn * const opfn[] = {
				2659	NULL,
				2660	gen_VQDMULH_16,
				2661	gen_VQDMULH_32,
				2662	NULL,
				2663	};
				2664
				2665	return do_2scalar(s, a, opfn[a->size], NULL);
				2666	}
				2667
				2668	static bool trans_VQRDMULH_2sc(DisasContext s, arg_2scalar a)
				2669	{
				2670	static NeonGenTwoOpFn * const opfn[] = {
				2671	NULL,
				2672	gen_VQRDMULH_16,
				2673	gen_VQRDMULH_32,
				2674	NULL,
				2675	};
				2676
				2677	return do_2scalar(s, a, opfn[a->size], NULL);
				2678	}
Peter Maydell	aa318f5	2020-06-16 10:32:27 +0100	[diff] [blame]	2679
				2680	static bool do_vqrdmlah_2sc(DisasContext s, arg_2scalar a,
				2681	NeonGenThreeOpEnvFn *opfn)
				2682	{
				2683	/*
				2684	* VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
				2685	* performs a kind of fused op-then-accumulate using a helper
				2686	* function that takes all of rd, rn and the scalar at once.
				2687	*/
				2688	TCGv_i32 scalar;
				2689	int pass;
				2690
				2691	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2692	return false;
				2693	}
				2694
				2695	if (!dc_isar_feature(aa32_rdm, s)) {
				2696	return false;
				2697	}
				2698
				2699	/* UNDEF accesses to D16-D31 if they don't exist. */
				2700	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2701	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2702	return false;
				2703	}
				2704
				2705	if (!opfn) {
				2706	/* Bad size (including size == 3, which is a different insn group) */
				2707	return false;
				2708	}
				2709
				2710	if (a->q && ((a->vd \| a->vn) & 1)) {
				2711	return false;
				2712	}
				2713
				2714	if (!vfp_access_check(s)) {
				2715	return true;
				2716	}
				2717
				2718	scalar = neon_get_scalar(a->size, a->vm);
				2719
				2720	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				2721	TCGv_i32 rn = neon_load_reg(a->vn, pass);
				2722	TCGv_i32 rd = neon_load_reg(a->vd, pass);
				2723	opfn(rd, cpu_env, rn, scalar, rd);
				2724	tcg_temp_free_i32(rn);
				2725	neon_store_reg(a->vd, pass, rd);
				2726	}
				2727	tcg_temp_free_i32(scalar);
				2728
				2729	return true;
				2730	}
				2731
				2732	static bool trans_VQRDMLAH_2sc(DisasContext s, arg_2scalar a)
				2733	{
				2734	static NeonGenThreeOpEnvFn *opfn[] = {
				2735	NULL,
				2736	gen_helper_neon_qrdmlah_s16,
				2737	gen_helper_neon_qrdmlah_s32,
				2738	NULL,
				2739	};
				2740	return do_vqrdmlah_2sc(s, a, opfn[a->size]);
				2741	}
				2742
				2743	static bool trans_VQRDMLSH_2sc(DisasContext s, arg_2scalar a)
				2744	{
				2745	static NeonGenThreeOpEnvFn *opfn[] = {
				2746	NULL,
				2747	gen_helper_neon_qrdmlsh_s16,
				2748	gen_helper_neon_qrdmlsh_s32,
				2749	NULL,
				2750	};
				2751	return do_vqrdmlah_2sc(s, a, opfn[a->size]);
				2752	}
Peter Maydell	77e576a	2020-06-16 10:32:28 +0100	[diff] [blame]	2753
				2754	static bool do_2scalar_long(DisasContext s, arg_2scalar a,
				2755	NeonGenTwoOpWidenFn *opfn,
				2756	NeonGenTwo64OpFn *accfn)
				2757	{
				2758	/*
				2759	* Two registers and a scalar, long operations: perform an
				2760	* operation on the input elements and the scalar which produces
				2761	* a double-width result, and then possibly perform an accumulation
				2762	* operation of that result into the destination.
				2763	*/
				2764	TCGv_i32 scalar, rn;
				2765	TCGv_i64 rn0_64, rn1_64;
				2766
				2767	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2768	return false;
				2769	}
				2770
				2771	/* UNDEF accesses to D16-D31 if they don't exist. */
				2772	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2773	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2774	return false;
				2775	}
				2776
				2777	if (!opfn) {
				2778	/* Bad size (including size == 3, which is a different insn group) */
				2779	return false;
				2780	}
				2781
				2782	if (a->vd & 1) {
				2783	return false;
				2784	}
				2785
				2786	if (!vfp_access_check(s)) {
				2787	return true;
				2788	}
				2789
				2790	scalar = neon_get_scalar(a->size, a->vm);
				2791
				2792	/* Load all inputs before writing any outputs, in case of overlap */
				2793	rn = neon_load_reg(a->vn, 0);
				2794	rn0_64 = tcg_temp_new_i64();
				2795	opfn(rn0_64, rn, scalar);
				2796	tcg_temp_free_i32(rn);
				2797
				2798	rn = neon_load_reg(a->vn, 1);
				2799	rn1_64 = tcg_temp_new_i64();
				2800	opfn(rn1_64, rn, scalar);
				2801	tcg_temp_free_i32(rn);
				2802	tcg_temp_free_i32(scalar);
				2803
				2804	if (accfn) {
				2805	TCGv_i64 t64 = tcg_temp_new_i64();
				2806	neon_load_reg64(t64, a->vd);
				2807	accfn(t64, t64, rn0_64);
				2808	neon_store_reg64(t64, a->vd);
				2809	neon_load_reg64(t64, a->vd + 1);
				2810	accfn(t64, t64, rn1_64);
				2811	neon_store_reg64(t64, a->vd + 1);
				2812	tcg_temp_free_i64(t64);
				2813	} else {
				2814	neon_store_reg64(rn0_64, a->vd);
				2815	neon_store_reg64(rn1_64, a->vd + 1);
				2816	}
				2817	tcg_temp_free_i64(rn0_64);
				2818	tcg_temp_free_i64(rn1_64);
				2819	return true;
				2820	}
				2821
				2822	static bool trans_VMULL_S_2sc(DisasContext s, arg_2scalar a)
				2823	{
				2824	static NeonGenTwoOpWidenFn * const opfn[] = {
				2825	NULL,
				2826	gen_helper_neon_mull_s16,
				2827	gen_mull_s32,
				2828	NULL,
				2829	};
				2830
				2831	return do_2scalar_long(s, a, opfn[a->size], NULL);
				2832	}
				2833
				2834	static bool trans_VMULL_U_2sc(DisasContext s, arg_2scalar a)
				2835	{
				2836	static NeonGenTwoOpWidenFn * const opfn[] = {
				2837	NULL,
				2838	gen_helper_neon_mull_u16,
				2839	gen_mull_u32,
				2840	NULL,
				2841	};
				2842
				2843	return do_2scalar_long(s, a, opfn[a->size], NULL);
				2844	}
				2845
				2846	#define DO_VMLAL_2SC(INSN, MULL, ACC) \
				2847	static bool trans_##INSN##_2sc(DisasContext s, arg_2scalar a) \
				2848	{ \
				2849	static NeonGenTwoOpWidenFn * const opfn[] = { \
				2850	NULL, \
				2851	gen_helper_neon_##MULL##16, \
				2852	gen_##MULL##32, \
				2853	NULL, \
				2854	}; \
				2855	static NeonGenTwo64OpFn * const accfn[] = { \
				2856	NULL, \
				2857	gen_helper_neon_##ACC##l_u32, \
				2858	tcg_gen_##ACC##_i64, \
				2859	NULL, \
				2860	}; \
				2861	return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
				2862	}
				2863
				2864	DO_VMLAL_2SC(VMLAL_S, mull_s, add)
				2865	DO_VMLAL_2SC(VMLAL_U, mull_u, add)
				2866	DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
				2867	DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
				2868
				2869	static bool trans_VQDMULL_2sc(DisasContext s, arg_2scalar a)
				2870	{
				2871	static NeonGenTwoOpWidenFn * const opfn[] = {
				2872	NULL,
				2873	gen_VQDMULL_16,
				2874	gen_VQDMULL_32,
				2875	NULL,
				2876	};
				2877
				2878	return do_2scalar_long(s, a, opfn[a->size], NULL);
				2879	}
				2880
				2881	static bool trans_VQDMLAL_2sc(DisasContext s, arg_2scalar a)
				2882	{
				2883	static NeonGenTwoOpWidenFn * const opfn[] = {
				2884	NULL,
				2885	gen_VQDMULL_16,
				2886	gen_VQDMULL_32,
				2887	NULL,
				2888	};
				2889	static NeonGenTwo64OpFn * const accfn[] = {
				2890	NULL,
				2891	gen_VQDMLAL_acc_16,
				2892	gen_VQDMLAL_acc_32,
				2893	NULL,
				2894	};
				2895
				2896	return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
				2897	}
				2898
				2899	static bool trans_VQDMLSL_2sc(DisasContext s, arg_2scalar a)
				2900	{
				2901	static NeonGenTwoOpWidenFn * const opfn[] = {
				2902	NULL,
				2903	gen_VQDMULL_16,
				2904	gen_VQDMULL_32,
				2905	NULL,
				2906	};
				2907	static NeonGenTwo64OpFn * const accfn[] = {
				2908	NULL,
				2909	gen_VQDMLSL_acc_16,
				2910	gen_VQDMLSL_acc_32,
				2911	NULL,
				2912	};
				2913
				2914	return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
				2915	}
Peter Maydell	0aad761	2020-06-16 10:32:28 +0100	[diff] [blame]	2916
				2917	static bool trans_VEXT(DisasContext s, arg_VEXT a)
				2918	{
				2919	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				2920	return false;
				2921	}
				2922
				2923	/* UNDEF accesses to D16-D31 if they don't exist. */
				2924	if (!dc_isar_feature(aa32_simd_r32, s) &&
				2925	((a->vd \| a->vn \| a->vm) & 0x10)) {
				2926	return false;
				2927	}
				2928
				2929	if ((a->vn \| a->vm \| a->vd) & a->q) {
				2930	return false;
				2931	}
				2932
				2933	if (a->imm > 7 && !a->q) {
				2934	return false;
				2935	}
				2936
				2937	if (!vfp_access_check(s)) {
				2938	return true;
				2939	}
				2940
				2941	if (!a->q) {
				2942	/* Extract 64 bits from <Vm:Vn> */
				2943	TCGv_i64 left, right, dest;
				2944
				2945	left = tcg_temp_new_i64();
				2946	right = tcg_temp_new_i64();
				2947	dest = tcg_temp_new_i64();
				2948
				2949	neon_load_reg64(right, a->vn);
				2950	neon_load_reg64(left, a->vm);
				2951	tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
				2952	neon_store_reg64(dest, a->vd);
				2953
				2954	tcg_temp_free_i64(left);
				2955	tcg_temp_free_i64(right);
				2956	tcg_temp_free_i64(dest);
				2957	} else {
				2958	/* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
				2959	TCGv_i64 left, middle, right, destleft, destright;
				2960
				2961	left = tcg_temp_new_i64();
				2962	middle = tcg_temp_new_i64();
				2963	right = tcg_temp_new_i64();
				2964	destleft = tcg_temp_new_i64();
				2965	destright = tcg_temp_new_i64();
				2966
				2967	if (a->imm < 8) {
				2968	neon_load_reg64(right, a->vn);
				2969	neon_load_reg64(middle, a->vn + 1);
				2970	tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
				2971	neon_load_reg64(left, a->vm);
				2972	tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
				2973	} else {
				2974	neon_load_reg64(right, a->vn + 1);
				2975	neon_load_reg64(middle, a->vm);
				2976	tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
				2977	neon_load_reg64(left, a->vm + 1);
				2978	tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
				2979	}
				2980
				2981	neon_store_reg64(destright, a->vd);
				2982	neon_store_reg64(destleft, a->vd + 1);
				2983
				2984	tcg_temp_free_i64(destright);
				2985	tcg_temp_free_i64(destleft);
				2986	tcg_temp_free_i64(right);
				2987	tcg_temp_free_i64(middle);
				2988	tcg_temp_free_i64(left);
				2989	}
				2990	return true;
				2991	}
Peter Maydell	54e96c7	2020-06-16 10:32:28 +0100	[diff] [blame]	2992
				2993	static bool trans_VTBL(DisasContext s, arg_VTBL a)
				2994	{
				2995	int n;
				2996	TCGv_i32 tmp, tmp2, tmp3, tmp4;
				2997	TCGv_ptr ptr1;
				2998
				2999	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3000	return false;
				3001	}
				3002
				3003	/* UNDEF accesses to D16-D31 if they don't exist. */
				3004	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3005	((a->vd \| a->vn \| a->vm) & 0x10)) {
				3006	return false;
				3007	}
				3008
				3009	if (!vfp_access_check(s)) {
				3010	return true;
				3011	}
				3012
				3013	n = a->len + 1;
				3014	if ((a->vn + n) > 32) {
				3015	/*
				3016	* This is UNPREDICTABLE; we choose to UNDEF to avoid the
				3017	* helper function running off the end of the register file.
				3018	*/
				3019	return false;
				3020	}
				3021	n <<= 3;
				3022	if (a->op) {
				3023	tmp = neon_load_reg(a->vd, 0);
				3024	} else {
				3025	tmp = tcg_temp_new_i32();
				3026	tcg_gen_movi_i32(tmp, 0);
				3027	}
				3028	tmp2 = neon_load_reg(a->vm, 0);
				3029	ptr1 = vfp_reg_ptr(true, a->vn);
				3030	tmp4 = tcg_const_i32(n);
				3031	gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
				3032	tcg_temp_free_i32(tmp);
				3033	if (a->op) {
				3034	tmp = neon_load_reg(a->vd, 1);
				3035	} else {
				3036	tmp = tcg_temp_new_i32();
				3037	tcg_gen_movi_i32(tmp, 0);
				3038	}
				3039	tmp3 = neon_load_reg(a->vm, 1);
				3040	gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
				3041	tcg_temp_free_i32(tmp4);
				3042	tcg_temp_free_ptr(ptr1);
				3043	neon_store_reg(a->vd, 0, tmp2);
				3044	neon_store_reg(a->vd, 1, tmp3);
				3045	tcg_temp_free_i32(tmp);
				3046	return true;
				3047	}
Peter Maydell	9aaa23c	2020-06-16 10:32:28 +0100	[diff] [blame]	3048
				3049	static bool trans_VDUP_scalar(DisasContext s, arg_VDUP_scalar a)
				3050	{
				3051	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3052	return false;
				3053	}
				3054
				3055	/* UNDEF accesses to D16-D31 if they don't exist. */
				3056	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3057	((a->vd \| a->vm) & 0x10)) {
				3058	return false;
				3059	}
				3060
				3061	if (a->vd & a->q) {
				3062	return false;
				3063	}
				3064
				3065	if (!vfp_access_check(s)) {
				3066	return true;
				3067	}
				3068
				3069	tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
				3070	neon_element_offset(a->vm, a->index, a->size),
				3071	a->q ? 16 : 8, a->q ? 16 : 8);
				3072	return true;
				3073	}
Peter Maydell	353d2b8	2020-06-16 18:08:24 +0100	[diff] [blame]	3074
				3075	static bool trans_VREV64(DisasContext s, arg_VREV64 a)
				3076	{
				3077	int pass, half;
				3078
				3079	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3080	return false;
				3081	}
				3082
				3083	/* UNDEF accesses to D16-D31 if they don't exist. */
				3084	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3085	((a->vd \| a->vm) & 0x10)) {
				3086	return false;
				3087	}
				3088
				3089	if ((a->vd \| a->vm) & a->q) {
				3090	return false;
				3091	}
				3092
				3093	if (a->size == 3) {
				3094	return false;
				3095	}
				3096
				3097	if (!vfp_access_check(s)) {
				3098	return true;
				3099	}
				3100
				3101	for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
				3102	TCGv_i32 tmp[2];
				3103
				3104	for (half = 0; half < 2; half++) {
				3105	tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
				3106	switch (a->size) {
				3107	case 0:
				3108	tcg_gen_bswap32_i32(tmp[half], tmp[half]);
				3109	break;
				3110	case 1:
Peter Maydell	8ec3de7	2020-06-16 18:08:34 +0100	[diff] [blame]	3111	gen_swap_half(tmp[half], tmp[half]);
Peter Maydell	353d2b8	2020-06-16 18:08:24 +0100	[diff] [blame]	3112	break;
				3113	case 2:
				3114	break;
				3115	default:
				3116	g_assert_not_reached();
				3117	}
				3118	}
				3119	neon_store_reg(a->vd, pass * 2, tmp[1]);
				3120	neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
				3121	}
				3122	return true;
				3123	}
Peter Maydell	6106af3	2020-06-16 18:08:25 +0100	[diff] [blame]	3124
				3125	static bool do_2misc_pairwise(DisasContext s, arg_2misc a,
				3126	NeonGenWidenFn *widenfn,
				3127	NeonGenTwo64OpFn *opfn,
				3128	NeonGenTwo64OpFn *accfn)
				3129	{
				3130	/*
				3131	* Pairwise long operations: widen both halves of the pair,
				3132	* combine the pairs with the opfn, and then possibly accumulate
				3133	* into the destination with the accfn.
				3134	*/
				3135	int pass;
				3136
				3137	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3138	return false;
				3139	}
				3140
				3141	/* UNDEF accesses to D16-D31 if they don't exist. */
				3142	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3143	((a->vd \| a->vm) & 0x10)) {
				3144	return false;
				3145	}
				3146
				3147	if ((a->vd \| a->vm) & a->q) {
				3148	return false;
				3149	}
				3150
				3151	if (!widenfn) {
				3152	return false;
				3153	}
				3154
				3155	if (!vfp_access_check(s)) {
				3156	return true;
				3157	}
				3158
				3159	for (pass = 0; pass < a->q + 1; pass++) {
				3160	TCGv_i32 tmp;
				3161	TCGv_i64 rm0_64, rm1_64, rd_64;
				3162
				3163	rm0_64 = tcg_temp_new_i64();
				3164	rm1_64 = tcg_temp_new_i64();
				3165	rd_64 = tcg_temp_new_i64();
				3166	tmp = neon_load_reg(a->vm, pass * 2);
				3167	widenfn(rm0_64, tmp);
				3168	tcg_temp_free_i32(tmp);
				3169	tmp = neon_load_reg(a->vm, pass * 2 + 1);
				3170	widenfn(rm1_64, tmp);
				3171	tcg_temp_free_i32(tmp);
				3172	opfn(rd_64, rm0_64, rm1_64);
				3173	tcg_temp_free_i64(rm0_64);
				3174	tcg_temp_free_i64(rm1_64);
				3175
				3176	if (accfn) {
				3177	TCGv_i64 tmp64 = tcg_temp_new_i64();
				3178	neon_load_reg64(tmp64, a->vd + pass);
				3179	accfn(rd_64, tmp64, rd_64);
				3180	tcg_temp_free_i64(tmp64);
				3181	}
				3182	neon_store_reg64(rd_64, a->vd + pass);
				3183	tcg_temp_free_i64(rd_64);
				3184	}
				3185	return true;
				3186	}
				3187
				3188	static bool trans_VPADDL_S(DisasContext s, arg_2misc a)
				3189	{
				3190	static NeonGenWidenFn * const widenfn[] = {
				3191	gen_helper_neon_widen_s8,
				3192	gen_helper_neon_widen_s16,
				3193	tcg_gen_ext_i32_i64,
				3194	NULL,
				3195	};
				3196	static NeonGenTwo64OpFn * const opfn[] = {
				3197	gen_helper_neon_paddl_u16,
				3198	gen_helper_neon_paddl_u32,
				3199	tcg_gen_add_i64,
				3200	NULL,
				3201	};
				3202
				3203	return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
				3204	}
				3205
				3206	static bool trans_VPADDL_U(DisasContext s, arg_2misc a)
				3207	{
				3208	static NeonGenWidenFn * const widenfn[] = {
				3209	gen_helper_neon_widen_u8,
				3210	gen_helper_neon_widen_u16,
				3211	tcg_gen_extu_i32_i64,
				3212	NULL,
				3213	};
				3214	static NeonGenTwo64OpFn * const opfn[] = {
				3215	gen_helper_neon_paddl_u16,
				3216	gen_helper_neon_paddl_u32,
				3217	tcg_gen_add_i64,
				3218	NULL,
				3219	};
				3220
				3221	return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
				3222	}
				3223
				3224	static bool trans_VPADAL_S(DisasContext s, arg_2misc a)
				3225	{
				3226	static NeonGenWidenFn * const widenfn[] = {
				3227	gen_helper_neon_widen_s8,
				3228	gen_helper_neon_widen_s16,
				3229	tcg_gen_ext_i32_i64,
				3230	NULL,
				3231	};
				3232	static NeonGenTwo64OpFn * const opfn[] = {
				3233	gen_helper_neon_paddl_u16,
				3234	gen_helper_neon_paddl_u32,
				3235	tcg_gen_add_i64,
				3236	NULL,
				3237	};
				3238	static NeonGenTwo64OpFn * const accfn[] = {
				3239	gen_helper_neon_addl_u16,
				3240	gen_helper_neon_addl_u32,
				3241	tcg_gen_add_i64,
				3242	NULL,
				3243	};
				3244
				3245	return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
				3246	accfn[a->size]);
				3247	}
				3248
				3249	static bool trans_VPADAL_U(DisasContext s, arg_2misc a)
				3250	{
				3251	static NeonGenWidenFn * const widenfn[] = {
				3252	gen_helper_neon_widen_u8,
				3253	gen_helper_neon_widen_u16,
				3254	tcg_gen_extu_i32_i64,
				3255	NULL,
				3256	};
				3257	static NeonGenTwo64OpFn * const opfn[] = {
				3258	gen_helper_neon_paddl_u16,
				3259	gen_helper_neon_paddl_u32,
				3260	tcg_gen_add_i64,
				3261	NULL,
				3262	};
				3263	static NeonGenTwo64OpFn * const accfn[] = {
				3264	gen_helper_neon_addl_u16,
				3265	gen_helper_neon_addl_u32,
				3266	tcg_gen_add_i64,
				3267	NULL,
				3268	};
				3269
				3270	return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
				3271	accfn[a->size]);
				3272	}
Peter Maydell	567663a	2020-06-16 18:08:26 +0100	[diff] [blame]	3273
				3274	typedef void ZipFn(TCGv_ptr, TCGv_ptr);
				3275
				3276	static bool do_zip_uzp(DisasContext s, arg_2misc a,
				3277	ZipFn *fn)
				3278	{
				3279	TCGv_ptr pd, pm;
				3280
				3281	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3282	return false;
				3283	}
				3284
				3285	/* UNDEF accesses to D16-D31 if they don't exist. */
				3286	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3287	((a->vd \| a->vm) & 0x10)) {
				3288	return false;
				3289	}
				3290
				3291	if ((a->vd \| a->vm) & a->q) {
				3292	return false;
				3293	}
				3294
				3295	if (!fn) {
				3296	/* Bad size or size/q combination */
				3297	return false;
				3298	}
				3299
				3300	if (!vfp_access_check(s)) {
				3301	return true;
				3302	}
				3303
				3304	pd = vfp_reg_ptr(true, a->vd);
				3305	pm = vfp_reg_ptr(true, a->vm);
				3306	fn(pd, pm);
				3307	tcg_temp_free_ptr(pd);
				3308	tcg_temp_free_ptr(pm);
				3309	return true;
				3310	}
				3311
				3312	static bool trans_VUZP(DisasContext s, arg_2misc a)
				3313	{
				3314	static ZipFn * const fn[2][4] = {
				3315	{
				3316	gen_helper_neon_unzip8,
				3317	gen_helper_neon_unzip16,
				3318	NULL,
				3319	NULL,
				3320	}, {
				3321	gen_helper_neon_qunzip8,
				3322	gen_helper_neon_qunzip16,
				3323	gen_helper_neon_qunzip32,
				3324	NULL,
				3325	}
				3326	};
				3327	return do_zip_uzp(s, a, fn[a->q][a->size]);
				3328	}
				3329
				3330	static bool trans_VZIP(DisasContext s, arg_2misc a)
				3331	{
				3332	static ZipFn * const fn[2][4] = {
				3333	{
				3334	gen_helper_neon_zip8,
				3335	gen_helper_neon_zip16,
				3336	NULL,
				3337	NULL,
				3338	}, {
				3339	gen_helper_neon_qzip8,
				3340	gen_helper_neon_qzip16,
				3341	gen_helper_neon_qzip32,
				3342	NULL,
				3343	}
				3344	};
				3345	return do_zip_uzp(s, a, fn[a->q][a->size]);
				3346	}
Peter Maydell	3882bda	2020-06-16 18:08:27 +0100	[diff] [blame]	3347
				3348	static bool do_vmovn(DisasContext s, arg_2misc a,
				3349	NeonGenNarrowEnvFn *narrowfn)
				3350	{
				3351	TCGv_i64 rm;
				3352	TCGv_i32 rd0, rd1;
				3353
				3354	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3355	return false;
				3356	}
				3357
				3358	/* UNDEF accesses to D16-D31 if they don't exist. */
				3359	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3360	((a->vd \| a->vm) & 0x10)) {
				3361	return false;
				3362	}
				3363
				3364	if (a->vm & 1) {
				3365	return false;
				3366	}
				3367
				3368	if (!narrowfn) {
				3369	return false;
				3370	}
				3371
				3372	if (!vfp_access_check(s)) {
				3373	return true;
				3374	}
				3375
				3376	rm = tcg_temp_new_i64();
				3377	rd0 = tcg_temp_new_i32();
				3378	rd1 = tcg_temp_new_i32();
				3379
				3380	neon_load_reg64(rm, a->vm);
				3381	narrowfn(rd0, cpu_env, rm);
				3382	neon_load_reg64(rm, a->vm + 1);
				3383	narrowfn(rd1, cpu_env, rm);
				3384	neon_store_reg(a->vd, 0, rd0);
				3385	neon_store_reg(a->vd, 1, rd1);
				3386	tcg_temp_free_i64(rm);
				3387	return true;
				3388	}
				3389
				3390	#define DO_VMOVN(INSN, FUNC) \
				3391	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				3392	{ \
				3393	static NeonGenNarrowEnvFn * const narrowfn[] = { \
				3394	FUNC##8, \
				3395	FUNC##16, \
				3396	FUNC##32, \
				3397	NULL, \
				3398	}; \
				3399	return do_vmovn(s, a, narrowfn[a->size]); \
				3400	}
				3401
				3402	DO_VMOVN(VMOVN, gen_neon_narrow_u)
				3403	DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
				3404	DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
				3405	DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
Peter Maydell	749e2be	2020-06-16 18:08:28 +0100	[diff] [blame]	3406
				3407	static bool trans_VSHLL(DisasContext s, arg_2misc a)
				3408	{
				3409	TCGv_i32 rm0, rm1;
				3410	TCGv_i64 rd;
				3411	static NeonGenWidenFn * const widenfns[] = {
				3412	gen_helper_neon_widen_u8,
				3413	gen_helper_neon_widen_u16,
				3414	tcg_gen_extu_i32_i64,
				3415	NULL,
				3416	};
				3417	NeonGenWidenFn *widenfn = widenfns[a->size];
				3418
				3419	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3420	return false;
				3421	}
				3422
				3423	/* UNDEF accesses to D16-D31 if they don't exist. */
				3424	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3425	((a->vd \| a->vm) & 0x10)) {
				3426	return false;
				3427	}
				3428
				3429	if (a->vd & 1) {
				3430	return false;
				3431	}
				3432
				3433	if (!widenfn) {
				3434	return false;
				3435	}
				3436
				3437	if (!vfp_access_check(s)) {
				3438	return true;
				3439	}
				3440
				3441	rd = tcg_temp_new_i64();
				3442
				3443	rm0 = neon_load_reg(a->vm, 0);
				3444	rm1 = neon_load_reg(a->vm, 1);
				3445
				3446	widenfn(rd, rm0);
				3447	tcg_gen_shli_i64(rd, rd, 8 << a->size);
				3448	neon_store_reg64(rd, a->vd);
				3449	widenfn(rd, rm1);
				3450	tcg_gen_shli_i64(rd, rd, 8 << a->size);
				3451	neon_store_reg64(rd, a->vd + 1);
				3452
				3453	tcg_temp_free_i64(rd);
				3454	tcg_temp_free_i32(rm0);
				3455	tcg_temp_free_i32(rm1);
				3456	return true;
				3457	}
Peter Maydell	654a517	2020-06-16 18:08:29 +0100	[diff] [blame]	3458
				3459	static bool trans_VCVT_F16_F32(DisasContext s, arg_2misc a)
				3460	{
				3461	TCGv_ptr fpst;
				3462	TCGv_i32 ahp, tmp, tmp2, tmp3;
				3463
				3464	if (!arm_dc_feature(s, ARM_FEATURE_NEON) \|\|
				3465	!dc_isar_feature(aa32_fp16_spconv, s)) {
				3466	return false;
				3467	}
				3468
				3469	/* UNDEF accesses to D16-D31 if they don't exist. */
				3470	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3471	((a->vd \| a->vm) & 0x10)) {
				3472	return false;
				3473	}
				3474
				3475	if ((a->vm & 1) \|\| (a->size != 1)) {
				3476	return false;
				3477	}
				3478
				3479	if (!vfp_access_check(s)) {
				3480	return true;
				3481	}
				3482
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	3483	fpst = fpstatus_ptr(FPST_STD);
Peter Maydell	654a517	2020-06-16 18:08:29 +0100	[diff] [blame]	3484	ahp = get_ahp_flag();
				3485	tmp = neon_load_reg(a->vm, 0);
				3486	gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
				3487	tmp2 = neon_load_reg(a->vm, 1);
				3488	gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
				3489	tcg_gen_shli_i32(tmp2, tmp2, 16);
				3490	tcg_gen_or_i32(tmp2, tmp2, tmp);
				3491	tcg_temp_free_i32(tmp);
				3492	tmp = neon_load_reg(a->vm, 2);
				3493	gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
				3494	tmp3 = neon_load_reg(a->vm, 3);
				3495	neon_store_reg(a->vd, 0, tmp2);
				3496	gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
				3497	tcg_gen_shli_i32(tmp3, tmp3, 16);
				3498	tcg_gen_or_i32(tmp3, tmp3, tmp);
				3499	neon_store_reg(a->vd, 1, tmp3);
				3500	tcg_temp_free_i32(tmp);
				3501	tcg_temp_free_i32(ahp);
				3502	tcg_temp_free_ptr(fpst);
				3503
				3504	return true;
				3505	}
				3506
				3507	static bool trans_VCVT_F32_F16(DisasContext s, arg_2misc a)
				3508	{
				3509	TCGv_ptr fpst;
				3510	TCGv_i32 ahp, tmp, tmp2, tmp3;
				3511
				3512	if (!arm_dc_feature(s, ARM_FEATURE_NEON) \|\|
				3513	!dc_isar_feature(aa32_fp16_spconv, s)) {
				3514	return false;
				3515	}
				3516
				3517	/* UNDEF accesses to D16-D31 if they don't exist. */
				3518	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3519	((a->vd \| a->vm) & 0x10)) {
				3520	return false;
				3521	}
				3522
				3523	if ((a->vd & 1) \|\| (a->size != 1)) {
				3524	return false;
				3525	}
				3526
				3527	if (!vfp_access_check(s)) {
				3528	return true;
				3529	}
				3530
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	3531	fpst = fpstatus_ptr(FPST_STD);
Peter Maydell	654a517	2020-06-16 18:08:29 +0100	[diff] [blame]	3532	ahp = get_ahp_flag();
				3533	tmp3 = tcg_temp_new_i32();
				3534	tmp = neon_load_reg(a->vm, 0);
				3535	tmp2 = neon_load_reg(a->vm, 1);
				3536	tcg_gen_ext16u_i32(tmp3, tmp);
				3537	gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
				3538	neon_store_reg(a->vd, 0, tmp3);
				3539	tcg_gen_shri_i32(tmp, tmp, 16);
				3540	gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
				3541	neon_store_reg(a->vd, 1, tmp);
				3542	tmp3 = tcg_temp_new_i32();
				3543	tcg_gen_ext16u_i32(tmp3, tmp2);
				3544	gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
				3545	neon_store_reg(a->vd, 2, tmp3);
				3546	tcg_gen_shri_i32(tmp2, tmp2, 16);
				3547	gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
				3548	neon_store_reg(a->vd, 3, tmp2);
				3549	tcg_temp_free_i32(ahp);
				3550	tcg_temp_free_ptr(fpst);
				3551
				3552	return true;
				3553	}
Peter Maydell	7515317	2020-06-16 18:08:30 +0100	[diff] [blame]	3554
				3555	static bool do_2misc_vec(DisasContext s, arg_2misc a, GVecGen2Fn *fn)
				3556	{
				3557	int vec_size = a->q ? 16 : 8;
				3558	int rd_ofs = neon_reg_offset(a->vd, 0);
				3559	int rm_ofs = neon_reg_offset(a->vm, 0);
				3560
				3561	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3562	return false;
				3563	}
				3564
				3565	/* UNDEF accesses to D16-D31 if they don't exist. */
				3566	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3567	((a->vd \| a->vm) & 0x10)) {
				3568	return false;
				3569	}
				3570
				3571	if (a->size == 3) {
				3572	return false;
				3573	}
				3574
				3575	if ((a->vd \| a->vm) & a->q) {
				3576	return false;
				3577	}
				3578
				3579	if (!vfp_access_check(s)) {
				3580	return true;
				3581	}
				3582
				3583	fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
				3584
				3585	return true;
				3586	}
				3587
				3588	#define DO_2MISC_VEC(INSN, FN) \
				3589	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				3590	{ \
				3591	return do_2misc_vec(s, a, FN); \
				3592	}
				3593
				3594	DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
				3595	DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
				3596	DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
				3597	DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
				3598	DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
				3599	DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
				3600	DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
				3601
				3602	static bool trans_VMVN(DisasContext s, arg_2misc a)
				3603	{
				3604	if (a->size != 0) {
				3605	return false;
				3606	}
				3607	return do_2misc_vec(s, a, tcg_gen_gvec_not);
				3608	}
Peter Maydell	0b30dd5	2020-06-16 18:08:31 +0100	[diff] [blame]	3609
				3610	#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
				3611	static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
				3612	uint32_t rm_ofs, uint32_t oprsz, \
				3613	uint32_t maxsz) \
				3614	{ \
				3615	tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
				3616	DATA, FUNC); \
				3617	}
				3618
				3619	#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
				3620	static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
				3621	uint32_t rm_ofs, uint32_t oprsz, \
				3622	uint32_t maxsz) \
				3623	{ \
				3624	tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
				3625	}
				3626
				3627	WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
				3628	WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
				3629	WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
				3630	WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
				3631	WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
				3632	WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
				3633	WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
				3634
				3635	#define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
				3636	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				3637	{ \
				3638	if (!dc_isar_feature(FEATURE, s) \|\| a->size != SIZE) { \
				3639	return false; \
				3640	} \
				3641	return do_2misc_vec(s, a, gen_##INSN); \
				3642	}
				3643
				3644	DO_2M_CRYPTO(AESE, aa32_aes, 0)
				3645	DO_2M_CRYPTO(AESD, aa32_aes, 0)
				3646	DO_2M_CRYPTO(AESMC, aa32_aes, 0)
				3647	DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
				3648	DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
				3649	DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
				3650	DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
Peter Maydell	8966808	2020-06-16 18:08:35 +0100	[diff] [blame]	3651
				3652	static bool do_2misc(DisasContext s, arg_2misc a, NeonGenOneOpFn *fn)
				3653	{
				3654	int pass;
				3655
				3656	/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
				3657	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3658	return false;
				3659	}
				3660
				3661	/* UNDEF accesses to D16-D31 if they don't exist. */
				3662	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3663	((a->vd \| a->vm) & 0x10)) {
				3664	return false;
				3665	}
				3666
				3667	if (!fn) {
				3668	return false;
				3669	}
				3670
				3671	if ((a->vd \| a->vm) & a->q) {
				3672	return false;
				3673	}
				3674
				3675	if (!vfp_access_check(s)) {
				3676	return true;
				3677	}
				3678
				3679	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				3680	TCGv_i32 tmp = neon_load_reg(a->vm, pass);
				3681	fn(tmp, tmp);
				3682	neon_store_reg(a->vd, pass, tmp);
				3683	}
				3684
				3685	return true;
				3686	}
				3687
				3688	static bool trans_VREV32(DisasContext s, arg_2misc a)
				3689	{
				3690	static NeonGenOneOpFn * const fn[] = {
				3691	tcg_gen_bswap32_i32,
				3692	gen_swap_half,
				3693	NULL,
				3694	NULL,
				3695	};
				3696	return do_2misc(s, a, fn[a->size]);
				3697	}
				3698
				3699	static bool trans_VREV16(DisasContext s, arg_2misc a)
				3700	{
				3701	if (a->size != 0) {
				3702	return false;
				3703	}
				3704	return do_2misc(s, a, gen_rev16);
				3705	}
Peter Maydell	84eae77	2020-06-16 18:08:36 +0100	[diff] [blame]	3706
				3707	static bool trans_VCLS(DisasContext s, arg_2misc a)
				3708	{
				3709	static NeonGenOneOpFn * const fn[] = {
				3710	gen_helper_neon_cls_s8,
				3711	gen_helper_neon_cls_s16,
				3712	gen_helper_neon_cls_s32,
				3713	NULL,
				3714	};
				3715	return do_2misc(s, a, fn[a->size]);
				3716	}
				3717
				3718	static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
				3719	{
				3720	tcg_gen_clzi_i32(rd, rm, 32);
				3721	}
				3722
				3723	static bool trans_VCLZ(DisasContext s, arg_2misc a)
				3724	{
				3725	static NeonGenOneOpFn * const fn[] = {
				3726	gen_helper_neon_clz_u8,
				3727	gen_helper_neon_clz_u16,
				3728	do_VCLZ_32,
				3729	NULL,
				3730	};
				3731	return do_2misc(s, a, fn[a->size]);
				3732	}
				3733
				3734	static bool trans_VCNT(DisasContext s, arg_2misc a)
				3735	{
				3736	if (a->size != 0) {
				3737	return false;
				3738	}
				3739	return do_2misc(s, a, gen_helper_neon_cnt_u8);
				3740	}
				3741
				3742	static bool trans_VABS_F(DisasContext s, arg_2misc a)
				3743	{
				3744	if (a->size != 2) {
				3745	return false;
				3746	}
				3747	/* TODO: FP16 : size == 1 */
				3748	return do_2misc(s, a, gen_helper_vfp_abss);
				3749	}
				3750
				3751	static bool trans_VNEG_F(DisasContext s, arg_2misc a)
				3752	{
				3753	if (a->size != 2) {
				3754	return false;
				3755	}
				3756	/* TODO: FP16 : size == 1 */
				3757	return do_2misc(s, a, gen_helper_vfp_negs);
				3758	}
				3759
				3760	static bool trans_VRECPE(DisasContext s, arg_2misc a)
				3761	{
				3762	if (a->size != 2) {
				3763	return false;
				3764	}
				3765	return do_2misc(s, a, gen_helper_recpe_u32);
				3766	}
				3767
				3768	static bool trans_VRSQRTE(DisasContext s, arg_2misc a)
				3769	{
				3770	if (a->size != 2) {
				3771	return false;
				3772	}
				3773	return do_2misc(s, a, gen_helper_rsqrte_u32);
				3774	}
Peter Maydell	4936f38	2020-06-16 18:08:37 +0100	[diff] [blame]	3775
				3776	#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
				3777	static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
				3778	{ \
				3779	FUNC(d, cpu_env, m); \
				3780	}
				3781
				3782	WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
				3783	WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
				3784	WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
				3785	WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
				3786	WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
				3787	WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
				3788
				3789	static bool trans_VQABS(DisasContext s, arg_2misc a)
				3790	{
				3791	static NeonGenOneOpFn * const fn[] = {
				3792	gen_VQABS_s8,
				3793	gen_VQABS_s16,
				3794	gen_VQABS_s32,
				3795	NULL,
				3796	};
				3797	return do_2misc(s, a, fn[a->size]);
				3798	}
				3799
				3800	static bool trans_VQNEG(DisasContext s, arg_2misc a)
				3801	{
				3802	static NeonGenOneOpFn * const fn[] = {
				3803	gen_VQNEG_s8,
				3804	gen_VQNEG_s16,
				3805	gen_VQNEG_s32,
				3806	NULL,
				3807	};
				3808	return do_2misc(s, a, fn[a->size]);
				3809	}
Peter Maydell	3e96b20	2020-06-16 18:08:38 +0100	[diff] [blame]	3810
				3811	static bool do_2misc_fp(DisasContext s, arg_2misc a,
				3812	NeonGenOneSingleOpFn *fn)
				3813	{
				3814	int pass;
				3815	TCGv_ptr fpst;
				3816
				3817	/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
				3818	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				3819	return false;
				3820	}
				3821
				3822	/* UNDEF accesses to D16-D31 if they don't exist. */
				3823	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3824	((a->vd \| a->vm) & 0x10)) {
				3825	return false;
				3826	}
				3827
				3828	if (a->size != 2) {
				3829	/* TODO: FP16 will be the size == 1 case */
				3830	return false;
				3831	}
				3832
				3833	if ((a->vd \| a->vm) & a->q) {
				3834	return false;
				3835	}
				3836
				3837	if (!vfp_access_check(s)) {
				3838	return true;
				3839	}
				3840
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	3841	fpst = fpstatus_ptr(FPST_STD);
Peter Maydell	3e96b20	2020-06-16 18:08:38 +0100	[diff] [blame]	3842	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				3843	TCGv_i32 tmp = neon_load_reg(a->vm, pass);
				3844	fn(tmp, tmp, fpst);
				3845	neon_store_reg(a->vd, pass, tmp);
				3846	}
				3847	tcg_temp_free_ptr(fpst);
				3848
				3849	return true;
				3850	}
				3851
				3852	#define DO_2MISC_FP(INSN, FUNC) \
				3853	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				3854	{ \
				3855	return do_2misc_fp(s, a, FUNC); \
				3856	}
				3857
				3858	DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32)
				3859	DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32)
				3860	DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos)
				3861	DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos)
				3862	DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs)
				3863	DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs)
				3864
				3865	static bool trans_VRINTX(DisasContext s, arg_2misc a)
				3866	{
				3867	if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
				3868	return false;
				3869	}
				3870	return do_2misc_fp(s, a, gen_helper_rints_exact);
				3871	}
Peter Maydell	baa59323	2020-06-16 18:08:39 +0100	[diff] [blame]	3872
				3873	#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC) \
				3874	static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
				3875	{ \
				3876	TCGv_i32 zero = tcg_const_i32(0); \
				3877	FUNC(d, m, zero, fpst); \
				3878	tcg_temp_free_i32(zero); \
				3879	}
				3880	#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC) \
				3881	static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
				3882	{ \
				3883	TCGv_i32 zero = tcg_const_i32(0); \
				3884	FUNC(d, zero, m, fpst); \
				3885	tcg_temp_free_i32(zero); \
				3886	}
				3887
				3888	#define DO_FP_CMP0(INSN, FUNC, REV) \
				3889	WRAP_FP_CMP0_##REV(gen_##INSN, FUNC) \
				3890	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				3891	{ \
				3892	return do_2misc_fp(s, a, gen_##INSN); \
				3893	}
				3894
				3895	DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD)
				3896	DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD)
				3897	DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD)
				3898	DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV)
				3899	DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV)
Peter Maydell	128123e	2020-06-16 18:08:40 +0100	[diff] [blame]	3900
				3901	static bool do_vrint(DisasContext s, arg_2misc a, int rmode)
				3902	{
				3903	/*
				3904	* Handle a VRINT* operation by iterating 32 bits at a time,
				3905	* with a specified rounding mode in operation.
				3906	*/
				3907	int pass;
				3908	TCGv_ptr fpst;
				3909	TCGv_i32 tcg_rmode;
				3910
				3911	if (!arm_dc_feature(s, ARM_FEATURE_NEON) \|\|
				3912	!arm_dc_feature(s, ARM_FEATURE_V8)) {
				3913	return false;
				3914	}
				3915
				3916	/* UNDEF accesses to D16-D31 if they don't exist. */
				3917	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3918	((a->vd \| a->vm) & 0x10)) {
				3919	return false;
				3920	}
				3921
				3922	if (a->size != 2) {
				3923	/* TODO: FP16 will be the size == 1 case */
				3924	return false;
				3925	}
				3926
				3927	if ((a->vd \| a->vm) & a->q) {
				3928	return false;
				3929	}
				3930
				3931	if (!vfp_access_check(s)) {
				3932	return true;
				3933	}
				3934
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	3935	fpst = fpstatus_ptr(FPST_STD);
Peter Maydell	128123e	2020-06-16 18:08:40 +0100	[diff] [blame]	3936	tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
				3937	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
				3938	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				3939	TCGv_i32 tmp = neon_load_reg(a->vm, pass);
				3940	gen_helper_rints(tmp, tmp, fpst);
				3941	neon_store_reg(a->vd, pass, tmp);
				3942	}
				3943	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
				3944	tcg_temp_free_i32(tcg_rmode);
				3945	tcg_temp_free_ptr(fpst);
				3946
				3947	return true;
				3948	}
				3949
				3950	#define DO_VRINT(INSN, RMODE) \
				3951	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				3952	{ \
				3953	return do_vrint(s, a, RMODE); \
				3954	}
				3955
				3956	DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
				3957	DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
				3958	DO_VRINT(VRINTZ, FPROUNDING_ZERO)
				3959	DO_VRINT(VRINTM, FPROUNDING_NEGINF)
				3960	DO_VRINT(VRINTP, FPROUNDING_POSINF)
Peter Maydell	a183d5f	2020-06-16 18:08:41 +0100	[diff] [blame]	3961
				3962	static bool do_vcvt(DisasContext s, arg_2misc a, int rmode, bool is_signed)
				3963	{
				3964	/*
				3965	* Handle a VCVT* operation by iterating 32 bits at a time,
				3966	* with a specified rounding mode in operation.
				3967	*/
				3968	int pass;
				3969	TCGv_ptr fpst;
				3970	TCGv_i32 tcg_rmode, tcg_shift;
				3971
				3972	if (!arm_dc_feature(s, ARM_FEATURE_NEON) \|\|
				3973	!arm_dc_feature(s, ARM_FEATURE_V8)) {
				3974	return false;
				3975	}
				3976
				3977	/* UNDEF accesses to D16-D31 if they don't exist. */
				3978	if (!dc_isar_feature(aa32_simd_r32, s) &&
				3979	((a->vd \| a->vm) & 0x10)) {
				3980	return false;
				3981	}
				3982
				3983	if (a->size != 2) {
				3984	/* TODO: FP16 will be the size == 1 case */
				3985	return false;
				3986	}
				3987
				3988	if ((a->vd \| a->vm) & a->q) {
				3989	return false;
				3990	}
				3991
				3992	if (!vfp_access_check(s)) {
				3993	return true;
				3994	}
				3995
Peter Maydell	a84d1d1	2020-08-06 11:44:51 +0100	[diff] [blame]	3996	fpst = fpstatus_ptr(FPST_STD);
Peter Maydell	a183d5f	2020-06-16 18:08:41 +0100	[diff] [blame]	3997	tcg_shift = tcg_const_i32(0);
				3998	tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
				3999	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
				4000	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				4001	TCGv_i32 tmp = neon_load_reg(a->vm, pass);
				4002	if (is_signed) {
				4003	gen_helper_vfp_tosls(tmp, tmp, tcg_shift, fpst);
				4004	} else {
				4005	gen_helper_vfp_touls(tmp, tmp, tcg_shift, fpst);
				4006	}
				4007	neon_store_reg(a->vd, pass, tmp);
				4008	}
				4009	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
				4010	tcg_temp_free_i32(tcg_rmode);
				4011	tcg_temp_free_i32(tcg_shift);
				4012	tcg_temp_free_ptr(fpst);
				4013
				4014	return true;
				4015	}
				4016
				4017	#define DO_VCVT(INSN, RMODE, SIGNED) \
				4018	static bool trans_##INSN(DisasContext s, arg_2misc a) \
				4019	{ \
				4020	return do_vcvt(s, a, RMODE, SIGNED); \
				4021	}
				4022
				4023	DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false)
				4024	DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true)
				4025	DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false)
				4026	DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true)
				4027	DO_VCVT(VCVTPU, FPROUNDING_POSINF, false)
				4028	DO_VCVT(VCVTPS, FPROUNDING_POSINF, true)
				4029	DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false)
				4030	DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true)
Peter Maydell	8ab3a22	2020-06-16 18:08:42 +0100	[diff] [blame]	4031
				4032	static bool trans_VSWP(DisasContext s, arg_2misc a)
				4033	{
				4034	TCGv_i64 rm, rd;
				4035	int pass;
				4036
				4037	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				4038	return false;
				4039	}
				4040
				4041	/* UNDEF accesses to D16-D31 if they don't exist. */
				4042	if (!dc_isar_feature(aa32_simd_r32, s) &&
				4043	((a->vd \| a->vm) & 0x10)) {
				4044	return false;
				4045	}
				4046
				4047	if (a->size != 0) {
				4048	return false;
				4049	}
				4050
				4051	if ((a->vd \| a->vm) & a->q) {
				4052	return false;
				4053	}
				4054
				4055	if (!vfp_access_check(s)) {
				4056	return true;
				4057	}
				4058
				4059	rm = tcg_temp_new_i64();
				4060	rd = tcg_temp_new_i64();
				4061	for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
				4062	neon_load_reg64(rm, a->vm + pass);
				4063	neon_load_reg64(rd, a->vd + pass);
				4064	neon_store_reg64(rm, a->vd + pass);
				4065	neon_store_reg64(rd, a->vm + pass);
				4066	}
				4067	tcg_temp_free_i64(rm);
				4068	tcg_temp_free_i64(rd);
				4069
				4070	return true;
				4071	}
Peter Maydell	d436619	2020-06-16 18:08:43 +0100	[diff] [blame]	4072	static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
				4073	{
				4074	TCGv_i32 rd, tmp;
				4075
				4076	rd = tcg_temp_new_i32();
				4077	tmp = tcg_temp_new_i32();
				4078
				4079	tcg_gen_shli_i32(rd, t0, 8);
				4080	tcg_gen_andi_i32(rd, rd, 0xff00ff00);
				4081	tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
				4082	tcg_gen_or_i32(rd, rd, tmp);
				4083
				4084	tcg_gen_shri_i32(t1, t1, 8);
				4085	tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
				4086	tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
				4087	tcg_gen_or_i32(t1, t1, tmp);
				4088	tcg_gen_mov_i32(t0, rd);
				4089
				4090	tcg_temp_free_i32(tmp);
				4091	tcg_temp_free_i32(rd);
				4092	}
				4093
				4094	static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
				4095	{
				4096	TCGv_i32 rd, tmp;
				4097
				4098	rd = tcg_temp_new_i32();
				4099	tmp = tcg_temp_new_i32();
				4100
				4101	tcg_gen_shli_i32(rd, t0, 16);
				4102	tcg_gen_andi_i32(tmp, t1, 0xffff);
				4103	tcg_gen_or_i32(rd, rd, tmp);
				4104	tcg_gen_shri_i32(t1, t1, 16);
				4105	tcg_gen_andi_i32(tmp, t0, 0xffff0000);
				4106	tcg_gen_or_i32(t1, t1, tmp);
				4107	tcg_gen_mov_i32(t0, rd);
				4108
				4109	tcg_temp_free_i32(tmp);
				4110	tcg_temp_free_i32(rd);
				4111	}
				4112
				4113	static bool trans_VTRN(DisasContext s, arg_2misc a)
				4114	{
				4115	TCGv_i32 tmp, tmp2;
				4116	int pass;
				4117
				4118	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
				4119	return false;
				4120	}
				4121
				4122	/* UNDEF accesses to D16-D31 if they don't exist. */
				4123	if (!dc_isar_feature(aa32_simd_r32, s) &&
				4124	((a->vd \| a->vm) & 0x10)) {
				4125	return false;
				4126	}
				4127
				4128	if ((a->vd \| a->vm) & a->q) {
				4129	return false;
				4130	}
				4131
				4132	if (a->size == 3) {
				4133	return false;
				4134	}
				4135
				4136	if (!vfp_access_check(s)) {
				4137	return true;
				4138	}
				4139
				4140	if (a->size == 2) {
				4141	for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
				4142	tmp = neon_load_reg(a->vm, pass);
				4143	tmp2 = neon_load_reg(a->vd, pass + 1);
				4144	neon_store_reg(a->vm, pass, tmp2);
				4145	neon_store_reg(a->vd, pass + 1, tmp);
				4146	}
				4147	} else {
				4148	for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
				4149	tmp = neon_load_reg(a->vm, pass);
				4150	tmp2 = neon_load_reg(a->vd, pass);
				4151	if (a->size == 0) {
				4152	gen_neon_trn_u8(tmp, tmp2);
				4153	} else {
				4154	gen_neon_trn_u16(tmp, tmp2);
				4155	}
				4156	neon_store_reg(a->vm, pass, tmp2);
				4157	neon_store_reg(a->vd, pass, tmp);
				4158	}
				4159	}
				4160	return true;
				4161	}