Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Tiny Code Generator for QEMU |
| 3 | * |
| 4 | * Copyright (c) 2018 Linaro, Inc. |
| 5 | * |
| 6 | * This library is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
Thomas Huth | fb0343d | 2019-01-23 15:08:56 +0100 | [diff] [blame] | 9 | * version 2.1 of the License, or (at your option) any later version. |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 10 | * |
| 11 | * This library is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
| 18 | */ |
| 19 | |
| 20 | #include "qemu/osdep.h" |
Philippe Mathieu-Daudé | dcb32f1 | 2020-01-01 12:23:00 +0100 | [diff] [blame] | 21 | #include "tcg/tcg.h" |
Richard Henderson | 47f7313 | 2023-02-24 22:45:43 -1000 | [diff] [blame] | 22 | #include "tcg/tcg-temp-internal.h" |
Richard Henderson | ad3d0e4 | 2023-03-28 18:17:24 -0700 | [diff] [blame] | 23 | #include "tcg/tcg-op-common.h" |
Philippe Mathieu-Daudé | dcb32f1 | 2020-01-01 12:23:00 +0100 | [diff] [blame] | 24 | #include "tcg/tcg-mo.h" |
Richard Henderson | d56fea7 | 2022-10-17 11:07:39 +1000 | [diff] [blame] | 25 | #include "tcg-internal.h" |
| 26 | |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 27 | /* |
| 28 | * Vector optional opcode tracking. |
| 29 | * Except for the basic logical operations (and, or, xor), and |
| 30 | * data movement (mov, ld, st, dupi), many vector opcodes are |
| 31 | * optional and may not be supported on the host. Thank Intel |
| 32 | * for the irregularity in their instruction set. |
| 33 | * |
| 34 | * The gvec expanders allow custom vector operations to be composed, |
| 35 | * generally via the .fniv callback in the GVecGen* structures. At |
| 36 | * the same time, in deciding whether to use this hook we need to |
| 37 | * know if the host supports the required operations. This is |
| 38 | * presented as an array of opcodes, terminated by 0. Each opcode |
| 39 | * is assumed to be expanded with the given VECE. |
| 40 | * |
| 41 | * For debugging, we want to validate this array. Therefore, when |
| 42 | * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders |
| 43 | * will validate that their opcode is present in the list. |
| 44 | */ |
Philippe Mathieu-Daudé | ec2297b | 2023-06-29 11:11:07 +0200 | [diff] [blame] | 45 | static void tcg_assert_listed_vecop(TCGOpcode op) |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 46 | { |
Philippe Mathieu-Daudé | ec2297b | 2023-06-29 11:11:07 +0200 | [diff] [blame] | 47 | #ifdef CONFIG_DEBUG_TCG |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 48 | const TCGOpcode *p = tcg_ctx->vecop_list; |
| 49 | if (p) { |
| 50 | for (; *p; ++p) { |
| 51 | if (*p == op) { |
| 52 | return; |
| 53 | } |
| 54 | } |
| 55 | g_assert_not_reached(); |
| 56 | } |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 57 | #endif |
Philippe Mathieu-Daudé | ec2297b | 2023-06-29 11:11:07 +0200 | [diff] [blame] | 58 | } |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 59 | |
| 60 | bool tcg_can_emit_vecop_list(const TCGOpcode *list, |
| 61 | TCGType type, unsigned vece) |
| 62 | { |
| 63 | if (list == NULL) { |
| 64 | return true; |
| 65 | } |
| 66 | |
| 67 | for (; *list; ++list) { |
| 68 | TCGOpcode opc = *list; |
| 69 | |
| 70 | #ifdef CONFIG_DEBUG_TCG |
| 71 | switch (opc) { |
| 72 | case INDEX_op_and_vec: |
| 73 | case INDEX_op_or_vec: |
| 74 | case INDEX_op_xor_vec: |
| 75 | case INDEX_op_mov_vec: |
| 76 | case INDEX_op_dup_vec: |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 77 | case INDEX_op_dup2_vec: |
| 78 | case INDEX_op_ld_vec: |
| 79 | case INDEX_op_st_vec: |
Richard Henderson | 38dc129 | 2019-04-30 11:02:23 -0700 | [diff] [blame] | 80 | case INDEX_op_bitsel_vec: |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 81 | /* These opcodes are mandatory and should not be listed. */ |
| 82 | g_assert_not_reached(); |
Richard Henderson | 11978f6 | 2019-06-27 17:34:47 +0000 | [diff] [blame] | 83 | case INDEX_op_not_vec: |
| 84 | /* These opcodes have generic expansions using the above. */ |
| 85 | g_assert_not_reached(); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 86 | default: |
| 87 | break; |
| 88 | } |
| 89 | #endif |
| 90 | |
| 91 | if (tcg_can_emit_vec_op(opc, type, vece)) { |
| 92 | continue; |
| 93 | } |
| 94 | |
| 95 | /* |
| 96 | * The opcode list is created by front ends based on what they |
| 97 | * actually invoke. We must mirror the logic in the routines |
| 98 | * below for generic expansions using other opcodes. |
| 99 | */ |
| 100 | switch (opc) { |
| 101 | case INDEX_op_neg_vec: |
| 102 | if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { |
| 103 | continue; |
| 104 | } |
| 105 | break; |
Richard Henderson | bcefc90 | 2019-04-17 13:53:02 -1000 | [diff] [blame] | 106 | case INDEX_op_abs_vec: |
| 107 | if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) |
| 108 | && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 |
| 109 | || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 |
| 110 | || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { |
| 111 | continue; |
| 112 | } |
| 113 | break; |
Richard Henderson | 2552d60 | 2020-09-14 19:25:58 -0700 | [diff] [blame] | 114 | case INDEX_op_usadd_vec: |
| 115 | if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) || |
| 116 | tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { |
| 117 | continue; |
| 118 | } |
| 119 | break; |
| 120 | case INDEX_op_ussub_vec: |
| 121 | if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) || |
| 122 | tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { |
| 123 | continue; |
| 124 | } |
| 125 | break; |
Richard Henderson | f75da29 | 2019-04-30 13:01:12 -0700 | [diff] [blame] | 126 | case INDEX_op_cmpsel_vec: |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 127 | case INDEX_op_smin_vec: |
| 128 | case INDEX_op_smax_vec: |
| 129 | case INDEX_op_umin_vec: |
| 130 | case INDEX_op_umax_vec: |
Richard Henderson | f75da29 | 2019-04-30 13:01:12 -0700 | [diff] [blame] | 131 | if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { |
| 132 | continue; |
| 133 | } |
| 134 | break; |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 135 | default: |
| 136 | break; |
| 137 | } |
| 138 | return false; |
| 139 | } |
| 140 | return true; |
| 141 | } |
| 142 | |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 143 | void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) |
| 144 | { |
Philippe Mathieu-Daudé | d447894 | 2022-12-18 22:18:31 +0100 | [diff] [blame] | 145 | TCGOp *op = tcg_emit_op(opc, 2); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 146 | TCGOP_VECL(op) = type - TCG_TYPE_V64; |
| 147 | TCGOP_VECE(op) = vece; |
| 148 | op->args[0] = r; |
| 149 | op->args[1] = a; |
| 150 | } |
| 151 | |
| 152 | void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, |
| 153 | TCGArg r, TCGArg a, TCGArg b) |
| 154 | { |
Philippe Mathieu-Daudé | d447894 | 2022-12-18 22:18:31 +0100 | [diff] [blame] | 155 | TCGOp *op = tcg_emit_op(opc, 3); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 156 | TCGOP_VECL(op) = type - TCG_TYPE_V64; |
| 157 | TCGOP_VECE(op) = vece; |
| 158 | op->args[0] = r; |
| 159 | op->args[1] = a; |
| 160 | op->args[2] = b; |
| 161 | } |
| 162 | |
| 163 | void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, |
| 164 | TCGArg r, TCGArg a, TCGArg b, TCGArg c) |
| 165 | { |
Philippe Mathieu-Daudé | d447894 | 2022-12-18 22:18:31 +0100 | [diff] [blame] | 166 | TCGOp *op = tcg_emit_op(opc, 4); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 167 | TCGOP_VECL(op) = type - TCG_TYPE_V64; |
| 168 | TCGOP_VECE(op) = vece; |
| 169 | op->args[0] = r; |
| 170 | op->args[1] = a; |
| 171 | op->args[2] = b; |
| 172 | op->args[3] = c; |
| 173 | } |
| 174 | |
Richard Henderson | f75da29 | 2019-04-30 13:01:12 -0700 | [diff] [blame] | 175 | static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, |
| 176 | TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) |
| 177 | { |
Philippe Mathieu-Daudé | d447894 | 2022-12-18 22:18:31 +0100 | [diff] [blame] | 178 | TCGOp *op = tcg_emit_op(opc, 6); |
Richard Henderson | f75da29 | 2019-04-30 13:01:12 -0700 | [diff] [blame] | 179 | TCGOP_VECL(op) = type - TCG_TYPE_V64; |
| 180 | TCGOP_VECE(op) = vece; |
| 181 | op->args[0] = r; |
| 182 | op->args[1] = a; |
| 183 | op->args[2] = b; |
| 184 | op->args[3] = c; |
| 185 | op->args[4] = d; |
| 186 | op->args[5] = e; |
| 187 | } |
| 188 | |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 189 | static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) |
| 190 | { |
| 191 | TCGTemp *rt = tcgv_vec_temp(r); |
| 192 | TCGTemp *at = tcgv_vec_temp(a); |
| 193 | TCGType type = rt->base_type; |
| 194 | |
Richard Henderson | db43267 | 2017-09-15 14:11:45 -0700 | [diff] [blame] | 195 | /* Must enough inputs for the output. */ |
| 196 | tcg_debug_assert(at->base_type >= type); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 197 | vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); |
| 198 | } |
| 199 | |
| 200 | static void vec_gen_op3(TCGOpcode opc, unsigned vece, |
| 201 | TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 202 | { |
| 203 | TCGTemp *rt = tcgv_vec_temp(r); |
| 204 | TCGTemp *at = tcgv_vec_temp(a); |
| 205 | TCGTemp *bt = tcgv_vec_temp(b); |
| 206 | TCGType type = rt->base_type; |
| 207 | |
Richard Henderson | db43267 | 2017-09-15 14:11:45 -0700 | [diff] [blame] | 208 | /* Must enough inputs for the output. */ |
| 209 | tcg_debug_assert(at->base_type >= type); |
| 210 | tcg_debug_assert(bt->base_type >= type); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 211 | vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); |
| 212 | } |
| 213 | |
| 214 | void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) |
| 215 | { |
| 216 | if (r != a) { |
| 217 | vec_gen_op2(INDEX_op_mov_vec, 0, r, a); |
| 218 | } |
| 219 | } |
| 220 | |
Richard Henderson | db43267 | 2017-09-15 14:11:45 -0700 | [diff] [blame] | 221 | void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) |
| 222 | { |
Richard Henderson | 0b4286d | 2020-09-06 17:33:18 -0700 | [diff] [blame] | 223 | TCGTemp *rt = tcgv_vec_temp(r); |
| 224 | tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a)); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 225 | } |
| 226 | |
| 227 | void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) |
| 228 | { |
| 229 | TCGArg ri = tcgv_vec_arg(r); |
| 230 | TCGTemp *rt = arg_temp(ri); |
| 231 | TCGType type = rt->base_type; |
| 232 | |
| 233 | if (TCG_TARGET_REG_BITS == 64) { |
| 234 | TCGArg ai = tcgv_i64_arg(a); |
Richard Henderson | db43267 | 2017-09-15 14:11:45 -0700 | [diff] [blame] | 235 | vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 236 | } else if (vece == MO_64) { |
| 237 | TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); |
| 238 | TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); |
| 239 | vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); |
| 240 | } else { |
| 241 | TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); |
Richard Henderson | db43267 | 2017-09-15 14:11:45 -0700 | [diff] [blame] | 242 | vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 243 | } |
| 244 | } |
| 245 | |
| 246 | void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) |
| 247 | { |
| 248 | TCGArg ri = tcgv_vec_arg(r); |
| 249 | TCGArg ai = tcgv_i32_arg(a); |
| 250 | TCGTemp *rt = arg_temp(ri); |
| 251 | TCGType type = rt->base_type; |
| 252 | |
| 253 | vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); |
| 254 | } |
| 255 | |
Richard Henderson | 37ee55a | 2019-03-17 01:55:22 +0000 | [diff] [blame] | 256 | void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, |
| 257 | tcg_target_long ofs) |
| 258 | { |
| 259 | TCGArg ri = tcgv_vec_arg(r); |
| 260 | TCGArg bi = tcgv_ptr_arg(b); |
| 261 | TCGTemp *rt = arg_temp(ri); |
| 262 | TCGType type = rt->base_type; |
| 263 | |
| 264 | vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); |
| 265 | } |
| 266 | |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 267 | static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) |
| 268 | { |
| 269 | TCGArg ri = tcgv_vec_arg(r); |
| 270 | TCGArg bi = tcgv_ptr_arg(b); |
| 271 | TCGTemp *rt = arg_temp(ri); |
| 272 | TCGType type = rt->base_type; |
| 273 | |
| 274 | vec_gen_3(opc, type, 0, ri, bi, o); |
| 275 | } |
| 276 | |
| 277 | void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) |
| 278 | { |
| 279 | vec_gen_ldst(INDEX_op_ld_vec, r, b, o); |
| 280 | } |
| 281 | |
| 282 | void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) |
| 283 | { |
| 284 | vec_gen_ldst(INDEX_op_st_vec, r, b, o); |
| 285 | } |
| 286 | |
| 287 | void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) |
| 288 | { |
| 289 | TCGArg ri = tcgv_vec_arg(r); |
| 290 | TCGArg bi = tcgv_ptr_arg(b); |
| 291 | TCGTemp *rt = arg_temp(ri); |
| 292 | TCGType type = rt->base_type; |
| 293 | |
| 294 | tcg_debug_assert(low_type >= TCG_TYPE_V64); |
| 295 | tcg_debug_assert(low_type <= type); |
| 296 | vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); |
| 297 | } |
| 298 | |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 299 | void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 300 | { |
| 301 | vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); |
| 302 | } |
| 303 | |
| 304 | void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 305 | { |
| 306 | vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); |
| 307 | } |
| 308 | |
| 309 | void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 310 | { |
| 311 | vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); |
| 312 | } |
| 313 | |
| 314 | void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 315 | { |
| 316 | if (TCG_TARGET_HAS_andc_vec) { |
| 317 | vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); |
| 318 | } else { |
| 319 | TCGv_vec t = tcg_temp_new_vec_matching(r); |
| 320 | tcg_gen_not_vec(0, t, b); |
| 321 | tcg_gen_and_vec(0, r, a, t); |
| 322 | tcg_temp_free_vec(t); |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 327 | { |
| 328 | if (TCG_TARGET_HAS_orc_vec) { |
| 329 | vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); |
| 330 | } else { |
| 331 | TCGv_vec t = tcg_temp_new_vec_matching(r); |
| 332 | tcg_gen_not_vec(0, t, b); |
| 333 | tcg_gen_or_vec(0, r, a, t); |
| 334 | tcg_temp_free_vec(t); |
| 335 | } |
| 336 | } |
| 337 | |
Richard Henderson | f550805 | 2018-12-17 13:22:06 -0800 | [diff] [blame] | 338 | void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 339 | { |
Richard Henderson | ed52347 | 2021-12-16 11:17:46 -0800 | [diff] [blame] | 340 | if (TCG_TARGET_HAS_nand_vec) { |
| 341 | vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b); |
| 342 | } else { |
| 343 | tcg_gen_and_vec(0, r, a, b); |
| 344 | tcg_gen_not_vec(0, r, r); |
| 345 | } |
Richard Henderson | f550805 | 2018-12-17 13:22:06 -0800 | [diff] [blame] | 346 | } |
| 347 | |
| 348 | void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 349 | { |
Richard Henderson | ed52347 | 2021-12-16 11:17:46 -0800 | [diff] [blame] | 350 | if (TCG_TARGET_HAS_nor_vec) { |
| 351 | vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b); |
| 352 | } else { |
| 353 | tcg_gen_or_vec(0, r, a, b); |
| 354 | tcg_gen_not_vec(0, r, r); |
| 355 | } |
Richard Henderson | f550805 | 2018-12-17 13:22:06 -0800 | [diff] [blame] | 356 | } |
| 357 | |
| 358 | void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 359 | { |
Richard Henderson | ed52347 | 2021-12-16 11:17:46 -0800 | [diff] [blame] | 360 | if (TCG_TARGET_HAS_eqv_vec) { |
| 361 | vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b); |
| 362 | } else { |
| 363 | tcg_gen_xor_vec(0, r, a, b); |
| 364 | tcg_gen_not_vec(0, r, r); |
| 365 | } |
Richard Henderson | f550805 | 2018-12-17 13:22:06 -0800 | [diff] [blame] | 366 | } |
| 367 | |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 368 | static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) |
| 369 | { |
| 370 | TCGTemp *rt = tcgv_vec_temp(r); |
| 371 | TCGTemp *at = tcgv_vec_temp(a); |
| 372 | TCGArg ri = temp_arg(rt); |
| 373 | TCGArg ai = temp_arg(at); |
| 374 | TCGType type = rt->base_type; |
| 375 | int can; |
| 376 | |
| 377 | tcg_debug_assert(at->base_type >= type); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 378 | tcg_assert_listed_vecop(opc); |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 379 | can = tcg_can_emit_vec_op(opc, type, vece); |
| 380 | if (can > 0) { |
| 381 | vec_gen_2(opc, type, vece, ri, ai); |
| 382 | } else if (can < 0) { |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 383 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 384 | tcg_expand_vec_op(opc, type, vece, ri, ai); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 385 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 386 | } else { |
| 387 | return false; |
| 388 | } |
| 389 | return true; |
| 390 | } |
| 391 | |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 392 | void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) |
| 393 | { |
Richard Henderson | 77fafcb | 2023-08-28 12:15:35 -0700 | [diff] [blame] | 394 | if (TCG_TARGET_HAS_not_vec) { |
| 395 | vec_gen_op2(INDEX_op_not_vec, 0, r, a); |
| 396 | } else { |
Richard Henderson | f6ff9c2 | 2023-02-26 13:57:36 -1000 | [diff] [blame] | 397 | tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1)); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 398 | } |
| 399 | } |
| 400 | |
| 401 | void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) |
| 402 | { |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 403 | const TCGOpcode *hold_list; |
| 404 | |
| 405 | tcg_assert_listed_vecop(INDEX_op_neg_vec); |
| 406 | hold_list = tcg_swap_vecop_list(NULL); |
| 407 | |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 408 | if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { |
Richard Henderson | f6ff9c2 | 2023-02-26 13:57:36 -1000 | [diff] [blame] | 409 | tcg_gen_sub_vec(vece, r, tcg_constant_vec_matching(r, vece, 0), a); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 410 | } |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 411 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | d2fd745 | 2017-09-14 13:53:46 -0700 | [diff] [blame] | 412 | } |
Richard Henderson | d0ec979 | 2017-11-17 14:35:11 +0100 | [diff] [blame] | 413 | |
Richard Henderson | bcefc90 | 2019-04-17 13:53:02 -1000 | [diff] [blame] | 414 | void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) |
| 415 | { |
| 416 | const TCGOpcode *hold_list; |
| 417 | |
| 418 | tcg_assert_listed_vecop(INDEX_op_abs_vec); |
| 419 | hold_list = tcg_swap_vecop_list(NULL); |
| 420 | |
| 421 | if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { |
| 422 | TCGType type = tcgv_vec_temp(r)->base_type; |
| 423 | TCGv_vec t = tcg_temp_new_vec(type); |
| 424 | |
| 425 | tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); |
| 426 | if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { |
| 427 | tcg_gen_neg_vec(vece, t, a); |
| 428 | tcg_gen_smax_vec(vece, r, a, t); |
| 429 | } else { |
| 430 | if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { |
| 431 | tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); |
| 432 | } else { |
Richard Henderson | 0b4286d | 2020-09-06 17:33:18 -0700 | [diff] [blame] | 433 | tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, |
| 434 | tcg_constant_vec(type, vece, 0)); |
Richard Henderson | bcefc90 | 2019-04-17 13:53:02 -1000 | [diff] [blame] | 435 | } |
| 436 | tcg_gen_xor_vec(vece, r, a, t); |
| 437 | tcg_gen_sub_vec(vece, r, r, t); |
| 438 | } |
| 439 | |
| 440 | tcg_temp_free_vec(t); |
| 441 | } |
| 442 | tcg_swap_vecop_list(hold_list); |
| 443 | } |
| 444 | |
Richard Henderson | d0ec979 | 2017-11-17 14:35:11 +0100 | [diff] [blame] | 445 | static void do_shifti(TCGOpcode opc, unsigned vece, |
| 446 | TCGv_vec r, TCGv_vec a, int64_t i) |
| 447 | { |
| 448 | TCGTemp *rt = tcgv_vec_temp(r); |
| 449 | TCGTemp *at = tcgv_vec_temp(a); |
| 450 | TCGArg ri = temp_arg(rt); |
| 451 | TCGArg ai = temp_arg(at); |
| 452 | TCGType type = rt->base_type; |
| 453 | int can; |
| 454 | |
| 455 | tcg_debug_assert(at->base_type == type); |
| 456 | tcg_debug_assert(i >= 0 && i < (8 << vece)); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 457 | tcg_assert_listed_vecop(opc); |
Richard Henderson | d0ec979 | 2017-11-17 14:35:11 +0100 | [diff] [blame] | 458 | |
| 459 | if (i == 0) { |
| 460 | tcg_gen_mov_vec(r, a); |
| 461 | return; |
| 462 | } |
| 463 | |
| 464 | can = tcg_can_emit_vec_op(opc, type, vece); |
| 465 | if (can > 0) { |
| 466 | vec_gen_3(opc, type, vece, ri, ai, i); |
| 467 | } else { |
| 468 | /* We leave the choice of expansion via scalar or vector shift |
| 469 | to the target. Often, but not always, dupi can feed a vector |
| 470 | shift easier than a scalar. */ |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 471 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
Richard Henderson | d0ec979 | 2017-11-17 14:35:11 +0100 | [diff] [blame] | 472 | tcg_debug_assert(can < 0); |
| 473 | tcg_expand_vec_op(opc, type, vece, ri, ai, i); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 474 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | d0ec979 | 2017-11-17 14:35:11 +0100 | [diff] [blame] | 475 | } |
| 476 | } |
| 477 | |
| 478 | void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) |
| 479 | { |
| 480 | do_shifti(INDEX_op_shli_vec, vece, r, a, i); |
| 481 | } |
| 482 | |
| 483 | void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) |
| 484 | { |
| 485 | do_shifti(INDEX_op_shri_vec, vece, r, a, i); |
| 486 | } |
| 487 | |
| 488 | void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) |
| 489 | { |
| 490 | do_shifti(INDEX_op_sari_vec, vece, r, a, i); |
| 491 | } |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 492 | |
Richard Henderson | b0f7e74 | 2020-04-19 18:01:52 -0700 | [diff] [blame] | 493 | void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) |
| 494 | { |
| 495 | do_shifti(INDEX_op_rotli_vec, vece, r, a, i); |
| 496 | } |
| 497 | |
| 498 | void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) |
| 499 | { |
| 500 | int bits = 8 << vece; |
| 501 | tcg_debug_assert(i >= 0 && i < bits); |
| 502 | do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); |
| 503 | } |
| 504 | |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 505 | void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, |
| 506 | TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 507 | { |
| 508 | TCGTemp *rt = tcgv_vec_temp(r); |
| 509 | TCGTemp *at = tcgv_vec_temp(a); |
| 510 | TCGTemp *bt = tcgv_vec_temp(b); |
Richard Henderson | 6975cc4 | 2024-05-15 15:08:16 +0200 | [diff] [blame] | 511 | TCGTemp *tt = NULL; |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 512 | TCGArg ri = temp_arg(rt); |
| 513 | TCGArg ai = temp_arg(at); |
| 514 | TCGArg bi = temp_arg(bt); |
Richard Henderson | 6975cc4 | 2024-05-15 15:08:16 +0200 | [diff] [blame] | 515 | TCGArg ti; |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 516 | TCGType type = rt->base_type; |
| 517 | int can; |
| 518 | |
Richard Henderson | 9a938d8 | 2018-04-17 11:35:42 -1000 | [diff] [blame] | 519 | tcg_debug_assert(at->base_type >= type); |
| 520 | tcg_debug_assert(bt->base_type >= type); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 521 | tcg_assert_listed_vecop(INDEX_op_cmp_vec); |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 522 | can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); |
Richard Henderson | 6975cc4 | 2024-05-15 15:08:16 +0200 | [diff] [blame] | 523 | |
| 524 | if (!TCG_TARGET_HAS_tst_vec && is_tst_cond(cond)) { |
| 525 | tt = tcg_temp_new_internal(type, TEMP_EBB); |
| 526 | ti = temp_arg(tt); |
| 527 | vec_gen_3(INDEX_op_and_vec, type, 0, ti, ai, bi); |
| 528 | at = tt; |
| 529 | ai = ti; |
| 530 | bt = tcg_constant_internal(type, 0); |
| 531 | bi = temp_arg(bt); |
| 532 | cond = tcg_tst_eqne_cond(cond); |
| 533 | } |
| 534 | |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 535 | if (can > 0) { |
| 536 | vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); |
| 537 | } else { |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 538 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 539 | tcg_debug_assert(can < 0); |
| 540 | tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 541 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 542 | } |
Richard Henderson | 6975cc4 | 2024-05-15 15:08:16 +0200 | [diff] [blame] | 543 | |
| 544 | if (tt) { |
| 545 | tcg_temp_free_internal(tt); |
| 546 | } |
Richard Henderson | 212be17 | 2017-11-17 20:47:42 +0100 | [diff] [blame] | 547 | } |
Richard Henderson | 3774030 | 2017-11-21 10:11:14 +0100 | [diff] [blame] | 548 | |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 549 | static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 550 | TCGv_vec b, TCGOpcode opc) |
Richard Henderson | 3774030 | 2017-11-21 10:11:14 +0100 | [diff] [blame] | 551 | { |
| 552 | TCGTemp *rt = tcgv_vec_temp(r); |
| 553 | TCGTemp *at = tcgv_vec_temp(a); |
| 554 | TCGTemp *bt = tcgv_vec_temp(b); |
| 555 | TCGArg ri = temp_arg(rt); |
| 556 | TCGArg ai = temp_arg(at); |
| 557 | TCGArg bi = temp_arg(bt); |
| 558 | TCGType type = rt->base_type; |
| 559 | int can; |
| 560 | |
Richard Henderson | 9a938d8 | 2018-04-17 11:35:42 -1000 | [diff] [blame] | 561 | tcg_debug_assert(at->base_type >= type); |
| 562 | tcg_debug_assert(bt->base_type >= type); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 563 | tcg_assert_listed_vecop(opc); |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 564 | can = tcg_can_emit_vec_op(opc, type, vece); |
Richard Henderson | 3774030 | 2017-11-21 10:11:14 +0100 | [diff] [blame] | 565 | if (can > 0) { |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 566 | vec_gen_3(opc, type, vece, ri, ai, bi); |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 567 | } else if (can < 0) { |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 568 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 569 | tcg_expand_vec_op(opc, type, vece, ri, ai, bi); |
Richard Henderson | 53229a7 | 2019-03-17 00:27:29 +0000 | [diff] [blame] | 570 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 571 | } else { |
| 572 | return false; |
Richard Henderson | 3774030 | 2017-11-21 10:11:14 +0100 | [diff] [blame] | 573 | } |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 574 | return true; |
| 575 | } |
| 576 | |
| 577 | static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, |
| 578 | TCGv_vec b, TCGOpcode opc) |
| 579 | { |
| 580 | bool ok = do_op3(vece, r, a, b, opc); |
| 581 | tcg_debug_assert(ok); |
Richard Henderson | 3774030 | 2017-11-21 10:11:14 +0100 | [diff] [blame] | 582 | } |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 583 | |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 584 | void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 585 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 586 | do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 587 | } |
| 588 | |
| 589 | void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 590 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 591 | do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); |
Richard Henderson | ce27c5d | 2019-03-16 21:44:56 +0000 | [diff] [blame] | 592 | } |
| 593 | |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 594 | void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 595 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 596 | do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 597 | } |
| 598 | |
| 599 | void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 600 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 601 | do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 602 | } |
| 603 | |
| 604 | void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 605 | { |
Richard Henderson | 2552d60 | 2020-09-14 19:25:58 -0700 | [diff] [blame] | 606 | if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) { |
| 607 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
| 608 | TCGv_vec t = tcg_temp_new_vec_matching(r); |
| 609 | |
| 610 | /* usadd(a, b) = min(a, ~b) + b */ |
| 611 | tcg_gen_not_vec(vece, t, b); |
| 612 | tcg_gen_umin_vec(vece, t, t, a); |
| 613 | tcg_gen_add_vec(vece, r, t, b); |
| 614 | |
| 615 | tcg_temp_free_vec(t); |
| 616 | tcg_swap_vecop_list(hold_list); |
| 617 | } |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 618 | } |
| 619 | |
| 620 | void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 621 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 622 | do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 623 | } |
| 624 | |
| 625 | void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 626 | { |
Richard Henderson | 2552d60 | 2020-09-14 19:25:58 -0700 | [diff] [blame] | 627 | if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) { |
| 628 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
| 629 | TCGv_vec t = tcg_temp_new_vec_matching(r); |
| 630 | |
| 631 | /* ussub(a, b) = max(a, b) - b */ |
| 632 | tcg_gen_umax_vec(vece, t, a, b); |
| 633 | tcg_gen_sub_vec(vece, r, t, b); |
| 634 | |
| 635 | tcg_temp_free_vec(t); |
| 636 | tcg_swap_vecop_list(hold_list); |
| 637 | } |
Richard Henderson | 8afaf05 | 2018-12-17 18:01:47 -0800 | [diff] [blame] | 638 | } |
Richard Henderson | dd0a0fc | 2018-12-17 19:35:46 -0800 | [diff] [blame] | 639 | |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 640 | static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, |
| 641 | TCGv_vec b, TCGOpcode opc, TCGCond cond) |
| 642 | { |
| 643 | if (!do_op3(vece, r, a, b, opc)) { |
Richard Henderson | 69c918d | 2020-06-09 16:32:09 -0700 | [diff] [blame] | 644 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 645 | tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); |
Richard Henderson | 69c918d | 2020-06-09 16:32:09 -0700 | [diff] [blame] | 646 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 647 | } |
| 648 | } |
| 649 | |
Richard Henderson | dd0a0fc | 2018-12-17 19:35:46 -0800 | [diff] [blame] | 650 | void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 651 | { |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 652 | do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); |
Richard Henderson | dd0a0fc | 2018-12-17 19:35:46 -0800 | [diff] [blame] | 653 | } |
| 654 | |
| 655 | void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 656 | { |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 657 | do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); |
Richard Henderson | dd0a0fc | 2018-12-17 19:35:46 -0800 | [diff] [blame] | 658 | } |
| 659 | |
| 660 | void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 661 | { |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 662 | do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); |
Richard Henderson | dd0a0fc | 2018-12-17 19:35:46 -0800 | [diff] [blame] | 663 | } |
| 664 | |
| 665 | void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 666 | { |
Richard Henderson | 72b4c79 | 2019-04-20 03:26:09 +0000 | [diff] [blame] | 667 | do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); |
Richard Henderson | dd0a0fc | 2018-12-17 19:35:46 -0800 | [diff] [blame] | 668 | } |
Richard Henderson | 5ee5c14 | 2019-04-13 20:42:37 -1000 | [diff] [blame] | 669 | |
| 670 | void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 671 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 672 | do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); |
Richard Henderson | 5ee5c14 | 2019-04-13 20:42:37 -1000 | [diff] [blame] | 673 | } |
| 674 | |
| 675 | void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 676 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 677 | do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); |
Richard Henderson | 5ee5c14 | 2019-04-13 20:42:37 -1000 | [diff] [blame] | 678 | } |
| 679 | |
| 680 | void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 681 | { |
Richard Henderson | 17f7994 | 2019-04-20 03:13:26 +0000 | [diff] [blame] | 682 | do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); |
Richard Henderson | 5ee5c14 | 2019-04-13 20:42:37 -1000 | [diff] [blame] | 683 | } |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 684 | |
Richard Henderson | 5d0ceda | 2020-04-19 19:47:59 -0700 | [diff] [blame] | 685 | void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 686 | { |
| 687 | do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec); |
| 688 | } |
| 689 | |
| 690 | void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) |
| 691 | { |
| 692 | do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec); |
| 693 | } |
| 694 | |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 695 | static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 696 | TCGv_i32 s, TCGOpcode opc) |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 697 | { |
| 698 | TCGTemp *rt = tcgv_vec_temp(r); |
| 699 | TCGTemp *at = tcgv_vec_temp(a); |
| 700 | TCGTemp *st = tcgv_i32_temp(s); |
| 701 | TCGArg ri = temp_arg(rt); |
| 702 | TCGArg ai = temp_arg(at); |
| 703 | TCGArg si = temp_arg(st); |
| 704 | TCGType type = rt->base_type; |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 705 | int can; |
| 706 | |
| 707 | tcg_debug_assert(at->base_type >= type); |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 708 | tcg_assert_listed_vecop(opc); |
| 709 | can = tcg_can_emit_vec_op(opc, type, vece); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 710 | if (can > 0) { |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 711 | vec_gen_3(opc, type, vece, ri, ai, si); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 712 | } else if (can < 0) { |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 713 | const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); |
| 714 | tcg_expand_vec_op(opc, type, vece, ri, ai, si); |
| 715 | tcg_swap_vecop_list(hold_list); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 716 | } else { |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 717 | g_assert_not_reached(); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 718 | } |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 719 | } |
| 720 | |
| 721 | void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) |
| 722 | { |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 723 | do_shifts(vece, r, a, b, INDEX_op_shls_vec); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 724 | } |
| 725 | |
| 726 | void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) |
| 727 | { |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 728 | do_shifts(vece, r, a, b, INDEX_op_shrs_vec); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 729 | } |
| 730 | |
| 731 | void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) |
| 732 | { |
Richard Henderson | 3d5bb2e | 2020-04-20 07:56:36 -0700 | [diff] [blame] | 733 | do_shifts(vece, r, a, b, INDEX_op_sars_vec); |
Richard Henderson | b4578cd | 2019-04-18 18:19:38 -1000 | [diff] [blame] | 734 | } |
Richard Henderson | 38dc129 | 2019-04-30 11:02:23 -0700 | [diff] [blame] | 735 | |
Richard Henderson | 23850a7 | 2020-04-20 08:22:44 -0700 | [diff] [blame] | 736 | void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) |
| 737 | { |
| 738 | do_shifts(vece, r, a, s, INDEX_op_rotls_vec); |
| 739 | } |
| 740 | |
Richard Henderson | 38dc129 | 2019-04-30 11:02:23 -0700 | [diff] [blame] | 741 | void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, |
| 742 | TCGv_vec b, TCGv_vec c) |
| 743 | { |
| 744 | TCGTemp *rt = tcgv_vec_temp(r); |
| 745 | TCGTemp *at = tcgv_vec_temp(a); |
| 746 | TCGTemp *bt = tcgv_vec_temp(b); |
| 747 | TCGTemp *ct = tcgv_vec_temp(c); |
| 748 | TCGType type = rt->base_type; |
| 749 | |
| 750 | tcg_debug_assert(at->base_type >= type); |
| 751 | tcg_debug_assert(bt->base_type >= type); |
| 752 | tcg_debug_assert(ct->base_type >= type); |
| 753 | |
| 754 | if (TCG_TARGET_HAS_bitsel_vec) { |
| 755 | vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, |
| 756 | temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); |
| 757 | } else { |
| 758 | TCGv_vec t = tcg_temp_new_vec(type); |
| 759 | tcg_gen_and_vec(MO_8, t, a, b); |
| 760 | tcg_gen_andc_vec(MO_8, r, c, a); |
| 761 | tcg_gen_or_vec(MO_8, r, r, t); |
| 762 | tcg_temp_free_vec(t); |
| 763 | } |
| 764 | } |
Richard Henderson | f75da29 | 2019-04-30 13:01:12 -0700 | [diff] [blame] | 765 | |
| 766 | void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, |
| 767 | TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) |
| 768 | { |
| 769 | TCGTemp *rt = tcgv_vec_temp(r); |
| 770 | TCGTemp *at = tcgv_vec_temp(a); |
| 771 | TCGTemp *bt = tcgv_vec_temp(b); |
| 772 | TCGTemp *ct = tcgv_vec_temp(c); |
| 773 | TCGTemp *dt = tcgv_vec_temp(d); |
| 774 | TCGArg ri = temp_arg(rt); |
| 775 | TCGArg ai = temp_arg(at); |
| 776 | TCGArg bi = temp_arg(bt); |
| 777 | TCGArg ci = temp_arg(ct); |
| 778 | TCGArg di = temp_arg(dt); |
| 779 | TCGType type = rt->base_type; |
| 780 | const TCGOpcode *hold_list; |
| 781 | int can; |
| 782 | |
| 783 | tcg_debug_assert(at->base_type >= type); |
| 784 | tcg_debug_assert(bt->base_type >= type); |
| 785 | tcg_debug_assert(ct->base_type >= type); |
| 786 | tcg_debug_assert(dt->base_type >= type); |
| 787 | |
| 788 | tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); |
| 789 | hold_list = tcg_swap_vecop_list(NULL); |
| 790 | can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); |
| 791 | |
| 792 | if (can > 0) { |
| 793 | vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); |
| 794 | } else if (can < 0) { |
| 795 | tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, |
| 796 | ri, ai, bi, ci, di, cond); |
| 797 | } else { |
| 798 | TCGv_vec t = tcg_temp_new_vec(type); |
| 799 | tcg_gen_cmp_vec(cond, vece, t, a, b); |
| 800 | tcg_gen_bitsel_vec(vece, r, t, c, d); |
| 801 | tcg_temp_free_vec(t); |
| 802 | } |
| 803 | tcg_swap_vecop_list(hold_list); |
| 804 | } |