bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Tiny Code Generator for QEMU |
| 3 | * |
| 4 | * Copyright (c) 2008 Fabrice Bellard |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to deal |
| 8 | * in the Software without restriction, including without limitation the rights |
| 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | * copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in |
| 14 | * all copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 22 | * THE SOFTWARE. |
| 23 | */ |
blueswir1 | d4a9eb1 | 2008-10-05 09:59:14 +0000 | [diff] [blame] | 24 | |
Richard Henderson | b1ee3c6 | 2021-07-27 19:42:35 -1000 | [diff] [blame] | 25 | #include "../tcg-ldst.c.inc" |
Paolo Bonzini | 139c183 | 2020-02-04 12:41:01 +0100 | [diff] [blame] | 26 | #include "../tcg-pool.c.inc" |
Richard Henderson | 4e45f23 | 2017-07-20 19:56:42 -1000 | [diff] [blame] | 27 | |
Aurelien Jarno | 8d8fdba | 2016-04-21 10:48:50 +0200 | [diff] [blame] | 28 | #ifdef CONFIG_DEBUG_TCG |
blueswir1 | d4a9eb1 | 2008-10-05 09:59:14 +0000 | [diff] [blame] | 29 | static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 30 | #if TCG_TARGET_REG_BITS == 64 |
| 31 | "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 32 | #else |
| 33 | "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", |
| 34 | #endif |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 35 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", |
| 36 | "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", |
| 37 | #if TCG_TARGET_REG_BITS == 64 |
| 38 | "%xmm8", "%xmm9", "%xmm10", "%xmm11", |
| 39 | "%xmm12", "%xmm13", "%xmm14", "%xmm15", |
| 40 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 41 | }; |
blueswir1 | d4a9eb1 | 2008-10-05 09:59:14 +0000 | [diff] [blame] | 42 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 43 | |
blueswir1 | d4a9eb1 | 2008-10-05 09:59:14 +0000 | [diff] [blame] | 44 | static const int tcg_target_reg_alloc_order[] = { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 45 | #if TCG_TARGET_REG_BITS == 64 |
| 46 | TCG_REG_RBP, |
| 47 | TCG_REG_RBX, |
| 48 | TCG_REG_R12, |
| 49 | TCG_REG_R13, |
| 50 | TCG_REG_R14, |
| 51 | TCG_REG_R15, |
| 52 | TCG_REG_R10, |
| 53 | TCG_REG_R11, |
| 54 | TCG_REG_R9, |
| 55 | TCG_REG_R8, |
| 56 | TCG_REG_RCX, |
| 57 | TCG_REG_RDX, |
| 58 | TCG_REG_RSI, |
| 59 | TCG_REG_RDI, |
| 60 | TCG_REG_RAX, |
| 61 | #else |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 62 | TCG_REG_EBX, |
| 63 | TCG_REG_ESI, |
| 64 | TCG_REG_EDI, |
| 65 | TCG_REG_EBP, |
Richard Henderson | 6648e29 | 2010-04-13 15:23:53 -0700 | [diff] [blame] | 66 | TCG_REG_ECX, |
| 67 | TCG_REG_EDX, |
| 68 | TCG_REG_EAX, |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 69 | #endif |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 70 | TCG_REG_XMM0, |
| 71 | TCG_REG_XMM1, |
| 72 | TCG_REG_XMM2, |
| 73 | TCG_REG_XMM3, |
| 74 | TCG_REG_XMM4, |
| 75 | TCG_REG_XMM5, |
| 76 | #ifndef _WIN64 |
| 77 | /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save |
| 78 | any of them. Therefore only allow xmm0-xmm5 to be allocated. */ |
| 79 | TCG_REG_XMM6, |
| 80 | TCG_REG_XMM7, |
| 81 | #if TCG_TARGET_REG_BITS == 64 |
| 82 | TCG_REG_XMM8, |
| 83 | TCG_REG_XMM9, |
| 84 | TCG_REG_XMM10, |
| 85 | TCG_REG_XMM11, |
| 86 | TCG_REG_XMM12, |
| 87 | TCG_REG_XMM13, |
| 88 | TCG_REG_XMM14, |
| 89 | TCG_REG_XMM15, |
| 90 | #endif |
| 91 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 92 | }; |
| 93 | |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 94 | #define TCG_TMP_VEC TCG_REG_XMM5 |
| 95 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 96 | static const int tcg_target_call_iarg_regs[] = { |
| 97 | #if TCG_TARGET_REG_BITS == 64 |
Stefan Weil | 8d91871 | 2012-04-12 20:46:32 +0200 | [diff] [blame] | 98 | #if defined(_WIN64) |
| 99 | TCG_REG_RCX, |
| 100 | TCG_REG_RDX, |
| 101 | #else |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 102 | TCG_REG_RDI, |
| 103 | TCG_REG_RSI, |
| 104 | TCG_REG_RDX, |
| 105 | TCG_REG_RCX, |
Stefan Weil | 8d91871 | 2012-04-12 20:46:32 +0200 | [diff] [blame] | 106 | #endif |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 107 | TCG_REG_R8, |
| 108 | TCG_REG_R9, |
| 109 | #else |
Stefan Weil | d73685e | 2012-09-13 19:37:45 +0200 | [diff] [blame] | 110 | /* 32 bit mode uses stack based calling convention (GCC default). */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 111 | #endif |
| 112 | }; |
| 113 | |
Richard Henderson | 5e3d0c1 | 2022-10-20 00:55:36 +1000 | [diff] [blame] | 114 | static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) |
| 115 | { |
| 116 | switch (kind) { |
| 117 | case TCG_CALL_RET_NORMAL: |
| 118 | tcg_debug_assert(slot >= 0 && slot <= 1); |
| 119 | return slot ? TCG_REG_EDX : TCG_REG_EAX; |
Richard Henderson | c4f4a00 | 2022-10-21 10:16:28 +1000 | [diff] [blame] | 120 | #ifdef _WIN64 |
| 121 | case TCG_CALL_RET_BY_VEC: |
| 122 | tcg_debug_assert(slot == 0); |
| 123 | return TCG_REG_XMM0; |
| 124 | #endif |
Richard Henderson | 5e3d0c1 | 2022-10-20 00:55:36 +1000 | [diff] [blame] | 125 | default: |
| 126 | g_assert_not_reached(); |
| 127 | } |
| 128 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 129 | |
Richard Henderson | a1b29c9 | 2014-01-27 13:02:31 -0800 | [diff] [blame] | 130 | /* Constants we accept. */ |
| 131 | #define TCG_CT_CONST_S32 0x100 |
| 132 | #define TCG_CT_CONST_U32 0x200 |
Richard Henderson | 9d2eec2 | 2014-01-27 21:49:17 -0800 | [diff] [blame] | 133 | #define TCG_CT_CONST_I32 0x400 |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 134 | #define TCG_CT_CONST_WSZ 0x800 |
Richard Henderson | a1b29c9 | 2014-01-27 13:02:31 -0800 | [diff] [blame] | 135 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 136 | /* Registers used with L constraint, which are the first argument |
Stefan Weil | b18212c | 2012-09-13 19:37:44 +0200 | [diff] [blame] | 137 | registers on x86_64, and two random call clobbered registers on |
| 138 | i386. */ |
| 139 | #if TCG_TARGET_REG_BITS == 64 |
| 140 | # define TCG_REG_L0 tcg_target_call_iarg_regs[0] |
| 141 | # define TCG_REG_L1 tcg_target_call_iarg_regs[1] |
Stefan Weil | b18212c | 2012-09-13 19:37:44 +0200 | [diff] [blame] | 142 | #else |
| 143 | # define TCG_REG_L0 TCG_REG_EAX |
| 144 | # define TCG_REG_L1 TCG_REG_EDX |
| 145 | #endif |
| 146 | |
Richard Henderson | df903b9 | 2021-01-19 09:33:02 -1000 | [diff] [blame] | 147 | #define ALL_BYTEH_REGS 0x0000000fu |
| 148 | #if TCG_TARGET_REG_BITS == 64 |
| 149 | # define ALL_GENERAL_REGS 0x0000ffffu |
| 150 | # define ALL_VECTOR_REGS 0xffff0000u |
| 151 | # define ALL_BYTEL_REGS ALL_GENERAL_REGS |
| 152 | #else |
| 153 | # define ALL_GENERAL_REGS 0x000000ffu |
| 154 | # define ALL_VECTOR_REGS 0x00ff0000u |
| 155 | # define ALL_BYTEL_REGS ALL_BYTEH_REGS |
| 156 | #endif |
| 157 | #ifdef CONFIG_SOFTMMU |
| 158 | # define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1)) |
| 159 | #else |
| 160 | # define SOFTMMU_RESERVE_REGS 0 |
| 161 | #endif |
| 162 | |
Richard Henderson | 5dd8990 | 2017-07-18 18:40:18 -1000 | [diff] [blame] | 163 | /* For 64-bit, we always know that CMOV is available. */ |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 164 | #if TCG_TARGET_REG_BITS == 64 |
Richard Henderson | dbedadb | 2023-05-17 18:17:34 -0700 | [diff] [blame] | 165 | # define have_cmov true |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 166 | #else |
Richard Henderson | dbedadb | 2023-05-17 18:17:34 -0700 | [diff] [blame] | 167 | # define have_cmov (cpuinfo & CPUINFO_CMOV) |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 168 | #endif |
Richard Henderson | dbedadb | 2023-05-17 18:17:34 -0700 | [diff] [blame] | 169 | #define have_bmi2 (cpuinfo & CPUINFO_BMI2) |
| 170 | #define have_lzcnt (cpuinfo & CPUINFO_LZCNT) |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 171 | |
Richard Henderson | 705ed47 | 2020-10-28 23:42:12 -0700 | [diff] [blame] | 172 | static const tcg_insn_unit *tb_ret_addr; |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 173 | |
Richard Henderson | 6ac1778 | 2018-11-30 11:52:48 -0800 | [diff] [blame] | 174 | static bool patch_reloc(tcg_insn_unit *code_ptr, int type, |
Richard Henderson | 2ba7fae2 | 2013-08-20 15:30:10 -0700 | [diff] [blame] | 175 | intptr_t value, intptr_t addend) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 176 | { |
aurel32 | f54b3f9 | 2008-04-12 20:14:54 +0000 | [diff] [blame] | 177 | value += addend; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 178 | switch(type) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 179 | case R_386_PC32: |
Richard Henderson | 705ed47 | 2020-10-28 23:42:12 -0700 | [diff] [blame] | 180 | value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 181 | if (value != (int32_t)value) { |
Richard Henderson | bec3afd | 2018-11-30 12:31:59 -0800 | [diff] [blame] | 182 | return false; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 183 | } |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 184 | /* FALLTHRU */ |
| 185 | case R_386_32: |
Peter Maydell | 5c53bb8 | 2014-03-28 15:29:48 +0000 | [diff] [blame] | 186 | tcg_patch32(code_ptr, value); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 187 | break; |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 188 | case R_386_PC8: |
Richard Henderson | 705ed47 | 2020-10-28 23:42:12 -0700 | [diff] [blame] | 189 | value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 190 | if (value != (int8_t)value) { |
Richard Henderson | bec3afd | 2018-11-30 12:31:59 -0800 | [diff] [blame] | 191 | return false; |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 192 | } |
Peter Maydell | 5c53bb8 | 2014-03-28 15:29:48 +0000 | [diff] [blame] | 193 | tcg_patch8(code_ptr, value); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 194 | break; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 195 | default: |
Richard Henderson | 732e89f | 2023-04-05 12:09:14 -0700 | [diff] [blame] | 196 | g_assert_not_reached(); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 197 | } |
Richard Henderson | 6ac1778 | 2018-11-30 11:52:48 -0800 | [diff] [blame] | 198 | return true; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 199 | } |
| 200 | |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 201 | /* test if a constant matches the constraint */ |
Richard Henderson | a4fbbd7 | 2021-05-03 16:47:37 -0700 | [diff] [blame] | 202 | static bool tcg_target_const_match(int64_t val, TCGType type, int ct) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 203 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 204 | if (ct & TCG_CT_CONST) { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 205 | return 1; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 206 | } |
Richard Henderson | c7c778b | 2020-10-16 14:26:30 -0700 | [diff] [blame] | 207 | if (type == TCG_TYPE_I32) { |
| 208 | if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) { |
| 209 | return 1; |
| 210 | } |
| 211 | } else { |
| 212 | if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { |
| 213 | return 1; |
| 214 | } |
| 215 | if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { |
| 216 | return 1; |
| 217 | } |
| 218 | if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { |
| 219 | return 1; |
| 220 | } |
Richard Henderson | 9d2eec2 | 2014-01-27 21:49:17 -0800 | [diff] [blame] | 221 | } |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 222 | if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { |
| 223 | return 1; |
| 224 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 225 | return 0; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 226 | } |
| 227 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 228 | # define LOWREGMASK(x) ((x) & 7) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 229 | |
Richard Henderson | 96b4cf3 | 2010-05-21 09:03:00 -0700 | [diff] [blame] | 230 | #define P_EXT 0x100 /* 0x0f opcode prefix */ |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 231 | #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ |
| 232 | #define P_DATA16 0x400 /* 0x66 opcode prefix */ |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 233 | #define P_VEXW 0x1000 /* Set VEX.W = 1 */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 234 | #if TCG_TARGET_REG_BITS == 64 |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 235 | # define P_REXW P_VEXW /* Set REX.W = 1; match VEXW */ |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 236 | # define P_REXB_R 0x2000 /* REG field as byte register */ |
| 237 | # define P_REXB_RM 0x4000 /* R/M field as byte register */ |
| 238 | # define P_GS 0x8000 /* gs segment override */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 239 | #else |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 240 | # define P_REXW 0 |
| 241 | # define P_REXB_R 0 |
| 242 | # define P_REXB_RM 0 |
Richard Henderson | 44b37ac | 2012-10-22 12:11:07 +1000 | [diff] [blame] | 243 | # define P_GS 0 |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 244 | #endif |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 245 | #define P_EXT3A 0x10000 /* 0x0f 0x3a opcode prefix */ |
| 246 | #define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */ |
| 247 | #define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */ |
| 248 | #define P_VEXL 0x80000 /* Set VEX.L = 1 */ |
Richard Henderson | 08b032f | 2021-12-15 20:43:00 -0800 | [diff] [blame] | 249 | #define P_EVEX 0x100000 /* Requires EVEX encoding */ |
Richard Henderson | fcb5dac | 2010-04-28 10:31:18 -0700 | [diff] [blame] | 250 | |
Richard Henderson | a369a70 | 2010-05-21 08:30:23 -0700 | [diff] [blame] | 251 | #define OPC_ARITH_EvIz (0x81) |
| 252 | #define OPC_ARITH_EvIb (0x83) |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 253 | #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ |
Richard Henderson | 9d2eec2 | 2014-01-27 21:49:17 -0800 | [diff] [blame] | 254 | #define OPC_ANDN (0xf2 | P_EXT38) |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 255 | #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) |
Emilio G. Cota | 54eaf40 | 2019-01-16 12:01:14 -0500 | [diff] [blame] | 256 | #define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3)) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 257 | #define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16) |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 258 | #define OPC_BSF (0xbc | P_EXT) |
| 259 | #define OPC_BSR (0xbd | P_EXT) |
Richard Henderson | fcb5dac | 2010-04-28 10:31:18 -0700 | [diff] [blame] | 260 | #define OPC_BSWAP (0xc8 | P_EXT) |
Richard Henderson | aadb21a | 2010-05-21 08:30:27 -0700 | [diff] [blame] | 261 | #define OPC_CALL_Jz (0xe8) |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 262 | #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 263 | #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) |
| 264 | #define OPC_DEC_r32 (0x48) |
Richard Henderson | 0566d38 | 2010-05-21 08:30:31 -0700 | [diff] [blame] | 265 | #define OPC_IMUL_GvEv (0xaf | P_EXT) |
| 266 | #define OPC_IMUL_GvEvIb (0x6b) |
| 267 | #define OPC_IMUL_GvEvIz (0x69) |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 268 | #define OPC_INC_r32 (0x40) |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 269 | #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ |
| 270 | #define OPC_JCC_short (0x70) /* ... plus condition code */ |
| 271 | #define OPC_JMP_long (0xe9) |
| 272 | #define OPC_JMP_short (0xeb) |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 273 | #define OPC_LEA (0x8d) |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 274 | #define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) |
Richard Henderson | af26608 | 2010-04-14 07:58:59 -0700 | [diff] [blame] | 275 | #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ |
| 276 | #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ |
| 277 | #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 278 | #define OPC_MOVB_EvIz (0xc6) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 279 | #define OPC_MOVL_EvIz (0xc7) |
Richard Henderson | ef10b10 | 2010-05-21 08:30:25 -0700 | [diff] [blame] | 280 | #define OPC_MOVL_Iv (0xb8) |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 281 | #define OPC_MOVBE_GyMy (0xf0 | P_EXT38) |
| 282 | #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 283 | #define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) |
| 284 | #define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) |
Richard Henderson | 7b60ef3 | 2019-05-16 15:11:17 -0700 | [diff] [blame] | 285 | #define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 286 | #define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) |
| 287 | #define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) |
| 288 | #define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) |
| 289 | #define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3) |
| 290 | #define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3) |
| 291 | #define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16) |
Richard Henderson | 6817c35 | 2010-05-21 08:30:22 -0700 | [diff] [blame] | 292 | #define OPC_MOVSBL (0xbe | P_EXT) |
| 293 | #define OPC_MOVSWL (0xbf | P_EXT) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 294 | #define OPC_MOVSLQ (0x63 | P_REXW) |
Richard Henderson | 55e082a | 2010-05-21 08:30:21 -0700 | [diff] [blame] | 295 | #define OPC_MOVZBL (0xb6 | P_EXT) |
| 296 | #define OPC_MOVZWL (0xb7 | P_EXT) |
Richard Henderson | 18f9b65 | 2019-04-17 15:54:20 -1000 | [diff] [blame] | 297 | #define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) |
| 298 | #define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) |
| 299 | #define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 300 | #define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 301 | #define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) |
| 302 | #define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) |
| 303 | #define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) |
| 304 | #define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16) |
| 305 | #define OPC_PADDB (0xfc | P_EXT | P_DATA16) |
| 306 | #define OPC_PADDW (0xfd | P_EXT | P_DATA16) |
| 307 | #define OPC_PADDD (0xfe | P_EXT | P_DATA16) |
| 308 | #define OPC_PADDQ (0xd4 | P_EXT | P_DATA16) |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 309 | #define OPC_PADDSB (0xec | P_EXT | P_DATA16) |
| 310 | #define OPC_PADDSW (0xed | P_EXT | P_DATA16) |
| 311 | #define OPC_PADDUB (0xdc | P_EXT | P_DATA16) |
| 312 | #define OPC_PADDUW (0xdd | P_EXT | P_DATA16) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 313 | #define OPC_PAND (0xdb | P_EXT | P_DATA16) |
| 314 | #define OPC_PANDN (0xdf | P_EXT | P_DATA16) |
| 315 | #define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16) |
| 316 | #define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16) |
| 317 | #define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16) |
| 318 | #define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16) |
| 319 | #define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16) |
| 320 | #define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16) |
| 321 | #define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16) |
| 322 | #define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16) |
| 323 | #define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16) |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 324 | #define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16) |
| 325 | #define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16) |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 326 | #define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) |
| 327 | #define OPC_PMAXSW (0xee | P_EXT | P_DATA16) |
| 328 | #define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 329 | #define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 330 | #define OPC_PMAXUB (0xde | P_EXT | P_DATA16) |
| 331 | #define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16) |
| 332 | #define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16) |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 333 | #define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 334 | #define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16) |
| 335 | #define OPC_PMINSW (0xea | P_EXT | P_DATA16) |
| 336 | #define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16) |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 337 | #define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 338 | #define OPC_PMINUB (0xda | P_EXT | P_DATA16) |
| 339 | #define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16) |
| 340 | #define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16) |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 341 | #define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 342 | #define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16) |
| 343 | #define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16) |
| 344 | #define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16) |
| 345 | #define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16) |
| 346 | #define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16) |
| 347 | #define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16) |
| 348 | #define OPC_PMULLW (0xd5 | P_EXT | P_DATA16) |
| 349 | #define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16) |
Richard Henderson | 4c8b968 | 2021-12-16 07:14:24 -0800 | [diff] [blame] | 350 | #define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 351 | #define OPC_POR (0xeb | P_EXT | P_DATA16) |
| 352 | #define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16) |
| 353 | #define OPC_PSHUFD (0x70 | P_EXT | P_DATA16) |
| 354 | #define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) |
| 355 | #define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) |
| 356 | #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ |
Richard Henderson | 4e73f84 | 2021-12-17 22:02:57 -0800 | [diff] [blame] | 357 | #define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 358 | #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 359 | #define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) |
| 360 | #define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) |
| 361 | #define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) |
| 362 | #define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) |
| 363 | #define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) |
Richard Henderson | 3cc18d1 | 2022-06-01 16:22:19 -0700 | [diff] [blame] | 364 | #define OPC_VPSRAQ (0xe2 | P_EXT | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 365 | #define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) |
| 366 | #define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) |
| 367 | #define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 368 | #define OPC_PSUBB (0xf8 | P_EXT | P_DATA16) |
| 369 | #define OPC_PSUBW (0xf9 | P_EXT | P_DATA16) |
| 370 | #define OPC_PSUBD (0xfa | P_EXT | P_DATA16) |
| 371 | #define OPC_PSUBQ (0xfb | P_EXT | P_DATA16) |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 372 | #define OPC_PSUBSB (0xe8 | P_EXT | P_DATA16) |
| 373 | #define OPC_PSUBSW (0xe9 | P_EXT | P_DATA16) |
| 374 | #define OPC_PSUBUB (0xd8 | P_EXT | P_DATA16) |
| 375 | #define OPC_PSUBUW (0xd9 | P_EXT | P_DATA16) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 376 | #define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16) |
| 377 | #define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16) |
| 378 | #define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16) |
| 379 | #define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16) |
| 380 | #define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16) |
| 381 | #define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16) |
| 382 | #define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16) |
| 383 | #define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16) |
| 384 | #define OPC_PXOR (0xef | P_EXT | P_DATA16) |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 385 | #define OPC_POP_r32 (0x58) |
Richard Henderson | 993508e | 2016-11-22 14:15:04 +0100 | [diff] [blame] | 386 | #define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3) |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 387 | #define OPC_PUSH_r32 (0x50) |
| 388 | #define OPC_PUSH_Iv (0x68) |
| 389 | #define OPC_PUSH_Ib (0x6a) |
Richard Henderson | 3c3accc | 2010-05-21 08:30:28 -0700 | [diff] [blame] | 390 | #define OPC_RET (0xc3) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 391 | #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ |
Richard Henderson | f53dba0 | 2010-04-28 10:38:04 -0700 | [diff] [blame] | 392 | #define OPC_SHIFT_1 (0xd1) |
| 393 | #define OPC_SHIFT_Ib (0xc1) |
| 394 | #define OPC_SHIFT_cl (0xd3) |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 395 | #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 396 | #define OPC_SHUFPS (0xc6 | P_EXT) |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 397 | #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) |
| 398 | #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) |
Richard Henderson | c6fb8c0 | 2019-02-25 11:42:35 -0800 | [diff] [blame] | 399 | #define OPC_SHRD_Ib (0xac | P_EXT) |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 400 | #define OPC_TESTL (0x85) |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 401 | #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 402 | #define OPC_UD2 (0x0b | P_EXT) |
| 403 | #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) |
| 404 | #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 405 | #define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) |
| 406 | #define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) |
| 407 | #define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) |
| 408 | #define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 409 | #define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) |
| 410 | #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) |
| 411 | #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) |
| 412 | #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 413 | #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 414 | #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) |
Richard Henderson | 102cd35 | 2021-12-18 09:15:29 -0800 | [diff] [blame] | 415 | #define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) |
| 416 | #define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
| 417 | #define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX) |
| 418 | #define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 419 | #define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) |
| 420 | #define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX) |
| 421 | #define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) |
| 422 | #define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
| 423 | #define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX) |
| 424 | #define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
| 425 | #define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
| 426 | #define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX) |
| 427 | #define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 428 | #define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 429 | #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 430 | #define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 431 | #define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 432 | #define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 433 | #define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
| 434 | #define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 435 | #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 436 | #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) |
Richard Henderson | 3143767 | 2021-12-16 07:37:02 -0800 | [diff] [blame] | 437 | #define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 438 | #define OPC_VZEROUPPER (0x77 | P_EXT) |
Richard Henderson | b3e66df | 2010-05-21 08:30:32 -0700 | [diff] [blame] | 439 | #define OPC_XCHG_ax_r32 (0x90) |
Richard Henderson | 767c250 | 2023-04-05 21:39:54 -0700 | [diff] [blame] | 440 | #define OPC_XCHG_EvGv (0x87) |
Richard Henderson | fcb5dac | 2010-04-28 10:31:18 -0700 | [diff] [blame] | 441 | |
Richard Henderson | b1ee3c6 | 2021-07-27 19:42:35 -1000 | [diff] [blame] | 442 | #define OPC_GRP3_Eb (0xf6) |
| 443 | #define OPC_GRP3_Ev (0xf7) |
| 444 | #define OPC_GRP5 (0xff) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 445 | #define OPC_GRP14 (0x73 | P_EXT | P_DATA16) |
Richard Henderson | 9363ded | 2010-05-21 08:30:30 -0700 | [diff] [blame] | 446 | |
| 447 | /* Group 1 opcode extensions for 0x80-0x83. |
| 448 | These are also used as modifiers for OPC_ARITH. */ |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 449 | #define ARITH_ADD 0 |
| 450 | #define ARITH_OR 1 |
| 451 | #define ARITH_ADC 2 |
| 452 | #define ARITH_SBB 3 |
| 453 | #define ARITH_AND 4 |
| 454 | #define ARITH_SUB 5 |
| 455 | #define ARITH_XOR 6 |
| 456 | #define ARITH_CMP 7 |
| 457 | |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 458 | /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ |
aurel32 | 9619376 | 2009-03-10 19:37:46 +0000 | [diff] [blame] | 459 | #define SHIFT_ROL 0 |
| 460 | #define SHIFT_ROR 1 |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 461 | #define SHIFT_SHL 4 |
| 462 | #define SHIFT_SHR 5 |
| 463 | #define SHIFT_SAR 7 |
| 464 | |
Richard Henderson | 9363ded | 2010-05-21 08:30:30 -0700 | [diff] [blame] | 465 | /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ |
Richard Henderson | b1ee3c6 | 2021-07-27 19:42:35 -1000 | [diff] [blame] | 466 | #define EXT3_TESTi 0 |
Richard Henderson | 9363ded | 2010-05-21 08:30:30 -0700 | [diff] [blame] | 467 | #define EXT3_NOT 2 |
| 468 | #define EXT3_NEG 3 |
| 469 | #define EXT3_MUL 4 |
| 470 | #define EXT3_IMUL 5 |
| 471 | #define EXT3_DIV 6 |
| 472 | #define EXT3_IDIV 7 |
| 473 | |
| 474 | /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 475 | #define EXT5_INC_Ev 0 |
| 476 | #define EXT5_DEC_Ev 1 |
Richard Henderson | 9363ded | 2010-05-21 08:30:30 -0700 | [diff] [blame] | 477 | #define EXT5_CALLN_Ev 2 |
| 478 | #define EXT5_JMPN_Ev 4 |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 479 | |
| 480 | /* Condition codes to be added to OPC_JCC_{long,short}. */ |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 481 | #define JCC_JMP (-1) |
| 482 | #define JCC_JO 0x0 |
| 483 | #define JCC_JNO 0x1 |
| 484 | #define JCC_JB 0x2 |
| 485 | #define JCC_JAE 0x3 |
| 486 | #define JCC_JE 0x4 |
| 487 | #define JCC_JNE 0x5 |
| 488 | #define JCC_JBE 0x6 |
| 489 | #define JCC_JA 0x7 |
| 490 | #define JCC_JS 0x8 |
| 491 | #define JCC_JNS 0x9 |
| 492 | #define JCC_JP 0xa |
| 493 | #define JCC_JNP 0xb |
| 494 | #define JCC_JL 0xc |
| 495 | #define JCC_JGE 0xd |
| 496 | #define JCC_JLE 0xe |
| 497 | #define JCC_JG 0xf |
| 498 | |
Richard Henderson | 0aed257 | 2012-09-24 14:21:40 -0700 | [diff] [blame] | 499 | static const uint8_t tcg_cond_to_jcc[] = { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 500 | [TCG_COND_EQ] = JCC_JE, |
| 501 | [TCG_COND_NE] = JCC_JNE, |
| 502 | [TCG_COND_LT] = JCC_JL, |
| 503 | [TCG_COND_GE] = JCC_JGE, |
| 504 | [TCG_COND_LE] = JCC_JLE, |
| 505 | [TCG_COND_GT] = JCC_JG, |
| 506 | [TCG_COND_LTU] = JCC_JB, |
| 507 | [TCG_COND_GEU] = JCC_JAE, |
| 508 | [TCG_COND_LEU] = JCC_JBE, |
| 509 | [TCG_COND_GTU] = JCC_JA, |
| 510 | }; |
| 511 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 512 | #if TCG_TARGET_REG_BITS == 64 |
| 513 | static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) |
| 514 | { |
| 515 | int rex; |
| 516 | |
Richard Henderson | 44b37ac | 2012-10-22 12:11:07 +1000 | [diff] [blame] | 517 | if (opc & P_GS) { |
| 518 | tcg_out8(s, 0x65); |
| 519 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 520 | if (opc & P_DATA16) { |
| 521 | /* We should never be asking for both 16 and 64-bit operation. */ |
Aurelien Jarno | eabb7b9 | 2016-04-21 10:48:49 +0200 | [diff] [blame] | 522 | tcg_debug_assert((opc & P_REXW) == 0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 523 | tcg_out8(s, 0x66); |
| 524 | } |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 525 | if (opc & P_SIMDF3) { |
| 526 | tcg_out8(s, 0xf3); |
| 527 | } else if (opc & P_SIMDF2) { |
| 528 | tcg_out8(s, 0xf2); |
| 529 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 530 | |
| 531 | rex = 0; |
Aurelien Jarno | c9d7821 | 2013-11-06 18:32:23 +0100 | [diff] [blame] | 532 | rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 533 | rex |= (r & 8) >> 1; /* REX.R */ |
| 534 | rex |= (x & 8) >> 2; /* REX.X */ |
| 535 | rex |= (rm & 8) >> 3; /* REX.B */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 536 | |
| 537 | /* P_REXB_{R,RM} indicates that the given register is the low byte. |
| 538 | For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, |
| 539 | as otherwise the encoding indicates %[abcd]h. Note that the values |
| 540 | that are ORed in merely indicate that the REX byte must be present; |
| 541 | those bits get discarded in output. */ |
| 542 | rex |= opc & (r >= 4 ? P_REXB_R : 0); |
| 543 | rex |= opc & (rm >= 4 ? P_REXB_RM : 0); |
| 544 | |
| 545 | if (rex) { |
| 546 | tcg_out8(s, (uint8_t)(rex | 0x40)); |
| 547 | } |
| 548 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 549 | if (opc & (P_EXT | P_EXT38 | P_EXT3A)) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 550 | tcg_out8(s, 0x0f); |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 551 | if (opc & P_EXT38) { |
| 552 | tcg_out8(s, 0x38); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 553 | } else if (opc & P_EXT3A) { |
| 554 | tcg_out8(s, 0x3a); |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 555 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 556 | } |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 557 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 558 | tcg_out8(s, opc); |
| 559 | } |
| 560 | #else |
| 561 | static void tcg_out_opc(TCGContext *s, int opc) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 562 | { |
Richard Henderson | 96b4cf3 | 2010-05-21 09:03:00 -0700 | [diff] [blame] | 563 | if (opc & P_DATA16) { |
| 564 | tcg_out8(s, 0x66); |
| 565 | } |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 566 | if (opc & P_SIMDF3) { |
| 567 | tcg_out8(s, 0xf3); |
| 568 | } else if (opc & P_SIMDF2) { |
| 569 | tcg_out8(s, 0xf2); |
| 570 | } |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 571 | if (opc & (P_EXT | P_EXT38 | P_EXT3A)) { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 572 | tcg_out8(s, 0x0f); |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 573 | if (opc & P_EXT38) { |
| 574 | tcg_out8(s, 0x38); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 575 | } else if (opc & P_EXT3A) { |
| 576 | tcg_out8(s, 0x3a); |
Aurelien Jarno | 2a11377 | 2013-11-06 19:49:08 +0100 | [diff] [blame] | 577 | } |
Richard Henderson | 96b4cf3 | 2010-05-21 09:03:00 -0700 | [diff] [blame] | 578 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 579 | tcg_out8(s, opc); |
| 580 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 581 | /* Discard the register arguments to tcg_out_opc early, so as not to penalize |
| 582 | the 32-bit compilation paths. This method works with all versions of gcc, |
| 583 | whereas relying on optimization may not be able to exclude them. */ |
| 584 | #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) |
| 585 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 586 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 587 | static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 588 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 589 | tcg_out_opc(s, opc, r, rm, 0); |
| 590 | tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 591 | } |
| 592 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 593 | static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, |
| 594 | int rm, int index) |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 595 | { |
| 596 | int tmp; |
| 597 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 598 | /* Use the two byte form if possible, which cannot encode |
| 599 | VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */ |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 600 | if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 601 | && ((rm | index) & 8) == 0) { |
| 602 | /* Two byte VEX prefix. */ |
| 603 | tcg_out8(s, 0xc5); |
| 604 | |
| 605 | tmp = (r & 8 ? 0 : 0x80); /* VEX.R */ |
| 606 | } else { |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 607 | /* Three byte VEX prefix. */ |
| 608 | tcg_out8(s, 0xc4); |
| 609 | |
| 610 | /* VEX.m-mmmm */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 611 | if (opc & P_EXT3A) { |
| 612 | tmp = 3; |
| 613 | } else if (opc & P_EXT38) { |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 614 | tmp = 2; |
| 615 | } else if (opc & P_EXT) { |
| 616 | tmp = 1; |
| 617 | } else { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 618 | g_assert_not_reached(); |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 619 | } |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 620 | tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */ |
| 621 | tmp |= (index & 8 ? 0 : 0x40); /* VEX.X */ |
| 622 | tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 623 | tcg_out8(s, tmp); |
| 624 | |
Richard Henderson | fc88a52 | 2021-08-10 16:17:04 -0700 | [diff] [blame] | 625 | tmp = (opc & P_VEXW ? 0x80 : 0); /* VEX.W */ |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 626 | } |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 627 | |
| 628 | tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */ |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 629 | /* VEX.pp */ |
| 630 | if (opc & P_DATA16) { |
| 631 | tmp |= 1; /* 0x66 */ |
| 632 | } else if (opc & P_SIMDF3) { |
| 633 | tmp |= 2; /* 0xf3 */ |
| 634 | } else if (opc & P_SIMDF2) { |
| 635 | tmp |= 3; /* 0xf2 */ |
| 636 | } |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 637 | tmp |= (~v & 15) << 3; /* VEX.vvvv */ |
| 638 | tcg_out8(s, tmp); |
| 639 | tcg_out8(s, opc); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 640 | } |
| 641 | |
Richard Henderson | 08b032f | 2021-12-15 20:43:00 -0800 | [diff] [blame] | 642 | static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, |
| 643 | int rm, int index) |
| 644 | { |
| 645 | /* The entire 4-byte evex prefix; with R' and V' set. */ |
| 646 | uint32_t p = 0x08041062; |
| 647 | int mm, pp; |
| 648 | |
| 649 | tcg_debug_assert(have_avx512vl); |
| 650 | |
| 651 | /* EVEX.mm */ |
| 652 | if (opc & P_EXT3A) { |
| 653 | mm = 3; |
| 654 | } else if (opc & P_EXT38) { |
| 655 | mm = 2; |
| 656 | } else if (opc & P_EXT) { |
| 657 | mm = 1; |
| 658 | } else { |
| 659 | g_assert_not_reached(); |
| 660 | } |
| 661 | |
| 662 | /* EVEX.pp */ |
| 663 | if (opc & P_DATA16) { |
| 664 | pp = 1; /* 0x66 */ |
| 665 | } else if (opc & P_SIMDF3) { |
| 666 | pp = 2; /* 0xf3 */ |
| 667 | } else if (opc & P_SIMDF2) { |
| 668 | pp = 3; /* 0xf2 */ |
| 669 | } else { |
| 670 | pp = 0; |
| 671 | } |
| 672 | |
| 673 | p = deposit32(p, 8, 2, mm); |
| 674 | p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */ |
| 675 | p = deposit32(p, 14, 1, (index & 8) == 0); /* EVEX.RXB.X */ |
| 676 | p = deposit32(p, 15, 1, (r & 8) == 0); /* EVEX.RXB.R */ |
| 677 | p = deposit32(p, 16, 2, pp); |
| 678 | p = deposit32(p, 19, 4, ~v); |
| 679 | p = deposit32(p, 23, 1, (opc & P_VEXW) != 0); |
| 680 | p = deposit32(p, 29, 2, (opc & P_VEXL) != 0); |
| 681 | |
| 682 | tcg_out32(s, p); |
| 683 | tcg_out8(s, opc); |
| 684 | } |
| 685 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 686 | static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) |
| 687 | { |
Richard Henderson | 08b032f | 2021-12-15 20:43:00 -0800 | [diff] [blame] | 688 | if (opc & P_EVEX) { |
| 689 | tcg_out_evex_opc(s, opc, r, v, rm, 0); |
| 690 | } else { |
| 691 | tcg_out_vex_opc(s, opc, r, v, rm, 0); |
| 692 | } |
Richard Henderson | ecc7e84 | 2014-01-27 21:19:40 -0800 | [diff] [blame] | 693 | tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); |
| 694 | } |
| 695 | |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 696 | /* Output an opcode with a full "rm + (index<<shift) + offset" address mode. |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 697 | We handle either RM and INDEX missing with a negative value. In 64-bit |
| 698 | mode for absolute addresses, ~RM is the size of the immediate operand |
| 699 | that will follow the instruction. */ |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 700 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 701 | static void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index, |
| 702 | int shift, intptr_t offset) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 703 | { |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 704 | int mod, len; |
| 705 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 706 | if (index < 0 && rm < 0) { |
| 707 | if (TCG_TARGET_REG_BITS == 64) { |
| 708 | /* Try for a rip-relative addressing mode. This has replaced |
| 709 | the 32-bit-mode absolute addressing encoding. */ |
Richard Henderson | 357e3d8 | 2013-08-20 15:37:16 -0700 | [diff] [blame] | 710 | intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm; |
| 711 | intptr_t disp = offset - pc; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 712 | if (disp == (int32_t)disp) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 713 | tcg_out8(s, (LOWREGMASK(r) << 3) | 5); |
| 714 | tcg_out32(s, disp); |
| 715 | return; |
| 716 | } |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 717 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 718 | /* Try for an absolute address encoding. This requires the |
| 719 | use of the MODRM+SIB encoding and is therefore larger than |
| 720 | rip-relative addressing. */ |
| 721 | if (offset == (int32_t)offset) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 722 | tcg_out8(s, (LOWREGMASK(r) << 3) | 4); |
| 723 | tcg_out8(s, (4 << 3) | 5); |
| 724 | tcg_out32(s, offset); |
| 725 | return; |
| 726 | } |
| 727 | |
| 728 | /* ??? The memory isn't directly addressable. */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 729 | g_assert_not_reached(); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 730 | } else { |
| 731 | /* Absolute address. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 732 | tcg_out8(s, (r << 3) | 5); |
| 733 | tcg_out32(s, offset); |
| 734 | return; |
| 735 | } |
| 736 | } |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 737 | |
| 738 | /* Find the length of the immediate addend. Note that the encoding |
| 739 | that would be used for (%ebp) indicates absolute addressing. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 740 | if (rm < 0) { |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 741 | mod = 0, len = 4, rm = 5; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 742 | } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 743 | mod = 0, len = 0; |
| 744 | } else if (offset == (int8_t)offset) { |
| 745 | mod = 0x40, len = 1; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 746 | } else { |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 747 | mod = 0x80, len = 4; |
| 748 | } |
| 749 | |
| 750 | /* Use a single byte MODRM format if possible. Note that the encoding |
| 751 | that would be used for %esp is the escape to the two byte form. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 752 | if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 753 | /* Single byte MODRM format. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 754 | tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 755 | } else { |
| 756 | /* Two byte MODRM+SIB format. */ |
| 757 | |
| 758 | /* Note that the encoding that would place %esp into the index |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 759 | field indicates no index register. In 64-bit mode, the REX.X |
| 760 | bit counts, so %r12 can be used as the index. */ |
| 761 | if (index < 0) { |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 762 | index = 4; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 763 | } else { |
Aurelien Jarno | eabb7b9 | 2016-04-21 10:48:49 +0200 | [diff] [blame] | 764 | tcg_debug_assert(index != TCG_REG_ESP); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 765 | } |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 766 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 767 | tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); |
| 768 | tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 769 | } |
| 770 | |
| 771 | if (len == 1) { |
| 772 | tcg_out8(s, offset); |
| 773 | } else if (len == 4) { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 774 | tcg_out32(s, offset); |
| 775 | } |
| 776 | } |
| 777 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 778 | static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, |
| 779 | int index, int shift, intptr_t offset) |
| 780 | { |
| 781 | tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index); |
| 782 | tcg_out_sib_offset(s, r, rm, index, shift, offset); |
| 783 | } |
| 784 | |
| 785 | static void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v, |
| 786 | int rm, int index, int shift, |
| 787 | intptr_t offset) |
| 788 | { |
| 789 | tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index); |
| 790 | tcg_out_sib_offset(s, r, rm, index, shift, offset); |
| 791 | } |
| 792 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 793 | /* A simplification of the above with no index or shift. */ |
| 794 | static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, |
Richard Henderson | 357e3d8 | 2013-08-20 15:37:16 -0700 | [diff] [blame] | 795 | int rm, intptr_t offset) |
Richard Henderson | 34a6d0b | 2010-05-21 08:30:33 -0700 | [diff] [blame] | 796 | { |
| 797 | tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); |
| 798 | } |
| 799 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 800 | static inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r, |
| 801 | int v, int rm, intptr_t offset) |
| 802 | { |
| 803 | tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset); |
| 804 | } |
| 805 | |
| 806 | /* Output an opcode with an expected reference to the constant pool. */ |
| 807 | static inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r) |
| 808 | { |
| 809 | tcg_out_opc(s, opc, r, 0, 0); |
| 810 | /* Absolute for 32-bit, pc-relative for 64-bit. */ |
| 811 | tcg_out8(s, LOWREGMASK(r) << 3 | 5); |
| 812 | tcg_out32(s, 0); |
| 813 | } |
| 814 | |
| 815 | /* Output an opcode with an expected reference to the constant pool. */ |
| 816 | static inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r) |
| 817 | { |
| 818 | tcg_out_vex_opc(s, opc, r, 0, 0, 0); |
| 819 | /* Absolute for 32-bit, pc-relative for 64-bit. */ |
| 820 | tcg_out8(s, LOWREGMASK(r) << 3 | 5); |
| 821 | tcg_out32(s, 0); |
| 822 | } |
| 823 | |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 824 | /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ |
| 825 | static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) |
| 826 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 827 | /* Propagate an opcode prefix, such as P_REXW. */ |
| 828 | int ext = subop & ~0x7; |
| 829 | subop &= 0x7; |
| 830 | |
| 831 | tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 832 | } |
| 833 | |
Richard Henderson | 78113e8 | 2019-03-16 17:48:18 +0000 | [diff] [blame] | 834 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 835 | { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 836 | int rexw = 0; |
| 837 | |
| 838 | if (arg == ret) { |
Richard Henderson | 78113e8 | 2019-03-16 17:48:18 +0000 | [diff] [blame] | 839 | return true; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 840 | } |
| 841 | switch (type) { |
| 842 | case TCG_TYPE_I64: |
| 843 | rexw = P_REXW; |
| 844 | /* fallthru */ |
| 845 | case TCG_TYPE_I32: |
| 846 | if (ret < 16) { |
| 847 | if (arg < 16) { |
| 848 | tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg); |
| 849 | } else { |
| 850 | tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret); |
| 851 | } |
| 852 | } else { |
| 853 | if (arg < 16) { |
| 854 | tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg); |
| 855 | } else { |
| 856 | tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg); |
| 857 | } |
| 858 | } |
| 859 | break; |
| 860 | |
| 861 | case TCG_TYPE_V64: |
| 862 | tcg_debug_assert(ret >= 16 && arg >= 16); |
| 863 | tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg); |
| 864 | break; |
| 865 | case TCG_TYPE_V128: |
| 866 | tcg_debug_assert(ret >= 16 && arg >= 16); |
| 867 | tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg); |
| 868 | break; |
| 869 | case TCG_TYPE_V256: |
| 870 | tcg_debug_assert(ret >= 16 && arg >= 16); |
| 871 | tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg); |
| 872 | break; |
| 873 | |
| 874 | default: |
| 875 | g_assert_not_reached(); |
| 876 | } |
Richard Henderson | 78113e8 | 2019-03-16 17:48:18 +0000 | [diff] [blame] | 877 | return true; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 878 | } |
| 879 | |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 880 | static const int avx2_dup_insn[4] = { |
| 881 | OPC_VPBROADCASTB, OPC_VPBROADCASTW, |
| 882 | OPC_VPBROADCASTD, OPC_VPBROADCASTQ, |
| 883 | }; |
| 884 | |
Richard Henderson | e7632cf | 2019-03-18 15:32:44 +0000 | [diff] [blame] | 885 | static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 886 | TCGReg r, TCGReg a) |
| 887 | { |
| 888 | if (have_avx2) { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 889 | int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 890 | tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 891 | } else { |
| 892 | switch (vece) { |
| 893 | case MO_8: |
| 894 | /* ??? With zero in a register, use PSHUFB. */ |
Peter Maydell | 7eb30ef | 2018-05-04 16:34:31 +0100 | [diff] [blame] | 895 | tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 896 | a = r; |
| 897 | /* FALLTHRU */ |
| 898 | case MO_16: |
Peter Maydell | 7eb30ef | 2018-05-04 16:34:31 +0100 | [diff] [blame] | 899 | tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 900 | a = r; |
| 901 | /* FALLTHRU */ |
| 902 | case MO_32: |
| 903 | tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a); |
| 904 | /* imm8 operand: all output lanes selected from input lane 0. */ |
| 905 | tcg_out8(s, 0); |
| 906 | break; |
| 907 | case MO_64: |
Peter Maydell | 7eb30ef | 2018-05-04 16:34:31 +0100 | [diff] [blame] | 908 | tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 909 | break; |
| 910 | default: |
| 911 | g_assert_not_reached(); |
| 912 | } |
| 913 | } |
Richard Henderson | e7632cf | 2019-03-18 15:32:44 +0000 | [diff] [blame] | 914 | return true; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 915 | } |
| 916 | |
Richard Henderson | d6ecb4a | 2019-03-18 12:00:39 -0700 | [diff] [blame] | 917 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
| 918 | TCGReg r, TCGReg base, intptr_t offset) |
| 919 | { |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 920 | if (have_avx2) { |
| 921 | int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); |
| 922 | tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, |
| 923 | r, 0, base, offset); |
| 924 | } else { |
| 925 | switch (vece) { |
| 926 | case MO_64: |
Richard Henderson | 7b60ef3 | 2019-05-16 15:11:17 -0700 | [diff] [blame] | 927 | tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset); |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 928 | break; |
| 929 | case MO_32: |
| 930 | tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); |
| 931 | break; |
| 932 | case MO_16: |
| 933 | tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); |
| 934 | tcg_out8(s, 0); /* imm8 */ |
| 935 | tcg_out_dup_vec(s, type, vece, r, r); |
| 936 | break; |
| 937 | case MO_8: |
| 938 | tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); |
| 939 | tcg_out8(s, 0); /* imm8 */ |
| 940 | tcg_out_dup_vec(s, type, vece, r, r); |
| 941 | break; |
| 942 | default: |
| 943 | g_assert_not_reached(); |
| 944 | } |
| 945 | } |
| 946 | return true; |
Richard Henderson | d6ecb4a | 2019-03-18 12:00:39 -0700 | [diff] [blame] | 947 | } |
| 948 | |
Richard Henderson | 4e18617 | 2020-03-31 01:02:08 -0700 | [diff] [blame] | 949 | static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, |
| 950 | TCGReg ret, int64_t arg) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 951 | { |
| 952 | int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); |
| 953 | |
| 954 | if (arg == 0) { |
| 955 | tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret); |
| 956 | return; |
| 957 | } |
| 958 | if (arg == -1) { |
| 959 | tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret); |
| 960 | return; |
| 961 | } |
| 962 | |
Richard Henderson | 4e18617 | 2020-03-31 01:02:08 -0700 | [diff] [blame] | 963 | if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) { |
| 964 | if (have_avx2) { |
| 965 | tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret); |
| 966 | } else { |
| 967 | tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); |
| 968 | } |
| 969 | new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); |
| 970 | } else { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 971 | if (type == TCG_TYPE_V64) { |
| 972 | tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret); |
| 973 | } else if (have_avx2) { |
| 974 | tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); |
| 975 | } else { |
Richard Henderson | 7b60ef3 | 2019-05-16 15:11:17 -0700 | [diff] [blame] | 976 | tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 977 | } |
Richard Henderson | 4e18617 | 2020-03-31 01:02:08 -0700 | [diff] [blame] | 978 | if (TCG_TARGET_REG_BITS == 64) { |
| 979 | new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 980 | } else { |
Richard Henderson | 4e18617 | 2020-03-31 01:02:08 -0700 | [diff] [blame] | 981 | new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32); |
Richard Henderson | 1e262b4 | 2019-03-18 12:02:54 -0700 | [diff] [blame] | 982 | } |
Richard Henderson | af26608 | 2010-04-14 07:58:59 -0700 | [diff] [blame] | 983 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 984 | } |
| 985 | |
Richard Henderson | 0a6a8bc | 2020-03-31 05:43:23 -0700 | [diff] [blame] | 986 | static void tcg_out_movi_vec(TCGContext *s, TCGType type, |
| 987 | TCGReg ret, tcg_target_long arg) |
| 988 | { |
| 989 | if (arg == 0) { |
| 990 | tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret); |
| 991 | return; |
| 992 | } |
| 993 | if (arg == -1) { |
| 994 | tcg_out_vex_modrm(s, OPC_PCMPEQB, ret, ret, ret); |
| 995 | return; |
| 996 | } |
| 997 | |
| 998 | int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW); |
| 999 | tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy + rexw, ret); |
| 1000 | if (TCG_TARGET_REG_BITS == 64) { |
| 1001 | new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); |
| 1002 | } else { |
| 1003 | new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); |
| 1004 | } |
| 1005 | } |
| 1006 | |
| 1007 | static void tcg_out_movi_int(TCGContext *s, TCGType type, |
| 1008 | TCGReg ret, tcg_target_long arg) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1009 | { |
Richard Henderson | 8023ccd | 2013-07-25 10:00:41 -1000 | [diff] [blame] | 1010 | tcg_target_long diff; |
| 1011 | |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1012 | if (arg == 0) { |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 1013 | tgen_arithr(s, ARITH_XOR, ret, ret); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1014 | return; |
Richard Henderson | 8023ccd | 2013-07-25 10:00:41 -1000 | [diff] [blame] | 1015 | } |
| 1016 | if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1017 | tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1018 | tcg_out32(s, arg); |
Richard Henderson | 8023ccd | 2013-07-25 10:00:41 -1000 | [diff] [blame] | 1019 | return; |
| 1020 | } |
| 1021 | if (arg == (int32_t)arg) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1022 | tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); |
| 1023 | tcg_out32(s, arg); |
Richard Henderson | 8023ccd | 2013-07-25 10:00:41 -1000 | [diff] [blame] | 1024 | return; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1025 | } |
Richard Henderson | 8023ccd | 2013-07-25 10:00:41 -1000 | [diff] [blame] | 1026 | |
| 1027 | /* Try a 7 byte pc-relative lea before the 10 byte movq. */ |
Richard Henderson | 705ed47 | 2020-10-28 23:42:12 -0700 | [diff] [blame] | 1028 | diff = tcg_pcrel_diff(s, (const void *)arg) - 7; |
Richard Henderson | 8023ccd | 2013-07-25 10:00:41 -1000 | [diff] [blame] | 1029 | if (diff == (int32_t)diff) { |
| 1030 | tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); |
| 1031 | tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); |
| 1032 | tcg_out32(s, diff); |
| 1033 | return; |
| 1034 | } |
| 1035 | |
| 1036 | tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); |
| 1037 | tcg_out64(s, arg); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1038 | } |
| 1039 | |
Richard Henderson | 0a6a8bc | 2020-03-31 05:43:23 -0700 | [diff] [blame] | 1040 | static void tcg_out_movi(TCGContext *s, TCGType type, |
| 1041 | TCGReg ret, tcg_target_long arg) |
| 1042 | { |
| 1043 | switch (type) { |
| 1044 | case TCG_TYPE_I32: |
| 1045 | #if TCG_TARGET_REG_BITS == 64 |
| 1046 | case TCG_TYPE_I64: |
| 1047 | #endif |
| 1048 | if (ret < 16) { |
| 1049 | tcg_out_movi_int(s, type, ret, arg); |
| 1050 | } else { |
| 1051 | tcg_out_movi_vec(s, type, ret, arg); |
| 1052 | } |
| 1053 | break; |
| 1054 | default: |
| 1055 | g_assert_not_reached(); |
| 1056 | } |
| 1057 | } |
| 1058 | |
Richard Henderson | 767c250 | 2023-04-05 21:39:54 -0700 | [diff] [blame] | 1059 | static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) |
| 1060 | { |
| 1061 | int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; |
| 1062 | tcg_out_modrm(s, OPC_XCHG_EvGv + rexw, r1, r2); |
| 1063 | return true; |
| 1064 | } |
| 1065 | |
Richard Henderson | 6a6d772 | 2022-10-18 21:28:04 +1000 | [diff] [blame] | 1066 | static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, |
| 1067 | tcg_target_long imm) |
| 1068 | { |
| 1069 | /* This function is only used for passing structs by reference. */ |
Richard Henderson | 7d9e1ee | 2023-04-07 12:00:22 -0700 | [diff] [blame] | 1070 | tcg_debug_assert(imm == (int32_t)imm); |
Richard Henderson | 9889985 | 2023-05-12 18:12:43 +0100 | [diff] [blame] | 1071 | tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm); |
Richard Henderson | 6a6d772 | 2022-10-18 21:28:04 +1000 | [diff] [blame] | 1072 | } |
| 1073 | |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1074 | static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) |
| 1075 | { |
| 1076 | if (val == (int8_t)val) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1077 | tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1078 | tcg_out8(s, val); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1079 | } else if (val == (int32_t)val) { |
| 1080 | tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1081 | tcg_out32(s, val); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1082 | } else { |
Richard Henderson | 732e89f | 2023-04-05 12:09:14 -0700 | [diff] [blame] | 1083 | g_assert_not_reached(); |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1084 | } |
| 1085 | } |
| 1086 | |
Pranith Kumar | a7d00d4 | 2016-07-14 16:20:14 -0400 | [diff] [blame] | 1087 | static inline void tcg_out_mb(TCGContext *s, TCGArg a0) |
| 1088 | { |
| 1089 | /* Given the strength of x86 memory ordering, we only need care for |
| 1090 | store-load ordering. Experimentally, "lock orl $0,0(%esp)" is |
| 1091 | faster than "mfence", so don't bother with the sse insn. */ |
| 1092 | if (a0 & TCG_MO_ST_LD) { |
| 1093 | tcg_out8(s, 0xf0); |
| 1094 | tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0); |
| 1095 | tcg_out8(s, 0); |
| 1096 | } |
| 1097 | } |
| 1098 | |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1099 | static inline void tcg_out_push(TCGContext *s, int reg) |
| 1100 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1101 | tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1102 | } |
| 1103 | |
| 1104 | static inline void tcg_out_pop(TCGContext *s, int reg) |
| 1105 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1106 | tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); |
Richard Henderson | 6858614 | 2010-05-21 08:30:26 -0700 | [diff] [blame] | 1107 | } |
| 1108 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1109 | static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, |
| 1110 | TCGReg arg1, intptr_t arg2) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1111 | { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1112 | switch (type) { |
| 1113 | case TCG_TYPE_I32: |
| 1114 | if (ret < 16) { |
| 1115 | tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2); |
| 1116 | } else { |
| 1117 | tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2); |
| 1118 | } |
| 1119 | break; |
| 1120 | case TCG_TYPE_I64: |
| 1121 | if (ret < 16) { |
| 1122 | tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2); |
| 1123 | break; |
| 1124 | } |
| 1125 | /* FALLTHRU */ |
| 1126 | case TCG_TYPE_V64: |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1127 | /* There is no instruction that can validate 8-byte alignment. */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1128 | tcg_debug_assert(ret >= 16); |
| 1129 | tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2); |
| 1130 | break; |
| 1131 | case TCG_TYPE_V128: |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1132 | /* |
| 1133 | * The gvec infrastructure is asserts that v128 vector loads |
| 1134 | * and stores use a 16-byte aligned offset. Validate that the |
| 1135 | * final pointer is aligned by using an insn that will SIGSEGV. |
| 1136 | */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1137 | tcg_debug_assert(ret >= 16); |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1138 | tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1139 | break; |
| 1140 | case TCG_TYPE_V256: |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1141 | /* |
| 1142 | * The gvec infrastructure only requires 16-byte alignment, |
| 1143 | * so here we must use an unaligned load. |
| 1144 | */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1145 | tcg_debug_assert(ret >= 16); |
| 1146 | tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL, |
| 1147 | ret, 0, arg1, arg2); |
| 1148 | break; |
| 1149 | default: |
| 1150 | g_assert_not_reached(); |
| 1151 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1152 | } |
| 1153 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1154 | static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, |
| 1155 | TCGReg arg1, intptr_t arg2) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1156 | { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1157 | switch (type) { |
| 1158 | case TCG_TYPE_I32: |
| 1159 | if (arg < 16) { |
| 1160 | tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2); |
| 1161 | } else { |
| 1162 | tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2); |
| 1163 | } |
| 1164 | break; |
| 1165 | case TCG_TYPE_I64: |
| 1166 | if (arg < 16) { |
| 1167 | tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2); |
| 1168 | break; |
| 1169 | } |
| 1170 | /* FALLTHRU */ |
| 1171 | case TCG_TYPE_V64: |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1172 | /* There is no instruction that can validate 8-byte alignment. */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1173 | tcg_debug_assert(arg >= 16); |
| 1174 | tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2); |
| 1175 | break; |
| 1176 | case TCG_TYPE_V128: |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1177 | /* |
| 1178 | * The gvec infrastructure is asserts that v128 vector loads |
| 1179 | * and stores use a 16-byte aligned offset. Validate that the |
| 1180 | * final pointer is aligned by using an insn that will SIGSEGV. |
Richard Henderson | c4f4a00 | 2022-10-21 10:16:28 +1000 | [diff] [blame] | 1181 | * |
| 1182 | * This specific instance is also used by TCG_CALL_RET_BY_VEC, |
| 1183 | * for _WIN64, which must have SSE2 but may not have AVX. |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1184 | */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1185 | tcg_debug_assert(arg >= 16); |
Richard Henderson | c4f4a00 | 2022-10-21 10:16:28 +1000 | [diff] [blame] | 1186 | if (have_avx1) { |
| 1187 | tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); |
| 1188 | } else { |
| 1189 | tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2); |
| 1190 | } |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1191 | break; |
| 1192 | case TCG_TYPE_V256: |
Richard Henderson | 11e2bfe | 2019-05-18 11:37:02 -0700 | [diff] [blame] | 1193 | /* |
| 1194 | * The gvec infrastructure only requires 16-byte alignment, |
| 1195 | * so here we must use an unaligned store. |
| 1196 | */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1197 | tcg_debug_assert(arg >= 16); |
| 1198 | tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL, |
| 1199 | arg, 0, arg1, arg2); |
| 1200 | break; |
| 1201 | default: |
| 1202 | g_assert_not_reached(); |
| 1203 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1204 | } |
| 1205 | |
Richard Henderson | 59d7c14 | 2016-06-19 22:59:13 -0700 | [diff] [blame] | 1206 | static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, |
| 1207 | TCGReg base, intptr_t ofs) |
Richard Henderson | c6f29ff | 2013-07-25 06:33:33 -1000 | [diff] [blame] | 1208 | { |
Richard Henderson | 59d7c14 | 2016-06-19 22:59:13 -0700 | [diff] [blame] | 1209 | int rexw = 0; |
| 1210 | if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) { |
| 1211 | if (val != (int32_t)val) { |
| 1212 | return false; |
| 1213 | } |
| 1214 | rexw = P_REXW; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 1215 | } else if (type != TCG_TYPE_I32) { |
| 1216 | return false; |
Richard Henderson | 59d7c14 | 2016-06-19 22:59:13 -0700 | [diff] [blame] | 1217 | } |
| 1218 | tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs); |
Richard Henderson | c6f29ff | 2013-07-25 06:33:33 -1000 | [diff] [blame] | 1219 | tcg_out32(s, val); |
Richard Henderson | 59d7c14 | 2016-06-19 22:59:13 -0700 | [diff] [blame] | 1220 | return true; |
Richard Henderson | c6f29ff | 2013-07-25 06:33:33 -1000 | [diff] [blame] | 1221 | } |
| 1222 | |
Richard Henderson | f53dba0 | 2010-04-28 10:38:04 -0700 | [diff] [blame] | 1223 | static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) |
| 1224 | { |
Richard Henderson | 96b4cf3 | 2010-05-21 09:03:00 -0700 | [diff] [blame] | 1225 | /* Propagate an opcode prefix, such as P_DATA16. */ |
| 1226 | int ext = subopc & ~0x7; |
| 1227 | subopc &= 0x7; |
| 1228 | |
Richard Henderson | f53dba0 | 2010-04-28 10:38:04 -0700 | [diff] [blame] | 1229 | if (count == 1) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1230 | tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); |
Richard Henderson | f53dba0 | 2010-04-28 10:38:04 -0700 | [diff] [blame] | 1231 | } else { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1232 | tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); |
Richard Henderson | f53dba0 | 2010-04-28 10:38:04 -0700 | [diff] [blame] | 1233 | tcg_out8(s, count); |
| 1234 | } |
| 1235 | } |
| 1236 | |
Richard Henderson | fcb5dac | 2010-04-28 10:31:18 -0700 | [diff] [blame] | 1237 | static inline void tcg_out_bswap32(TCGContext *s, int reg) |
| 1238 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1239 | tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); |
Richard Henderson | fcb5dac | 2010-04-28 10:31:18 -0700 | [diff] [blame] | 1240 | } |
| 1241 | |
| 1242 | static inline void tcg_out_rolw_8(TCGContext *s, int reg) |
| 1243 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1244 | tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); |
Richard Henderson | fcb5dac | 2010-04-28 10:31:18 -0700 | [diff] [blame] | 1245 | } |
| 1246 | |
Richard Henderson | d0e66c8 | 2023-04-05 13:26:51 -0700 | [diff] [blame] | 1247 | static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src) |
Richard Henderson | 55e082a | 2010-05-21 08:30:21 -0700 | [diff] [blame] | 1248 | { |
| 1249 | /* movzbl */ |
Aurelien Jarno | eabb7b9 | 2016-04-21 10:48:49 +0200 | [diff] [blame] | 1250 | tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1251 | tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); |
Richard Henderson | 55e082a | 2010-05-21 08:30:21 -0700 | [diff] [blame] | 1252 | } |
| 1253 | |
Richard Henderson | 678155b | 2023-04-05 11:17:01 -0700 | [diff] [blame] | 1254 | static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) |
Richard Henderson | 6817c35 | 2010-05-21 08:30:22 -0700 | [diff] [blame] | 1255 | { |
Richard Henderson | 678155b | 2023-04-05 11:17:01 -0700 | [diff] [blame] | 1256 | int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; |
Richard Henderson | 6817c35 | 2010-05-21 08:30:22 -0700 | [diff] [blame] | 1257 | /* movsbl */ |
Aurelien Jarno | eabb7b9 | 2016-04-21 10:48:49 +0200 | [diff] [blame] | 1258 | tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1259 | tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); |
Richard Henderson | 6817c35 | 2010-05-21 08:30:22 -0700 | [diff] [blame] | 1260 | } |
| 1261 | |
Richard Henderson | 379afdf | 2023-04-05 16:25:22 -0700 | [diff] [blame] | 1262 | static void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src) |
Richard Henderson | 55e082a | 2010-05-21 08:30:21 -0700 | [diff] [blame] | 1263 | { |
| 1264 | /* movzwl */ |
| 1265 | tcg_out_modrm(s, OPC_MOVZWL, dest, src); |
| 1266 | } |
| 1267 | |
Richard Henderson | 753e42e | 2023-04-05 14:49:59 -0700 | [diff] [blame] | 1268 | static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) |
Richard Henderson | 6817c35 | 2010-05-21 08:30:22 -0700 | [diff] [blame] | 1269 | { |
Richard Henderson | 753e42e | 2023-04-05 14:49:59 -0700 | [diff] [blame] | 1270 | int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1271 | /* movsw[lq] */ |
| 1272 | tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); |
Richard Henderson | 6817c35 | 2010-05-21 08:30:22 -0700 | [diff] [blame] | 1273 | } |
| 1274 | |
Richard Henderson | 9ecf5f6 | 2023-04-05 18:07:05 -0700 | [diff] [blame] | 1275 | static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1276 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1277 | /* 32-bit mov zero extends. */ |
| 1278 | tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); |
| 1279 | } |
| 1280 | |
Richard Henderson | 52bf339 | 2023-04-05 17:50:09 -0700 | [diff] [blame] | 1281 | static void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1282 | { |
Richard Henderson | 52bf339 | 2023-04-05 17:50:09 -0700 | [diff] [blame] | 1283 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1284 | tcg_out_modrm(s, OPC_MOVSLQ, dest, src); |
| 1285 | } |
| 1286 | |
Richard Henderson | 9c6aa27 | 2023-04-05 18:30:56 -0700 | [diff] [blame] | 1287 | static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src) |
| 1288 | { |
| 1289 | tcg_out_ext32s(s, dest, src); |
| 1290 | } |
| 1291 | |
Richard Henderson | b9bfe00 | 2023-04-05 18:56:28 -0700 | [diff] [blame] | 1292 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src) |
| 1293 | { |
Richard Henderson | b248553 | 2023-04-05 19:00:43 -0700 | [diff] [blame] | 1294 | if (dest != src) { |
| 1295 | tcg_out_ext32u(s, dest, src); |
| 1296 | } |
Richard Henderson | b9bfe00 | 2023-04-05 18:56:28 -0700 | [diff] [blame] | 1297 | } |
| 1298 | |
Richard Henderson | b8b94ac | 2023-04-05 19:58:35 -0700 | [diff] [blame] | 1299 | static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src) |
| 1300 | { |
| 1301 | tcg_out_ext32u(s, dest, src); |
| 1302 | } |
| 1303 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1304 | static inline void tcg_out_bswap64(TCGContext *s, int reg) |
| 1305 | { |
| 1306 | tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); |
| 1307 | } |
| 1308 | |
| 1309 | static void tgen_arithi(TCGContext *s, int c, int r0, |
| 1310 | tcg_target_long val, int cf) |
| 1311 | { |
| 1312 | int rexw = 0; |
| 1313 | |
| 1314 | if (TCG_TARGET_REG_BITS == 64) { |
| 1315 | rexw = c & -8; |
| 1316 | c &= 7; |
| 1317 | } |
| 1318 | |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 1319 | /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce |
| 1320 | partial flags update stalls on Pentium4 and are not recommended |
| 1321 | by current Intel optimization manuals. */ |
| 1322 | if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { |
Aurelien Jarno | 447d681 | 2010-06-10 20:40:24 +0200 | [diff] [blame] | 1323 | int is_inc = (c == ARITH_ADD) ^ (val < 0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1324 | if (TCG_TARGET_REG_BITS == 64) { |
| 1325 | /* The single-byte increment encodings are re-tasked as the |
| 1326 | REX prefixes. Use the MODRM encoding. */ |
| 1327 | tcg_out_modrm(s, OPC_GRP5 + rexw, |
| 1328 | (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); |
| 1329 | } else { |
| 1330 | tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); |
| 1331 | } |
| 1332 | return; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1333 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1334 | |
| 1335 | if (c == ARITH_AND) { |
| 1336 | if (TCG_TARGET_REG_BITS == 64) { |
| 1337 | if (val == 0xffffffffu) { |
| 1338 | tcg_out_ext32u(s, r0, r0); |
| 1339 | return; |
| 1340 | } |
| 1341 | if (val == (uint32_t)val) { |
| 1342 | /* AND with no high bits set can use a 32-bit operation. */ |
| 1343 | rexw = 0; |
| 1344 | } |
| 1345 | } |
Aurelien Jarno | dc397ca | 2010-06-10 20:52:47 +0200 | [diff] [blame] | 1346 | if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1347 | tcg_out_ext8u(s, r0, r0); |
| 1348 | return; |
| 1349 | } |
| 1350 | if (val == 0xffffu) { |
| 1351 | tcg_out_ext16u(s, r0, r0); |
| 1352 | return; |
| 1353 | } |
| 1354 | } |
| 1355 | |
| 1356 | if (val == (int8_t)val) { |
| 1357 | tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); |
| 1358 | tcg_out8(s, val); |
| 1359 | return; |
| 1360 | } |
| 1361 | if (rexw == 0 || val == (int32_t)val) { |
| 1362 | tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); |
| 1363 | tcg_out32(s, val); |
| 1364 | return; |
| 1365 | } |
| 1366 | |
Richard Henderson | 732e89f | 2023-04-05 12:09:14 -0700 | [diff] [blame] | 1367 | g_assert_not_reached(); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1368 | } |
| 1369 | |
aurel32 | 3e9a474 | 2008-12-14 17:29:58 +0000 | [diff] [blame] | 1370 | static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1371 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1372 | if (val != 0) { |
| 1373 | tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); |
| 1374 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1375 | } |
| 1376 | |
Richard Henderson | 1a05755 | 2023-04-05 12:08:46 -0700 | [diff] [blame] | 1377 | /* Set SMALL to force a short forward branch. */ |
| 1378 | static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1379 | { |
| 1380 | int32_t val, val1; |
Richard Henderson | 7868652 | 2010-05-21 08:30:35 -0700 | [diff] [blame] | 1381 | |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1382 | if (l->has_value) { |
Richard Henderson | f6bff89 | 2014-04-01 08:34:03 -0700 | [diff] [blame] | 1383 | val = tcg_pcrel_diff(s, l->u.value_ptr); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1384 | val1 = val - 2; |
| 1385 | if ((int8_t)val1 == val1) { |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1386 | if (opc == -1) { |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 1387 | tcg_out8(s, OPC_JMP_short); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1388 | } else { |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 1389 | tcg_out8(s, OPC_JCC_short + opc); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1390 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1391 | tcg_out8(s, val1); |
| 1392 | } else { |
Richard Henderson | 1a05755 | 2023-04-05 12:08:46 -0700 | [diff] [blame] | 1393 | tcg_debug_assert(!small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1394 | if (opc == -1) { |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 1395 | tcg_out8(s, OPC_JMP_long); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1396 | tcg_out32(s, val - 5); |
| 1397 | } else { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1398 | tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1399 | tcg_out32(s, val - 6); |
| 1400 | } |
| 1401 | } |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1402 | } else if (small) { |
| 1403 | if (opc == -1) { |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 1404 | tcg_out8(s, OPC_JMP_short); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1405 | } else { |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 1406 | tcg_out8(s, OPC_JCC_short + opc); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1407 | } |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1408 | tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1409 | s->code_ptr += 1; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1410 | } else { |
| 1411 | if (opc == -1) { |
Richard Henderson | da441cf | 2010-04-14 08:26:50 -0700 | [diff] [blame] | 1412 | tcg_out8(s, OPC_JMP_long); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1413 | } else { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1414 | tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1415 | } |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1416 | tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); |
pbrook | 623e265 | 2008-02-10 14:09:09 +0000 | [diff] [blame] | 1417 | s->code_ptr += 4; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1418 | } |
| 1419 | } |
| 1420 | |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1421 | static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1422 | int const_arg2, int rexw) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1423 | { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1424 | if (const_arg2) { |
| 1425 | if (arg2 == 0) { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1426 | /* test r, r */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1427 | tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1428 | } else { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1429 | tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1430 | } |
| 1431 | } else { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1432 | tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1433 | } |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1434 | } |
| 1435 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1436 | static void tcg_out_brcond32(TCGContext *s, TCGCond cond, |
| 1437 | TCGArg arg1, TCGArg arg2, int const_arg2, |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1438 | TCGLabel *label, int small) |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1439 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1440 | tcg_out_cmp(s, arg1, arg2, const_arg2, 0); |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1441 | tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1442 | } |
| 1443 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1444 | #if TCG_TARGET_REG_BITS == 64 |
| 1445 | static void tcg_out_brcond64(TCGContext *s, TCGCond cond, |
| 1446 | TCGArg arg1, TCGArg arg2, int const_arg2, |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1447 | TCGLabel *label, int small) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1448 | { |
| 1449 | tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1450 | tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1451 | } |
| 1452 | #else |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1453 | /* XXX: we implement it at the target level to avoid having to |
| 1454 | handle cross basic blocks temporaries */ |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1455 | static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, |
| 1456 | const int *const_args, int small) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1457 | { |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1458 | TCGLabel *label_next = gen_new_label(); |
| 1459 | TCGLabel *label_this = arg_label(args[5]); |
Richard Henderson | 42a268c | 2015-02-13 12:51:55 -0800 | [diff] [blame] | 1460 | |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1461 | switch(args[4]) { |
| 1462 | case TCG_COND_EQ: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1463 | tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], |
| 1464 | label_next, 1); |
| 1465 | tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1466 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1467 | break; |
| 1468 | case TCG_COND_NE: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1469 | tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1470 | label_this, small); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1471 | tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1472 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1473 | break; |
| 1474 | case TCG_COND_LT: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1475 | tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1476 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1477 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1478 | tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1479 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1480 | break; |
| 1481 | case TCG_COND_LE: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1482 | tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1483 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1484 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1485 | tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1486 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1487 | break; |
| 1488 | case TCG_COND_GT: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1489 | tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1490 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1491 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1492 | tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1493 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1494 | break; |
| 1495 | case TCG_COND_GE: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1496 | tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1497 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1498 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1499 | tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1500 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1501 | break; |
| 1502 | case TCG_COND_LTU: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1503 | tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1504 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1505 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1506 | tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1507 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1508 | break; |
| 1509 | case TCG_COND_LEU: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1510 | tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1511 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1512 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1513 | tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1514 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1515 | break; |
| 1516 | case TCG_COND_GTU: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1517 | tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1518 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1519 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1520 | tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1521 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1522 | break; |
| 1523 | case TCG_COND_GEU: |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1524 | tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1525 | label_this, small); |
Richard Henderson | f75b56c | 2010-02-06 11:47:58 -0800 | [diff] [blame] | 1526 | tcg_out_jxx(s, JCC_JNE, label_next, 1); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1527 | tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1528 | label_this, small); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1529 | break; |
| 1530 | default: |
Richard Henderson | 732e89f | 2023-04-05 12:09:14 -0700 | [diff] [blame] | 1531 | g_assert_not_reached(); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1532 | } |
Richard Henderson | 92ab8e7 | 2020-10-28 18:55:50 -0700 | [diff] [blame] | 1533 | tcg_out_label(s, label_next); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1534 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1535 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 1536 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1537 | static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, |
| 1538 | TCGArg arg1, TCGArg arg2, int const_arg2) |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1539 | { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1540 | tcg_out_cmp(s, arg1, arg2, const_arg2, 0); |
Richard Henderson | 32a8ffb | 2010-05-21 08:30:29 -0700 | [diff] [blame] | 1541 | tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); |
Richard Henderson | a369a70 | 2010-05-21 08:30:23 -0700 | [diff] [blame] | 1542 | tcg_out_ext8u(s, dest, dest); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1543 | } |
| 1544 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1545 | #if TCG_TARGET_REG_BITS == 64 |
| 1546 | static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, |
| 1547 | TCGArg arg1, TCGArg arg2, int const_arg2) |
| 1548 | { |
| 1549 | tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); |
| 1550 | tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); |
| 1551 | tcg_out_ext8u(s, dest, dest); |
| 1552 | } |
| 1553 | #else |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1554 | static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, |
| 1555 | const int *const_args) |
| 1556 | { |
| 1557 | TCGArg new_args[6]; |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1558 | TCGLabel *label_true, *label_over; |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1559 | |
| 1560 | memcpy(new_args, args+1, 5*sizeof(TCGArg)); |
| 1561 | |
| 1562 | if (args[0] == args[1] || args[0] == args[2] |
| 1563 | || (!const_args[3] && args[0] == args[3]) |
| 1564 | || (!const_args[4] && args[0] == args[4])) { |
| 1565 | /* When the destination overlaps with one of the argument |
| 1566 | registers, don't do anything tricky. */ |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1567 | label_true = gen_new_label(); |
| 1568 | label_over = gen_new_label(); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1569 | |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1570 | new_args[5] = label_arg(label_true); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1571 | tcg_out_brcond2(s, new_args, const_args+1, 1); |
| 1572 | |
| 1573 | tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); |
| 1574 | tcg_out_jxx(s, JCC_JMP, label_over, 1); |
Richard Henderson | 92ab8e7 | 2020-10-28 18:55:50 -0700 | [diff] [blame] | 1575 | tcg_out_label(s, label_true); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1576 | |
| 1577 | tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); |
Richard Henderson | 92ab8e7 | 2020-10-28 18:55:50 -0700 | [diff] [blame] | 1578 | tcg_out_label(s, label_over); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1579 | } else { |
| 1580 | /* When the destination does not overlap one of the arguments, |
| 1581 | clear the destination first, jump if cond false, and emit an |
| 1582 | increment in the true case. This results in smaller code. */ |
| 1583 | |
| 1584 | tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); |
| 1585 | |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1586 | label_over = gen_new_label(); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1587 | new_args[4] = tcg_invert_cond(new_args[4]); |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1588 | new_args[5] = label_arg(label_over); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1589 | tcg_out_brcond2(s, new_args, const_args+1, 1); |
| 1590 | |
| 1591 | tgen_arithi(s, ARITH_ADD, args[0], 1, 0); |
Richard Henderson | 92ab8e7 | 2020-10-28 18:55:50 -0700 | [diff] [blame] | 1592 | tcg_out_label(s, label_over); |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1593 | } |
| 1594 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1595 | #endif |
Richard Henderson | 1d2699a | 2009-12-19 10:46:38 -0800 | [diff] [blame] | 1596 | |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1597 | static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, |
| 1598 | TCGReg dest, TCGReg v1) |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 1599 | { |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 1600 | if (have_cmov) { |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1601 | tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1); |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 1602 | } else { |
Richard Henderson | bec1631 | 2015-02-13 13:39:54 -0800 | [diff] [blame] | 1603 | TCGLabel *over = gen_new_label(); |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 1604 | tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); |
| 1605 | tcg_out_mov(s, TCG_TYPE_I32, dest, v1); |
Richard Henderson | 92ab8e7 | 2020-10-28 18:55:50 -0700 | [diff] [blame] | 1606 | tcg_out_label(s, over); |
Richard Henderson | 76a347e | 2012-12-28 14:17:02 -0800 | [diff] [blame] | 1607 | } |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 1608 | } |
| 1609 | |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1610 | static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest, |
| 1611 | TCGReg c1, TCGArg c2, int const_c2, |
| 1612 | TCGReg v1) |
| 1613 | { |
| 1614 | tcg_out_cmp(s, c1, c2, const_c2, 0); |
| 1615 | tcg_out_cmov(s, cond, 0, dest, v1); |
| 1616 | } |
| 1617 | |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 1618 | #if TCG_TARGET_REG_BITS == 64 |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1619 | static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest, |
| 1620 | TCGReg c1, TCGArg c2, int const_c2, |
| 1621 | TCGReg v1) |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 1622 | { |
| 1623 | tcg_out_cmp(s, c1, c2, const_c2, P_REXW); |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1624 | tcg_out_cmov(s, cond, P_REXW, dest, v1); |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 1625 | } |
| 1626 | #endif |
| 1627 | |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1628 | static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, |
| 1629 | TCGArg arg2, bool const_a2) |
| 1630 | { |
Richard Henderson | 39f099e | 2017-01-17 12:02:08 -0800 | [diff] [blame] | 1631 | if (have_bmi1) { |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1632 | tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); |
Richard Henderson | 39f099e | 2017-01-17 12:02:08 -0800 | [diff] [blame] | 1633 | if (const_a2) { |
| 1634 | tcg_debug_assert(arg2 == (rexw ? 64 : 32)); |
| 1635 | } else { |
| 1636 | tcg_debug_assert(dest != arg2); |
| 1637 | tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); |
| 1638 | } |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1639 | } else { |
Richard Henderson | 9bf3830 | 2017-01-17 11:38:22 -0800 | [diff] [blame] | 1640 | tcg_debug_assert(dest != arg2); |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1641 | tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); |
Richard Henderson | 9bf3830 | 2017-01-17 11:38:22 -0800 | [diff] [blame] | 1642 | tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1643 | } |
| 1644 | } |
| 1645 | |
| 1646 | static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, |
| 1647 | TCGArg arg2, bool const_a2) |
| 1648 | { |
| 1649 | if (have_lzcnt) { |
| 1650 | tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); |
| 1651 | if (const_a2) { |
| 1652 | tcg_debug_assert(arg2 == (rexw ? 64 : 32)); |
| 1653 | } else { |
| 1654 | tcg_debug_assert(dest != arg2); |
| 1655 | tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); |
| 1656 | } |
| 1657 | } else { |
Richard Henderson | 9bf3830 | 2017-01-17 11:38:22 -0800 | [diff] [blame] | 1658 | tcg_debug_assert(!const_a2); |
| 1659 | tcg_debug_assert(dest != arg1); |
| 1660 | tcg_debug_assert(dest != arg2); |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1661 | |
Richard Henderson | 9bf3830 | 2017-01-17 11:38:22 -0800 | [diff] [blame] | 1662 | /* Recall that the output of BSR is the index not the count. */ |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1663 | tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); |
Richard Henderson | 9bf3830 | 2017-01-17 11:38:22 -0800 | [diff] [blame] | 1664 | tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); |
| 1665 | |
| 1666 | /* Since we have destroyed the flags from BSR, we have to re-test. */ |
| 1667 | tcg_out_cmp(s, arg1, 0, 1, rexw); |
| 1668 | tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 1669 | } |
| 1670 | } |
| 1671 | |
Richard Henderson | 2be7d76 | 2020-10-28 15:29:04 -0700 | [diff] [blame] | 1672 | static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) |
Richard Henderson | aadb21a | 2010-05-21 08:30:27 -0700 | [diff] [blame] | 1673 | { |
Richard Henderson | f6bff89 | 2014-04-01 08:34:03 -0700 | [diff] [blame] | 1674 | intptr_t disp = tcg_pcrel_diff(s, dest) - 5; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1675 | |
| 1676 | if (disp == (int32_t)disp) { |
| 1677 | tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); |
| 1678 | tcg_out32(s, disp); |
| 1679 | } else { |
Richard Henderson | 4e45f23 | 2017-07-20 19:56:42 -1000 | [diff] [blame] | 1680 | /* rip-relative addressing into the constant pool. |
| 1681 | This is 6 + 8 = 14 bytes, as compared to using an |
Daniel P. Berrangé | 7a21bee | 2022-07-07 17:37:15 +0100 | [diff] [blame] | 1682 | immediate load 10 + 6 = 16 bytes, plus we may |
Richard Henderson | 4e45f23 | 2017-07-20 19:56:42 -1000 | [diff] [blame] | 1683 | be able to re-use the pool constant for more calls. */ |
| 1684 | tcg_out_opc(s, OPC_GRP5, 0, 0, 0); |
| 1685 | tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5); |
| 1686 | new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4); |
| 1687 | tcg_out32(s, 0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1688 | } |
| 1689 | } |
| 1690 | |
Richard Henderson | cee44b0 | 2022-10-18 17:51:41 +1000 | [diff] [blame] | 1691 | static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, |
| 1692 | const TCGHelperInfo *info) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1693 | { |
| 1694 | tcg_out_branch(s, 1, dest); |
Richard Henderson | c4f4a00 | 2022-10-21 10:16:28 +1000 | [diff] [blame] | 1695 | |
| 1696 | #ifndef _WIN32 |
| 1697 | if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) { |
| 1698 | /* |
| 1699 | * The sysv i386 abi for struct return places a reference as the |
| 1700 | * first argument of the stack, and pops that argument with the |
| 1701 | * return statement. Since we want to retain the aligned stack |
| 1702 | * pointer for the callee, we do not want to actually push that |
| 1703 | * argument before the call but rely on the normal store to the |
| 1704 | * stack slot. But we do need to compensate for the pop in order |
| 1705 | * to reset our correct stack pointer value. |
| 1706 | * Pushing a garbage value back onto the stack is quickest. |
| 1707 | */ |
| 1708 | tcg_out_push(s, TCG_REG_EAX); |
| 1709 | } |
| 1710 | #endif |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1711 | } |
| 1712 | |
Richard Henderson | 705ed47 | 2020-10-28 23:42:12 -0700 | [diff] [blame] | 1713 | static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 1714 | { |
| 1715 | tcg_out_branch(s, 0, dest); |
Richard Henderson | aadb21a | 2010-05-21 08:30:27 -0700 | [diff] [blame] | 1716 | } |
| 1717 | |
Sergey Fedorov | 0d07abf | 2016-04-22 19:08:47 +0300 | [diff] [blame] | 1718 | static void tcg_out_nopn(TCGContext *s, int n) |
| 1719 | { |
| 1720 | int i; |
| 1721 | /* Emit 1 or 2 operand size prefixes for the standard one byte nop, |
| 1722 | * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the |
| 1723 | * duplicate prefix, and all of the interesting recent cores can |
| 1724 | * decode and discard the duplicates in a single cycle. |
| 1725 | */ |
| 1726 | tcg_debug_assert(n >= 1); |
| 1727 | for (i = 1; i < n; ++i) { |
| 1728 | tcg_out8(s, 0x66); |
| 1729 | } |
| 1730 | tcg_out8(s, 0x90); |
| 1731 | } |
| 1732 | |
Richard Henderson | a48f1c7 | 2022-11-08 14:30:27 +1100 | [diff] [blame] | 1733 | /* Test register R vs immediate bits I, setting Z flag for EQ/NE. */ |
| 1734 | static void __attribute__((unused)) |
| 1735 | tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i) |
| 1736 | { |
| 1737 | /* |
| 1738 | * This is used for testing alignment, so we can usually use testb. |
| 1739 | * For i686, we have to use testl for %esi/%edi. |
| 1740 | */ |
| 1741 | if (i <= 0xff && (TCG_TARGET_REG_BITS == 64 || r < 4)) { |
| 1742 | tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, r); |
| 1743 | tcg_out8(s, i); |
| 1744 | } else { |
| 1745 | tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, r); |
| 1746 | tcg_out32(s, i); |
| 1747 | } |
| 1748 | } |
| 1749 | |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 1750 | typedef struct { |
| 1751 | TCGReg base; |
| 1752 | int index; |
| 1753 | int ofs; |
| 1754 | int seg; |
Richard Henderson | 1c5322d | 2023-04-17 09:33:08 +0200 | [diff] [blame] | 1755 | TCGAtomAlign aa; |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 1756 | } HostAddress; |
| 1757 | |
Richard Henderson | 7b88010 | 2023-04-19 12:43:17 +0200 | [diff] [blame] | 1758 | bool tcg_target_has_memory_bswap(MemOp memop) |
| 1759 | { |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 1760 | TCGAtomAlign aa; |
| 1761 | |
| 1762 | if (!have_movbe) { |
| 1763 | return false; |
| 1764 | } |
| 1765 | if ((memop & MO_SIZE) < MO_128) { |
| 1766 | return true; |
| 1767 | } |
| 1768 | |
| 1769 | /* |
| 1770 | * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA, |
| 1771 | * but do allow a pair of 64-bit operations, i.e. MOVBEQ. |
| 1772 | */ |
| 1773 | aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); |
| 1774 | return aa.atom < MO_128; |
Richard Henderson | 7b88010 | 2023-04-19 12:43:17 +0200 | [diff] [blame] | 1775 | } |
| 1776 | |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1777 | /* |
Richard Henderson | da8ab70 | 2023-04-09 23:03:55 -0700 | [diff] [blame] | 1778 | * Because i686 has no register parameters and because x86_64 has xchg |
| 1779 | * to handle addr/data register overlap, we have placed all input arguments |
| 1780 | * before we need might need a scratch reg. |
| 1781 | * |
| 1782 | * Even then, a scratch is only needed for l->raddr. Rather than expose |
| 1783 | * a general-purpose scratch when we don't actually know it's available, |
| 1784 | * use the ra_gen hook to load into RAX if needed. |
| 1785 | */ |
| 1786 | #if TCG_TARGET_REG_BITS == 64 |
| 1787 | static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) |
| 1788 | { |
| 1789 | if (arg < 0) { |
| 1790 | arg = TCG_REG_RAX; |
| 1791 | } |
| 1792 | tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr); |
| 1793 | return arg; |
| 1794 | } |
| 1795 | static const TCGLdstHelperParam ldst_helper_param = { |
| 1796 | .ra_gen = ldst_ra_gen |
| 1797 | }; |
| 1798 | #else |
| 1799 | static const TCGLdstHelperParam ldst_helper_param = { }; |
| 1800 | #endif |
| 1801 | |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 1802 | static void tcg_out_vec_to_pair(TCGContext *s, TCGType type, |
| 1803 | TCGReg l, TCGReg h, TCGReg v) |
| 1804 | { |
| 1805 | int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; |
| 1806 | |
| 1807 | /* vpmov{d,q} %v, %l */ |
| 1808 | tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l); |
| 1809 | /* vpextr{d,q} $1, %v, %h */ |
| 1810 | tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h); |
| 1811 | tcg_out8(s, 1); |
| 1812 | } |
| 1813 | |
| 1814 | static void tcg_out_pair_to_vec(TCGContext *s, TCGType type, |
| 1815 | TCGReg v, TCGReg l, TCGReg h) |
| 1816 | { |
| 1817 | int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; |
| 1818 | |
| 1819 | /* vmov{d,q} %l, %v */ |
| 1820 | tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l); |
| 1821 | /* vpinsr{d,q} $1, %h, %v, %v */ |
| 1822 | tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h); |
| 1823 | tcg_out8(s, 1); |
| 1824 | } |
| 1825 | |
Richard Henderson | da8ab70 | 2023-04-09 23:03:55 -0700 | [diff] [blame] | 1826 | /* |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1827 | * Generate code for the slow path for a load at the end of block |
| 1828 | */ |
Richard Henderson | aeee05f | 2019-04-21 14:51:00 -0700 | [diff] [blame] | 1829 | static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1830 | { |
Richard Henderson | da8ab70 | 2023-04-09 23:03:55 -0700 | [diff] [blame] | 1831 | MemOp opc = get_memop(l->oi); |
Richard Henderson | f6bff89 | 2014-04-01 08:34:03 -0700 | [diff] [blame] | 1832 | tcg_insn_unit **label_ptr = &l->label_ptr[0]; |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1833 | |
| 1834 | /* resolve label address */ |
Peter Maydell | 5c53bb8 | 2014-03-28 15:29:48 +0000 | [diff] [blame] | 1835 | tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); |
Richard Henderson | 30cc7a7 | 2022-11-07 20:51:56 +1100 | [diff] [blame] | 1836 | if (label_ptr[1]) { |
Peter Maydell | 5c53bb8 | 2014-03-28 15:29:48 +0000 | [diff] [blame] | 1837 | tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1838 | } |
| 1839 | |
Richard Henderson | da8ab70 | 2023-04-09 23:03:55 -0700 | [diff] [blame] | 1840 | tcg_out_ld_helper_args(s, l, &ldst_helper_param); |
Richard Henderson | 0cadc1e | 2022-11-01 12:51:04 +1100 | [diff] [blame] | 1841 | tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]); |
Richard Henderson | da8ab70 | 2023-04-09 23:03:55 -0700 | [diff] [blame] | 1842 | tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param); |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1843 | |
Richard Henderson | f6bff89 | 2014-04-01 08:34:03 -0700 | [diff] [blame] | 1844 | tcg_out_jmp(s, l->raddr); |
Richard Henderson | aeee05f | 2019-04-21 14:51:00 -0700 | [diff] [blame] | 1845 | return true; |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1846 | } |
| 1847 | |
| 1848 | /* |
| 1849 | * Generate code for the slow path for a store at the end of block |
| 1850 | */ |
Richard Henderson | aeee05f | 2019-04-21 14:51:00 -0700 | [diff] [blame] | 1851 | static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1852 | { |
Richard Henderson | 0036e54 | 2023-04-09 23:08:12 -0700 | [diff] [blame] | 1853 | MemOp opc = get_memop(l->oi); |
Richard Henderson | f6bff89 | 2014-04-01 08:34:03 -0700 | [diff] [blame] | 1854 | tcg_insn_unit **label_ptr = &l->label_ptr[0]; |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1855 | |
| 1856 | /* resolve label address */ |
Peter Maydell | 5c53bb8 | 2014-03-28 15:29:48 +0000 | [diff] [blame] | 1857 | tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); |
Richard Henderson | 30cc7a7 | 2022-11-07 20:51:56 +1100 | [diff] [blame] | 1858 | if (label_ptr[1]) { |
Peter Maydell | 5c53bb8 | 2014-03-28 15:29:48 +0000 | [diff] [blame] | 1859 | tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1860 | } |
| 1861 | |
Richard Henderson | 0036e54 | 2023-04-09 23:08:12 -0700 | [diff] [blame] | 1862 | tcg_out_st_helper_args(s, l, &ldst_helper_param); |
Richard Henderson | 0cadc1e | 2022-11-01 12:51:04 +1100 | [diff] [blame] | 1863 | tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]); |
Richard Henderson | c6f29ff | 2013-07-25 06:33:33 -1000 | [diff] [blame] | 1864 | |
Richard Henderson | 0036e54 | 2023-04-09 23:08:12 -0700 | [diff] [blame] | 1865 | tcg_out_jmp(s, l->raddr); |
Richard Henderson | aeee05f | 2019-04-21 14:51:00 -0700 | [diff] [blame] | 1866 | return true; |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 1867 | } |
Richard Henderson | b1ee3c6 | 2021-07-27 19:42:35 -1000 | [diff] [blame] | 1868 | |
Richard Henderson | 30cc7a7 | 2022-11-07 20:51:56 +1100 | [diff] [blame] | 1869 | #ifndef CONFIG_SOFTMMU |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 1870 | static HostAddress x86_guest_base = { |
| 1871 | .index = -1 |
| 1872 | }; |
| 1873 | |
| 1874 | #if defined(__x86_64__) && defined(__linux__) |
| 1875 | # include <asm/prctl.h> |
| 1876 | # include <sys/prctl.h> |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 1877 | int arch_prctl(int code, unsigned long addr); |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 1878 | static inline int setup_guest_base_seg(void) |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 1879 | { |
Laurent Vivier | b76f21a | 2015-08-24 14:53:54 +0200 | [diff] [blame] | 1880 | if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 1881 | return P_GS; |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 1882 | } |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 1883 | return 0; |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 1884 | } |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 1885 | #elif defined(__x86_64__) && \ |
| 1886 | (defined (__FreeBSD__) || defined (__FreeBSD_kernel__)) |
| 1887 | # include <machine/sysarch.h> |
Richard Henderson | 5785c17 | 2018-12-03 09:25:10 -0600 | [diff] [blame] | 1888 | static inline int setup_guest_base_seg(void) |
| 1889 | { |
| 1890 | if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) { |
| 1891 | return P_GS; |
| 1892 | } |
| 1893 | return 0; |
| 1894 | } |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 1895 | #else |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 1896 | static inline int setup_guest_base_seg(void) |
| 1897 | { |
| 1898 | return 0; |
| 1899 | } |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 1900 | #endif /* setup_guest_base_seg */ |
Richard Henderson | 30cc7a7 | 2022-11-07 20:51:56 +1100 | [diff] [blame] | 1901 | #endif /* !SOFTMMU */ |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 1902 | |
Richard Henderson | d0a9bb5 | 2023-03-27 16:07:15 -0700 | [diff] [blame] | 1903 | #define MIN_TLB_MASK_TABLE_OFS INT_MIN |
| 1904 | |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1905 | /* |
| 1906 | * For softmmu, perform the TLB load and compare. |
| 1907 | * For useronly, perform any required alignment tests. |
| 1908 | * In both cases, return a TCGLabelQemuLdst structure if the slow path |
| 1909 | * is required and fill in @h with the host address for the fast path. |
| 1910 | */ |
| 1911 | static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
| 1912 | TCGReg addrlo, TCGReg addrhi, |
| 1913 | MemOpIdx oi, bool is_ld) |
| 1914 | { |
| 1915 | TCGLabelQemuLdst *ldst = NULL; |
| 1916 | MemOp opc = get_memop(oi); |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 1917 | MemOp s_bits = opc & MO_SIZE; |
Richard Henderson | 1c5322d | 2023-04-17 09:33:08 +0200 | [diff] [blame] | 1918 | unsigned a_mask; |
| 1919 | |
| 1920 | #ifdef CONFIG_SOFTMMU |
| 1921 | h->index = TCG_REG_L0; |
| 1922 | h->ofs = 0; |
| 1923 | h->seg = 0; |
| 1924 | #else |
| 1925 | *h = x86_guest_base; |
| 1926 | #endif |
| 1927 | h->base = addrlo; |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 1928 | h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); |
Richard Henderson | 1c5322d | 2023-04-17 09:33:08 +0200 | [diff] [blame] | 1929 | a_mask = (1 << h->aa.align) - 1; |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1930 | |
| 1931 | #ifdef CONFIG_SOFTMMU |
| 1932 | int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) |
| 1933 | : offsetof(CPUTLBEntry, addr_write); |
| 1934 | TCGType ttype = TCG_TYPE_I32; |
| 1935 | TCGType tlbtype = TCG_TYPE_I32; |
| 1936 | int trexw = 0, hrexw = 0, tlbrexw = 0; |
| 1937 | unsigned mem_index = get_mmuidx(oi); |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1938 | unsigned s_mask = (1 << s_bits) - 1; |
Richard Henderson | d0a9bb5 | 2023-03-27 16:07:15 -0700 | [diff] [blame] | 1939 | int fast_ofs = tlb_mask_table_ofs(s, mem_index); |
Richard Henderson | c60ad6e | 2023-03-20 09:36:31 -0700 | [diff] [blame] | 1940 | int tlb_mask; |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1941 | |
| 1942 | ldst = new_ldst_label(s); |
| 1943 | ldst->is_ld = is_ld; |
| 1944 | ldst->oi = oi; |
| 1945 | ldst->addrlo_reg = addrlo; |
| 1946 | ldst->addrhi_reg = addrhi; |
| 1947 | |
| 1948 | if (TCG_TARGET_REG_BITS == 64) { |
Richard Henderson | 63f4da9 | 2023-04-27 13:55:11 +0100 | [diff] [blame] | 1949 | ttype = s->addr_type; |
| 1950 | trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1951 | if (TCG_TYPE_PTR == TCG_TYPE_I64) { |
| 1952 | hrexw = P_REXW; |
Richard Henderson | a66efde | 2023-04-02 10:07:57 -0700 | [diff] [blame] | 1953 | if (s->page_bits + s->tlb_dyn_max_bits > 32) { |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1954 | tlbtype = TCG_TYPE_I64; |
| 1955 | tlbrexw = P_REXW; |
| 1956 | } |
| 1957 | } |
| 1958 | } |
| 1959 | |
| 1960 | tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); |
| 1961 | tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, |
Richard Henderson | aece72b | 2023-03-23 21:06:22 -0700 | [diff] [blame] | 1962 | s->page_bits - CPU_TLB_ENTRY_BITS); |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1963 | |
| 1964 | tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, |
Richard Henderson | d0a9bb5 | 2023-03-27 16:07:15 -0700 | [diff] [blame] | 1965 | fast_ofs + offsetof(CPUTLBDescFast, mask)); |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1966 | |
| 1967 | tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, |
Richard Henderson | d0a9bb5 | 2023-03-27 16:07:15 -0700 | [diff] [blame] | 1968 | fast_ofs + offsetof(CPUTLBDescFast, table)); |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1969 | |
| 1970 | /* |
| 1971 | * If the required alignment is at least as large as the access, simply |
| 1972 | * copy the address and mask. For lesser alignments, check that we don't |
| 1973 | * cross pages for the complete access. |
| 1974 | */ |
Richard Henderson | 1c5322d | 2023-04-17 09:33:08 +0200 | [diff] [blame] | 1975 | if (a_mask >= s_mask) { |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1976 | tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); |
| 1977 | } else { |
| 1978 | tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, |
| 1979 | addrlo, s_mask - a_mask); |
| 1980 | } |
Richard Henderson | aece72b | 2023-03-23 21:06:22 -0700 | [diff] [blame] | 1981 | tlb_mask = s->page_mask | a_mask; |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1982 | tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); |
| 1983 | |
| 1984 | /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ |
| 1985 | tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, |
| 1986 | TCG_REG_L1, TCG_REG_L0, cmp_ofs); |
| 1987 | |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1988 | /* jne slow_path */ |
| 1989 | tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); |
| 1990 | ldst->label_ptr[0] = s->code_ptr; |
| 1991 | s->code_ptr += 4; |
| 1992 | |
Richard Henderson | 63f4da9 | 2023-04-27 13:55:11 +0100 | [diff] [blame] | 1993 | if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) { |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 1994 | /* cmp 4(TCG_REG_L0), addrhi */ |
| 1995 | tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4); |
| 1996 | |
| 1997 | /* jne slow_path */ |
| 1998 | tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); |
| 1999 | ldst->label_ptr[1] = s->code_ptr; |
| 2000 | s->code_ptr += 4; |
| 2001 | } |
| 2002 | |
| 2003 | /* TLB Hit. */ |
Richard Henderson | 1fac464 | 2023-04-16 09:00:18 +0200 | [diff] [blame] | 2004 | tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, |
| 2005 | offsetof(CPUTLBEntry, addend)); |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2006 | #else |
Richard Henderson | 1c5322d | 2023-04-17 09:33:08 +0200 | [diff] [blame] | 2007 | if (a_mask) { |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2008 | ldst = new_ldst_label(s); |
| 2009 | |
| 2010 | ldst->is_ld = is_ld; |
| 2011 | ldst->oi = oi; |
| 2012 | ldst->addrlo_reg = addrlo; |
| 2013 | ldst->addrhi_reg = addrhi; |
| 2014 | |
| 2015 | tcg_out_testi(s, addrlo, a_mask); |
| 2016 | /* jne slow_path */ |
| 2017 | tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); |
| 2018 | ldst->label_ptr[0] = s->code_ptr; |
| 2019 | s->code_ptr += 4; |
| 2020 | } |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2021 | #endif |
| 2022 | |
| 2023 | return ldst; |
| 2024 | } |
| 2025 | |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2026 | static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2027 | HostAddress h, TCGType type, MemOp memop) |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2028 | { |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2029 | bool use_movbe = false; |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2030 | int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW); |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2031 | int movop = OPC_MOVL_GvEv; |
| 2032 | |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2033 | /* Do big-endian loads with movbe. */ |
| 2034 | if (memop & MO_BSWAP) { |
| 2035 | tcg_debug_assert(have_movbe); |
| 2036 | use_movbe = true; |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2037 | movop = OPC_MOVBE_GyMy; |
| 2038 | } |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2039 | |
| 2040 | switch (memop & MO_SSIZE) { |
| 2041 | case MO_UB: |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2042 | tcg_out_modrm_sib_offset(s, OPC_MOVZBL + h.seg, datalo, |
| 2043 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2044 | break; |
| 2045 | case MO_SB: |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2046 | tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + h.seg, datalo, |
| 2047 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2048 | break; |
| 2049 | case MO_UW: |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2050 | if (use_movbe) { |
| 2051 | /* There is no extending movbe; only low 16-bits are modified. */ |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2052 | if (datalo != h.base && datalo != h.index) { |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2053 | /* XOR breaks dependency chains. */ |
| 2054 | tgen_arithr(s, ARITH_XOR, datalo, datalo); |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2055 | tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg, |
| 2056 | datalo, h.base, h.index, 0, h.ofs); |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2057 | } else { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2058 | tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg, |
| 2059 | datalo, h.base, h.index, 0, h.ofs); |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2060 | tcg_out_ext16u(s, datalo, datalo); |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2061 | } |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2062 | } else { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2063 | tcg_out_modrm_sib_offset(s, OPC_MOVZWL + h.seg, datalo, |
| 2064 | h.base, h.index, 0, h.ofs); |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2065 | } |
| 2066 | break; |
| 2067 | case MO_SW: |
| 2068 | if (use_movbe) { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2069 | tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + h.seg, |
| 2070 | datalo, h.base, h.index, 0, h.ofs); |
Richard Henderson | 753e42e | 2023-04-05 14:49:59 -0700 | [diff] [blame] | 2071 | tcg_out_ext16s(s, type, datalo, datalo); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2072 | } else { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2073 | tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + h.seg, |
| 2074 | datalo, h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2075 | } |
| 2076 | break; |
| 2077 | case MO_UL: |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2078 | tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, |
| 2079 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2080 | break; |
| 2081 | #if TCG_TARGET_REG_BITS == 64 |
| 2082 | case MO_SL: |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2083 | if (use_movbe) { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2084 | tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + h.seg, datalo, |
| 2085 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2086 | tcg_out_ext32s(s, datalo, datalo); |
| 2087 | } else { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2088 | tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + h.seg, datalo, |
| 2089 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2090 | } |
| 2091 | break; |
| 2092 | #endif |
Frédéric Pétrot | fc313c6 | 2022-01-06 22:00:51 +0100 | [diff] [blame] | 2093 | case MO_UQ: |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2094 | if (TCG_TARGET_REG_BITS == 64) { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2095 | tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, |
| 2096 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 3174941 | 2023-04-16 15:56:41 +0200 | [diff] [blame] | 2097 | break; |
| 2098 | } |
| 2099 | if (use_movbe) { |
| 2100 | TCGReg t = datalo; |
| 2101 | datalo = datahi; |
| 2102 | datahi = t; |
| 2103 | } |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2104 | if (h.base == datalo || h.index == datalo) { |
| 2105 | tcg_out_modrm_sib_offset(s, OPC_LEA, datahi, |
| 2106 | h.base, h.index, 0, h.ofs); |
| 2107 | tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0); |
| 2108 | tcg_out_modrm_offset(s, movop + h.seg, datahi, datahi, 4); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2109 | } else { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2110 | tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, |
| 2111 | h.base, h.index, 0, h.ofs); |
| 2112 | tcg_out_modrm_sib_offset(s, movop + h.seg, datahi, |
| 2113 | h.base, h.index, 0, h.ofs + 4); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2114 | } |
| 2115 | break; |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 2116 | |
| 2117 | case MO_128: |
| 2118 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
| 2119 | |
| 2120 | /* |
| 2121 | * Without 16-byte atomicity, use integer regs. |
| 2122 | * That is where we want the data, and it allows bswaps. |
| 2123 | */ |
| 2124 | if (h.aa.atom < MO_128) { |
| 2125 | if (use_movbe) { |
| 2126 | TCGReg t = datalo; |
| 2127 | datalo = datahi; |
| 2128 | datahi = t; |
| 2129 | } |
| 2130 | if (h.base == datalo || h.index == datalo) { |
| 2131 | tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi, |
| 2132 | h.base, h.index, 0, h.ofs); |
| 2133 | tcg_out_modrm_offset(s, movop + P_REXW + h.seg, |
| 2134 | datalo, datahi, 0); |
| 2135 | tcg_out_modrm_offset(s, movop + P_REXW + h.seg, |
| 2136 | datahi, datahi, 8); |
| 2137 | } else { |
| 2138 | tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, |
| 2139 | h.base, h.index, 0, h.ofs); |
| 2140 | tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, |
| 2141 | h.base, h.index, 0, h.ofs + 8); |
| 2142 | } |
| 2143 | break; |
| 2144 | } |
| 2145 | |
| 2146 | /* |
| 2147 | * With 16-byte atomicity, a vector load is required. |
| 2148 | * If we already have 16-byte alignment, then VMOVDQA always works. |
| 2149 | * Else if VMOVDQU has atomicity with dynamic alignment, use that. |
| 2150 | * Else use we require a runtime test for alignment for VMOVDQA; |
| 2151 | * use VMOVDQU on the unaligned nonatomic path for simplicity. |
| 2152 | */ |
| 2153 | if (h.aa.align >= MO_128) { |
| 2154 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, |
| 2155 | TCG_TMP_VEC, 0, |
| 2156 | h.base, h.index, 0, h.ofs); |
| 2157 | } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { |
| 2158 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, |
| 2159 | TCG_TMP_VEC, 0, |
| 2160 | h.base, h.index, 0, h.ofs); |
| 2161 | } else { |
| 2162 | TCGLabel *l1 = gen_new_label(); |
| 2163 | TCGLabel *l2 = gen_new_label(); |
| 2164 | |
| 2165 | tcg_out_testi(s, h.base, 15); |
| 2166 | tcg_out_jxx(s, JCC_JNE, l1, true); |
| 2167 | |
| 2168 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, |
| 2169 | TCG_TMP_VEC, 0, |
| 2170 | h.base, h.index, 0, h.ofs); |
| 2171 | tcg_out_jxx(s, JCC_JMP, l2, true); |
| 2172 | |
| 2173 | tcg_out_label(s, l1); |
| 2174 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, |
| 2175 | TCG_TMP_VEC, 0, |
| 2176 | h.base, h.index, 0, h.ofs); |
| 2177 | tcg_out_label(s, l2); |
| 2178 | } |
| 2179 | tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC); |
| 2180 | break; |
| 2181 | |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2182 | default: |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2183 | g_assert_not_reached(); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2184 | } |
| 2185 | } |
| 2186 | |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2187 | static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, |
| 2188 | TCGReg addrlo, TCGReg addrhi, |
| 2189 | MemOpIdx oi, TCGType data_type) |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2190 | { |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2191 | TCGLabelQemuLdst *ldst; |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2192 | HostAddress h; |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2193 | |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2194 | ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); |
| 2195 | tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2196 | |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2197 | if (ldst) { |
| 2198 | ldst->type = data_type; |
| 2199 | ldst->datalo_reg = datalo; |
| 2200 | ldst->datahi_reg = datahi; |
| 2201 | ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); |
Richard Henderson | b1ee3c6 | 2021-07-27 19:42:35 -1000 | [diff] [blame] | 2202 | } |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2203 | } |
| 2204 | |
| 2205 | static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2206 | HostAddress h, MemOp memop) |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2207 | { |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2208 | bool use_movbe = false; |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2209 | int movop = OPC_MOVL_EvGv; |
| 2210 | |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2211 | /* |
| 2212 | * Do big-endian stores with movbe or softmmu. |
| 2213 | * User-only without movbe will have its swapping done generically. |
| 2214 | */ |
| 2215 | if (memop & MO_BSWAP) { |
| 2216 | tcg_debug_assert(have_movbe); |
| 2217 | use_movbe = true; |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2218 | movop = OPC_MOVBE_MyGy; |
| 2219 | } |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2220 | |
| 2221 | switch (memop & MO_SIZE) { |
| 2222 | case MO_8: |
Richard Henderson | 07ce0b0 | 2020-12-09 13:58:39 -0600 | [diff] [blame] | 2223 | /* This is handled with constraints on INDEX_op_qemu_st8_i32. */ |
| 2224 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2225 | tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, |
| 2226 | datalo, h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2227 | break; |
| 2228 | case MO_16: |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2229 | tcg_out_modrm_sib_offset(s, movop + P_DATA16 + h.seg, datalo, |
| 2230 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2231 | break; |
| 2232 | case MO_32: |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2233 | tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, |
| 2234 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2235 | break; |
| 2236 | case MO_64: |
| 2237 | if (TCG_TARGET_REG_BITS == 64) { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2238 | tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, |
| 2239 | h.base, h.index, 0, h.ofs); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2240 | } else { |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2241 | if (use_movbe) { |
| 2242 | TCGReg t = datalo; |
Aurelien Jarno | 085bb5b | 2013-11-06 19:51:21 +0100 | [diff] [blame] | 2243 | datalo = datahi; |
| 2244 | datahi = t; |
| 2245 | } |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2246 | tcg_out_modrm_sib_offset(s, movop + h.seg, datalo, |
| 2247 | h.base, h.index, 0, h.ofs); |
| 2248 | tcg_out_modrm_sib_offset(s, movop + h.seg, datahi, |
| 2249 | h.base, h.index, 0, h.ofs + 4); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2250 | } |
| 2251 | break; |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 2252 | |
| 2253 | case MO_128: |
| 2254 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
| 2255 | |
| 2256 | /* |
| 2257 | * Without 16-byte atomicity, use integer regs. |
| 2258 | * That is where we have the data, and it allows bswaps. |
| 2259 | */ |
| 2260 | if (h.aa.atom < MO_128) { |
| 2261 | if (use_movbe) { |
| 2262 | TCGReg t = datalo; |
| 2263 | datalo = datahi; |
| 2264 | datahi = t; |
| 2265 | } |
| 2266 | tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, |
| 2267 | h.base, h.index, 0, h.ofs); |
| 2268 | tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, |
| 2269 | h.base, h.index, 0, h.ofs + 8); |
| 2270 | break; |
| 2271 | } |
| 2272 | |
| 2273 | /* |
| 2274 | * With 16-byte atomicity, a vector store is required. |
| 2275 | * If we already have 16-byte alignment, then VMOVDQA always works. |
| 2276 | * Else if VMOVDQU has atomicity with dynamic alignment, use that. |
| 2277 | * Else use we require a runtime test for alignment for VMOVDQA; |
| 2278 | * use VMOVDQU on the unaligned nonatomic path for simplicity. |
| 2279 | */ |
| 2280 | tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi); |
| 2281 | if (h.aa.align >= MO_128) { |
| 2282 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, |
| 2283 | TCG_TMP_VEC, 0, |
| 2284 | h.base, h.index, 0, h.ofs); |
| 2285 | } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { |
| 2286 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, |
| 2287 | TCG_TMP_VEC, 0, |
| 2288 | h.base, h.index, 0, h.ofs); |
| 2289 | } else { |
| 2290 | TCGLabel *l1 = gen_new_label(); |
| 2291 | TCGLabel *l2 = gen_new_label(); |
| 2292 | |
| 2293 | tcg_out_testi(s, h.base, 15); |
| 2294 | tcg_out_jxx(s, JCC_JNE, l1, true); |
| 2295 | |
| 2296 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, |
| 2297 | TCG_TMP_VEC, 0, |
| 2298 | h.base, h.index, 0, h.ofs); |
| 2299 | tcg_out_jxx(s, JCC_JMP, l2, true); |
| 2300 | |
| 2301 | tcg_out_label(s, l1); |
| 2302 | tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, |
| 2303 | TCG_TMP_VEC, 0, |
| 2304 | h.base, h.index, 0, h.ofs); |
| 2305 | tcg_out_label(s, l2); |
| 2306 | } |
| 2307 | break; |
| 2308 | |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2309 | default: |
Richard Henderson | d2ef1b8 | 2018-11-20 10:26:40 +0100 | [diff] [blame] | 2310 | g_assert_not_reached(); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2311 | } |
| 2312 | } |
| 2313 | |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2314 | static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, |
| 2315 | TCGReg addrlo, TCGReg addrhi, |
| 2316 | MemOpIdx oi, TCGType data_type) |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2317 | { |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2318 | TCGLabelQemuLdst *ldst; |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 2319 | HostAddress h; |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2320 | |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2321 | ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); |
| 2322 | tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2323 | |
Richard Henderson | 530074c | 2023-04-19 19:10:27 +0200 | [diff] [blame] | 2324 | if (ldst) { |
| 2325 | ldst->type = data_type; |
| 2326 | ldst->datalo_reg = datalo; |
| 2327 | ldst->datahi_reg = datahi; |
| 2328 | ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); |
Richard Henderson | b1ee3c6 | 2021-07-27 19:42:35 -1000 | [diff] [blame] | 2329 | } |
Richard Henderson | 7352ee5 | 2013-09-04 08:13:42 -0700 | [diff] [blame] | 2330 | } |
Yeongkyoon Lee | b76f0d8 | 2012-10-31 16:04:25 +0900 | [diff] [blame] | 2331 | |
Richard Henderson | b55a8d9 | 2022-11-26 12:42:06 -0800 | [diff] [blame] | 2332 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) |
| 2333 | { |
| 2334 | /* Reuse the zeroing that exists for goto_ptr. */ |
| 2335 | if (a0 == 0) { |
| 2336 | tcg_out_jmp(s, tcg_code_gen_epilogue); |
| 2337 | } else { |
| 2338 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); |
| 2339 | tcg_out_jmp(s, tb_ret_addr); |
| 2340 | } |
| 2341 | } |
| 2342 | |
Richard Henderson | cf7d6b8 | 2022-11-26 17:14:05 -0800 | [diff] [blame] | 2343 | static void tcg_out_goto_tb(TCGContext *s, int which) |
| 2344 | { |
| 2345 | /* |
| 2346 | * Jump displacement must be aligned for atomic patching; |
| 2347 | * see if we need to add extra nops before jump |
| 2348 | */ |
| 2349 | int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; |
| 2350 | if (gap != 1) { |
| 2351 | tcg_out_nopn(s, gap - 1); |
| 2352 | } |
| 2353 | tcg_out8(s, OPC_JMP_long); /* jmp im */ |
| 2354 | set_jmp_insn_offset(s, which); |
| 2355 | tcg_out32(s, 0); |
| 2356 | set_jmp_reset_offset(s, which); |
| 2357 | } |
| 2358 | |
Richard Henderson | 0fe1c98 | 2022-12-05 11:31:20 -0600 | [diff] [blame] | 2359 | void tb_target_set_jmp_target(const TranslationBlock *tb, int n, |
| 2360 | uintptr_t jmp_rx, uintptr_t jmp_rw) |
| 2361 | { |
| 2362 | /* patch the branch destination */ |
| 2363 | uintptr_t addr = tb->jmp_target_addr[n]; |
| 2364 | qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); |
| 2365 | /* no need to flush icache explicitly */ |
| 2366 | } |
| 2367 | |
Richard Henderson | a975160 | 2010-03-19 11:12:29 -0700 | [diff] [blame] | 2368 | static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, |
Miroslav Rezanina | 5e8892d | 2021-03-12 13:14:18 +0100 | [diff] [blame] | 2369 | const TCGArg args[TCG_MAX_OP_ARGS], |
| 2370 | const int const_args[TCG_MAX_OP_ARGS]) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2371 | { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2372 | TCGArg a0, a1, a2; |
| 2373 | int c, const_a2, vexop, rexw = 0; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2374 | |
| 2375 | #if TCG_TARGET_REG_BITS == 64 |
| 2376 | # define OP_32_64(x) \ |
| 2377 | case glue(glue(INDEX_op_, x), _i64): \ |
| 2378 | rexw = P_REXW; /* FALLTHRU */ \ |
| 2379 | case glue(glue(INDEX_op_, x), _i32) |
| 2380 | #else |
| 2381 | # define OP_32_64(x) \ |
| 2382 | case glue(glue(INDEX_op_, x), _i32) |
| 2383 | #endif |
Richard Henderson | 7868652 | 2010-05-21 08:30:35 -0700 | [diff] [blame] | 2384 | |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2385 | /* Hoist the loads of the most common arguments. */ |
| 2386 | a0 = args[0]; |
| 2387 | a1 = args[1]; |
| 2388 | a2 = args[2]; |
| 2389 | const_a2 = const_args[2]; |
| 2390 | |
| 2391 | switch (opc) { |
Emilio G. Cota | 5cb4ef8 | 2017-04-26 23:29:18 -0400 | [diff] [blame] | 2392 | case INDEX_op_goto_ptr: |
| 2393 | /* jmp to the given host address (could be epilogue) */ |
| 2394 | tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); |
| 2395 | break; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2396 | case INDEX_op_br: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2397 | tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2398 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2399 | OP_32_64(ld8u): |
| 2400 | /* Note that we can ignore REXW for the zero-extend to 64-bit. */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2401 | tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2402 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2403 | OP_32_64(ld8s): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2404 | tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2405 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2406 | OP_32_64(ld16u): |
| 2407 | /* Note that we can ignore REXW for the zero-extend to 64-bit. */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2408 | tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2409 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2410 | OP_32_64(ld16s): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2411 | tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2412 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2413 | #if TCG_TARGET_REG_BITS == 64 |
| 2414 | case INDEX_op_ld32u_i64: |
| 2415 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2416 | case INDEX_op_ld_i32: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2417 | tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2418 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2419 | |
| 2420 | OP_32_64(st8): |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2421 | if (const_args[0]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2422 | tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); |
| 2423 | tcg_out8(s, a0); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2424 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2425 | tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2426 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2427 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2428 | OP_32_64(st16): |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2429 | if (const_args[0]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2430 | tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2); |
| 2431 | tcg_out16(s, a0); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2432 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2433 | tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2434 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2435 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2436 | #if TCG_TARGET_REG_BITS == 64 |
| 2437 | case INDEX_op_st32_i64: |
| 2438 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2439 | case INDEX_op_st_i32: |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2440 | if (const_args[0]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2441 | tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2); |
| 2442 | tcg_out32(s, a0); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2443 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2444 | tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2445 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2446 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2447 | |
| 2448 | OP_32_64(add): |
Richard Henderson | 5d1e4e8 | 2010-05-21 08:30:34 -0700 | [diff] [blame] | 2449 | /* For 3-operand addition, use LEA. */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2450 | if (a0 != a1) { |
| 2451 | TCGArg c3 = 0; |
| 2452 | if (const_a2) { |
Richard Henderson | 5d1e4e8 | 2010-05-21 08:30:34 -0700 | [diff] [blame] | 2453 | c3 = a2, a2 = -1; |
| 2454 | } else if (a0 == a2) { |
| 2455 | /* Watch out for dest = src + dest, since we've removed |
| 2456 | the matching constraint on the add. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2457 | tgen_arithr(s, ARITH_ADD + rexw, a0, a1); |
Richard Henderson | 5d1e4e8 | 2010-05-21 08:30:34 -0700 | [diff] [blame] | 2458 | break; |
| 2459 | } |
| 2460 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2461 | tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); |
Richard Henderson | 5d1e4e8 | 2010-05-21 08:30:34 -0700 | [diff] [blame] | 2462 | break; |
| 2463 | } |
| 2464 | c = ARITH_ADD; |
| 2465 | goto gen_arith; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2466 | OP_32_64(sub): |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2467 | c = ARITH_SUB; |
| 2468 | goto gen_arith; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2469 | OP_32_64(and): |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2470 | c = ARITH_AND; |
| 2471 | goto gen_arith; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2472 | OP_32_64(or): |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2473 | c = ARITH_OR; |
| 2474 | goto gen_arith; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2475 | OP_32_64(xor): |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2476 | c = ARITH_XOR; |
| 2477 | goto gen_arith; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2478 | gen_arith: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2479 | if (const_a2) { |
| 2480 | tgen_arithi(s, c + rexw, a0, a2, 0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2481 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2482 | tgen_arithr(s, c + rexw, a0, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2483 | } |
| 2484 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2485 | |
Richard Henderson | 9d2eec2 | 2014-01-27 21:49:17 -0800 | [diff] [blame] | 2486 | OP_32_64(andc): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2487 | if (const_a2) { |
| 2488 | tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); |
| 2489 | tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0); |
Richard Henderson | 9d2eec2 | 2014-01-27 21:49:17 -0800 | [diff] [blame] | 2490 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2491 | tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); |
Richard Henderson | 9d2eec2 | 2014-01-27 21:49:17 -0800 | [diff] [blame] | 2492 | } |
| 2493 | break; |
| 2494 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2495 | OP_32_64(mul): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2496 | if (const_a2) { |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2497 | int32_t val; |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2498 | val = a2; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2499 | if (val == (int8_t)val) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2500 | tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2501 | tcg_out8(s, val); |
| 2502 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2503 | tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2504 | tcg_out32(s, val); |
| 2505 | } |
| 2506 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2507 | tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2508 | } |
| 2509 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2510 | |
| 2511 | OP_32_64(div2): |
| 2512 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); |
| 2513 | break; |
| 2514 | OP_32_64(divu2): |
| 2515 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); |
| 2516 | break; |
| 2517 | |
| 2518 | OP_32_64(shl): |
Richard Henderson | 6a5aed4 | 2016-11-18 14:18:41 +0100 | [diff] [blame] | 2519 | /* For small constant 3-operand shift, use LEA. */ |
| 2520 | if (const_a2 && a0 != a1 && (a2 - 1) < 3) { |
| 2521 | if (a2 - 1 == 0) { |
| 2522 | /* shl $1,a1,a0 -> lea (a1,a1),a0 */ |
| 2523 | tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); |
| 2524 | } else { |
| 2525 | /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ |
| 2526 | tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); |
| 2527 | } |
| 2528 | break; |
| 2529 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2530 | c = SHIFT_SHL; |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 2531 | vexop = OPC_SHLX; |
| 2532 | goto gen_shift_maybe_vex; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2533 | OP_32_64(shr): |
| 2534 | c = SHIFT_SHR; |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 2535 | vexop = OPC_SHRX; |
| 2536 | goto gen_shift_maybe_vex; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2537 | OP_32_64(sar): |
| 2538 | c = SHIFT_SAR; |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 2539 | vexop = OPC_SARX; |
| 2540 | goto gen_shift_maybe_vex; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2541 | OP_32_64(rotl): |
| 2542 | c = SHIFT_ROL; |
| 2543 | goto gen_shift; |
| 2544 | OP_32_64(rotr): |
| 2545 | c = SHIFT_ROR; |
| 2546 | goto gen_shift; |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 2547 | gen_shift_maybe_vex: |
Richard Henderson | 6a5aed4 | 2016-11-18 14:18:41 +0100 | [diff] [blame] | 2548 | if (have_bmi2) { |
| 2549 | if (!const_a2) { |
| 2550 | tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1); |
| 2551 | break; |
| 2552 | } |
| 2553 | tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); |
Richard Henderson | 6399ab3 | 2014-01-28 11:39:49 -0800 | [diff] [blame] | 2554 | } |
| 2555 | /* FALLTHRU */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2556 | gen_shift: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2557 | if (const_a2) { |
| 2558 | tcg_out_shifti(s, c + rexw, a0, a2); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2559 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2560 | tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2561 | } |
| 2562 | break; |
| 2563 | |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 2564 | OP_32_64(ctz): |
| 2565 | tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); |
| 2566 | break; |
| 2567 | OP_32_64(clz): |
| 2568 | tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); |
| 2569 | break; |
Richard Henderson | 993508e | 2016-11-22 14:15:04 +0100 | [diff] [blame] | 2570 | OP_32_64(ctpop): |
| 2571 | tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); |
| 2572 | break; |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 2573 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2574 | case INDEX_op_brcond_i32: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2575 | tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2576 | break; |
| 2577 | case INDEX_op_setcond_i32: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2578 | tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2579 | break; |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 2580 | case INDEX_op_movcond_i32: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2581 | tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]); |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 2582 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2583 | |
| 2584 | OP_32_64(bswap16): |
Richard Henderson | 7335a3d | 2021-06-12 22:42:13 -0700 | [diff] [blame] | 2585 | if (a2 & TCG_BSWAP_OS) { |
| 2586 | /* Output must be sign-extended. */ |
| 2587 | if (rexw) { |
| 2588 | tcg_out_bswap64(s, a0); |
| 2589 | tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); |
| 2590 | } else { |
| 2591 | tcg_out_bswap32(s, a0); |
| 2592 | tcg_out_shifti(s, SHIFT_SAR, a0, 16); |
| 2593 | } |
| 2594 | } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { |
| 2595 | /* Output must be zero-extended, but input isn't. */ |
| 2596 | tcg_out_bswap32(s, a0); |
| 2597 | tcg_out_shifti(s, SHIFT_SHR, a0, 16); |
| 2598 | } else { |
| 2599 | tcg_out_rolw_8(s, a0); |
| 2600 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2601 | break; |
| 2602 | OP_32_64(bswap32): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2603 | tcg_out_bswap32(s, a0); |
Richard Henderson | 7335a3d | 2021-06-12 22:42:13 -0700 | [diff] [blame] | 2604 | if (rexw && (a2 & TCG_BSWAP_OS)) { |
| 2605 | tcg_out_ext32s(s, a0, a0); |
| 2606 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2607 | break; |
| 2608 | |
| 2609 | OP_32_64(neg): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2610 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2611 | break; |
| 2612 | OP_32_64(not): |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2613 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2614 | break; |
| 2615 | |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2616 | case INDEX_op_qemu_ld_a64_i32: |
| 2617 | if (TCG_TARGET_REG_BITS == 32) { |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2618 | tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32); |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2619 | break; |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2620 | } |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2621 | /* fall through */ |
| 2622 | case INDEX_op_qemu_ld_a32_i32: |
| 2623 | tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2624 | break; |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2625 | case INDEX_op_qemu_ld_a32_i64: |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2626 | if (TCG_TARGET_REG_BITS == 64) { |
| 2627 | tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2628 | } else { |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2629 | tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64); |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2630 | } |
| 2631 | break; |
| 2632 | case INDEX_op_qemu_ld_a64_i64: |
| 2633 | if (TCG_TARGET_REG_BITS == 64) { |
| 2634 | tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2635 | } else { |
| 2636 | tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); |
| 2637 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2638 | break; |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 2639 | case INDEX_op_qemu_ld_a32_i128: |
| 2640 | case INDEX_op_qemu_ld_a64_i128: |
| 2641 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
| 2642 | tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); |
| 2643 | break; |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2644 | |
| 2645 | case INDEX_op_qemu_st_a64_i32: |
| 2646 | case INDEX_op_qemu_st8_a64_i32: |
| 2647 | if (TCG_TARGET_REG_BITS == 32) { |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2648 | tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32); |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2649 | break; |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2650 | } |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2651 | /* fall through */ |
| 2652 | case INDEX_op_qemu_st_a32_i32: |
| 2653 | case INDEX_op_qemu_st8_a32_i32: |
| 2654 | tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2655 | break; |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2656 | case INDEX_op_qemu_st_a32_i64: |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2657 | if (TCG_TARGET_REG_BITS == 64) { |
| 2658 | tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2659 | } else { |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2660 | tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64); |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 2661 | } |
| 2662 | break; |
| 2663 | case INDEX_op_qemu_st_a64_i64: |
| 2664 | if (TCG_TARGET_REG_BITS == 64) { |
| 2665 | tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64); |
Richard Henderson | bf12e22 | 2023-04-06 12:42:40 -0700 | [diff] [blame] | 2666 | } else { |
| 2667 | tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); |
| 2668 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2669 | break; |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 2670 | case INDEX_op_qemu_st_a32_i128: |
| 2671 | case INDEX_op_qemu_st_a64_i128: |
| 2672 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
| 2673 | tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); |
| 2674 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2675 | |
Richard Henderson | 624988a | 2013-02-19 23:51:57 -0800 | [diff] [blame] | 2676 | OP_32_64(mulu2): |
| 2677 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2678 | break; |
Richard Henderson | 624988a | 2013-02-19 23:51:57 -0800 | [diff] [blame] | 2679 | OP_32_64(muls2): |
| 2680 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); |
| 2681 | break; |
| 2682 | OP_32_64(add2): |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2683 | if (const_args[4]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2684 | tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2685 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2686 | tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2687 | } |
| 2688 | if (const_args[5]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2689 | tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2690 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2691 | tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2692 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2693 | break; |
Richard Henderson | 624988a | 2013-02-19 23:51:57 -0800 | [diff] [blame] | 2694 | OP_32_64(sub2): |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2695 | if (const_args[4]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2696 | tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2697 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2698 | tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2699 | } |
| 2700 | if (const_args[5]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2701 | tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2702 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2703 | tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]); |
Richard Henderson | 81570a7 | 2010-05-21 08:30:24 -0700 | [diff] [blame] | 2704 | } |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2705 | break; |
Richard Henderson | bbc863b | 2013-02-19 23:51:50 -0800 | [diff] [blame] | 2706 | |
| 2707 | #if TCG_TARGET_REG_BITS == 32 |
| 2708 | case INDEX_op_brcond2_i32: |
| 2709 | tcg_out_brcond2(s, args, const_args, 0); |
| 2710 | break; |
| 2711 | case INDEX_op_setcond2_i32: |
| 2712 | tcg_out_setcond2(s, args, const_args); |
| 2713 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2714 | #else /* TCG_TARGET_REG_BITS == 64 */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2715 | case INDEX_op_ld32s_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2716 | tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2717 | break; |
| 2718 | case INDEX_op_ld_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2719 | tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2720 | break; |
| 2721 | case INDEX_op_st_i64: |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2722 | if (const_args[0]) { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2723 | tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); |
| 2724 | tcg_out32(s, a0); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2725 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2726 | tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); |
Aurelien Jarno | 5c2d2a9 | 2012-09-10 13:56:24 +0200 | [diff] [blame] | 2727 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2728 | break; |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2729 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2730 | case INDEX_op_brcond_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2731 | tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); |
aurel32 | 5d40cd6 | 2009-03-13 09:35:49 +0000 | [diff] [blame] | 2732 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2733 | case INDEX_op_setcond_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2734 | tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2); |
aurel32 | 9619376 | 2009-03-10 19:37:46 +0000 | [diff] [blame] | 2735 | break; |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 2736 | case INDEX_op_movcond_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2737 | tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]); |
Richard Henderson | d0a1629 | 2012-09-21 10:13:36 -0700 | [diff] [blame] | 2738 | break; |
aurel32 | 9619376 | 2009-03-10 19:37:46 +0000 | [diff] [blame] | 2739 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2740 | case INDEX_op_bswap64_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2741 | tcg_out_bswap64(s, a0); |
aurel32 | 9619376 | 2009-03-10 19:37:46 +0000 | [diff] [blame] | 2742 | break; |
Richard Henderson | 7547827 | 2018-11-30 16:31:15 -0800 | [diff] [blame] | 2743 | case INDEX_op_extrh_i64_i32: |
| 2744 | tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); |
| 2745 | break; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2746 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2747 | |
Jan Kiszka | a477332 | 2011-09-29 18:52:11 +0200 | [diff] [blame] | 2748 | OP_32_64(deposit): |
| 2749 | if (args[3] == 0 && args[4] == 8) { |
| 2750 | /* load bits 0..7 */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2751 | tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); |
Jan Kiszka | a477332 | 2011-09-29 18:52:11 +0200 | [diff] [blame] | 2752 | } else if (args[3] == 8 && args[4] == 8) { |
| 2753 | /* load bits 8..15 */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2754 | tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); |
Jan Kiszka | a477332 | 2011-09-29 18:52:11 +0200 | [diff] [blame] | 2755 | } else if (args[3] == 0 && args[4] == 16) { |
| 2756 | /* load bits 0..15 */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2757 | tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); |
Jan Kiszka | a477332 | 2011-09-29 18:52:11 +0200 | [diff] [blame] | 2758 | } else { |
Richard Henderson | 732e89f | 2023-04-05 12:09:14 -0700 | [diff] [blame] | 2759 | g_assert_not_reached(); |
Jan Kiszka | a477332 | 2011-09-29 18:52:11 +0200 | [diff] [blame] | 2760 | } |
| 2761 | break; |
| 2762 | |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2763 | case INDEX_op_extract_i64: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2764 | if (a2 + args[3] == 32) { |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2765 | /* This is a 32-bit zero-extending right shift. */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2766 | tcg_out_mov(s, TCG_TYPE_I32, a0, a1); |
| 2767 | tcg_out_shifti(s, SHIFT_SHR, a0, a2); |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2768 | break; |
| 2769 | } |
| 2770 | /* FALLTHRU */ |
| 2771 | case INDEX_op_extract_i32: |
| 2772 | /* On the off-chance that we can use the high-byte registers. |
| 2773 | Otherwise we emit the same ext16 + shift pattern that we |
| 2774 | would have gotten from the normal tcg-op.c expansion. */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2775 | tcg_debug_assert(a2 == 8 && args[3] == 8); |
| 2776 | if (a1 < 4 && a0 < 8) { |
| 2777 | tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2778 | } else { |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2779 | tcg_out_ext16u(s, a0, a1); |
| 2780 | tcg_out_shifti(s, SHIFT_SHR, a0, 8); |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2781 | } |
| 2782 | break; |
| 2783 | |
| 2784 | case INDEX_op_sextract_i32: |
| 2785 | /* We don't implement sextract_i64, as we cannot sign-extend to |
| 2786 | 64-bits without using the REX prefix that explicitly excludes |
| 2787 | access to the high-byte registers. */ |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2788 | tcg_debug_assert(a2 == 8 && args[3] == 8); |
| 2789 | if (a1 < 4 && a0 < 8) { |
| 2790 | tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2791 | } else { |
Richard Henderson | 753e42e | 2023-04-05 14:49:59 -0700 | [diff] [blame] | 2792 | tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2793 | tcg_out_shifti(s, SHIFT_SAR, a0, 8); |
Richard Henderson | 78fdbfb | 2016-10-14 14:08:13 -0500 | [diff] [blame] | 2794 | } |
| 2795 | break; |
| 2796 | |
Richard Henderson | c6fb8c0 | 2019-02-25 11:42:35 -0800 | [diff] [blame] | 2797 | OP_32_64(extract2): |
| 2798 | /* Note that SHRD outputs to the r/m operand. */ |
| 2799 | tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); |
| 2800 | tcg_out8(s, args[3]); |
| 2801 | break; |
| 2802 | |
Pranith Kumar | a7d00d4 | 2016-07-14 16:20:14 -0400 | [diff] [blame] | 2803 | case INDEX_op_mb: |
Richard Henderson | 42d5b51 | 2016-11-18 12:50:50 +0100 | [diff] [blame] | 2804 | tcg_out_mb(s, a0); |
Pranith Kumar | a7d00d4 | 2016-07-14 16:20:14 -0400 | [diff] [blame] | 2805 | break; |
Richard Henderson | 96d0ee7 | 2014-04-25 15:19:33 -0400 | [diff] [blame] | 2806 | case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ |
| 2807 | case INDEX_op_mov_i64: |
Richard Henderson | 96d0ee7 | 2014-04-25 15:19:33 -0400 | [diff] [blame] | 2808 | case INDEX_op_call: /* Always emitted via tcg_out_call. */ |
Richard Henderson | b55a8d9 | 2022-11-26 12:42:06 -0800 | [diff] [blame] | 2809 | case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ |
Richard Henderson | cf7d6b8 | 2022-11-26 17:14:05 -0800 | [diff] [blame] | 2810 | case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ |
Richard Henderson | 678155b | 2023-04-05 11:17:01 -0700 | [diff] [blame] | 2811 | case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ |
| 2812 | case INDEX_op_ext8s_i64: |
Richard Henderson | d0e66c8 | 2023-04-05 13:26:51 -0700 | [diff] [blame] | 2813 | case INDEX_op_ext8u_i32: |
| 2814 | case INDEX_op_ext8u_i64: |
Richard Henderson | 753e42e | 2023-04-05 14:49:59 -0700 | [diff] [blame] | 2815 | case INDEX_op_ext16s_i32: |
| 2816 | case INDEX_op_ext16s_i64: |
Richard Henderson | 379afdf | 2023-04-05 16:25:22 -0700 | [diff] [blame] | 2817 | case INDEX_op_ext16u_i32: |
| 2818 | case INDEX_op_ext16u_i64: |
Richard Henderson | 52bf339 | 2023-04-05 17:50:09 -0700 | [diff] [blame] | 2819 | case INDEX_op_ext32s_i64: |
Richard Henderson | 9ecf5f6 | 2023-04-05 18:07:05 -0700 | [diff] [blame] | 2820 | case INDEX_op_ext32u_i64: |
Richard Henderson | 9c6aa27 | 2023-04-05 18:30:56 -0700 | [diff] [blame] | 2821 | case INDEX_op_ext_i32_i64: |
Richard Henderson | b9bfe00 | 2023-04-05 18:56:28 -0700 | [diff] [blame] | 2822 | case INDEX_op_extu_i32_i64: |
Richard Henderson | b8b94ac | 2023-04-05 19:58:35 -0700 | [diff] [blame] | 2823 | case INDEX_op_extrl_i64_i32: |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2824 | default: |
Richard Henderson | 732e89f | 2023-04-05 12:09:14 -0700 | [diff] [blame] | 2825 | g_assert_not_reached(); |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2826 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 2827 | |
| 2828 | #undef OP_32_64 |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 2829 | } |
| 2830 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2831 | static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
| 2832 | unsigned vecl, unsigned vece, |
Miroslav Rezanina | 5e8892d | 2021-03-12 13:14:18 +0100 | [diff] [blame] | 2833 | const TCGArg args[TCG_MAX_OP_ARGS], |
| 2834 | const int const_args[TCG_MAX_OP_ARGS]) |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2835 | { |
| 2836 | static int const add_insn[4] = { |
| 2837 | OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ |
| 2838 | }; |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 2839 | static int const ssadd_insn[4] = { |
| 2840 | OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2 |
| 2841 | }; |
| 2842 | static int const usadd_insn[4] = { |
Mark Cave-Ayland | 3115584 | 2019-02-07 22:42:58 +0000 | [diff] [blame] | 2843 | OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2 |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 2844 | }; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2845 | static int const sub_insn[4] = { |
| 2846 | OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ |
| 2847 | }; |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 2848 | static int const sssub_insn[4] = { |
| 2849 | OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2 |
| 2850 | }; |
| 2851 | static int const ussub_insn[4] = { |
Mark Cave-Ayland | 3115584 | 2019-02-07 22:42:58 +0000 | [diff] [blame] | 2852 | OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2 |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 2853 | }; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2854 | static int const mul_insn[4] = { |
Richard Henderson | 4c8b968 | 2021-12-16 07:14:24 -0800 | [diff] [blame] | 2855 | OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2856 | }; |
| 2857 | static int const shift_imm_insn[4] = { |
| 2858 | OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib |
| 2859 | }; |
| 2860 | static int const cmpeq_insn[4] = { |
| 2861 | OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ |
| 2862 | }; |
| 2863 | static int const cmpgt_insn[4] = { |
| 2864 | OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ |
| 2865 | }; |
| 2866 | static int const punpckl_insn[4] = { |
| 2867 | OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ |
| 2868 | }; |
| 2869 | static int const punpckh_insn[4] = { |
| 2870 | OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ |
| 2871 | }; |
| 2872 | static int const packss_insn[4] = { |
| 2873 | OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2 |
| 2874 | }; |
| 2875 | static int const packus_insn[4] = { |
| 2876 | OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2 |
| 2877 | }; |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 2878 | static int const smin_insn[4] = { |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 2879 | OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 2880 | }; |
| 2881 | static int const smax_insn[4] = { |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 2882 | OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 2883 | }; |
| 2884 | static int const umin_insn[4] = { |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 2885 | OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 2886 | }; |
| 2887 | static int const umax_insn[4] = { |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 2888 | OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 2889 | }; |
Richard Henderson | 102cd35 | 2021-12-18 09:15:29 -0800 | [diff] [blame] | 2890 | static int const rotlv_insn[4] = { |
| 2891 | OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ |
| 2892 | }; |
| 2893 | static int const rotrv_insn[4] = { |
| 2894 | OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ |
| 2895 | }; |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 2896 | static int const shlv_insn[4] = { |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 2897 | OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 2898 | }; |
| 2899 | static int const shrv_insn[4] = { |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 2900 | OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 2901 | }; |
| 2902 | static int const sarv_insn[4] = { |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 2903 | OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 2904 | }; |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 2905 | static int const shls_insn[4] = { |
| 2906 | OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ |
| 2907 | }; |
| 2908 | static int const shrs_insn[4] = { |
| 2909 | OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ |
| 2910 | }; |
| 2911 | static int const sars_insn[4] = { |
Richard Henderson | 47b331b | 2021-12-16 06:33:47 -0800 | [diff] [blame] | 2912 | OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 2913 | }; |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 2914 | static int const vpshldi_insn[4] = { |
| 2915 | OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ |
| 2916 | }; |
| 2917 | static int const vpshldv_insn[4] = { |
| 2918 | OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ |
| 2919 | }; |
| 2920 | static int const vpshrdv_insn[4] = { |
| 2921 | OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ |
| 2922 | }; |
Richard Henderson | 18f9b65 | 2019-04-17 15:54:20 -1000 | [diff] [blame] | 2923 | static int const abs_insn[4] = { |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 2924 | OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ |
Richard Henderson | 18f9b65 | 2019-04-17 15:54:20 -1000 | [diff] [blame] | 2925 | }; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2926 | |
| 2927 | TCGType type = vecl + TCG_TYPE_V64; |
| 2928 | int insn, sub; |
Richard Henderson | cf32076 | 2021-12-16 08:06:33 -0800 | [diff] [blame] | 2929 | TCGArg a0, a1, a2, a3; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2930 | |
| 2931 | a0 = args[0]; |
| 2932 | a1 = args[1]; |
| 2933 | a2 = args[2]; |
| 2934 | |
| 2935 | switch (opc) { |
| 2936 | case INDEX_op_add_vec: |
| 2937 | insn = add_insn[vece]; |
| 2938 | goto gen_simd; |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 2939 | case INDEX_op_ssadd_vec: |
| 2940 | insn = ssadd_insn[vece]; |
| 2941 | goto gen_simd; |
| 2942 | case INDEX_op_usadd_vec: |
| 2943 | insn = usadd_insn[vece]; |
| 2944 | goto gen_simd; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2945 | case INDEX_op_sub_vec: |
| 2946 | insn = sub_insn[vece]; |
| 2947 | goto gen_simd; |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 2948 | case INDEX_op_sssub_vec: |
| 2949 | insn = sssub_insn[vece]; |
| 2950 | goto gen_simd; |
| 2951 | case INDEX_op_ussub_vec: |
| 2952 | insn = ussub_insn[vece]; |
| 2953 | goto gen_simd; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 2954 | case INDEX_op_mul_vec: |
| 2955 | insn = mul_insn[vece]; |
| 2956 | goto gen_simd; |
| 2957 | case INDEX_op_and_vec: |
| 2958 | insn = OPC_PAND; |
| 2959 | goto gen_simd; |
| 2960 | case INDEX_op_or_vec: |
| 2961 | insn = OPC_POR; |
| 2962 | goto gen_simd; |
| 2963 | case INDEX_op_xor_vec: |
| 2964 | insn = OPC_PXOR; |
| 2965 | goto gen_simd; |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 2966 | case INDEX_op_smin_vec: |
| 2967 | insn = smin_insn[vece]; |
| 2968 | goto gen_simd; |
| 2969 | case INDEX_op_umin_vec: |
| 2970 | insn = umin_insn[vece]; |
| 2971 | goto gen_simd; |
| 2972 | case INDEX_op_smax_vec: |
| 2973 | insn = smax_insn[vece]; |
| 2974 | goto gen_simd; |
| 2975 | case INDEX_op_umax_vec: |
| 2976 | insn = umax_insn[vece]; |
| 2977 | goto gen_simd; |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 2978 | case INDEX_op_shlv_vec: |
| 2979 | insn = shlv_insn[vece]; |
| 2980 | goto gen_simd; |
| 2981 | case INDEX_op_shrv_vec: |
| 2982 | insn = shrv_insn[vece]; |
| 2983 | goto gen_simd; |
| 2984 | case INDEX_op_sarv_vec: |
| 2985 | insn = sarv_insn[vece]; |
| 2986 | goto gen_simd; |
Richard Henderson | 102cd35 | 2021-12-18 09:15:29 -0800 | [diff] [blame] | 2987 | case INDEX_op_rotlv_vec: |
| 2988 | insn = rotlv_insn[vece]; |
| 2989 | goto gen_simd; |
| 2990 | case INDEX_op_rotrv_vec: |
| 2991 | insn = rotrv_insn[vece]; |
| 2992 | goto gen_simd; |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 2993 | case INDEX_op_shls_vec: |
| 2994 | insn = shls_insn[vece]; |
| 2995 | goto gen_simd; |
| 2996 | case INDEX_op_shrs_vec: |
| 2997 | insn = shrs_insn[vece]; |
| 2998 | goto gen_simd; |
| 2999 | case INDEX_op_sars_vec: |
| 3000 | insn = sars_insn[vece]; |
| 3001 | goto gen_simd; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3002 | case INDEX_op_x86_punpckl_vec: |
| 3003 | insn = punpckl_insn[vece]; |
| 3004 | goto gen_simd; |
| 3005 | case INDEX_op_x86_punpckh_vec: |
| 3006 | insn = punpckh_insn[vece]; |
| 3007 | goto gen_simd; |
| 3008 | case INDEX_op_x86_packss_vec: |
| 3009 | insn = packss_insn[vece]; |
| 3010 | goto gen_simd; |
| 3011 | case INDEX_op_x86_packus_vec: |
| 3012 | insn = packus_insn[vece]; |
| 3013 | goto gen_simd; |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 3014 | case INDEX_op_x86_vpshldv_vec: |
| 3015 | insn = vpshldv_insn[vece]; |
| 3016 | a1 = a2; |
| 3017 | a2 = args[3]; |
| 3018 | goto gen_simd; |
| 3019 | case INDEX_op_x86_vpshrdv_vec: |
| 3020 | insn = vpshrdv_insn[vece]; |
| 3021 | a1 = a2; |
| 3022 | a2 = args[3]; |
| 3023 | goto gen_simd; |
Richard Henderson | 7f34ed4 | 2018-02-21 12:56:24 -0800 | [diff] [blame] | 3024 | #if TCG_TARGET_REG_BITS == 32 |
| 3025 | case INDEX_op_dup2_vec: |
Richard Henderson | e20cb81 | 2020-03-28 18:16:10 -0700 | [diff] [blame] | 3026 | /* First merge the two 32-bit inputs to a single 64-bit element. */ |
| 3027 | tcg_out_vex_modrm(s, OPC_PUNPCKLDQ, a0, a1, a2); |
| 3028 | /* Then replicate the 64-bit elements across the rest of the vector. */ |
| 3029 | if (type != TCG_TYPE_V64) { |
| 3030 | tcg_out_dup_vec(s, type, MO_64, a0, a0); |
| 3031 | } |
| 3032 | break; |
Richard Henderson | 7f34ed4 | 2018-02-21 12:56:24 -0800 | [diff] [blame] | 3033 | #endif |
Richard Henderson | 18f9b65 | 2019-04-17 15:54:20 -1000 | [diff] [blame] | 3034 | case INDEX_op_abs_vec: |
| 3035 | insn = abs_insn[vece]; |
| 3036 | a2 = a1; |
| 3037 | a1 = 0; |
| 3038 | goto gen_simd; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3039 | gen_simd: |
| 3040 | tcg_debug_assert(insn != OPC_UD2); |
| 3041 | if (type == TCG_TYPE_V256) { |
| 3042 | insn |= P_VEXL; |
| 3043 | } |
| 3044 | tcg_out_vex_modrm(s, insn, a0, a1, a2); |
| 3045 | break; |
| 3046 | |
| 3047 | case INDEX_op_cmp_vec: |
| 3048 | sub = args[3]; |
| 3049 | if (sub == TCG_COND_EQ) { |
| 3050 | insn = cmpeq_insn[vece]; |
| 3051 | } else if (sub == TCG_COND_GT) { |
| 3052 | insn = cmpgt_insn[vece]; |
| 3053 | } else { |
| 3054 | g_assert_not_reached(); |
| 3055 | } |
| 3056 | goto gen_simd; |
| 3057 | |
| 3058 | case INDEX_op_andc_vec: |
| 3059 | insn = OPC_PANDN; |
| 3060 | if (type == TCG_TYPE_V256) { |
| 3061 | insn |= P_VEXL; |
| 3062 | } |
| 3063 | tcg_out_vex_modrm(s, insn, a0, a2, a1); |
| 3064 | break; |
| 3065 | |
| 3066 | case INDEX_op_shli_vec: |
Richard Henderson | 264e418 | 2021-12-17 20:16:43 -0800 | [diff] [blame] | 3067 | insn = shift_imm_insn[vece]; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3068 | sub = 6; |
| 3069 | goto gen_shift; |
| 3070 | case INDEX_op_shri_vec: |
Richard Henderson | 264e418 | 2021-12-17 20:16:43 -0800 | [diff] [blame] | 3071 | insn = shift_imm_insn[vece]; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3072 | sub = 2; |
| 3073 | goto gen_shift; |
| 3074 | case INDEX_op_sari_vec: |
Richard Henderson | 264e418 | 2021-12-17 20:16:43 -0800 | [diff] [blame] | 3075 | if (vece == MO_64) { |
| 3076 | insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX; |
| 3077 | } else { |
| 3078 | insn = shift_imm_insn[vece]; |
| 3079 | } |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3080 | sub = 4; |
Richard Henderson | 4e73f84 | 2021-12-17 22:02:57 -0800 | [diff] [blame] | 3081 | goto gen_shift; |
| 3082 | case INDEX_op_rotli_vec: |
| 3083 | insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ |
| 3084 | if (vece == MO_64) { |
| 3085 | insn |= P_VEXW; |
| 3086 | } |
| 3087 | sub = 1; |
| 3088 | goto gen_shift; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3089 | gen_shift: |
| 3090 | tcg_debug_assert(vece != MO_8); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3091 | if (type == TCG_TYPE_V256) { |
| 3092 | insn |= P_VEXL; |
| 3093 | } |
| 3094 | tcg_out_vex_modrm(s, insn, sub, a0, a1); |
| 3095 | tcg_out8(s, a2); |
| 3096 | break; |
| 3097 | |
| 3098 | case INDEX_op_ld_vec: |
| 3099 | tcg_out_ld(s, type, a0, a1, a2); |
| 3100 | break; |
| 3101 | case INDEX_op_st_vec: |
| 3102 | tcg_out_st(s, type, a0, a1, a2); |
| 3103 | break; |
Richard Henderson | 37ee55a | 2019-03-17 01:55:22 +0000 | [diff] [blame] | 3104 | case INDEX_op_dupm_vec: |
| 3105 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); |
| 3106 | break; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3107 | |
| 3108 | case INDEX_op_x86_shufps_vec: |
| 3109 | insn = OPC_SHUFPS; |
| 3110 | sub = args[3]; |
| 3111 | goto gen_simd_imm8; |
| 3112 | case INDEX_op_x86_blend_vec: |
| 3113 | if (vece == MO_16) { |
| 3114 | insn = OPC_PBLENDW; |
| 3115 | } else if (vece == MO_32) { |
| 3116 | insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS); |
| 3117 | } else { |
| 3118 | g_assert_not_reached(); |
| 3119 | } |
| 3120 | sub = args[3]; |
| 3121 | goto gen_simd_imm8; |
| 3122 | case INDEX_op_x86_vperm2i128_vec: |
| 3123 | insn = OPC_VPERM2I128; |
| 3124 | sub = args[3]; |
| 3125 | goto gen_simd_imm8; |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 3126 | case INDEX_op_x86_vpshldi_vec: |
| 3127 | insn = vpshldi_insn[vece]; |
| 3128 | sub = args[3]; |
| 3129 | goto gen_simd_imm8; |
Richard Henderson | 3143767 | 2021-12-16 07:37:02 -0800 | [diff] [blame] | 3130 | |
| 3131 | case INDEX_op_not_vec: |
| 3132 | insn = OPC_VPTERNLOGQ; |
| 3133 | a2 = a1; |
| 3134 | sub = 0x33; /* !B */ |
| 3135 | goto gen_simd_imm8; |
| 3136 | case INDEX_op_nor_vec: |
| 3137 | insn = OPC_VPTERNLOGQ; |
| 3138 | sub = 0x11; /* norCB */ |
| 3139 | goto gen_simd_imm8; |
| 3140 | case INDEX_op_nand_vec: |
| 3141 | insn = OPC_VPTERNLOGQ; |
| 3142 | sub = 0x77; /* nandCB */ |
| 3143 | goto gen_simd_imm8; |
| 3144 | case INDEX_op_eqv_vec: |
| 3145 | insn = OPC_VPTERNLOGQ; |
| 3146 | sub = 0x99; /* xnorCB */ |
| 3147 | goto gen_simd_imm8; |
| 3148 | case INDEX_op_orc_vec: |
| 3149 | insn = OPC_VPTERNLOGQ; |
| 3150 | sub = 0xdd; /* orB!C */ |
| 3151 | goto gen_simd_imm8; |
| 3152 | |
Richard Henderson | cf32076 | 2021-12-16 08:06:33 -0800 | [diff] [blame] | 3153 | case INDEX_op_bitsel_vec: |
| 3154 | insn = OPC_VPTERNLOGQ; |
| 3155 | a3 = args[3]; |
| 3156 | if (a0 == a1) { |
| 3157 | a1 = a2; |
| 3158 | a2 = a3; |
| 3159 | sub = 0xca; /* A?B:C */ |
| 3160 | } else if (a0 == a2) { |
| 3161 | a2 = a3; |
| 3162 | sub = 0xe2; /* B?A:C */ |
| 3163 | } else { |
| 3164 | tcg_out_mov(s, type, a0, a3); |
| 3165 | sub = 0xb8; /* B?C:A */ |
| 3166 | } |
| 3167 | goto gen_simd_imm8; |
| 3168 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3169 | gen_simd_imm8: |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 3170 | tcg_debug_assert(insn != OPC_UD2); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3171 | if (type == TCG_TYPE_V256) { |
| 3172 | insn |= P_VEXL; |
| 3173 | } |
| 3174 | tcg_out_vex_modrm(s, insn, a0, a1, a2); |
| 3175 | tcg_out8(s, sub); |
| 3176 | break; |
| 3177 | |
| 3178 | case INDEX_op_x86_vpblendvb_vec: |
| 3179 | insn = OPC_VPBLENDVB; |
| 3180 | if (type == TCG_TYPE_V256) { |
| 3181 | insn |= P_VEXL; |
| 3182 | } |
| 3183 | tcg_out_vex_modrm(s, insn, a0, a1, a2); |
| 3184 | tcg_out8(s, args[3] << 4); |
| 3185 | break; |
| 3186 | |
| 3187 | case INDEX_op_x86_psrldq_vec: |
| 3188 | tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1); |
| 3189 | tcg_out8(s, a2); |
| 3190 | break; |
| 3191 | |
Richard Henderson | bab1671 | 2019-03-18 11:20:27 -0700 | [diff] [blame] | 3192 | case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ |
Richard Henderson | bab1671 | 2019-03-18 11:20:27 -0700 | [diff] [blame] | 3193 | case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3194 | default: |
| 3195 | g_assert_not_reached(); |
| 3196 | } |
| 3197 | } |
| 3198 | |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3199 | static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) |
Richard Henderson | f69d277 | 2016-11-18 09:31:40 +0100 | [diff] [blame] | 3200 | { |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3201 | switch (op) { |
Emilio G. Cota | 5cb4ef8 | 2017-04-26 23:29:18 -0400 | [diff] [blame] | 3202 | case INDEX_op_goto_ptr: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3203 | return C_O0_I1(r); |
Emilio G. Cota | 5cb4ef8 | 2017-04-26 23:29:18 -0400 | [diff] [blame] | 3204 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3205 | case INDEX_op_ld8u_i32: |
| 3206 | case INDEX_op_ld8u_i64: |
| 3207 | case INDEX_op_ld8s_i32: |
| 3208 | case INDEX_op_ld8s_i64: |
| 3209 | case INDEX_op_ld16u_i32: |
| 3210 | case INDEX_op_ld16u_i64: |
| 3211 | case INDEX_op_ld16s_i32: |
| 3212 | case INDEX_op_ld16s_i64: |
| 3213 | case INDEX_op_ld_i32: |
| 3214 | case INDEX_op_ld32u_i64: |
| 3215 | case INDEX_op_ld32s_i64: |
| 3216 | case INDEX_op_ld_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3217 | return C_O1_I1(r, r); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3218 | |
| 3219 | case INDEX_op_st8_i32: |
| 3220 | case INDEX_op_st8_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3221 | return C_O0_I2(qi, r); |
| 3222 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3223 | case INDEX_op_st16_i32: |
| 3224 | case INDEX_op_st16_i64: |
| 3225 | case INDEX_op_st_i32: |
| 3226 | case INDEX_op_st32_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3227 | return C_O0_I2(ri, r); |
| 3228 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3229 | case INDEX_op_st_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3230 | return C_O0_I2(re, r); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3231 | |
| 3232 | case INDEX_op_add_i32: |
| 3233 | case INDEX_op_add_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3234 | return C_O1_I2(r, r, re); |
| 3235 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3236 | case INDEX_op_sub_i32: |
| 3237 | case INDEX_op_sub_i64: |
| 3238 | case INDEX_op_mul_i32: |
| 3239 | case INDEX_op_mul_i64: |
| 3240 | case INDEX_op_or_i32: |
| 3241 | case INDEX_op_or_i64: |
| 3242 | case INDEX_op_xor_i32: |
| 3243 | case INDEX_op_xor_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3244 | return C_O1_I2(r, 0, re); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3245 | |
| 3246 | case INDEX_op_and_i32: |
| 3247 | case INDEX_op_and_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3248 | return C_O1_I2(r, 0, reZ); |
| 3249 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3250 | case INDEX_op_andc_i32: |
| 3251 | case INDEX_op_andc_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3252 | return C_O1_I2(r, r, rI); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3253 | |
| 3254 | case INDEX_op_shl_i32: |
| 3255 | case INDEX_op_shl_i64: |
| 3256 | case INDEX_op_shr_i32: |
| 3257 | case INDEX_op_shr_i64: |
| 3258 | case INDEX_op_sar_i32: |
| 3259 | case INDEX_op_sar_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3260 | return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); |
| 3261 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3262 | case INDEX_op_rotl_i32: |
| 3263 | case INDEX_op_rotl_i64: |
| 3264 | case INDEX_op_rotr_i32: |
| 3265 | case INDEX_op_rotr_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3266 | return C_O1_I2(r, 0, ci); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3267 | |
| 3268 | case INDEX_op_brcond_i32: |
| 3269 | case INDEX_op_brcond_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3270 | return C_O0_I2(r, re); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3271 | |
| 3272 | case INDEX_op_bswap16_i32: |
| 3273 | case INDEX_op_bswap16_i64: |
| 3274 | case INDEX_op_bswap32_i32: |
| 3275 | case INDEX_op_bswap32_i64: |
| 3276 | case INDEX_op_bswap64_i64: |
| 3277 | case INDEX_op_neg_i32: |
| 3278 | case INDEX_op_neg_i64: |
| 3279 | case INDEX_op_not_i32: |
| 3280 | case INDEX_op_not_i64: |
Richard Henderson | 7547827 | 2018-11-30 16:31:15 -0800 | [diff] [blame] | 3281 | case INDEX_op_extrh_i64_i32: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3282 | return C_O1_I1(r, 0); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3283 | |
| 3284 | case INDEX_op_ext8s_i32: |
| 3285 | case INDEX_op_ext8s_i64: |
| 3286 | case INDEX_op_ext8u_i32: |
| 3287 | case INDEX_op_ext8u_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3288 | return C_O1_I1(r, q); |
| 3289 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3290 | case INDEX_op_ext16s_i32: |
| 3291 | case INDEX_op_ext16s_i64: |
| 3292 | case INDEX_op_ext16u_i32: |
| 3293 | case INDEX_op_ext16u_i64: |
| 3294 | case INDEX_op_ext32s_i64: |
| 3295 | case INDEX_op_ext32u_i64: |
| 3296 | case INDEX_op_ext_i32_i64: |
| 3297 | case INDEX_op_extu_i32_i64: |
Richard Henderson | 7547827 | 2018-11-30 16:31:15 -0800 | [diff] [blame] | 3298 | case INDEX_op_extrl_i64_i32: |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3299 | case INDEX_op_extract_i32: |
| 3300 | case INDEX_op_extract_i64: |
| 3301 | case INDEX_op_sextract_i32: |
Richard Henderson | 993508e | 2016-11-22 14:15:04 +0100 | [diff] [blame] | 3302 | case INDEX_op_ctpop_i32: |
| 3303 | case INDEX_op_ctpop_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3304 | return C_O1_I1(r, r); |
| 3305 | |
Richard Henderson | c6fb8c0 | 2019-02-25 11:42:35 -0800 | [diff] [blame] | 3306 | case INDEX_op_extract2_i32: |
| 3307 | case INDEX_op_extract2_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3308 | return C_O1_I2(r, 0, r); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3309 | |
| 3310 | case INDEX_op_deposit_i32: |
| 3311 | case INDEX_op_deposit_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3312 | return C_O1_I2(Q, 0, Q); |
| 3313 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3314 | case INDEX_op_setcond_i32: |
| 3315 | case INDEX_op_setcond_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3316 | return C_O1_I2(q, r, re); |
| 3317 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3318 | case INDEX_op_movcond_i32: |
| 3319 | case INDEX_op_movcond_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3320 | return C_O1_I4(r, r, re, r, 0); |
| 3321 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3322 | case INDEX_op_div2_i32: |
| 3323 | case INDEX_op_div2_i64: |
| 3324 | case INDEX_op_divu2_i32: |
| 3325 | case INDEX_op_divu2_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3326 | return C_O2_I3(a, d, 0, 1, r); |
| 3327 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3328 | case INDEX_op_mulu2_i32: |
| 3329 | case INDEX_op_mulu2_i64: |
| 3330 | case INDEX_op_muls2_i32: |
| 3331 | case INDEX_op_muls2_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3332 | return C_O2_I2(a, d, a, r); |
| 3333 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3334 | case INDEX_op_add2_i32: |
| 3335 | case INDEX_op_add2_i64: |
| 3336 | case INDEX_op_sub2_i32: |
| 3337 | case INDEX_op_sub2_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3338 | return C_O2_I4(r, r, 0, 1, re, re); |
| 3339 | |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 3340 | case INDEX_op_ctz_i32: |
| 3341 | case INDEX_op_ctz_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3342 | return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); |
| 3343 | |
Richard Henderson | bbf25f9 | 2016-11-16 12:22:54 +0100 | [diff] [blame] | 3344 | case INDEX_op_clz_i32: |
| 3345 | case INDEX_op_clz_i64: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3346 | return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3347 | |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 3348 | case INDEX_op_qemu_ld_a32_i32: |
| 3349 | return C_O1_I1(r, L); |
| 3350 | case INDEX_op_qemu_ld_a64_i32: |
| 3351 | return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L); |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3352 | |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 3353 | case INDEX_op_qemu_st_a32_i32: |
| 3354 | return C_O0_I2(L, L); |
| 3355 | case INDEX_op_qemu_st_a64_i32: |
| 3356 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); |
| 3357 | case INDEX_op_qemu_st8_a32_i32: |
| 3358 | return C_O0_I2(s, L); |
| 3359 | case INDEX_op_qemu_st8_a64_i32: |
| 3360 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L); |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3361 | |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 3362 | case INDEX_op_qemu_ld_a32_i64: |
| 3363 | return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L); |
| 3364 | case INDEX_op_qemu_ld_a64_i64: |
| 3365 | return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L); |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3366 | |
Richard Henderson | fecccfc | 2023-05-16 20:07:20 -0700 | [diff] [blame] | 3367 | case INDEX_op_qemu_st_a32_i64: |
| 3368 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); |
| 3369 | case INDEX_op_qemu_st_a64_i64: |
| 3370 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3371 | |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 3372 | case INDEX_op_qemu_ld_a32_i128: |
| 3373 | case INDEX_op_qemu_ld_a64_i128: |
| 3374 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
| 3375 | return C_O2_I1(r, r, L); |
| 3376 | case INDEX_op_qemu_st_a32_i128: |
| 3377 | case INDEX_op_qemu_st_a64_i128: |
| 3378 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
| 3379 | return C_O0_I3(L, L, L); |
| 3380 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3381 | case INDEX_op_brcond2_i32: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3382 | return C_O0_I4(r, r, ri, ri); |
| 3383 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3384 | case INDEX_op_setcond2_i32: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3385 | return C_O1_I4(r, r, r, ri, ri); |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3386 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3387 | case INDEX_op_ld_vec: |
Richard Henderson | 37ee55a | 2019-03-17 01:55:22 +0000 | [diff] [blame] | 3388 | case INDEX_op_dupm_vec: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3389 | return C_O1_I1(x, r); |
| 3390 | |
| 3391 | case INDEX_op_st_vec: |
| 3392 | return C_O0_I2(x, r); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3393 | |
| 3394 | case INDEX_op_add_vec: |
| 3395 | case INDEX_op_sub_vec: |
| 3396 | case INDEX_op_mul_vec: |
| 3397 | case INDEX_op_and_vec: |
| 3398 | case INDEX_op_or_vec: |
| 3399 | case INDEX_op_xor_vec: |
| 3400 | case INDEX_op_andc_vec: |
Richard Henderson | 3143767 | 2021-12-16 07:37:02 -0800 | [diff] [blame] | 3401 | case INDEX_op_orc_vec: |
| 3402 | case INDEX_op_nand_vec: |
| 3403 | case INDEX_op_nor_vec: |
| 3404 | case INDEX_op_eqv_vec: |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 3405 | case INDEX_op_ssadd_vec: |
| 3406 | case INDEX_op_usadd_vec: |
| 3407 | case INDEX_op_sssub_vec: |
| 3408 | case INDEX_op_ussub_vec: |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 3409 | case INDEX_op_smin_vec: |
| 3410 | case INDEX_op_umin_vec: |
| 3411 | case INDEX_op_smax_vec: |
| 3412 | case INDEX_op_umax_vec: |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 3413 | case INDEX_op_shlv_vec: |
| 3414 | case INDEX_op_shrv_vec: |
| 3415 | case INDEX_op_sarv_vec: |
Richard Henderson | 102cd35 | 2021-12-18 09:15:29 -0800 | [diff] [blame] | 3416 | case INDEX_op_rotlv_vec: |
| 3417 | case INDEX_op_rotrv_vec: |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 3418 | case INDEX_op_shls_vec: |
| 3419 | case INDEX_op_shrs_vec: |
| 3420 | case INDEX_op_sars_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3421 | case INDEX_op_cmp_vec: |
| 3422 | case INDEX_op_x86_shufps_vec: |
| 3423 | case INDEX_op_x86_blend_vec: |
| 3424 | case INDEX_op_x86_packss_vec: |
| 3425 | case INDEX_op_x86_packus_vec: |
| 3426 | case INDEX_op_x86_vperm2i128_vec: |
| 3427 | case INDEX_op_x86_punpckl_vec: |
| 3428 | case INDEX_op_x86_punpckh_vec: |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 3429 | case INDEX_op_x86_vpshldi_vec: |
Richard Henderson | 7f34ed4 | 2018-02-21 12:56:24 -0800 | [diff] [blame] | 3430 | #if TCG_TARGET_REG_BITS == 32 |
| 3431 | case INDEX_op_dup2_vec: |
| 3432 | #endif |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3433 | return C_O1_I2(x, x, x); |
| 3434 | |
Richard Henderson | 18f9b65 | 2019-04-17 15:54:20 -1000 | [diff] [blame] | 3435 | case INDEX_op_abs_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3436 | case INDEX_op_dup_vec: |
Richard Henderson | 3143767 | 2021-12-16 07:37:02 -0800 | [diff] [blame] | 3437 | case INDEX_op_not_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3438 | case INDEX_op_shli_vec: |
| 3439 | case INDEX_op_shri_vec: |
| 3440 | case INDEX_op_sari_vec: |
Richard Henderson | 4e73f84 | 2021-12-17 22:02:57 -0800 | [diff] [blame] | 3441 | case INDEX_op_rotli_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3442 | case INDEX_op_x86_psrldq_vec: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3443 | return C_O1_I1(x, x); |
| 3444 | |
Richard Henderson | 965d5d0 | 2021-12-18 10:48:43 -0800 | [diff] [blame] | 3445 | case INDEX_op_x86_vpshldv_vec: |
| 3446 | case INDEX_op_x86_vpshrdv_vec: |
| 3447 | return C_O1_I3(x, 0, x, x); |
| 3448 | |
Richard Henderson | cf32076 | 2021-12-16 08:06:33 -0800 | [diff] [blame] | 3449 | case INDEX_op_bitsel_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3450 | case INDEX_op_x86_vpblendvb_vec: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3451 | return C_O1_I3(x, x, x, x); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3452 | |
Richard Henderson | cd26449 | 2016-11-18 11:55:41 +0100 | [diff] [blame] | 3453 | default: |
Richard Henderson | 4c22e84 | 2020-10-16 22:20:55 -0700 | [diff] [blame] | 3454 | g_assert_not_reached(); |
Richard Henderson | f69d277 | 2016-11-18 09:31:40 +0100 | [diff] [blame] | 3455 | } |
Richard Henderson | f69d277 | 2016-11-18 09:31:40 +0100 | [diff] [blame] | 3456 | } |
| 3457 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3458 | int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) |
| 3459 | { |
| 3460 | switch (opc) { |
| 3461 | case INDEX_op_add_vec: |
| 3462 | case INDEX_op_sub_vec: |
| 3463 | case INDEX_op_and_vec: |
| 3464 | case INDEX_op_or_vec: |
| 3465 | case INDEX_op_xor_vec: |
| 3466 | case INDEX_op_andc_vec: |
Richard Henderson | 3143767 | 2021-12-16 07:37:02 -0800 | [diff] [blame] | 3467 | case INDEX_op_orc_vec: |
| 3468 | case INDEX_op_nand_vec: |
| 3469 | case INDEX_op_nor_vec: |
| 3470 | case INDEX_op_eqv_vec: |
| 3471 | case INDEX_op_not_vec: |
Richard Henderson | cf32076 | 2021-12-16 08:06:33 -0800 | [diff] [blame] | 3472 | case INDEX_op_bitsel_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3473 | return 1; |
| 3474 | case INDEX_op_cmp_vec: |
Richard Henderson | 904c5e1 | 2019-04-19 10:13:33 -1000 | [diff] [blame] | 3475 | case INDEX_op_cmpsel_vec: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3476 | return -1; |
| 3477 | |
Richard Henderson | 4e73f84 | 2021-12-17 22:02:57 -0800 | [diff] [blame] | 3478 | case INDEX_op_rotli_vec: |
| 3479 | return have_avx512vl && vece >= MO_32 ? 1 : -1; |
| 3480 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3481 | case INDEX_op_shli_vec: |
| 3482 | case INDEX_op_shri_vec: |
| 3483 | /* We must expand the operation for MO_8. */ |
| 3484 | return vece == MO_8 ? -1 : 1; |
| 3485 | |
| 3486 | case INDEX_op_sari_vec: |
Richard Henderson | 264e418 | 2021-12-17 20:16:43 -0800 | [diff] [blame] | 3487 | switch (vece) { |
| 3488 | case MO_8: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3489 | return -1; |
Richard Henderson | 264e418 | 2021-12-17 20:16:43 -0800 | [diff] [blame] | 3490 | case MO_16: |
| 3491 | case MO_32: |
| 3492 | return 1; |
| 3493 | case MO_64: |
| 3494 | if (have_avx512vl) { |
| 3495 | return 1; |
| 3496 | } |
| 3497 | /* |
| 3498 | * We can emulate this for MO_64, but it does not pay off |
| 3499 | * unless we're producing at least 4 values. |
| 3500 | */ |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3501 | return type >= TCG_TYPE_V256 ? -1 : 0; |
| 3502 | } |
Richard Henderson | 264e418 | 2021-12-17 20:16:43 -0800 | [diff] [blame] | 3503 | return 0; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3504 | |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 3505 | case INDEX_op_shls_vec: |
| 3506 | case INDEX_op_shrs_vec: |
| 3507 | return vece >= MO_16; |
| 3508 | case INDEX_op_sars_vec: |
Richard Henderson | 47b331b | 2021-12-16 06:33:47 -0800 | [diff] [blame] | 3509 | switch (vece) { |
| 3510 | case MO_16: |
| 3511 | case MO_32: |
| 3512 | return 1; |
| 3513 | case MO_64: |
| 3514 | return have_avx512vl; |
| 3515 | } |
| 3516 | return 0; |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3517 | case INDEX_op_rotls_vec: |
| 3518 | return vece >= MO_16 ? -1 : 0; |
Richard Henderson | 0a8d7a3 | 2019-04-18 19:19:31 -1000 | [diff] [blame] | 3519 | |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 3520 | case INDEX_op_shlv_vec: |
| 3521 | case INDEX_op_shrv_vec: |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 3522 | switch (vece) { |
| 3523 | case MO_16: |
| 3524 | return have_avx512bw; |
| 3525 | case MO_32: |
| 3526 | case MO_64: |
| 3527 | return have_avx2; |
| 3528 | } |
| 3529 | return 0; |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 3530 | case INDEX_op_sarv_vec: |
Richard Henderson | ef77ce0 | 2021-12-15 21:18:48 -0800 | [diff] [blame] | 3531 | switch (vece) { |
| 3532 | case MO_16: |
| 3533 | return have_avx512bw; |
| 3534 | case MO_32: |
| 3535 | return have_avx2; |
| 3536 | case MO_64: |
| 3537 | return have_avx512vl; |
| 3538 | } |
| 3539 | return 0; |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3540 | case INDEX_op_rotlv_vec: |
| 3541 | case INDEX_op_rotrv_vec: |
Richard Henderson | 102cd35 | 2021-12-18 09:15:29 -0800 | [diff] [blame] | 3542 | switch (vece) { |
Richard Henderson | 786c7ef | 2021-12-18 11:04:34 -0800 | [diff] [blame] | 3543 | case MO_16: |
| 3544 | return have_avx512vbmi2 ? -1 : 0; |
Richard Henderson | 102cd35 | 2021-12-18 09:15:29 -0800 | [diff] [blame] | 3545 | case MO_32: |
| 3546 | case MO_64: |
| 3547 | return have_avx512vl ? 1 : have_avx2 ? -1 : 0; |
| 3548 | } |
| 3549 | return 0; |
Richard Henderson | a2ce146 | 2019-04-14 09:13:21 -1000 | [diff] [blame] | 3550 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3551 | case INDEX_op_mul_vec: |
Richard Henderson | 4c8b968 | 2021-12-16 07:14:24 -0800 | [diff] [blame] | 3552 | switch (vece) { |
| 3553 | case MO_8: |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3554 | return -1; |
Richard Henderson | 4c8b968 | 2021-12-16 07:14:24 -0800 | [diff] [blame] | 3555 | case MO_64: |
| 3556 | return have_avx512dq; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3557 | } |
| 3558 | return 1; |
| 3559 | |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 3560 | case INDEX_op_ssadd_vec: |
| 3561 | case INDEX_op_usadd_vec: |
| 3562 | case INDEX_op_sssub_vec: |
| 3563 | case INDEX_op_ussub_vec: |
| 3564 | return vece <= MO_16; |
Richard Henderson | bc37faf | 2018-12-17 20:17:56 -0800 | [diff] [blame] | 3565 | case INDEX_op_smin_vec: |
| 3566 | case INDEX_op_smax_vec: |
| 3567 | case INDEX_op_umin_vec: |
| 3568 | case INDEX_op_umax_vec: |
Richard Henderson | 18f9b65 | 2019-04-17 15:54:20 -1000 | [diff] [blame] | 3569 | case INDEX_op_abs_vec: |
Richard Henderson | dac1648 | 2021-12-16 06:54:26 -0800 | [diff] [blame] | 3570 | return vece <= MO_32 || have_avx512vl; |
Richard Henderson | 8ffafbc | 2018-12-17 19:00:41 -0800 | [diff] [blame] | 3571 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3572 | default: |
| 3573 | return 0; |
| 3574 | } |
| 3575 | } |
| 3576 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3577 | static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc, |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3578 | TCGv_vec v0, TCGv_vec v1, TCGArg imm) |
| 3579 | { |
| 3580 | TCGv_vec t1, t2; |
| 3581 | |
| 3582 | tcg_debug_assert(vece == MO_8); |
| 3583 | |
| 3584 | t1 = tcg_temp_new_vec(type); |
| 3585 | t2 = tcg_temp_new_vec(type); |
| 3586 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3587 | /* |
| 3588 | * Unpack to W, shift, and repack. Tricky bits: |
| 3589 | * (1) Use punpck*bw x,x to produce DDCCBBAA, |
| 3590 | * i.e. duplicate in other half of the 16-bit lane. |
| 3591 | * (2) For right-shift, add 8 so that the high half of the lane |
| 3592 | * becomes zero. For left-shift, and left-rotate, we must |
| 3593 | * shift up and down again. |
| 3594 | * (3) Step 2 leaves high half zero such that PACKUSWB |
| 3595 | * (pack with unsigned saturation) does not modify |
| 3596 | * the quantity. |
| 3597 | */ |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3598 | vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, |
| 3599 | tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); |
| 3600 | vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, |
| 3601 | tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); |
| 3602 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3603 | if (opc != INDEX_op_rotli_vec) { |
| 3604 | imm += 8; |
| 3605 | } |
| 3606 | if (opc == INDEX_op_shri_vec) { |
| 3607 | tcg_gen_shri_vec(MO_16, t1, t1, imm); |
| 3608 | tcg_gen_shri_vec(MO_16, t2, t2, imm); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3609 | } else { |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3610 | tcg_gen_shli_vec(MO_16, t1, t1, imm); |
| 3611 | tcg_gen_shli_vec(MO_16, t2, t2, imm); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3612 | tcg_gen_shri_vec(MO_16, t1, t1, 8); |
| 3613 | tcg_gen_shri_vec(MO_16, t2, t2, 8); |
| 3614 | } |
| 3615 | |
| 3616 | vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8, |
| 3617 | tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2)); |
| 3618 | tcg_temp_free_vec(t1); |
| 3619 | tcg_temp_free_vec(t2); |
| 3620 | } |
| 3621 | |
| 3622 | static void expand_vec_sari(TCGType type, unsigned vece, |
| 3623 | TCGv_vec v0, TCGv_vec v1, TCGArg imm) |
| 3624 | { |
| 3625 | TCGv_vec t1, t2; |
| 3626 | |
| 3627 | switch (vece) { |
| 3628 | case MO_8: |
| 3629 | /* Unpack to W, shift, and repack, as in expand_vec_shi. */ |
| 3630 | t1 = tcg_temp_new_vec(type); |
| 3631 | t2 = tcg_temp_new_vec(type); |
| 3632 | vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, |
| 3633 | tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); |
| 3634 | vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, |
| 3635 | tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1)); |
| 3636 | tcg_gen_sari_vec(MO_16, t1, t1, imm + 8); |
| 3637 | tcg_gen_sari_vec(MO_16, t2, t2, imm + 8); |
| 3638 | vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8, |
| 3639 | tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2)); |
| 3640 | tcg_temp_free_vec(t1); |
| 3641 | tcg_temp_free_vec(t2); |
| 3642 | break; |
| 3643 | |
| 3644 | case MO_64: |
Richard Henderson | f6ff9c2 | 2023-02-26 13:57:36 -1000 | [diff] [blame] | 3645 | t1 = tcg_temp_new_vec(type); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3646 | if (imm <= 32) { |
Richard Henderson | 312b426 | 2020-03-10 22:14:26 -0700 | [diff] [blame] | 3647 | /* |
| 3648 | * We can emulate a small sign extend by performing an arithmetic |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3649 | * 32-bit shift and overwriting the high half of a 64-bit logical |
Richard Henderson | 312b426 | 2020-03-10 22:14:26 -0700 | [diff] [blame] | 3650 | * shift. Note that the ISA says shift of 32 is valid, but TCG |
| 3651 | * does not, so we have to bound the smaller shift -- we get the |
| 3652 | * same result in the high half either way. |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3653 | */ |
Richard Henderson | 312b426 | 2020-03-10 22:14:26 -0700 | [diff] [blame] | 3654 | tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3655 | tcg_gen_shri_vec(MO_64, v0, v1, imm); |
| 3656 | vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32, |
| 3657 | tcgv_vec_arg(v0), tcgv_vec_arg(v0), |
| 3658 | tcgv_vec_arg(t1), 0xaa); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3659 | } else { |
| 3660 | /* Otherwise we will need to use a compare vs 0 to produce |
| 3661 | * the sign-extend, shift and merge. |
| 3662 | */ |
Richard Henderson | f6ff9c2 | 2023-02-26 13:57:36 -1000 | [diff] [blame] | 3663 | tcg_gen_cmp_vec(TCG_COND_GT, MO_64, t1, |
| 3664 | tcg_constant_vec(type, MO_64, 0), v1); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3665 | tcg_gen_shri_vec(MO_64, v0, v1, imm); |
| 3666 | tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm); |
| 3667 | tcg_gen_or_vec(MO_64, v0, v0, t1); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3668 | } |
Richard Henderson | f6ff9c2 | 2023-02-26 13:57:36 -1000 | [diff] [blame] | 3669 | tcg_temp_free_vec(t1); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3670 | break; |
| 3671 | |
| 3672 | default: |
| 3673 | g_assert_not_reached(); |
| 3674 | } |
| 3675 | } |
| 3676 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3677 | static void expand_vec_rotli(TCGType type, unsigned vece, |
| 3678 | TCGv_vec v0, TCGv_vec v1, TCGArg imm) |
| 3679 | { |
| 3680 | TCGv_vec t; |
| 3681 | |
| 3682 | if (vece == MO_8) { |
| 3683 | expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm); |
| 3684 | return; |
| 3685 | } |
| 3686 | |
Richard Henderson | 786c7ef | 2021-12-18 11:04:34 -0800 | [diff] [blame] | 3687 | if (have_avx512vbmi2) { |
| 3688 | vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece, |
| 3689 | tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm); |
| 3690 | return; |
| 3691 | } |
| 3692 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3693 | t = tcg_temp_new_vec(type); |
| 3694 | tcg_gen_shli_vec(vece, t, v1, imm); |
| 3695 | tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm); |
| 3696 | tcg_gen_or_vec(vece, v0, v0, t); |
| 3697 | tcg_temp_free_vec(t); |
| 3698 | } |
| 3699 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3700 | static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, |
| 3701 | TCGv_vec v1, TCGv_vec sh, bool right) |
| 3702 | { |
Richard Henderson | 786c7ef | 2021-12-18 11:04:34 -0800 | [diff] [blame] | 3703 | TCGv_vec t; |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3704 | |
Richard Henderson | 786c7ef | 2021-12-18 11:04:34 -0800 | [diff] [blame] | 3705 | if (have_avx512vbmi2) { |
| 3706 | vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec, |
| 3707 | type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), |
| 3708 | tcgv_vec_arg(v1), tcgv_vec_arg(sh)); |
| 3709 | return; |
| 3710 | } |
| 3711 | |
| 3712 | t = tcg_temp_new_vec(type); |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3713 | tcg_gen_dupi_vec(vece, t, 8 << vece); |
| 3714 | tcg_gen_sub_vec(vece, t, t, sh); |
| 3715 | if (right) { |
| 3716 | tcg_gen_shlv_vec(vece, t, v1, t); |
| 3717 | tcg_gen_shrv_vec(vece, v0, v1, sh); |
| 3718 | } else { |
| 3719 | tcg_gen_shrv_vec(vece, t, v1, t); |
| 3720 | tcg_gen_shlv_vec(vece, v0, v1, sh); |
| 3721 | } |
| 3722 | tcg_gen_or_vec(vece, v0, v0, t); |
| 3723 | tcg_temp_free_vec(t); |
| 3724 | } |
| 3725 | |
Richard Henderson | 1d442e4 | 2021-12-18 11:25:58 -0800 | [diff] [blame] | 3726 | static void expand_vec_rotls(TCGType type, unsigned vece, |
| 3727 | TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) |
| 3728 | { |
| 3729 | TCGv_vec t = tcg_temp_new_vec(type); |
| 3730 | |
| 3731 | tcg_debug_assert(vece != MO_8); |
| 3732 | |
| 3733 | if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) { |
| 3734 | tcg_gen_dup_i32_vec(vece, t, lsh); |
| 3735 | if (vece >= MO_32) { |
| 3736 | tcg_gen_rotlv_vec(vece, v0, v1, t); |
| 3737 | } else { |
| 3738 | expand_vec_rotv(type, vece, v0, v1, t, false); |
| 3739 | } |
| 3740 | } else { |
| 3741 | TCGv_i32 rsh = tcg_temp_new_i32(); |
| 3742 | |
| 3743 | tcg_gen_neg_i32(rsh, lsh); |
| 3744 | tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1); |
| 3745 | tcg_gen_shls_vec(vece, t, v1, lsh); |
| 3746 | tcg_gen_shrs_vec(vece, v0, v1, rsh); |
| 3747 | tcg_gen_or_vec(vece, v0, v0, t); |
| 3748 | |
| 3749 | tcg_temp_free_i32(rsh); |
| 3750 | } |
| 3751 | |
| 3752 | tcg_temp_free_vec(t); |
| 3753 | } |
| 3754 | |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3755 | static void expand_vec_mul(TCGType type, unsigned vece, |
| 3756 | TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) |
| 3757 | { |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3758 | TCGv_vec t1, t2, t3, t4, zero; |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3759 | |
| 3760 | tcg_debug_assert(vece == MO_8); |
| 3761 | |
| 3762 | /* |
| 3763 | * Unpack v1 bytes to words, 0 | x. |
| 3764 | * Unpack v2 bytes to words, y | 0. |
| 3765 | * This leaves the 8-bit result, x * y, with 8 bits of right padding. |
| 3766 | * Shift logical right by 8 bits to clear the high 8 bytes before |
| 3767 | * using an unsigned saturated pack. |
| 3768 | * |
| 3769 | * The difference between the V64, V128 and V256 cases is merely how |
| 3770 | * we distribute the expansion between temporaries. |
| 3771 | */ |
| 3772 | switch (type) { |
| 3773 | case TCG_TYPE_V64: |
| 3774 | t1 = tcg_temp_new_vec(TCG_TYPE_V128); |
| 3775 | t2 = tcg_temp_new_vec(TCG_TYPE_V128); |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3776 | zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3777 | vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8, |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3778 | tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3779 | vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8, |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3780 | tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3781 | tcg_gen_mul_vec(MO_16, t1, t1, t2); |
| 3782 | tcg_gen_shri_vec(MO_16, t1, t1, 8); |
| 3783 | vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8, |
| 3784 | tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t1)); |
| 3785 | tcg_temp_free_vec(t1); |
| 3786 | tcg_temp_free_vec(t2); |
| 3787 | break; |
| 3788 | |
| 3789 | case TCG_TYPE_V128: |
| 3790 | case TCG_TYPE_V256: |
| 3791 | t1 = tcg_temp_new_vec(type); |
| 3792 | t2 = tcg_temp_new_vec(type); |
| 3793 | t3 = tcg_temp_new_vec(type); |
| 3794 | t4 = tcg_temp_new_vec(type); |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3795 | zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3796 | vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3797 | tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3798 | vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8, |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3799 | tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3800 | vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3801 | tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(zero)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3802 | vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8, |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3803 | tcgv_vec_arg(t4), tcgv_vec_arg(zero), tcgv_vec_arg(v2)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3804 | tcg_gen_mul_vec(MO_16, t1, t1, t2); |
| 3805 | tcg_gen_mul_vec(MO_16, t3, t3, t4); |
| 3806 | tcg_gen_shri_vec(MO_16, t1, t1, 8); |
| 3807 | tcg_gen_shri_vec(MO_16, t3, t3, 8); |
| 3808 | vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8, |
| 3809 | tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t3)); |
| 3810 | tcg_temp_free_vec(t1); |
| 3811 | tcg_temp_free_vec(t2); |
| 3812 | tcg_temp_free_vec(t3); |
| 3813 | tcg_temp_free_vec(t4); |
| 3814 | break; |
| 3815 | |
| 3816 | default: |
| 3817 | g_assert_not_reached(); |
| 3818 | } |
| 3819 | } |
| 3820 | |
Richard Henderson | 904c5e1 | 2019-04-19 10:13:33 -1000 | [diff] [blame] | 3821 | static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, |
| 3822 | TCGv_vec v1, TCGv_vec v2, TCGCond cond) |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3823 | { |
| 3824 | enum { |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3825 | NEED_INV = 1, |
| 3826 | NEED_SWAP = 2, |
| 3827 | NEED_BIAS = 4, |
| 3828 | NEED_UMIN = 8, |
| 3829 | NEED_UMAX = 16, |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3830 | }; |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3831 | TCGv_vec t1, t2, t3; |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3832 | uint8_t fixup; |
| 3833 | |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3834 | switch (cond) { |
| 3835 | case TCG_COND_EQ: |
| 3836 | case TCG_COND_GT: |
| 3837 | fixup = 0; |
| 3838 | break; |
| 3839 | case TCG_COND_NE: |
| 3840 | case TCG_COND_LE: |
| 3841 | fixup = NEED_INV; |
| 3842 | break; |
| 3843 | case TCG_COND_LT: |
| 3844 | fixup = NEED_SWAP; |
| 3845 | break; |
| 3846 | case TCG_COND_GE: |
| 3847 | fixup = NEED_SWAP | NEED_INV; |
| 3848 | break; |
| 3849 | case TCG_COND_LEU: |
Richard Henderson | 54e2d65 | 2021-12-16 08:48:57 -0800 | [diff] [blame] | 3850 | if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3851 | fixup = NEED_UMIN; |
| 3852 | } else { |
| 3853 | fixup = NEED_BIAS | NEED_INV; |
| 3854 | } |
| 3855 | break; |
| 3856 | case TCG_COND_GTU: |
Richard Henderson | 54e2d65 | 2021-12-16 08:48:57 -0800 | [diff] [blame] | 3857 | if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3858 | fixup = NEED_UMIN | NEED_INV; |
| 3859 | } else { |
| 3860 | fixup = NEED_BIAS; |
| 3861 | } |
| 3862 | break; |
| 3863 | case TCG_COND_GEU: |
Richard Henderson | 54e2d65 | 2021-12-16 08:48:57 -0800 | [diff] [blame] | 3864 | if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3865 | fixup = NEED_UMAX; |
| 3866 | } else { |
| 3867 | fixup = NEED_BIAS | NEED_SWAP | NEED_INV; |
| 3868 | } |
| 3869 | break; |
| 3870 | case TCG_COND_LTU: |
Richard Henderson | 54e2d65 | 2021-12-16 08:48:57 -0800 | [diff] [blame] | 3871 | if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3872 | fixup = NEED_UMAX | NEED_INV; |
| 3873 | } else { |
| 3874 | fixup = NEED_BIAS | NEED_SWAP; |
| 3875 | } |
| 3876 | break; |
| 3877 | default: |
| 3878 | g_assert_not_reached(); |
| 3879 | } |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3880 | |
| 3881 | if (fixup & NEED_INV) { |
| 3882 | cond = tcg_invert_cond(cond); |
| 3883 | } |
| 3884 | if (fixup & NEED_SWAP) { |
| 3885 | t1 = v1, v1 = v2, v2 = t1; |
| 3886 | cond = tcg_swap_cond(cond); |
| 3887 | } |
| 3888 | |
| 3889 | t1 = t2 = NULL; |
Richard Henderson | ebcfb91 | 2019-04-30 20:45:16 -0700 | [diff] [blame] | 3890 | if (fixup & (NEED_UMIN | NEED_UMAX)) { |
| 3891 | t1 = tcg_temp_new_vec(type); |
| 3892 | if (fixup & NEED_UMIN) { |
| 3893 | tcg_gen_umin_vec(vece, t1, v1, v2); |
| 3894 | } else { |
| 3895 | tcg_gen_umax_vec(vece, t1, v1, v2); |
| 3896 | } |
| 3897 | v2 = t1; |
| 3898 | cond = TCG_COND_EQ; |
| 3899 | } else if (fixup & NEED_BIAS) { |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3900 | t1 = tcg_temp_new_vec(type); |
| 3901 | t2 = tcg_temp_new_vec(type); |
Richard Henderson | 9739a05 | 2020-03-31 20:03:16 -0700 | [diff] [blame] | 3902 | t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1)); |
| 3903 | tcg_gen_sub_vec(vece, t1, v1, t3); |
| 3904 | tcg_gen_sub_vec(vece, t2, v2, t3); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3905 | v1 = t1; |
| 3906 | v2 = t2; |
| 3907 | cond = tcg_signed_cond(cond); |
| 3908 | } |
| 3909 | |
| 3910 | tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT); |
| 3911 | /* Expand directly; do not recurse. */ |
| 3912 | vec_gen_4(INDEX_op_cmp_vec, type, vece, |
| 3913 | tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); |
| 3914 | |
| 3915 | if (t1) { |
| 3916 | tcg_temp_free_vec(t1); |
| 3917 | if (t2) { |
| 3918 | tcg_temp_free_vec(t2); |
| 3919 | } |
| 3920 | } |
Richard Henderson | 904c5e1 | 2019-04-19 10:13:33 -1000 | [diff] [blame] | 3921 | return fixup & NEED_INV; |
| 3922 | } |
| 3923 | |
| 3924 | static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, |
| 3925 | TCGv_vec v1, TCGv_vec v2, TCGCond cond) |
| 3926 | { |
| 3927 | if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3928 | tcg_gen_not_vec(vece, v0, v0); |
| 3929 | } |
| 3930 | } |
| 3931 | |
Richard Henderson | 904c5e1 | 2019-04-19 10:13:33 -1000 | [diff] [blame] | 3932 | static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, |
| 3933 | TCGv_vec c1, TCGv_vec c2, |
| 3934 | TCGv_vec v3, TCGv_vec v4, TCGCond cond) |
| 3935 | { |
| 3936 | TCGv_vec t = tcg_temp_new_vec(type); |
| 3937 | |
| 3938 | if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { |
| 3939 | /* Invert the sense of the compare by swapping arguments. */ |
| 3940 | TCGv_vec x; |
| 3941 | x = v3, v3 = v4, v4 = x; |
| 3942 | } |
| 3943 | vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece, |
| 3944 | tcgv_vec_arg(v0), tcgv_vec_arg(v4), |
| 3945 | tcgv_vec_arg(v3), tcgv_vec_arg(t)); |
| 3946 | tcg_temp_free_vec(t); |
| 3947 | } |
| 3948 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3949 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, |
| 3950 | TCGArg a0, ...) |
| 3951 | { |
| 3952 | va_list va; |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3953 | TCGArg a2; |
Richard Henderson | 904c5e1 | 2019-04-19 10:13:33 -1000 | [diff] [blame] | 3954 | TCGv_vec v0, v1, v2, v3, v4; |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3955 | |
| 3956 | va_start(va, a0); |
| 3957 | v0 = temp_tcgv_vec(arg_temp(a0)); |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3958 | v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); |
| 3959 | a2 = va_arg(va, TCGArg); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3960 | |
| 3961 | switch (opc) { |
| 3962 | case INDEX_op_shli_vec: |
| 3963 | case INDEX_op_shri_vec: |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3964 | expand_vec_shi(type, vece, opc, v0, v1, a2); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3965 | break; |
| 3966 | |
| 3967 | case INDEX_op_sari_vec: |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3968 | expand_vec_sari(type, vece, v0, v1, a2); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3969 | break; |
| 3970 | |
Richard Henderson | 885b170 | 2020-04-19 19:14:51 -0700 | [diff] [blame] | 3971 | case INDEX_op_rotli_vec: |
| 3972 | expand_vec_rotli(type, vece, v0, v1, a2); |
| 3973 | break; |
| 3974 | |
| 3975 | case INDEX_op_rotls_vec: |
| 3976 | expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2))); |
| 3977 | break; |
| 3978 | |
| 3979 | case INDEX_op_rotlv_vec: |
| 3980 | v2 = temp_tcgv_vec(arg_temp(a2)); |
| 3981 | expand_vec_rotv(type, vece, v0, v1, v2, false); |
| 3982 | break; |
| 3983 | case INDEX_op_rotrv_vec: |
| 3984 | v2 = temp_tcgv_vec(arg_temp(a2)); |
| 3985 | expand_vec_rotv(type, vece, v0, v1, v2, true); |
| 3986 | break; |
| 3987 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3988 | case INDEX_op_mul_vec: |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3989 | v2 = temp_tcgv_vec(arg_temp(a2)); |
| 3990 | expand_vec_mul(type, vece, v0, v1, v2); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3991 | break; |
| 3992 | |
| 3993 | case INDEX_op_cmp_vec: |
Richard Henderson | 44f1441 | 2018-12-18 13:56:00 -0800 | [diff] [blame] | 3994 | v2 = temp_tcgv_vec(arg_temp(a2)); |
| 3995 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 3996 | break; |
| 3997 | |
Richard Henderson | 904c5e1 | 2019-04-19 10:13:33 -1000 | [diff] [blame] | 3998 | case INDEX_op_cmpsel_vec: |
| 3999 | v2 = temp_tcgv_vec(arg_temp(a2)); |
| 4000 | v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); |
| 4001 | v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); |
| 4002 | expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); |
| 4003 | break; |
| 4004 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 4005 | default: |
| 4006 | break; |
| 4007 | } |
| 4008 | |
| 4009 | va_end(va); |
| 4010 | } |
| 4011 | |
Emilio G. Cota | e268f4c | 2017-07-05 18:12:56 -0400 | [diff] [blame] | 4012 | static const int tcg_target_callee_save_regs[] = { |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4013 | #if TCG_TARGET_REG_BITS == 64 |
| 4014 | TCG_REG_RBP, |
| 4015 | TCG_REG_RBX, |
Stefan Weil | 8d91871 | 2012-04-12 20:46:32 +0200 | [diff] [blame] | 4016 | #if defined(_WIN64) |
| 4017 | TCG_REG_RDI, |
| 4018 | TCG_REG_RSI, |
| 4019 | #endif |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4020 | TCG_REG_R12, |
| 4021 | TCG_REG_R13, |
Blue Swirl | cea5f9a | 2011-05-15 16:03:25 +0000 | [diff] [blame] | 4022 | TCG_REG_R14, /* Currently used for the global env. */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4023 | TCG_REG_R15, |
| 4024 | #else |
Blue Swirl | cea5f9a | 2011-05-15 16:03:25 +0000 | [diff] [blame] | 4025 | TCG_REG_EBP, /* Currently used for the global env. */ |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4026 | TCG_REG_EBX, |
| 4027 | TCG_REG_ESI, |
| 4028 | TCG_REG_EDI, |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4029 | #endif |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4030 | }; |
| 4031 | |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4032 | /* Compute frame size via macros, to share between tcg_target_qemu_prologue |
| 4033 | and tcg_register_jit. */ |
| 4034 | |
| 4035 | #define PUSH_SIZE \ |
| 4036 | ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ |
| 4037 | * (TCG_TARGET_REG_BITS / 8)) |
| 4038 | |
| 4039 | #define FRAME_SIZE \ |
| 4040 | ((PUSH_SIZE \ |
| 4041 | + TCG_STATIC_CALL_ARGS_SIZE \ |
| 4042 | + CPU_TEMP_BUF_NLONGS * sizeof(long) \ |
| 4043 | + TCG_TARGET_STACK_ALIGN - 1) \ |
| 4044 | & ~(TCG_TARGET_STACK_ALIGN - 1)) |
| 4045 | |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4046 | /* Generate global QEMU prologue and epilogue code */ |
Richard Henderson | e4d58b4 | 2010-06-02 17:26:56 -0700 | [diff] [blame] | 4047 | static void tcg_target_qemu_prologue(TCGContext *s) |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4048 | { |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4049 | int i, stack_addend; |
Richard Henderson | 7868652 | 2010-05-21 08:30:35 -0700 | [diff] [blame] | 4050 | |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4051 | /* TB prologue */ |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4052 | |
Blue Swirl | ac0275d | 2011-05-14 13:14:45 +0000 | [diff] [blame] | 4053 | /* Reserve some stack space, also for TCG temps. */ |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4054 | stack_addend = FRAME_SIZE - PUSH_SIZE; |
Blue Swirl | ac0275d | 2011-05-14 13:14:45 +0000 | [diff] [blame] | 4055 | tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, |
| 4056 | CPU_TEMP_BUF_NLONGS * sizeof(long)); |
| 4057 | |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4058 | /* Save all callee saved registers. */ |
| 4059 | for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4060 | tcg_out_push(s, tcg_target_callee_save_regs[i]); |
| 4061 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4062 | |
Blue Swirl | 6a18ae2 | 2012-02-15 18:02:32 +0000 | [diff] [blame] | 4063 | #if TCG_TARGET_REG_BITS == 32 |
| 4064 | tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, |
| 4065 | (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); |
Stefan Weil | b18212c | 2012-09-13 19:37:44 +0200 | [diff] [blame] | 4066 | tcg_out_addi(s, TCG_REG_ESP, -stack_addend); |
| 4067 | /* jmp *tb. */ |
| 4068 | tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, |
Paolo Bonzini | 7d37435 | 2018-12-13 23:37:37 +0100 | [diff] [blame] | 4069 | (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 |
| 4070 | + stack_addend); |
Blue Swirl | 6a18ae2 | 2012-02-15 18:02:32 +0000 | [diff] [blame] | 4071 | #else |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 4072 | # if !defined(CONFIG_SOFTMMU) |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 4073 | if (guest_base) { |
| 4074 | int seg = setup_guest_base_seg(); |
| 4075 | if (seg != 0) { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 4076 | x86_guest_base.seg = seg; |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 4077 | } else if (guest_base == (int32_t)guest_base) { |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 4078 | x86_guest_base.ofs = guest_base; |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 4079 | } else { |
| 4080 | /* Choose R12 because, as a base, it requires a SIB byte. */ |
Richard Henderson | 61713c2 | 2023-04-19 18:29:14 +0200 | [diff] [blame] | 4081 | x86_guest_base.index = TCG_REG_R12; |
| 4082 | tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base); |
| 4083 | tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index); |
Richard Henderson | 913c2bd | 2018-12-03 09:22:57 -0600 | [diff] [blame] | 4084 | } |
| 4085 | } |
| 4086 | # endif |
Blue Swirl | cea5f9a | 2011-05-15 16:03:25 +0000 | [diff] [blame] | 4087 | tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); |
Blue Swirl | 6a18ae2 | 2012-02-15 18:02:32 +0000 | [diff] [blame] | 4088 | tcg_out_addi(s, TCG_REG_ESP, -stack_addend); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4089 | /* jmp *tb. */ |
Blue Swirl | cea5f9a | 2011-05-15 16:03:25 +0000 | [diff] [blame] | 4090 | tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); |
Stefan Weil | b18212c | 2012-09-13 19:37:44 +0200 | [diff] [blame] | 4091 | #endif |
Richard Henderson | 7868652 | 2010-05-21 08:30:35 -0700 | [diff] [blame] | 4092 | |
Emilio G. Cota | 5cb4ef8 | 2017-04-26 23:29:18 -0400 | [diff] [blame] | 4093 | /* |
| 4094 | * Return path for goto_ptr. Set return value to 0, a-la exit_tb, |
| 4095 | * and fall through to the rest of the epilogue. |
| 4096 | */ |
Richard Henderson | c8bc116 | 2020-11-05 15:41:38 -0800 | [diff] [blame] | 4097 | tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr); |
Emilio G. Cota | 5cb4ef8 | 2017-04-26 23:29:18 -0400 | [diff] [blame] | 4098 | tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0); |
| 4099 | |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4100 | /* TB epilogue */ |
Richard Henderson | 705ed47 | 2020-10-28 23:42:12 -0700 | [diff] [blame] | 4101 | tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4102 | |
Blue Swirl | e83c80f | 2011-05-28 06:51:52 +0000 | [diff] [blame] | 4103 | tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4104 | |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 4105 | if (have_avx2) { |
| 4106 | tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0); |
| 4107 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4108 | for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4109 | tcg_out_pop(s, tcg_target_callee_save_regs[i]); |
| 4110 | } |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4111 | tcg_out_opc(s, OPC_RET, 0, 0, 0); |
bellard | b03cce8 | 2008-05-10 10:52:05 +0000 | [diff] [blame] | 4112 | } |
| 4113 | |
Richard Henderson | 4e45f23 | 2017-07-20 19:56:42 -1000 | [diff] [blame] | 4114 | static void tcg_out_nop_fill(tcg_insn_unit *p, int count) |
| 4115 | { |
| 4116 | memset(p, 0x90, count); |
| 4117 | } |
| 4118 | |
Richard Henderson | e4d58b4 | 2010-06-02 17:26:56 -0700 | [diff] [blame] | 4119 | static void tcg_target_init(TCGContext *s) |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 4120 | { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 4121 | tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4122 | if (TCG_TARGET_REG_BITS == 64) { |
Richard Henderson | 770c2fc | 2017-08-17 14:47:43 -0700 | [diff] [blame] | 4123 | tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; |
| 4124 | } |
| 4125 | if (have_avx1) { |
| 4126 | tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS; |
| 4127 | tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS; |
| 4128 | } |
| 4129 | if (have_avx2) { |
| 4130 | tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS; |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4131 | } |
Richard Henderson | 4ab50cc | 2010-04-13 15:26:17 -0700 | [diff] [blame] | 4132 | |
Richard Henderson | 672189c | 2018-07-22 15:28:02 -0700 | [diff] [blame] | 4133 | tcg_target_call_clobber_regs = ALL_VECTOR_REGS; |
Richard Henderson | 4ab50cc | 2010-04-13 15:26:17 -0700 | [diff] [blame] | 4134 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); |
| 4135 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); |
| 4136 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4137 | if (TCG_TARGET_REG_BITS == 64) { |
Stefan Weil | 8d91871 | 2012-04-12 20:46:32 +0200 | [diff] [blame] | 4138 | #if !defined(_WIN64) |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4139 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); |
| 4140 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); |
Stefan Weil | 8d91871 | 2012-04-12 20:46:32 +0200 | [diff] [blame] | 4141 | #endif |
Richard Henderson | 5d8a4f8 | 2010-06-03 17:35:17 -0700 | [diff] [blame] | 4142 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); |
| 4143 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); |
| 4144 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); |
| 4145 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); |
| 4146 | } |
Richard Henderson | 4ab50cc | 2010-04-13 15:26:17 -0700 | [diff] [blame] | 4147 | |
Richard Henderson | ccb1bb6 | 2017-09-11 11:25:55 -0700 | [diff] [blame] | 4148 | s->reserved_regs = 0; |
Blue Swirl | e83c80f | 2011-05-28 06:51:52 +0000 | [diff] [blame] | 4149 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); |
Richard Henderson | 098d0fc | 2023-04-17 10:16:28 +0200 | [diff] [blame] | 4150 | tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC); |
Richard Henderson | 6b258e7 | 2022-11-08 08:39:26 +1100 | [diff] [blame] | 4151 | #ifdef _WIN64 |
| 4152 | /* These are call saved, and we don't save them, so don't use them. */ |
| 4153 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6); |
| 4154 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM7); |
| 4155 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM8); |
| 4156 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM9); |
| 4157 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM10); |
| 4158 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM11); |
| 4159 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM12); |
| 4160 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM13); |
| 4161 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM14); |
| 4162 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM15); |
| 4163 | #endif |
bellard | c896fe2 | 2008-02-01 10:05:41 +0000 | [diff] [blame] | 4164 | } |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4165 | |
| 4166 | typedef struct { |
Richard Henderson | e9a9a5b | 2014-05-15 12:48:43 -0700 | [diff] [blame] | 4167 | DebugFrameHeader h; |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4168 | uint8_t fde_def_cfa[4]; |
| 4169 | uint8_t fde_reg_ofs[14]; |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4170 | } DebugFrame; |
| 4171 | |
Richard Henderson | b5cc476 | 2013-06-05 07:56:29 -0700 | [diff] [blame] | 4172 | /* We're expecting a 2 byte uleb128 encoded value. */ |
| 4173 | QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); |
| 4174 | |
Stefan Weil | c170cb6 | 2012-04-06 08:06:41 +0200 | [diff] [blame] | 4175 | #if !defined(__ELF__) |
| 4176 | /* Host machine without ELF. */ |
| 4177 | #elif TCG_TARGET_REG_BITS == 64 |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4178 | #define ELF_HOST_MACHINE EM_X86_64 |
Richard Henderson | e9a9a5b | 2014-05-15 12:48:43 -0700 | [diff] [blame] | 4179 | static const DebugFrame debug_frame = { |
| 4180 | .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ |
| 4181 | .h.cie.id = -1, |
| 4182 | .h.cie.version = 1, |
| 4183 | .h.cie.code_align = 1, |
| 4184 | .h.cie.data_align = 0x78, /* sleb128 -8 */ |
| 4185 | .h.cie.return_column = 16, |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4186 | |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4187 | /* Total FDE size does not include the "len" member. */ |
Richard Henderson | e9a9a5b | 2014-05-15 12:48:43 -0700 | [diff] [blame] | 4188 | .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4189 | |
| 4190 | .fde_def_cfa = { |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4191 | 12, 7, /* DW_CFA_def_cfa %rsp, ... */ |
| 4192 | (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ |
| 4193 | (FRAME_SIZE >> 7) |
| 4194 | }, |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4195 | .fde_reg_ofs = { |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4196 | 0x90, 1, /* DW_CFA_offset, %rip, -8 */ |
| 4197 | /* The following ordering must match tcg_target_callee_save_regs. */ |
| 4198 | 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ |
| 4199 | 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ |
| 4200 | 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ |
| 4201 | 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ |
| 4202 | 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ |
| 4203 | 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ |
| 4204 | } |
| 4205 | }; |
| 4206 | #else |
| 4207 | #define ELF_HOST_MACHINE EM_386 |
Richard Henderson | e9a9a5b | 2014-05-15 12:48:43 -0700 | [diff] [blame] | 4208 | static const DebugFrame debug_frame = { |
| 4209 | .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ |
| 4210 | .h.cie.id = -1, |
| 4211 | .h.cie.version = 1, |
| 4212 | .h.cie.code_align = 1, |
| 4213 | .h.cie.data_align = 0x7c, /* sleb128 -4 */ |
| 4214 | .h.cie.return_column = 8, |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4215 | |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4216 | /* Total FDE size does not include the "len" member. */ |
Richard Henderson | e9a9a5b | 2014-05-15 12:48:43 -0700 | [diff] [blame] | 4217 | .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4218 | |
| 4219 | .fde_def_cfa = { |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4220 | 12, 4, /* DW_CFA_def_cfa %esp, ... */ |
| 4221 | (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ |
| 4222 | (FRAME_SIZE >> 7) |
| 4223 | }, |
Richard Henderson | 497a22e | 2013-06-05 07:39:57 -0700 | [diff] [blame] | 4224 | .fde_reg_ofs = { |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4225 | 0x88, 1, /* DW_CFA_offset, %eip, -4 */ |
| 4226 | /* The following ordering must match tcg_target_callee_save_regs. */ |
| 4227 | 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ |
| 4228 | 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ |
| 4229 | 0x86, 4, /* DW_CFA_offset, %esi, -16 */ |
| 4230 | 0x87, 5, /* DW_CFA_offset, %edi, -20 */ |
| 4231 | } |
| 4232 | }; |
| 4233 | #endif |
| 4234 | |
Stefan Weil | c170cb6 | 2012-04-06 08:06:41 +0200 | [diff] [blame] | 4235 | #if defined(ELF_HOST_MACHINE) |
Richard Henderson | 755bf9e | 2020-10-29 09:17:30 -0700 | [diff] [blame] | 4236 | void tcg_register_jit(const void *buf, size_t buf_size) |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4237 | { |
Richard Henderson | 813da62 | 2012-03-19 12:25:11 -0700 | [diff] [blame] | 4238 | tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); |
| 4239 | } |
Stefan Weil | c170cb6 | 2012-04-06 08:06:41 +0200 | [diff] [blame] | 4240 | #endif |