Alexander Graf | d3e35a1 | 2013-12-17 19:42:32 +0000 | [diff] [blame] | 1 | /* |
| 2 | * AArch64 specific helpers |
| 3 | * |
| 4 | * Copyright (c) 2013 Alexander Graf <agraf@suse.de> |
| 5 | * |
| 6 | * This library is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * This library is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
| 18 | */ |
| 19 | |
Peter Maydell | 74c21bd | 2015-12-07 16:23:44 +0000 | [diff] [blame] | 20 | #include "qemu/osdep.h" |
Alexander Graf | d3e35a1 | 2013-12-17 19:42:32 +0000 | [diff] [blame] | 21 | #include "cpu.h" |
| 22 | #include "exec/gdbstub.h" |
Richard Henderson | 2ef6175 | 2014-04-07 22:31:41 -0700 | [diff] [blame] | 23 | #include "exec/helper-proto.h" |
Alexander Graf | d3e35a1 | 2013-12-17 19:42:32 +0000 | [diff] [blame] | 24 | #include "qemu/host-utils.h" |
Paolo Bonzini | 63c9155 | 2016-03-15 13:18:37 +0100 | [diff] [blame] | 25 | #include "qemu/log.h" |
Alexander Graf | d3e35a1 | 2013-12-17 19:42:32 +0000 | [diff] [blame] | 26 | #include "sysemu/sysemu.h" |
| 27 | #include "qemu/bitops.h" |
Rob Herring | 52e60cd | 2014-04-15 19:18:44 +0100 | [diff] [blame] | 28 | #include "internals.h" |
Peter Maydell | 130f2e7 | 2014-06-09 15:43:25 +0100 | [diff] [blame] | 29 | #include "qemu/crc32c.h" |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 30 | #include "exec/exec-all.h" |
| 31 | #include "exec/cpu_ldst.h" |
| 32 | #include "qemu/int128.h" |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 33 | #include "qemu/atomic128.h" |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 34 | #include "tcg.h" |
Alex Bennée | 24f91e8 | 2018-01-19 18:24:22 +0000 | [diff] [blame] | 35 | #include "fpu/softfloat.h" |
Peter Maydell | 130f2e7 | 2014-06-09 15:43:25 +0100 | [diff] [blame] | 36 | #include <zlib.h> /* For crc32 */ |
Alexander Graf | 8220e91 | 2013-12-17 19:42:34 +0000 | [diff] [blame] | 37 | |
| 38 | /* C2.4.7 Multiply and divide */ |
| 39 | /* special cases for 0 and LLONG_MIN are mandated by the standard */ |
| 40 | uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) |
| 41 | { |
| 42 | if (den == 0) { |
| 43 | return 0; |
| 44 | } |
| 45 | return num / den; |
| 46 | } |
| 47 | |
| 48 | int64_t HELPER(sdiv64)(int64_t num, int64_t den) |
| 49 | { |
| 50 | if (den == 0) { |
| 51 | return 0; |
| 52 | } |
| 53 | if (num == LLONG_MIN && den == -1) { |
| 54 | return LLONG_MIN; |
| 55 | } |
| 56 | return num / den; |
| 57 | } |
Claudio Fontana | 680ead2 | 2013-12-17 19:42:35 +0000 | [diff] [blame] | 58 | |
Alexander Graf | 82e14b0 | 2013-12-17 19:42:35 +0000 | [diff] [blame] | 59 | uint64_t HELPER(rbit64)(uint64_t x) |
| 60 | { |
Richard Henderson | 42fedbc | 2015-09-14 13:38:53 -0700 | [diff] [blame] | 61 | return revbit64(x); |
Alexander Graf | 82e14b0 | 2013-12-17 19:42:35 +0000 | [diff] [blame] | 62 | } |
Claudio Fontana | da7dafe | 2014-01-04 22:15:50 +0000 | [diff] [blame] | 63 | |
Richard Henderson | ff730e9 | 2019-03-01 12:04:55 -0800 | [diff] [blame] | 64 | void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) |
| 65 | { |
| 66 | update_spsel(env, imm); |
| 67 | } |
| 68 | |
| 69 | static void daif_check(CPUARMState *env, uint32_t op, |
| 70 | uint32_t imm, uintptr_t ra) |
| 71 | { |
| 72 | /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ |
| 73 | if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) { |
| 74 | raise_exception_ra(env, EXCP_UDEF, |
| 75 | syn_aa64_sysregtrap(0, extract32(op, 0, 3), |
| 76 | extract32(op, 3, 3), 4, |
| 77 | imm, 0x1f, 0), |
| 78 | exception_target_el(env), ra); |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) |
| 83 | { |
| 84 | daif_check(env, 0x1e, imm, GETPC()); |
| 85 | env->daif |= (imm << 6) & PSTATE_DAIF; |
| 86 | } |
| 87 | |
| 88 | void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) |
| 89 | { |
| 90 | daif_check(env, 0x1f, imm, GETPC()); |
| 91 | env->daif &= ~((imm << 6) & PSTATE_DAIF); |
| 92 | } |
| 93 | |
Claudio Fontana | da7dafe | 2014-01-04 22:15:50 +0000 | [diff] [blame] | 94 | /* Convert a softfloat float_relation_ (as returned by |
| 95 | * the float*_compare functions) to the correct ARM |
| 96 | * NZCV flag state. |
| 97 | */ |
| 98 | static inline uint32_t float_rel_to_flags(int res) |
| 99 | { |
| 100 | uint64_t flags; |
| 101 | switch (res) { |
| 102 | case float_relation_equal: |
| 103 | flags = PSTATE_Z | PSTATE_C; |
| 104 | break; |
| 105 | case float_relation_less: |
| 106 | flags = PSTATE_N; |
| 107 | break; |
| 108 | case float_relation_greater: |
| 109 | flags = PSTATE_C; |
| 110 | break; |
| 111 | case float_relation_unordered: |
| 112 | default: |
| 113 | flags = PSTATE_C | PSTATE_V; |
| 114 | break; |
| 115 | } |
| 116 | return flags; |
| 117 | } |
| 118 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 119 | uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) |
Alex Bennée | 7a19292 | 2018-05-15 14:58:43 +0100 | [diff] [blame] | 120 | { |
| 121 | return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); |
| 122 | } |
| 123 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 124 | uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) |
Alex Bennée | 7a19292 | 2018-05-15 14:58:43 +0100 | [diff] [blame] | 125 | { |
| 126 | return float_rel_to_flags(float16_compare(x, y, fp_status)); |
| 127 | } |
| 128 | |
Claudio Fontana | da7dafe | 2014-01-04 22:15:50 +0000 | [diff] [blame] | 129 | uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) |
| 130 | { |
| 131 | return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); |
| 132 | } |
| 133 | |
| 134 | uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) |
| 135 | { |
| 136 | return float_rel_to_flags(float32_compare(x, y, fp_status)); |
| 137 | } |
| 138 | |
| 139 | uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) |
| 140 | { |
| 141 | return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); |
| 142 | } |
| 143 | |
| 144 | uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) |
| 145 | { |
| 146 | return float_rel_to_flags(float64_compare(x, y, fp_status)); |
| 147 | } |
Michael Matz | 7c51048 | 2014-01-31 14:47:31 +0000 | [diff] [blame] | 148 | |
Peter Maydell | f5e51e7 | 2014-02-20 10:35:48 +0000 | [diff] [blame] | 149 | float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) |
| 150 | { |
| 151 | float_status *fpst = fpstp; |
| 152 | |
Xiangyu Hu | dabf005 | 2015-02-05 13:37:22 +0000 | [diff] [blame] | 153 | a = float32_squash_input_denormal(a, fpst); |
| 154 | b = float32_squash_input_denormal(b, fpst); |
| 155 | |
Peter Maydell | f5e51e7 | 2014-02-20 10:35:48 +0000 | [diff] [blame] | 156 | if ((float32_is_zero(a) && float32_is_infinity(b)) || |
| 157 | (float32_is_infinity(a) && float32_is_zero(b))) { |
| 158 | /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ |
| 159 | return make_float32((1U << 30) | |
| 160 | ((float32_val(a) ^ float32_val(b)) & (1U << 31))); |
| 161 | } |
| 162 | return float32_mul(a, b, fpst); |
| 163 | } |
| 164 | |
| 165 | float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) |
| 166 | { |
| 167 | float_status *fpst = fpstp; |
| 168 | |
Xiangyu Hu | dabf005 | 2015-02-05 13:37:22 +0000 | [diff] [blame] | 169 | a = float64_squash_input_denormal(a, fpst); |
| 170 | b = float64_squash_input_denormal(b, fpst); |
| 171 | |
Peter Maydell | f5e51e7 | 2014-02-20 10:35:48 +0000 | [diff] [blame] | 172 | if ((float64_is_zero(a) && float64_is_infinity(b)) || |
| 173 | (float64_is_infinity(a) && float64_is_zero(b))) { |
| 174 | /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ |
| 175 | return make_float64((1ULL << 62) | |
| 176 | ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); |
| 177 | } |
| 178 | return float64_mul(a, b, fpst); |
| 179 | } |
| 180 | |
Michael Matz | 7c51048 | 2014-01-31 14:47:31 +0000 | [diff] [blame] | 181 | uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, |
| 182 | uint32_t rn, uint32_t numregs) |
| 183 | { |
| 184 | /* Helper function for SIMD TBL and TBX. We have to do the table |
| 185 | * lookup part for the 64 bits worth of indices we're passed in. |
| 186 | * result is the initial results vector (either zeroes for TBL |
| 187 | * or some guest values for TBX), rn the register number where |
| 188 | * the table starts, and numregs the number of registers in the table. |
| 189 | * We return the results of the lookups. |
| 190 | */ |
| 191 | int shift; |
| 192 | |
| 193 | for (shift = 0; shift < 64; shift += 8) { |
| 194 | int index = extract64(indices, shift, 8); |
| 195 | if (index < 16 * numregs) { |
| 196 | /* Convert index (a byte offset into the virtual table |
| 197 | * which is a series of 128-bit vectors concatenated) |
Richard Henderson | 9a2b525 | 2018-01-25 11:45:29 +0000 | [diff] [blame] | 198 | * into the correct register element plus a bit offset |
Michael Matz | 7c51048 | 2014-01-31 14:47:31 +0000 | [diff] [blame] | 199 | * into that element, bearing in mind that the table |
| 200 | * can wrap around from V31 to V0. |
| 201 | */ |
| 202 | int elt = (rn * 2 + (index >> 3)) % 64; |
| 203 | int bitidx = (index & 7) * 8; |
Richard Henderson | 9a2b525 | 2018-01-25 11:45:29 +0000 | [diff] [blame] | 204 | uint64_t *q = aa64_vfp_qreg(env, elt >> 1); |
| 205 | uint64_t val = extract64(q[elt & 1], bitidx, 8); |
Michael Matz | 7c51048 | 2014-01-31 14:47:31 +0000 | [diff] [blame] | 206 | |
| 207 | result = deposit64(result, shift, 8, val); |
| 208 | } |
| 209 | } |
| 210 | return result; |
| 211 | } |
Alex Bennée | 8908f4d | 2014-02-20 10:35:49 +0000 | [diff] [blame] | 212 | |
| 213 | /* 64bit/double versions of the neon float compare functions */ |
| 214 | uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) |
| 215 | { |
| 216 | float_status *fpst = fpstp; |
| 217 | return -float64_eq_quiet(a, b, fpst); |
| 218 | } |
| 219 | |
| 220 | uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) |
| 221 | { |
| 222 | float_status *fpst = fpstp; |
| 223 | return -float64_le(b, a, fpst); |
| 224 | } |
| 225 | |
| 226 | uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) |
| 227 | { |
| 228 | float_status *fpst = fpstp; |
| 229 | return -float64_lt(b, a, fpst); |
| 230 | } |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 231 | |
| 232 | /* Reciprocal step and sqrt step. Note that unlike the A32/T32 |
| 233 | * versions, these do a fully fused multiply-add or |
| 234 | * multiply-add-and-halve. |
| 235 | */ |
Alex Bennée | 026e2d6 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 236 | #define float16_two make_float16(0x4000) |
| 237 | #define float16_three make_float16(0x4200) |
| 238 | #define float16_one_point_five make_float16(0x3e00) |
| 239 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 240 | #define float32_two make_float32(0x40000000) |
| 241 | #define float32_three make_float32(0x40400000) |
| 242 | #define float32_one_point_five make_float32(0x3fc00000) |
| 243 | |
| 244 | #define float64_two make_float64(0x4000000000000000ULL) |
| 245 | #define float64_three make_float64(0x4008000000000000ULL) |
| 246 | #define float64_one_point_five make_float64(0x3FF8000000000000ULL) |
| 247 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 248 | uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | 026e2d6 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 249 | { |
| 250 | float_status *fpst = fpstp; |
| 251 | |
| 252 | a = float16_squash_input_denormal(a, fpst); |
| 253 | b = float16_squash_input_denormal(b, fpst); |
| 254 | |
| 255 | a = float16_chs(a); |
| 256 | if ((float16_is_infinity(a) && float16_is_zero(b)) || |
| 257 | (float16_is_infinity(b) && float16_is_zero(a))) { |
| 258 | return float16_two; |
| 259 | } |
| 260 | return float16_muladd(a, b, float16_two, 0, fpst); |
| 261 | } |
| 262 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 263 | float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) |
| 264 | { |
| 265 | float_status *fpst = fpstp; |
| 266 | |
Peter Maydell | a8eb6e1 | 2015-02-05 13:37:22 +0000 | [diff] [blame] | 267 | a = float32_squash_input_denormal(a, fpst); |
| 268 | b = float32_squash_input_denormal(b, fpst); |
| 269 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 270 | a = float32_chs(a); |
| 271 | if ((float32_is_infinity(a) && float32_is_zero(b)) || |
| 272 | (float32_is_infinity(b) && float32_is_zero(a))) { |
| 273 | return float32_two; |
| 274 | } |
| 275 | return float32_muladd(a, b, float32_two, 0, fpst); |
| 276 | } |
| 277 | |
| 278 | float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) |
| 279 | { |
| 280 | float_status *fpst = fpstp; |
| 281 | |
Peter Maydell | a8eb6e1 | 2015-02-05 13:37:22 +0000 | [diff] [blame] | 282 | a = float64_squash_input_denormal(a, fpst); |
| 283 | b = float64_squash_input_denormal(b, fpst); |
| 284 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 285 | a = float64_chs(a); |
| 286 | if ((float64_is_infinity(a) && float64_is_zero(b)) || |
| 287 | (float64_is_infinity(b) && float64_is_zero(a))) { |
| 288 | return float64_two; |
| 289 | } |
| 290 | return float64_muladd(a, b, float64_two, 0, fpst); |
| 291 | } |
| 292 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 293 | uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | 026e2d6 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 294 | { |
| 295 | float_status *fpst = fpstp; |
| 296 | |
| 297 | a = float16_squash_input_denormal(a, fpst); |
| 298 | b = float16_squash_input_denormal(b, fpst); |
| 299 | |
| 300 | a = float16_chs(a); |
| 301 | if ((float16_is_infinity(a) && float16_is_zero(b)) || |
| 302 | (float16_is_infinity(b) && float16_is_zero(a))) { |
| 303 | return float16_one_point_five; |
| 304 | } |
| 305 | return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); |
| 306 | } |
| 307 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 308 | float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) |
| 309 | { |
| 310 | float_status *fpst = fpstp; |
| 311 | |
Peter Maydell | a8eb6e1 | 2015-02-05 13:37:22 +0000 | [diff] [blame] | 312 | a = float32_squash_input_denormal(a, fpst); |
| 313 | b = float32_squash_input_denormal(b, fpst); |
| 314 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 315 | a = float32_chs(a); |
| 316 | if ((float32_is_infinity(a) && float32_is_zero(b)) || |
| 317 | (float32_is_infinity(b) && float32_is_zero(a))) { |
| 318 | return float32_one_point_five; |
| 319 | } |
| 320 | return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); |
| 321 | } |
| 322 | |
| 323 | float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) |
| 324 | { |
| 325 | float_status *fpst = fpstp; |
| 326 | |
Peter Maydell | a8eb6e1 | 2015-02-05 13:37:22 +0000 | [diff] [blame] | 327 | a = float64_squash_input_denormal(a, fpst); |
| 328 | b = float64_squash_input_denormal(b, fpst); |
| 329 | |
Peter Maydell | 057d5f6 | 2014-02-20 10:35:50 +0000 | [diff] [blame] | 330 | a = float64_chs(a); |
| 331 | if ((float64_is_infinity(a) && float64_is_zero(b)) || |
| 332 | (float64_is_infinity(b) && float64_is_zero(a))) { |
| 333 | return float64_one_point_five; |
| 334 | } |
| 335 | return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); |
| 336 | } |
Peter Maydell | 6781fa1 | 2014-03-17 16:31:48 +0000 | [diff] [blame] | 337 | |
| 338 | /* Pairwise long add: add pairs of adjacent elements into |
| 339 | * double-width elements in the result (eg _s8 is an 8x8->16 op) |
| 340 | */ |
| 341 | uint64_t HELPER(neon_addlp_s8)(uint64_t a) |
| 342 | { |
| 343 | uint64_t nsignmask = 0x0080008000800080ULL; |
| 344 | uint64_t wsignmask = 0x8000800080008000ULL; |
| 345 | uint64_t elementmask = 0x00ff00ff00ff00ffULL; |
| 346 | uint64_t tmp1, tmp2; |
| 347 | uint64_t res, signres; |
| 348 | |
| 349 | /* Extract odd elements, sign extend each to a 16 bit field */ |
| 350 | tmp1 = a & elementmask; |
| 351 | tmp1 ^= nsignmask; |
| 352 | tmp1 |= wsignmask; |
| 353 | tmp1 = (tmp1 - nsignmask) ^ wsignmask; |
| 354 | /* Ditto for the even elements */ |
| 355 | tmp2 = (a >> 8) & elementmask; |
| 356 | tmp2 ^= nsignmask; |
| 357 | tmp2 |= wsignmask; |
| 358 | tmp2 = (tmp2 - nsignmask) ^ wsignmask; |
| 359 | |
| 360 | /* calculate the result by summing bits 0..14, 16..22, etc, |
| 361 | * and then adjusting the sign bits 15, 23, etc manually. |
| 362 | * This ensures the addition can't overflow the 16 bit field. |
| 363 | */ |
| 364 | signres = (tmp1 ^ tmp2) & wsignmask; |
| 365 | res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); |
| 366 | res ^= signres; |
| 367 | |
| 368 | return res; |
| 369 | } |
| 370 | |
| 371 | uint64_t HELPER(neon_addlp_u8)(uint64_t a) |
| 372 | { |
| 373 | uint64_t tmp; |
| 374 | |
| 375 | tmp = a & 0x00ff00ff00ff00ffULL; |
| 376 | tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; |
| 377 | return tmp; |
| 378 | } |
| 379 | |
| 380 | uint64_t HELPER(neon_addlp_s16)(uint64_t a) |
| 381 | { |
| 382 | int32_t reslo, reshi; |
| 383 | |
| 384 | reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); |
| 385 | reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); |
| 386 | |
| 387 | return (uint32_t)reslo | (((uint64_t)reshi) << 32); |
| 388 | } |
| 389 | |
| 390 | uint64_t HELPER(neon_addlp_u16)(uint64_t a) |
| 391 | { |
| 392 | uint64_t tmp; |
| 393 | |
| 394 | tmp = a & 0x0000ffff0000ffffULL; |
| 395 | tmp += (a >> 16) & 0x0000ffff0000ffffULL; |
| 396 | return tmp; |
| 397 | } |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 398 | |
| 399 | /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 400 | uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) |
Alex Bennée | 9869502 | 2018-03-01 11:05:55 +0000 | [diff] [blame] | 401 | { |
| 402 | float_status *fpst = fpstp; |
| 403 | uint16_t val16, sbit; |
| 404 | int16_t exp; |
| 405 | |
| 406 | if (float16_is_any_nan(a)) { |
| 407 | float16 nan = a; |
| 408 | if (float16_is_signaling_nan(a, fpst)) { |
| 409 | float_raise(float_flag_invalid, fpst); |
Richard Henderson | d7ecc06 | 2018-05-10 13:43:13 -0700 | [diff] [blame] | 410 | nan = float16_silence_nan(a, fpst); |
Alex Bennée | 9869502 | 2018-03-01 11:05:55 +0000 | [diff] [blame] | 411 | } |
| 412 | if (fpst->default_nan_mode) { |
| 413 | nan = float16_default_nan(fpst); |
| 414 | } |
| 415 | return nan; |
| 416 | } |
| 417 | |
Peter Maydell | 2cfbf36 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 418 | a = float16_squash_input_denormal(a, fpst); |
| 419 | |
Alex Bennée | 9869502 | 2018-03-01 11:05:55 +0000 | [diff] [blame] | 420 | val16 = float16_val(a); |
| 421 | sbit = 0x8000 & val16; |
| 422 | exp = extract32(val16, 10, 5); |
| 423 | |
| 424 | if (exp == 0) { |
| 425 | return make_float16(deposit32(sbit, 10, 5, 0x1e)); |
| 426 | } else { |
| 427 | return make_float16(deposit32(sbit, 10, 5, ~exp)); |
| 428 | } |
| 429 | } |
| 430 | |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 431 | float32 HELPER(frecpx_f32)(float32 a, void *fpstp) |
| 432 | { |
| 433 | float_status *fpst = fpstp; |
| 434 | uint32_t val32, sbit; |
| 435 | int32_t exp; |
| 436 | |
| 437 | if (float32_is_any_nan(a)) { |
| 438 | float32 nan = a; |
Aleksandar Markovic | af39bc8 | 2016-06-10 11:57:28 +0200 | [diff] [blame] | 439 | if (float32_is_signaling_nan(a, fpst)) { |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 440 | float_raise(float_flag_invalid, fpst); |
Richard Henderson | d7ecc06 | 2018-05-10 13:43:13 -0700 | [diff] [blame] | 441 | nan = float32_silence_nan(a, fpst); |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 442 | } |
| 443 | if (fpst->default_nan_mode) { |
Aleksandar Markovic | af39bc8 | 2016-06-10 11:57:28 +0200 | [diff] [blame] | 444 | nan = float32_default_nan(fpst); |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 445 | } |
| 446 | return nan; |
| 447 | } |
| 448 | |
Peter Maydell | 2cfbf36 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 449 | a = float32_squash_input_denormal(a, fpst); |
| 450 | |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 451 | val32 = float32_val(a); |
| 452 | sbit = 0x80000000ULL & val32; |
| 453 | exp = extract32(val32, 23, 8); |
| 454 | |
| 455 | if (exp == 0) { |
| 456 | return make_float32(sbit | (0xfe << 23)); |
| 457 | } else { |
| 458 | return make_float32(sbit | (~exp & 0xff) << 23); |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | float64 HELPER(frecpx_f64)(float64 a, void *fpstp) |
| 463 | { |
| 464 | float_status *fpst = fpstp; |
| 465 | uint64_t val64, sbit; |
| 466 | int64_t exp; |
| 467 | |
| 468 | if (float64_is_any_nan(a)) { |
| 469 | float64 nan = a; |
Aleksandar Markovic | af39bc8 | 2016-06-10 11:57:28 +0200 | [diff] [blame] | 470 | if (float64_is_signaling_nan(a, fpst)) { |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 471 | float_raise(float_flag_invalid, fpst); |
Richard Henderson | d7ecc06 | 2018-05-10 13:43:13 -0700 | [diff] [blame] | 472 | nan = float64_silence_nan(a, fpst); |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 473 | } |
| 474 | if (fpst->default_nan_mode) { |
Aleksandar Markovic | af39bc8 | 2016-06-10 11:57:28 +0200 | [diff] [blame] | 475 | nan = float64_default_nan(fpst); |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 476 | } |
| 477 | return nan; |
| 478 | } |
| 479 | |
Peter Maydell | 2cfbf36 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 480 | a = float64_squash_input_denormal(a, fpst); |
| 481 | |
Alex Bennée | 8f0c675 | 2014-03-17 16:31:50 +0000 | [diff] [blame] | 482 | val64 = float64_val(a); |
| 483 | sbit = 0x8000000000000000ULL & val64; |
| 484 | exp = extract64(float64_val(a), 52, 11); |
| 485 | |
| 486 | if (exp == 0) { |
| 487 | return make_float64(sbit | (0x7feULL << 52)); |
| 488 | } else { |
| 489 | return make_float64(sbit | (~exp & 0x7ffULL) << 52); |
| 490 | } |
| 491 | } |
Peter Maydell | 5553955 | 2014-03-17 16:31:53 +0000 | [diff] [blame] | 492 | |
| 493 | float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) |
| 494 | { |
| 495 | /* Von Neumann rounding is implemented by using round-to-zero |
| 496 | * and then setting the LSB of the result if Inexact was raised. |
| 497 | */ |
| 498 | float32 r; |
| 499 | float_status *fpst = &env->vfp.fp_status; |
| 500 | float_status tstat = *fpst; |
| 501 | int exflags; |
| 502 | |
| 503 | set_float_rounding_mode(float_round_to_zero, &tstat); |
| 504 | set_float_exception_flags(0, &tstat); |
| 505 | r = float64_to_float32(a, &tstat); |
Peter Maydell | 5553955 | 2014-03-17 16:31:53 +0000 | [diff] [blame] | 506 | exflags = get_float_exception_flags(&tstat); |
| 507 | if (exflags & float_flag_inexact) { |
| 508 | r = make_float32(float32_val(r) | 1); |
| 509 | } |
| 510 | exflags |= get_float_exception_flags(fpst); |
| 511 | set_float_exception_flags(exflags, fpst); |
| 512 | return r; |
| 513 | } |
Rob Herring | 52e60cd | 2014-04-15 19:18:44 +0100 | [diff] [blame] | 514 | |
Peter Maydell | 130f2e7 | 2014-06-09 15:43:25 +0100 | [diff] [blame] | 515 | /* 64-bit versions of the CRC helpers. Note that although the operation |
| 516 | * (and the prototypes of crc32c() and crc32() mean that only the bottom |
| 517 | * 32 bits of the accumulator and result are used, we pass and return |
| 518 | * uint64_t for convenience of the generated code. Unlike the 32-bit |
| 519 | * instruction set versions, val may genuinely have 64 bits of data in it. |
| 520 | * The upper bytes of val (above the number specified by 'bytes') must have |
| 521 | * been zeroed out by the caller. |
| 522 | */ |
| 523 | uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) |
| 524 | { |
| 525 | uint8_t buf[8]; |
| 526 | |
| 527 | stq_le_p(buf, val); |
| 528 | |
| 529 | /* zlib crc32 converts the accumulator and output to one's complement. */ |
| 530 | return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; |
| 531 | } |
| 532 | |
| 533 | uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) |
| 534 | { |
| 535 | uint8_t buf[8]; |
| 536 | |
| 537 | stq_le_p(buf, val); |
| 538 | |
| 539 | /* Linux crc32c converts the output to one's complement. */ |
| 540 | return crc32c(acc, buf, bytes) ^ 0xffffffff; |
| 541 | } |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 542 | |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 543 | uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, |
| 544 | uint64_t new_lo, uint64_t new_hi) |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 545 | { |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 546 | Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); |
| 547 | Int128 newv = int128_make128(new_lo, new_hi); |
| 548 | Int128 oldv; |
| 549 | uintptr_t ra = GETPC(); |
| 550 | uint64_t o0, o1; |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 551 | bool success; |
| 552 | |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 553 | #ifdef CONFIG_USER_ONLY |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 554 | /* ??? Enforce alignment. */ |
| 555 | uint64_t *haddr = g2h(addr); |
Richard Henderson | 3bdb5fc | 2017-11-14 04:47:13 -0800 | [diff] [blame] | 556 | |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 557 | helper_retaddr = ra; |
| 558 | o0 = ldq_le_p(haddr + 0); |
| 559 | o1 = ldq_le_p(haddr + 1); |
| 560 | oldv = int128_make128(o0, o1); |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 561 | |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 562 | success = int128_eq(oldv, cmpv); |
| 563 | if (success) { |
| 564 | stq_le_p(haddr + 0, int128_getlo(newv)); |
| 565 | stq_le_p(haddr + 1, int128_gethi(newv)); |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 566 | } |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 567 | helper_retaddr = 0; |
| 568 | #else |
| 569 | int mem_idx = cpu_mmu_index(env, false); |
| 570 | TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); |
| 571 | TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); |
| 572 | |
| 573 | o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); |
| 574 | o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); |
| 575 | oldv = int128_make128(o0, o1); |
| 576 | |
| 577 | success = int128_eq(oldv, cmpv); |
| 578 | if (success) { |
| 579 | helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); |
| 580 | helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); |
| 581 | } |
| 582 | #endif |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 583 | |
| 584 | return !success; |
| 585 | } |
| 586 | |
Emilio G. Cota | 2399d4e | 2017-07-14 18:20:49 -0400 | [diff] [blame] | 587 | uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, |
| 588 | uint64_t new_lo, uint64_t new_hi) |
| 589 | { |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 590 | Int128 oldv, cmpv, newv; |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 591 | uintptr_t ra = GETPC(); |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 592 | bool success; |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 593 | int mem_idx; |
| 594 | TCGMemOpIdx oi; |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 595 | |
Richard Henderson | 6282308 | 2018-08-20 17:16:28 -0700 | [diff] [blame] | 596 | assert(HAVE_CMPXCHG128); |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 597 | |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 598 | mem_idx = cpu_mmu_index(env, false); |
| 599 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); |
| 600 | |
| 601 | cmpv = int128_make128(env->exclusive_val, env->exclusive_high); |
| 602 | newv = int128_make128(new_lo, new_hi); |
| 603 | oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); |
| 604 | |
| 605 | success = int128_eq(oldv, cmpv); |
Emilio G. Cota | 1dd089d | 2016-06-27 15:02:13 -0400 | [diff] [blame] | 606 | return !success; |
| 607 | } |
Emilio G. Cota | 2399d4e | 2017-07-14 18:20:49 -0400 | [diff] [blame] | 608 | |
| 609 | uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, |
| 610 | uint64_t new_lo, uint64_t new_hi) |
| 611 | { |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 612 | /* |
| 613 | * High and low need to be switched here because this is not actually a |
| 614 | * 128bit store but two doublewords stored consecutively |
| 615 | */ |
Catherine Ho | abd5abc | 2019-02-15 09:56:38 +0000 | [diff] [blame] | 616 | Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); |
| 617 | Int128 newv = int128_make128(new_hi, new_lo); |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 618 | Int128 oldv; |
| 619 | uintptr_t ra = GETPC(); |
| 620 | uint64_t o0, o1; |
| 621 | bool success; |
| 622 | |
| 623 | #ifdef CONFIG_USER_ONLY |
| 624 | /* ??? Enforce alignment. */ |
| 625 | uint64_t *haddr = g2h(addr); |
| 626 | |
| 627 | helper_retaddr = ra; |
| 628 | o1 = ldq_be_p(haddr + 0); |
| 629 | o0 = ldq_be_p(haddr + 1); |
| 630 | oldv = int128_make128(o0, o1); |
| 631 | |
| 632 | success = int128_eq(oldv, cmpv); |
| 633 | if (success) { |
| 634 | stq_be_p(haddr + 0, int128_gethi(newv)); |
| 635 | stq_be_p(haddr + 1, int128_getlo(newv)); |
| 636 | } |
| 637 | helper_retaddr = 0; |
| 638 | #else |
| 639 | int mem_idx = cpu_mmu_index(env, false); |
| 640 | TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); |
| 641 | TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); |
| 642 | |
| 643 | o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); |
| 644 | o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); |
| 645 | oldv = int128_make128(o0, o1); |
| 646 | |
| 647 | success = int128_eq(oldv, cmpv); |
| 648 | if (success) { |
| 649 | helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); |
| 650 | helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); |
| 651 | } |
| 652 | #endif |
| 653 | |
| 654 | return !success; |
Emilio G. Cota | 2399d4e | 2017-07-14 18:20:49 -0400 | [diff] [blame] | 655 | } |
| 656 | |
| 657 | uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 658 | uint64_t new_lo, uint64_t new_hi) |
Emilio G. Cota | 2399d4e | 2017-07-14 18:20:49 -0400 | [diff] [blame] | 659 | { |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 660 | Int128 oldv, cmpv, newv; |
| 661 | uintptr_t ra = GETPC(); |
| 662 | bool success; |
| 663 | int mem_idx; |
| 664 | TCGMemOpIdx oi; |
| 665 | |
Richard Henderson | 6282308 | 2018-08-20 17:16:28 -0700 | [diff] [blame] | 666 | assert(HAVE_CMPXCHG128); |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 667 | |
| 668 | mem_idx = cpu_mmu_index(env, false); |
| 669 | oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); |
| 670 | |
| 671 | /* |
| 672 | * High and low need to be switched here because this is not actually a |
| 673 | * 128bit store but two doublewords stored consecutively |
| 674 | */ |
| 675 | cmpv = int128_make128(env->exclusive_high, env->exclusive_val); |
| 676 | newv = int128_make128(new_hi, new_lo); |
| 677 | oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); |
| 678 | |
| 679 | success = int128_eq(oldv, cmpv); |
| 680 | return !success; |
Emilio G. Cota | 2399d4e | 2017-07-14 18:20:49 -0400 | [diff] [blame] | 681 | } |
Alex Bennée | 807cdd5 | 2018-03-01 11:05:48 +0000 | [diff] [blame] | 682 | |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 683 | /* Writes back the old data into Rs. */ |
| 684 | void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, |
| 685 | uint64_t new_lo, uint64_t new_hi) |
| 686 | { |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 687 | Int128 oldv, cmpv, newv; |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 688 | uintptr_t ra = GETPC(); |
| 689 | int mem_idx; |
| 690 | TCGMemOpIdx oi; |
| 691 | |
Richard Henderson | 6282308 | 2018-08-20 17:16:28 -0700 | [diff] [blame] | 692 | assert(HAVE_CMPXCHG128); |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 693 | |
| 694 | mem_idx = cpu_mmu_index(env, false); |
| 695 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 696 | |
| 697 | cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); |
| 698 | newv = int128_make128(new_lo, new_hi); |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 699 | oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); |
| 700 | |
| 701 | env->xregs[rs] = int128_getlo(oldv); |
| 702 | env->xregs[rs + 1] = int128_gethi(oldv); |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 703 | } |
| 704 | |
| 705 | void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, |
| 706 | uint64_t new_hi, uint64_t new_lo) |
| 707 | { |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 708 | Int128 oldv, cmpv, newv; |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 709 | uintptr_t ra = GETPC(); |
| 710 | int mem_idx; |
| 711 | TCGMemOpIdx oi; |
| 712 | |
Richard Henderson | 6282308 | 2018-08-20 17:16:28 -0700 | [diff] [blame] | 713 | assert(HAVE_CMPXCHG128); |
Richard Henderson | 1ec182c | 2018-08-15 17:08:09 -0700 | [diff] [blame] | 714 | |
| 715 | mem_idx = cpu_mmu_index(env, false); |
| 716 | oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 717 | |
| 718 | cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); |
| 719 | newv = int128_make128(new_lo, new_hi); |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 720 | oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); |
| 721 | |
| 722 | env->xregs[rs + 1] = int128_getlo(oldv); |
| 723 | env->xregs[rs] = int128_gethi(oldv); |
Richard Henderson | 44ac14b0 | 2018-05-10 18:10:57 +0100 | [diff] [blame] | 724 | } |
| 725 | |
Alex Bennée | 807cdd5 | 2018-03-01 11:05:48 +0000 | [diff] [blame] | 726 | /* |
| 727 | * AdvSIMD half-precision |
| 728 | */ |
| 729 | |
| 730 | #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) |
| 731 | |
| 732 | #define ADVSIMD_HALFOP(name) \ |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 733 | uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ |
Alex Bennée | 807cdd5 | 2018-03-01 11:05:48 +0000 | [diff] [blame] | 734 | { \ |
| 735 | float_status *fpst = fpstp; \ |
| 736 | return float16_ ## name(a, b, fpst); \ |
| 737 | } |
| 738 | |
Alex Bennée | 3720873 | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 739 | ADVSIMD_HALFOP(add) |
| 740 | ADVSIMD_HALFOP(sub) |
| 741 | ADVSIMD_HALFOP(mul) |
| 742 | ADVSIMD_HALFOP(div) |
Alex Bennée | 807cdd5 | 2018-03-01 11:05:48 +0000 | [diff] [blame] | 743 | ADVSIMD_HALFOP(min) |
| 744 | ADVSIMD_HALFOP(max) |
| 745 | ADVSIMD_HALFOP(minnum) |
| 746 | ADVSIMD_HALFOP(maxnum) |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 747 | |
Alex Bennée | 6089030 | 2018-03-01 11:05:52 +0000 | [diff] [blame] | 748 | #define ADVSIMD_TWOHALFOP(name) \ |
| 749 | uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \ |
| 750 | { \ |
| 751 | float16 a1, a2, b1, b2; \ |
| 752 | uint32_t r1, r2; \ |
| 753 | float_status *fpst = fpstp; \ |
| 754 | a1 = extract32(two_a, 0, 16); \ |
| 755 | a2 = extract32(two_a, 16, 16); \ |
| 756 | b1 = extract32(two_b, 0, 16); \ |
| 757 | b2 = extract32(two_b, 16, 16); \ |
| 758 | r1 = float16_ ## name(a1, b1, fpst); \ |
| 759 | r2 = float16_ ## name(a2, b2, fpst); \ |
| 760 | return deposit32(r1, 16, 16, r2); \ |
| 761 | } |
| 762 | |
| 763 | ADVSIMD_TWOHALFOP(add) |
| 764 | ADVSIMD_TWOHALFOP(sub) |
| 765 | ADVSIMD_TWOHALFOP(mul) |
| 766 | ADVSIMD_TWOHALFOP(div) |
| 767 | ADVSIMD_TWOHALFOP(min) |
| 768 | ADVSIMD_TWOHALFOP(max) |
| 769 | ADVSIMD_TWOHALFOP(minnum) |
| 770 | ADVSIMD_TWOHALFOP(maxnum) |
| 771 | |
Alex Bennée | 2deb992 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 772 | /* Data processing - scalar floating-point and advanced SIMD */ |
Alex Bennée | 6089030 | 2018-03-01 11:05:52 +0000 | [diff] [blame] | 773 | static float16 float16_mulx(float16 a, float16 b, void *fpstp) |
Alex Bennée | 2deb992 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 774 | { |
| 775 | float_status *fpst = fpstp; |
| 776 | |
| 777 | a = float16_squash_input_denormal(a, fpst); |
| 778 | b = float16_squash_input_denormal(b, fpst); |
| 779 | |
| 780 | if ((float16_is_zero(a) && float16_is_infinity(b)) || |
| 781 | (float16_is_infinity(a) && float16_is_zero(b))) { |
| 782 | /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ |
| 783 | return make_float16((1U << 14) | |
| 784 | ((float16_val(a) ^ float16_val(b)) & (1U << 15))); |
| 785 | } |
| 786 | return float16_mul(a, b, fpst); |
| 787 | } |
| 788 | |
Alex Bennée | 6089030 | 2018-03-01 11:05:52 +0000 | [diff] [blame] | 789 | ADVSIMD_HALFOP(mulx) |
| 790 | ADVSIMD_TWOHALFOP(mulx) |
| 791 | |
Alex Bennée | 2deb992 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 792 | /* fused multiply-accumulate */ |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 793 | uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, |
| 794 | void *fpstp) |
Alex Bennée | 2deb992 | 2018-03-01 11:05:50 +0000 | [diff] [blame] | 795 | { |
| 796 | float_status *fpst = fpstp; |
| 797 | return float16_muladd(a, b, c, 0, fpst); |
| 798 | } |
| 799 | |
Alex Bennée | 6089030 | 2018-03-01 11:05:52 +0000 | [diff] [blame] | 800 | uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, |
| 801 | uint32_t two_c, void *fpstp) |
| 802 | { |
| 803 | float_status *fpst = fpstp; |
| 804 | float16 a1, a2, b1, b2, c1, c2; |
| 805 | uint32_t r1, r2; |
| 806 | a1 = extract32(two_a, 0, 16); |
| 807 | a2 = extract32(two_a, 16, 16); |
| 808 | b1 = extract32(two_b, 0, 16); |
| 809 | b2 = extract32(two_b, 16, 16); |
| 810 | c1 = extract32(two_c, 0, 16); |
| 811 | c2 = extract32(two_c, 16, 16); |
| 812 | r1 = float16_muladd(a1, b1, c1, 0, fpst); |
| 813 | r2 = float16_muladd(a2, b2, c2, 0, fpst); |
| 814 | return deposit32(r1, 16, 16, r2); |
| 815 | } |
| 816 | |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 817 | /* |
| 818 | * Floating point comparisons produce an integer result. Softfloat |
| 819 | * routines return float_relation types which we convert to the 0/-1 |
| 820 | * Neon requires. |
| 821 | */ |
| 822 | |
| 823 | #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 |
| 824 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 825 | uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 826 | { |
| 827 | float_status *fpst = fpstp; |
| 828 | int compare = float16_compare_quiet(a, b, fpst); |
| 829 | return ADVSIMD_CMPRES(compare == float_relation_equal); |
| 830 | } |
| 831 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 832 | uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 833 | { |
| 834 | float_status *fpst = fpstp; |
| 835 | int compare = float16_compare(a, b, fpst); |
| 836 | return ADVSIMD_CMPRES(compare == float_relation_greater || |
| 837 | compare == float_relation_equal); |
| 838 | } |
| 839 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 840 | uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 841 | { |
| 842 | float_status *fpst = fpstp; |
| 843 | int compare = float16_compare(a, b, fpst); |
| 844 | return ADVSIMD_CMPRES(compare == float_relation_greater); |
| 845 | } |
| 846 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 847 | uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 848 | { |
| 849 | float_status *fpst = fpstp; |
| 850 | float16 f0 = float16_abs(a); |
| 851 | float16 f1 = float16_abs(b); |
| 852 | int compare = float16_compare(f0, f1, fpst); |
| 853 | return ADVSIMD_CMPRES(compare == float_relation_greater || |
| 854 | compare == float_relation_equal); |
| 855 | } |
| 856 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 857 | uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) |
Alex Bennée | d32adea | 2018-03-01 11:05:49 +0000 | [diff] [blame] | 858 | { |
| 859 | float_status *fpst = fpstp; |
| 860 | float16 f0 = float16_abs(a); |
| 861 | float16 f1 = float16_abs(b); |
| 862 | int compare = float16_compare(f0, f1, fpst); |
| 863 | return ADVSIMD_CMPRES(compare == float_relation_greater); |
| 864 | } |
Alex Bennée | 6109aea | 2018-03-01 11:05:53 +0000 | [diff] [blame] | 865 | |
| 866 | /* round to integral */ |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 867 | uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) |
Alex Bennée | 6109aea | 2018-03-01 11:05:53 +0000 | [diff] [blame] | 868 | { |
| 869 | return float16_round_to_int(x, fp_status); |
| 870 | } |
| 871 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 872 | uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) |
Alex Bennée | 6109aea | 2018-03-01 11:05:53 +0000 | [diff] [blame] | 873 | { |
| 874 | int old_flags = get_float_exception_flags(fp_status), new_flags; |
| 875 | float16 ret; |
| 876 | |
| 877 | ret = float16_round_to_int(x, fp_status); |
| 878 | |
| 879 | /* Suppress any inexact exceptions the conversion produced */ |
| 880 | if (!(old_flags & float_flag_inexact)) { |
| 881 | new_flags = get_float_exception_flags(fp_status); |
| 882 | set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); |
| 883 | } |
| 884 | |
| 885 | return ret; |
| 886 | } |
Alex Bennée | 2df5813 | 2018-03-01 11:05:53 +0000 | [diff] [blame] | 887 | |
| 888 | /* |
| 889 | * Half-precision floating point conversion functions |
| 890 | * |
| 891 | * There are a multitude of conversion functions with various |
| 892 | * different rounding modes. This is dealt with by the calling code |
| 893 | * setting the mode appropriately before calling the helper. |
| 894 | */ |
| 895 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 896 | uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) |
Alex Bennée | 2df5813 | 2018-03-01 11:05:53 +0000 | [diff] [blame] | 897 | { |
| 898 | float_status *fpst = fpstp; |
| 899 | |
| 900 | /* Invalid if we are passed a NaN */ |
| 901 | if (float16_is_any_nan(a)) { |
| 902 | float_raise(float_flag_invalid, fpst); |
| 903 | return 0; |
| 904 | } |
| 905 | return float16_to_int16(a, fpst); |
| 906 | } |
| 907 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 908 | uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) |
Alex Bennée | 2df5813 | 2018-03-01 11:05:53 +0000 | [diff] [blame] | 909 | { |
| 910 | float_status *fpst = fpstp; |
| 911 | |
| 912 | /* Invalid if we are passed a NaN */ |
| 913 | if (float16_is_any_nan(a)) { |
| 914 | float_raise(float_flag_invalid, fpst); |
| 915 | return 0; |
| 916 | } |
| 917 | return float16_to_uint16(a, fpst); |
| 918 | } |
Alex Bennée | b96a54c | 2018-03-01 11:05:55 +0000 | [diff] [blame] | 919 | |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 920 | static int el_from_spsr(uint32_t spsr) |
| 921 | { |
| 922 | /* Return the exception level that this SPSR is requesting a return to, |
| 923 | * or -1 if it is invalid (an illegal return) |
| 924 | */ |
| 925 | if (spsr & PSTATE_nRW) { |
| 926 | switch (spsr & CPSR_M) { |
| 927 | case ARM_CPU_MODE_USR: |
| 928 | return 0; |
| 929 | case ARM_CPU_MODE_HYP: |
| 930 | return 2; |
| 931 | case ARM_CPU_MODE_FIQ: |
| 932 | case ARM_CPU_MODE_IRQ: |
| 933 | case ARM_CPU_MODE_SVC: |
| 934 | case ARM_CPU_MODE_ABT: |
| 935 | case ARM_CPU_MODE_UND: |
| 936 | case ARM_CPU_MODE_SYS: |
| 937 | return 1; |
| 938 | case ARM_CPU_MODE_MON: |
| 939 | /* Returning to Mon from AArch64 is never possible, |
| 940 | * so this is an illegal return. |
| 941 | */ |
| 942 | default: |
| 943 | return -1; |
| 944 | } |
| 945 | } else { |
| 946 | if (extract32(spsr, 1, 1)) { |
| 947 | /* Return with reserved M[1] bit set */ |
| 948 | return -1; |
| 949 | } |
| 950 | if (extract32(spsr, 0, 4) == 1) { |
| 951 | /* return to EL0 with M[0] bit set */ |
| 952 | return -1; |
| 953 | } |
| 954 | return extract32(spsr, 2, 2); |
| 955 | } |
| 956 | } |
| 957 | |
Richard Henderson | d9f482a | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 958 | void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 959 | { |
| 960 | int cur_el = arm_current_el(env); |
| 961 | unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); |
| 962 | uint32_t spsr = env->banked_spsr[spsr_idx]; |
| 963 | int new_el; |
| 964 | bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; |
| 965 | |
| 966 | aarch64_save_sp(env, cur_el); |
| 967 | |
| 968 | arm_clear_exclusive(env); |
| 969 | |
| 970 | /* We must squash the PSTATE.SS bit to zero unless both of the |
| 971 | * following hold: |
| 972 | * 1. debug exceptions are currently disabled |
| 973 | * 2. singlestep will be active in the EL we return to |
| 974 | * We check 1 here and 2 after we've done the pstate/cpsr write() to |
| 975 | * transition to the EL we're going to. |
| 976 | */ |
| 977 | if (arm_generate_debug_exceptions(env)) { |
| 978 | spsr &= ~PSTATE_SS; |
| 979 | } |
| 980 | |
| 981 | new_el = el_from_spsr(spsr); |
| 982 | if (new_el == -1) { |
| 983 | goto illegal_return; |
| 984 | } |
| 985 | if (new_el > cur_el |
| 986 | || (new_el == 2 && !arm_feature(env, ARM_FEATURE_EL2))) { |
| 987 | /* Disallow return to an EL which is unimplemented or higher |
| 988 | * than the current one. |
| 989 | */ |
| 990 | goto illegal_return; |
| 991 | } |
| 992 | |
| 993 | if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { |
| 994 | /* Return to an EL which is configured for a different register width */ |
| 995 | goto illegal_return; |
| 996 | } |
| 997 | |
| 998 | if (new_el == 2 && arm_is_secure_below_el3(env)) { |
| 999 | /* Return to the non-existent secure-EL2 */ |
| 1000 | goto illegal_return; |
| 1001 | } |
| 1002 | |
| 1003 | if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { |
| 1004 | goto illegal_return; |
| 1005 | } |
| 1006 | |
| 1007 | qemu_mutex_lock_iothread(); |
Richard Henderson | 2fc0cc0 | 2019-03-22 17:41:14 -0700 | [diff] [blame] | 1008 | arm_call_pre_el_change_hook(env_archcpu(env)); |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1009 | qemu_mutex_unlock_iothread(); |
| 1010 | |
| 1011 | if (!return_to_aa64) { |
| 1012 | env->aarch64 = 0; |
| 1013 | /* We do a raw CPSR write because aarch64_sync_64_to_32() |
| 1014 | * will sort the register banks out for us, and we've already |
| 1015 | * caught all the bad-mode cases in el_from_spsr(). |
| 1016 | */ |
| 1017 | cpsr_write(env, spsr, ~0, CPSRWriteRaw); |
| 1018 | if (!arm_singlestep_active(env)) { |
| 1019 | env->uncached_cpsr &= ~PSTATE_SS; |
| 1020 | } |
| 1021 | aarch64_sync_64_to_32(env); |
| 1022 | |
| 1023 | if (spsr & CPSR_T) { |
Richard Henderson | d9f482a | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1024 | env->regs[15] = new_pc & ~0x1; |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1025 | } else { |
Richard Henderson | d9f482a | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1026 | env->regs[15] = new_pc & ~0x3; |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1027 | } |
| 1028 | qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " |
| 1029 | "AArch32 EL%d PC 0x%" PRIx32 "\n", |
| 1030 | cur_el, new_el, env->regs[15]); |
| 1031 | } else { |
| 1032 | env->aarch64 = 1; |
| 1033 | pstate_write(env, spsr); |
| 1034 | if (!arm_singlestep_active(env)) { |
| 1035 | env->pstate &= ~PSTATE_SS; |
| 1036 | } |
| 1037 | aarch64_restore_sp(env, new_el); |
Richard Henderson | d9f482a | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1038 | env->pc = new_pc; |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1039 | qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " |
| 1040 | "AArch64 EL%d PC 0x%" PRIx64 "\n", |
| 1041 | cur_el, new_el, env->pc); |
| 1042 | } |
| 1043 | /* |
| 1044 | * Note that cur_el can never be 0. If new_el is 0, then |
| 1045 | * el0_a64 is return_to_aa64, else el0_a64 is ignored. |
| 1046 | */ |
| 1047 | aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); |
| 1048 | |
| 1049 | qemu_mutex_lock_iothread(); |
Richard Henderson | 2fc0cc0 | 2019-03-22 17:41:14 -0700 | [diff] [blame] | 1050 | arm_call_el_change_hook(env_archcpu(env)); |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1051 | qemu_mutex_unlock_iothread(); |
| 1052 | |
| 1053 | return; |
| 1054 | |
| 1055 | illegal_return: |
| 1056 | /* Illegal return events of various kinds have architecturally |
| 1057 | * mandated behaviour: |
| 1058 | * restore NZCV and DAIF from SPSR_ELx |
| 1059 | * set PSTATE.IL |
| 1060 | * restore PC from ELR_ELx |
| 1061 | * no change to exception level, execution state or stack pointer |
| 1062 | */ |
| 1063 | env->pstate |= PSTATE_IL; |
Richard Henderson | d9f482a | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1064 | env->pc = new_pc; |
Richard Henderson | ce02fd9 | 2019-01-21 10:23:12 +0000 | [diff] [blame] | 1065 | spsr &= PSTATE_NZCV | PSTATE_DAIF; |
| 1066 | spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF); |
| 1067 | pstate_write(env, spsr); |
| 1068 | if (!arm_singlestep_active(env)) { |
| 1069 | env->pstate &= ~PSTATE_SS; |
| 1070 | } |
| 1071 | qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " |
| 1072 | "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); |
| 1073 | } |
| 1074 | |
Alex Bennée | b96a54c | 2018-03-01 11:05:55 +0000 | [diff] [blame] | 1075 | /* |
| 1076 | * Square Root and Reciprocal square root |
| 1077 | */ |
| 1078 | |
Richard Henderson | 6c2be13 | 2018-05-31 14:50:51 +0100 | [diff] [blame] | 1079 | uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) |
Alex Bennée | b96a54c | 2018-03-01 11:05:55 +0000 | [diff] [blame] | 1080 | { |
| 1081 | float_status *s = fpstp; |
| 1082 | |
| 1083 | return float16_sqrt(a, s); |
| 1084 | } |
| 1085 | |
| 1086 | |