Song Gao | a0c9400 | 2023-05-04 20:27:28 +0800 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
| 2 | /* |
Song Gao | 1dc33f2 | 2023-09-14 10:25:49 +0800 | [diff] [blame] | 3 | * QEMU LoongArch vector helper functions. |
Song Gao | a0c9400 | 2023-05-04 20:27:28 +0800 | [diff] [blame] | 4 | * |
| 5 | * Copyright (c) 2022-2023 Loongson Technology Corporation Limited |
| 6 | */ |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 7 | |
| 8 | #include "qemu/osdep.h" |
| 9 | #include "cpu.h" |
| 10 | #include "exec/exec-all.h" |
| 11 | #include "exec/helper-proto.h" |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 12 | #include "fpu/softfloat.h" |
| 13 | #include "internals.h" |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 14 | #include "tcg/tcg.h" |
Song Gao | 008a3b1 | 2023-09-14 10:25:59 +0800 | [diff] [blame] | 15 | #include "vec.h" |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 16 | #include "tcg/tcg-gvec-desc.h" |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 17 | |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 18 | #define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 19 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 20 | { \ |
| 21 | int i; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 22 | VReg *Vd = (VReg *)vd; \ |
| 23 | VReg *Vj = (VReg *)vj; \ |
| 24 | VReg *Vk = (VReg *)vk; \ |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 25 | typedef __typeof(Vd->E1(0)) TD; \ |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 26 | int oprsz = simd_oprsz(desc); \ |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 27 | \ |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 28 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 29 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ |
| 30 | } \ |
| 31 | } |
| 32 | |
| 33 | DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) |
| 34 | DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) |
| 35 | DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) |
| 36 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 37 | void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 38 | { |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 39 | int i; |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 40 | VReg *Vd = (VReg *)vd; |
| 41 | VReg *Vj = (VReg *)vj; |
| 42 | VReg *Vk = (VReg *)vk; |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 43 | int oprsz = simd_oprsz(desc); |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 44 | |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 45 | for (i = 0; i < oprsz / 16 ; i++) { |
| 46 | Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), |
| 47 | int128_makes64(Vk->D(2 * i))); |
| 48 | } |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 49 | } |
| 50 | |
| 51 | DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) |
| 52 | DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) |
| 53 | DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) |
| 54 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 55 | void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 56 | { |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 57 | int i; |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 58 | VReg *Vd = (VReg *)vd; |
| 59 | VReg *Vj = (VReg *)vj; |
| 60 | VReg *Vk = (VReg *)vk; |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 61 | int oprsz = simd_oprsz(desc); |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 62 | |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 63 | for (i = 0; i < oprsz / 16; i++) { |
| 64 | Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), |
| 65 | int128_makes64(Vk->D(2 * i))); |
| 66 | } |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 67 | } |
| 68 | |
| 69 | DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) |
| 70 | DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) |
| 71 | DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) |
| 72 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 73 | void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 74 | { |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 75 | int i; |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 76 | VReg *Vd = (VReg *)vd; |
| 77 | VReg *Vj = (VReg *)vj; |
| 78 | VReg *Vk = (VReg *)vk; |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 79 | int oprsz = simd_oprsz(desc); |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 80 | |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 81 | for (i = 0; i < oprsz / 16; i ++) { |
| 82 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), |
| 83 | int128_make64(Vk->UD(2 * i))); |
| 84 | } |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 85 | } |
| 86 | |
| 87 | DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) |
| 88 | DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) |
| 89 | DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) |
| 90 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 91 | void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 92 | { |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 93 | int i; |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 94 | VReg *Vd = (VReg *)vd; |
| 95 | VReg *Vj = (VReg *)vj; |
| 96 | VReg *Vk = (VReg *)vk; |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 97 | int oprsz = simd_oprsz(desc); |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 98 | |
Song Gao | 64cf6b9 | 2023-09-14 10:26:07 +0800 | [diff] [blame] | 99 | for (i = 0; i < oprsz / 16; i++) { |
| 100 | Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), |
| 101 | int128_make64(Vk->UD(2 * i))); |
| 102 | } |
Song Gao | c037fbc | 2023-05-04 20:27:34 +0800 | [diff] [blame] | 103 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 104 | |
| 105 | #define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 106 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 107 | { \ |
| 108 | int i; \ |
| 109 | VReg *Vd = (VReg *)vd; \ |
| 110 | VReg *Vj = (VReg *)vj; \ |
| 111 | VReg *Vk = (VReg *)vk; \ |
| 112 | typedef __typeof(Vd->E1(0)) TD; \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 113 | int oprsz = simd_oprsz(desc); \ |
| 114 | \ |
| 115 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 116 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ |
| 117 | } \ |
| 118 | } |
| 119 | |
| 120 | #define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 121 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 122 | { \ |
| 123 | int i; \ |
| 124 | VReg *Vd = (VReg *)vd; \ |
| 125 | VReg *Vj = (VReg *)vj; \ |
| 126 | VReg *Vk = (VReg *)vk; \ |
| 127 | typedef __typeof(Vd->E1(0)) TD; \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 128 | int oprsz = simd_oprsz(desc); \ |
| 129 | \ |
| 130 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 131 | Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ |
| 132 | } \ |
| 133 | } |
| 134 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 135 | void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 136 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 137 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 138 | VReg *Vd = (VReg *)vd; |
| 139 | VReg *Vj = (VReg *)vj; |
| 140 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 141 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 142 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 143 | for (i = 0; i < oprsz / 16; i++) { |
| 144 | Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), |
| 145 | int128_makes64(Vk->D(2 * i))); |
| 146 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 147 | } |
| 148 | |
| 149 | DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) |
| 150 | DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) |
| 151 | DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) |
| 152 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 153 | void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 154 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 155 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 156 | VReg *Vd = (VReg *)vd; |
| 157 | VReg *Vj = (VReg *)vj; |
| 158 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 159 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 160 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 161 | for (i = 0; i < oprsz / 16; i++) { |
| 162 | Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), |
| 163 | int128_makes64(Vk->D(2 * i +1))); |
| 164 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 165 | } |
| 166 | |
| 167 | DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) |
| 168 | DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) |
| 169 | DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) |
| 170 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 171 | void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 172 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 173 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 174 | VReg *Vd = (VReg *)vd; |
| 175 | VReg *Vj = (VReg *)vj; |
| 176 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 177 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 178 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 179 | for (i = 0; i < oprsz / 16; i++) { |
| 180 | Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), |
| 181 | int128_makes64(Vk->D(2 * i))); |
| 182 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 183 | } |
| 184 | |
| 185 | DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) |
| 186 | DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) |
| 187 | DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) |
| 188 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 189 | void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 190 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 191 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 192 | VReg *Vd = (VReg *)vd; |
| 193 | VReg *Vj = (VReg *)vj; |
| 194 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 195 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 196 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 197 | for (i = 0; i < oprsz / 16; i++) { |
| 198 | Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), |
| 199 | int128_makes64(Vk->D(2 * i + 1))); |
| 200 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 201 | } |
| 202 | |
| 203 | DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) |
| 204 | DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) |
| 205 | DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) |
| 206 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 207 | void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 208 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 209 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 210 | VReg *Vd = (VReg *)vd; |
| 211 | VReg *Vj = (VReg *)vj; |
| 212 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 213 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 214 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 215 | for (i = 0; i < oprsz / 16; i++) { |
| 216 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), |
| 217 | int128_make64(Vk->UD(2 * i))); |
| 218 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 219 | } |
| 220 | |
| 221 | DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) |
| 222 | DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) |
| 223 | DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) |
| 224 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 225 | void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 226 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 227 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 228 | VReg *Vd = (VReg *)vd; |
| 229 | VReg *Vj = (VReg *)vj; |
| 230 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 231 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 232 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 233 | for (i = 0; i < oprsz / 16; i++) { |
| 234 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), |
| 235 | int128_make64(Vk->UD(2 * i + 1))); |
| 236 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 237 | } |
| 238 | |
| 239 | DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) |
| 240 | DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) |
| 241 | DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) |
| 242 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 243 | void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 244 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 245 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 246 | VReg *Vd = (VReg *)vd; |
| 247 | VReg *Vj = (VReg *)vj; |
| 248 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 249 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 250 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 251 | for (i = 0; i < oprsz / 16; i++) { |
| 252 | Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), |
| 253 | int128_make64(Vk->UD(2 * i))); |
| 254 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 255 | } |
| 256 | |
| 257 | DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) |
| 258 | DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) |
| 259 | DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) |
| 260 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 261 | void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 262 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 263 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 264 | VReg *Vd = (VReg *)vd; |
| 265 | VReg *Vj = (VReg *)vj; |
| 266 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 267 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 268 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 269 | for (i = 0; i < oprsz / 16; i++) { |
| 270 | Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), |
| 271 | int128_make64(Vk->UD(2 * i + 1))); |
| 272 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 273 | } |
| 274 | |
| 275 | DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) |
| 276 | DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) |
| 277 | DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) |
| 278 | |
| 279 | #define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 280 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 281 | { \ |
| 282 | int i; \ |
| 283 | VReg *Vd = (VReg *)vd; \ |
| 284 | VReg *Vj = (VReg *)vj; \ |
| 285 | VReg *Vk = (VReg *)vk; \ |
| 286 | typedef __typeof(Vd->ES1(0)) TDS; \ |
| 287 | typedef __typeof(Vd->EU1(0)) TDU; \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 288 | int oprsz = simd_oprsz(desc); \ |
| 289 | \ |
| 290 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 291 | Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ |
| 292 | } \ |
| 293 | } |
| 294 | |
| 295 | #define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 296 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 297 | { \ |
| 298 | int i; \ |
| 299 | VReg *Vd = (VReg *)vd; \ |
| 300 | VReg *Vj = (VReg *)vj; \ |
| 301 | VReg *Vk = (VReg *)vk; \ |
| 302 | typedef __typeof(Vd->ES1(0)) TDS; \ |
| 303 | typedef __typeof(Vd->EU1(0)) TDU; \ |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 304 | int oprsz = simd_oprsz(desc); \ |
| 305 | \ |
| 306 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 307 | Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ |
| 308 | } \ |
| 309 | } |
| 310 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 311 | void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 312 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 313 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 314 | VReg *Vd = (VReg *)vd; |
| 315 | VReg *Vj = (VReg *)vj; |
| 316 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 317 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 318 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 319 | for (i = 0; i < oprsz / 16; i++) { |
| 320 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), |
| 321 | int128_makes64(Vk->D(2 * i))); |
| 322 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 323 | } |
| 324 | |
| 325 | DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) |
| 326 | DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) |
| 327 | DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) |
| 328 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 329 | void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 330 | { |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 331 | int i; |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 332 | VReg *Vd = (VReg *)vd; |
| 333 | VReg *Vj = (VReg *)vj; |
| 334 | VReg *Vk = (VReg *)vk; |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 335 | int oprsz = simd_oprsz(desc); |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 336 | |
Song Gao | 85995f0 | 2023-09-14 10:26:08 +0800 | [diff] [blame] | 337 | for (i = 0; i < oprsz / 16; i++) { |
| 338 | Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), |
| 339 | int128_makes64(Vk->D(2 * i + 1))); |
| 340 | } |
Song Gao | 2d5f950 | 2023-05-04 20:27:35 +0800 | [diff] [blame] | 341 | } |
| 342 | |
| 343 | DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) |
| 344 | DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) |
| 345 | DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) |
Song Gao | 39e9b0a | 2023-05-04 20:27:36 +0800 | [diff] [blame] | 346 | |
Song Gao | ee7250d | 2023-09-14 10:26:09 +0800 | [diff] [blame] | 347 | #define DO_3OP(NAME, BIT, E, DO_OP) \ |
| 348 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 349 | { \ |
| 350 | int i; \ |
| 351 | VReg *Vd = (VReg *)vd; \ |
| 352 | VReg *Vj = (VReg *)vj; \ |
| 353 | VReg *Vk = (VReg *)vk; \ |
| 354 | int oprsz = simd_oprsz(desc); \ |
| 355 | \ |
| 356 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 357 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ |
| 358 | } \ |
Song Gao | 39e9b0a | 2023-05-04 20:27:36 +0800 | [diff] [blame] | 359 | } |
| 360 | |
| 361 | DO_3OP(vavg_b, 8, B, DO_VAVG) |
| 362 | DO_3OP(vavg_h, 16, H, DO_VAVG) |
| 363 | DO_3OP(vavg_w, 32, W, DO_VAVG) |
| 364 | DO_3OP(vavg_d, 64, D, DO_VAVG) |
| 365 | DO_3OP(vavgr_b, 8, B, DO_VAVGR) |
| 366 | DO_3OP(vavgr_h, 16, H, DO_VAVGR) |
| 367 | DO_3OP(vavgr_w, 32, W, DO_VAVGR) |
| 368 | DO_3OP(vavgr_d, 64, D, DO_VAVGR) |
| 369 | DO_3OP(vavg_bu, 8, UB, DO_VAVG) |
| 370 | DO_3OP(vavg_hu, 16, UH, DO_VAVG) |
| 371 | DO_3OP(vavg_wu, 32, UW, DO_VAVG) |
| 372 | DO_3OP(vavg_du, 64, UD, DO_VAVG) |
| 373 | DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) |
| 374 | DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) |
| 375 | DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) |
| 376 | DO_3OP(vavgr_du, 64, UD, DO_VAVGR) |
Song Gao | 4972565 | 2023-05-04 20:27:37 +0800 | [diff] [blame] | 377 | |
Song Gao | 4972565 | 2023-05-04 20:27:37 +0800 | [diff] [blame] | 378 | DO_3OP(vabsd_b, 8, B, DO_VABSD) |
| 379 | DO_3OP(vabsd_h, 16, H, DO_VABSD) |
| 380 | DO_3OP(vabsd_w, 32, W, DO_VABSD) |
| 381 | DO_3OP(vabsd_d, 64, D, DO_VABSD) |
| 382 | DO_3OP(vabsd_bu, 8, UB, DO_VABSD) |
| 383 | DO_3OP(vabsd_hu, 16, UH, DO_VABSD) |
| 384 | DO_3OP(vabsd_wu, 32, UW, DO_VABSD) |
| 385 | DO_3OP(vabsd_du, 64, UD, DO_VABSD) |
Song Gao | af448cb | 2023-05-04 20:27:38 +0800 | [diff] [blame] | 386 | |
Song Gao | 27f5485 | 2023-09-14 10:26:11 +0800 | [diff] [blame] | 387 | #define DO_VADDA(NAME, BIT, E) \ |
| 388 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 389 | { \ |
| 390 | int i; \ |
| 391 | VReg *Vd = (VReg *)vd; \ |
| 392 | VReg *Vj = (VReg *)vj; \ |
| 393 | VReg *Vk = (VReg *)vk; \ |
| 394 | int oprsz = simd_oprsz(desc); \ |
| 395 | \ |
| 396 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 397 | Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \ |
| 398 | } \ |
Song Gao | af448cb | 2023-05-04 20:27:38 +0800 | [diff] [blame] | 399 | } |
| 400 | |
Song Gao | 27f5485 | 2023-09-14 10:26:11 +0800 | [diff] [blame] | 401 | DO_VADDA(vadda_b, 8, B) |
| 402 | DO_VADDA(vadda_h, 16, H) |
| 403 | DO_VADDA(vadda_w, 32, W) |
| 404 | DO_VADDA(vadda_d, 64, D) |
Song Gao | 9ab2952 | 2023-05-04 20:27:39 +0800 | [diff] [blame] | 405 | |
Song Gao | c09360f | 2023-09-14 10:26:12 +0800 | [diff] [blame] | 406 | #define VMINMAXI(NAME, BIT, E, DO_OP) \ |
| 407 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 408 | { \ |
| 409 | int i; \ |
| 410 | VReg *Vd = (VReg *)vd; \ |
| 411 | VReg *Vj = (VReg *)vj; \ |
| 412 | typedef __typeof(Vd->E(0)) TD; \ |
| 413 | int oprsz = simd_oprsz(desc); \ |
| 414 | \ |
| 415 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 416 | Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ |
| 417 | } \ |
Song Gao | 9ab2952 | 2023-05-04 20:27:39 +0800 | [diff] [blame] | 418 | } |
| 419 | |
| 420 | VMINMAXI(vmini_b, 8, B, DO_MIN) |
| 421 | VMINMAXI(vmini_h, 16, H, DO_MIN) |
| 422 | VMINMAXI(vmini_w, 32, W, DO_MIN) |
| 423 | VMINMAXI(vmini_d, 64, D, DO_MIN) |
| 424 | VMINMAXI(vmaxi_b, 8, B, DO_MAX) |
| 425 | VMINMAXI(vmaxi_h, 16, H, DO_MAX) |
| 426 | VMINMAXI(vmaxi_w, 32, W, DO_MAX) |
| 427 | VMINMAXI(vmaxi_d, 64, D, DO_MAX) |
| 428 | VMINMAXI(vmini_bu, 8, UB, DO_MIN) |
| 429 | VMINMAXI(vmini_hu, 16, UH, DO_MIN) |
| 430 | VMINMAXI(vmini_wu, 32, UW, DO_MIN) |
| 431 | VMINMAXI(vmini_du, 64, UD, DO_MIN) |
| 432 | VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) |
| 433 | VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) |
| 434 | VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) |
| 435 | VMINMAXI(vmaxi_du, 64, UD, DO_MAX) |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 436 | |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 437 | #define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ |
| 438 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 439 | { \ |
| 440 | int i; \ |
| 441 | VReg *Vd = (VReg *)vd; \ |
| 442 | VReg *Vj = (VReg *)vj; \ |
| 443 | VReg *Vk = (VReg *)vk; \ |
| 444 | typedef __typeof(Vd->E1(0)) T; \ |
| 445 | int oprsz = simd_oprsz(desc); \ |
| 446 | \ |
| 447 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 448 | Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ |
| 449 | } \ |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 450 | } |
| 451 | |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 452 | void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 453 | { |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 454 | int i; |
| 455 | uint64_t l, h; |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 456 | VReg *Vd = (VReg *)vd; |
| 457 | VReg *Vj = (VReg *)vj; |
| 458 | VReg *Vk = (VReg *)vk; |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 459 | int oprsz = simd_oprsz(desc); |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 460 | |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 461 | for (i = 0; i < oprsz / 8; i++) { |
| 462 | muls64(&l, &h, Vj->D(i), Vk->D(i)); |
| 463 | Vd->D(i) = h; |
| 464 | } |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 465 | } |
| 466 | |
| 467 | DO_VMUH(vmuh_b, 8, H, B, DO_MUH) |
| 468 | DO_VMUH(vmuh_h, 16, W, H, DO_MUH) |
| 469 | DO_VMUH(vmuh_w, 32, D, W, DO_MUH) |
| 470 | |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 471 | void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc) |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 472 | { |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 473 | int i; |
| 474 | uint64_t l, h; |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 475 | VReg *Vd = (VReg *)vd; |
| 476 | VReg *Vj = (VReg *)vj; |
| 477 | VReg *Vk = (VReg *)vk; |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 478 | int oprsz = simd_oprsz(desc); |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 479 | |
Song Gao | 342dc1c | 2023-09-14 10:26:13 +0800 | [diff] [blame] | 480 | for (i = 0; i < oprsz / 8; i++) { |
| 481 | mulu64(&l, &h, Vj->D(i), Vk->D(i)); |
| 482 | Vd->D(i) = h; |
| 483 | } |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 484 | } |
| 485 | |
| 486 | DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) |
| 487 | DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) |
| 488 | DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) |
| 489 | |
Song Gao | cd1c49a | 2023-05-04 20:27:40 +0800 | [diff] [blame] | 490 | DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) |
| 491 | DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) |
| 492 | DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) |
| 493 | |
| 494 | DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) |
| 495 | DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) |
| 496 | DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) |
| 497 | |
| 498 | DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) |
| 499 | DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) |
| 500 | DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) |
| 501 | |
| 502 | DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) |
| 503 | DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) |
| 504 | DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) |
| 505 | |
| 506 | DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) |
| 507 | DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) |
| 508 | DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) |
| 509 | |
| 510 | DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) |
| 511 | DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) |
| 512 | DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 513 | |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 514 | #define VMADDSUB(NAME, BIT, E, DO_OP) \ |
| 515 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 516 | { \ |
| 517 | int i; \ |
| 518 | VReg *Vd = (VReg *)vd; \ |
| 519 | VReg *Vj = (VReg *)vj; \ |
| 520 | VReg *Vk = (VReg *)vk; \ |
| 521 | int oprsz = simd_oprsz(desc); \ |
| 522 | \ |
| 523 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 524 | Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ |
| 525 | } \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 526 | } |
| 527 | |
| 528 | VMADDSUB(vmadd_b, 8, B, DO_MADD) |
| 529 | VMADDSUB(vmadd_h, 16, H, DO_MADD) |
| 530 | VMADDSUB(vmadd_w, 32, W, DO_MADD) |
| 531 | VMADDSUB(vmadd_d, 64, D, DO_MADD) |
| 532 | VMADDSUB(vmsub_b, 8, B, DO_MSUB) |
| 533 | VMADDSUB(vmsub_h, 16, H, DO_MSUB) |
| 534 | VMADDSUB(vmsub_w, 32, W, DO_MSUB) |
| 535 | VMADDSUB(vmsub_d, 64, D, DO_MSUB) |
| 536 | |
| 537 | #define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 538 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 539 | { \ |
| 540 | int i; \ |
| 541 | VReg *Vd = (VReg *)vd; \ |
| 542 | VReg *Vj = (VReg *)vj; \ |
| 543 | VReg *Vk = (VReg *)vk; \ |
| 544 | typedef __typeof(Vd->E1(0)) TD; \ |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 545 | int oprsz = simd_oprsz(desc); \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 546 | \ |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 547 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 548 | Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ |
| 549 | } \ |
| 550 | } |
| 551 | |
| 552 | VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) |
| 553 | VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) |
| 554 | VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) |
| 555 | VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) |
| 556 | VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) |
| 557 | VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) |
| 558 | |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 559 | #define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ |
| 560 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 561 | { \ |
| 562 | int i; \ |
| 563 | VReg *Vd = (VReg *)vd; \ |
| 564 | VReg *Vj = (VReg *)vj; \ |
| 565 | VReg *Vk = (VReg *)vk; \ |
| 566 | typedef __typeof(Vd->E1(0)) TD; \ |
| 567 | int oprsz = simd_oprsz(desc); \ |
| 568 | \ |
| 569 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 570 | Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ |
| 571 | (TD)Vk->E2(2 * i + 1)); \ |
| 572 | } \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 573 | } |
| 574 | |
| 575 | VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) |
| 576 | VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) |
| 577 | VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) |
| 578 | VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) |
| 579 | VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) |
| 580 | VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) |
| 581 | |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 582 | #define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ |
| 583 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 584 | { \ |
| 585 | int i; \ |
| 586 | VReg *Vd = (VReg *)vd; \ |
| 587 | VReg *Vj = (VReg *)vj; \ |
| 588 | VReg *Vk = (VReg *)vk; \ |
| 589 | typedef __typeof(Vd->ES1(0)) TS1; \ |
| 590 | typedef __typeof(Vd->EU1(0)) TU1; \ |
| 591 | int oprsz = simd_oprsz(desc); \ |
| 592 | \ |
| 593 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 594 | Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ |
| 595 | (TS1)Vk->ES2(2 * i)); \ |
| 596 | } \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 597 | } |
| 598 | |
| 599 | VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) |
| 600 | VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) |
| 601 | VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) |
| 602 | |
Song Gao | 3f450c1 | 2023-09-14 10:26:14 +0800 | [diff] [blame] | 603 | #define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ |
| 604 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 605 | { \ |
| 606 | int i; \ |
| 607 | VReg *Vd = (VReg *)vd; \ |
| 608 | VReg *Vj = (VReg *)vj; \ |
| 609 | VReg *Vk = (VReg *)vk; \ |
| 610 | typedef __typeof(Vd->ES1(0)) TS1; \ |
| 611 | typedef __typeof(Vd->EU1(0)) TU1; \ |
| 612 | int oprsz = simd_oprsz(desc); \ |
| 613 | \ |
| 614 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 615 | Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ |
| 616 | (TS1)Vk->ES2(2 * i + 1)); \ |
| 617 | } \ |
Song Gao | d3aec65 | 2023-05-04 20:27:41 +0800 | [diff] [blame] | 618 | } |
| 619 | |
| 620 | VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) |
| 621 | VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) |
| 622 | VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) |
Song Gao | 4cc4c0f | 2023-05-04 20:27:42 +0800 | [diff] [blame] | 623 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 624 | #define VDIV(NAME, BIT, E, DO_OP) \ |
| 625 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 626 | { \ |
| 627 | int i; \ |
| 628 | VReg *Vd = (VReg *)vd; \ |
| 629 | VReg *Vj = (VReg *)vj; \ |
| 630 | VReg *Vk = (VReg *)vk; \ |
Song Gao | abb693d | 2023-09-14 10:26:15 +0800 | [diff] [blame] | 631 | int oprsz = simd_oprsz(desc); \ |
| 632 | \ |
| 633 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 634 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ |
| 635 | } \ |
Song Gao | 4cc4c0f | 2023-05-04 20:27:42 +0800 | [diff] [blame] | 636 | } |
| 637 | |
| 638 | VDIV(vdiv_b, 8, B, DO_DIV) |
| 639 | VDIV(vdiv_h, 16, H, DO_DIV) |
| 640 | VDIV(vdiv_w, 32, W, DO_DIV) |
| 641 | VDIV(vdiv_d, 64, D, DO_DIV) |
| 642 | VDIV(vdiv_bu, 8, UB, DO_DIVU) |
| 643 | VDIV(vdiv_hu, 16, UH, DO_DIVU) |
| 644 | VDIV(vdiv_wu, 32, UW, DO_DIVU) |
| 645 | VDIV(vdiv_du, 64, UD, DO_DIVU) |
| 646 | VDIV(vmod_b, 8, B, DO_REM) |
| 647 | VDIV(vmod_h, 16, H, DO_REM) |
| 648 | VDIV(vmod_w, 32, W, DO_REM) |
| 649 | VDIV(vmod_d, 64, D, DO_REM) |
| 650 | VDIV(vmod_bu, 8, UB, DO_REMU) |
| 651 | VDIV(vmod_hu, 16, UH, DO_REMU) |
| 652 | VDIV(vmod_wu, 32, UW, DO_REMU) |
| 653 | VDIV(vmod_du, 64, UD, DO_REMU) |
Song Gao | cbe4419 | 2023-05-04 20:27:43 +0800 | [diff] [blame] | 654 | |
Song Gao | e5c7f03 | 2023-09-14 10:26:16 +0800 | [diff] [blame] | 655 | #define VSAT_S(NAME, BIT, E) \ |
| 656 | void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ |
| 657 | { \ |
| 658 | int i; \ |
| 659 | VReg *Vd = (VReg *)vd; \ |
| 660 | VReg *Vj = (VReg *)vj; \ |
| 661 | typedef __typeof(Vd->E(0)) TD; \ |
| 662 | int oprsz = simd_oprsz(desc); \ |
| 663 | \ |
| 664 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 665 | Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ |
| 666 | Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ |
| 667 | } \ |
Song Gao | cbe4419 | 2023-05-04 20:27:43 +0800 | [diff] [blame] | 668 | } |
| 669 | |
| 670 | VSAT_S(vsat_b, 8, B) |
| 671 | VSAT_S(vsat_h, 16, H) |
| 672 | VSAT_S(vsat_w, 32, W) |
| 673 | VSAT_S(vsat_d, 64, D) |
| 674 | |
Song Gao | e5c7f03 | 2023-09-14 10:26:16 +0800 | [diff] [blame] | 675 | #define VSAT_U(NAME, BIT, E) \ |
| 676 | void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ |
| 677 | { \ |
| 678 | int i; \ |
| 679 | VReg *Vd = (VReg *)vd; \ |
| 680 | VReg *Vj = (VReg *)vj; \ |
| 681 | typedef __typeof(Vd->E(0)) TD; \ |
| 682 | int oprsz = simd_oprsz(desc); \ |
| 683 | \ |
| 684 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 685 | Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ |
| 686 | } \ |
Song Gao | cbe4419 | 2023-05-04 20:27:43 +0800 | [diff] [blame] | 687 | } |
| 688 | |
| 689 | VSAT_U(vsat_bu, 8, UB) |
| 690 | VSAT_U(vsat_hu, 16, UH) |
| 691 | VSAT_U(vsat_wu, 32, UW) |
| 692 | VSAT_U(vsat_du, 64, UD) |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 693 | |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 694 | #define VEXTH(NAME, BIT, E1, E2) \ |
| 695 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ |
| 696 | { \ |
| 697 | int i, j, ofs; \ |
| 698 | VReg *Vd = (VReg *)vd; \ |
| 699 | VReg *Vj = (VReg *)vj; \ |
| 700 | int oprsz = simd_oprsz(desc); \ |
| 701 | \ |
| 702 | ofs = LSX_LEN / BIT; \ |
| 703 | for (i = 0; i < oprsz / 16; i++) { \ |
| 704 | for (j = 0; j < ofs; j++) { \ |
| 705 | Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \ |
| 706 | } \ |
| 707 | } \ |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 708 | } |
| 709 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 710 | void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 711 | { |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 712 | int i; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 713 | VReg *Vd = (VReg *)vd; |
| 714 | VReg *Vj = (VReg *)vj; |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 715 | int oprsz = simd_oprsz(desc); |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 716 | |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 717 | for (i = 0; i < oprsz / 16; i++) { |
| 718 | Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1)); |
| 719 | } |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 720 | } |
| 721 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 722 | void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 723 | { |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 724 | int i; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 725 | VReg *Vd = (VReg *)vd; |
| 726 | VReg *Vj = (VReg *)vj; |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 727 | int oprsz = simd_oprsz(desc); |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 728 | |
Song Gao | f0db0be | 2023-09-14 10:26:17 +0800 | [diff] [blame] | 729 | for (i = 0; i < oprsz / 16; i++) { |
| 730 | Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1)); |
| 731 | } |
Song Gao | 3734ad9 | 2023-05-04 20:27:44 +0800 | [diff] [blame] | 732 | } |
| 733 | |
| 734 | VEXTH(vexth_h_b, 16, H, B) |
| 735 | VEXTH(vexth_w_h, 32, W, H) |
| 736 | VEXTH(vexth_d_w, 64, D, W) |
| 737 | VEXTH(vexth_hu_bu, 16, UH, UB) |
| 738 | VEXTH(vexth_wu_hu, 32, UW, UH) |
| 739 | VEXTH(vexth_du_wu, 64, UD, UW) |
Song Gao | f0e395d | 2023-05-04 20:27:45 +0800 | [diff] [blame] | 740 | |
Song Gao | 790acb2 | 2023-09-14 10:26:18 +0800 | [diff] [blame] | 741 | #define VEXT2XV(NAME, BIT, E1, E2) \ |
| 742 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ |
| 743 | { \ |
| 744 | int i; \ |
| 745 | VReg temp = {}; \ |
| 746 | VReg *Vd = (VReg *)vd; \ |
| 747 | VReg *Vj = (VReg *)vj; \ |
| 748 | int oprsz = simd_oprsz(desc); \ |
| 749 | \ |
| 750 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 751 | temp.E1(i) = Vj->E2(i); \ |
| 752 | } \ |
| 753 | *Vd = temp; \ |
| 754 | } |
| 755 | |
| 756 | VEXT2XV(vext2xv_h_b, 16, H, B) |
| 757 | VEXT2XV(vext2xv_w_b, 32, W, B) |
| 758 | VEXT2XV(vext2xv_d_b, 64, D, B) |
| 759 | VEXT2XV(vext2xv_w_h, 32, W, H) |
| 760 | VEXT2XV(vext2xv_d_h, 64, D, H) |
| 761 | VEXT2XV(vext2xv_d_w, 64, D, W) |
| 762 | VEXT2XV(vext2xv_hu_bu, 16, UH, UB) |
| 763 | VEXT2XV(vext2xv_wu_bu, 32, UW, UB) |
| 764 | VEXT2XV(vext2xv_du_bu, 64, UD, UB) |
| 765 | VEXT2XV(vext2xv_wu_hu, 32, UW, UH) |
| 766 | VEXT2XV(vext2xv_du_hu, 64, UD, UH) |
| 767 | VEXT2XV(vext2xv_du_wu, 64, UD, UW) |
| 768 | |
Song Gao | f0e395d | 2023-05-04 20:27:45 +0800 | [diff] [blame] | 769 | DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) |
| 770 | DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) |
| 771 | DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) |
| 772 | DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 773 | |
| 774 | static uint64_t do_vmskltz_b(int64_t val) |
| 775 | { |
| 776 | uint64_t m = 0x8080808080808080ULL; |
| 777 | uint64_t c = val & m; |
| 778 | c |= c << 7; |
| 779 | c |= c << 14; |
| 780 | c |= c << 28; |
| 781 | return c >> 56; |
| 782 | } |
| 783 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 784 | void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 785 | { |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 786 | int i; |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 787 | uint16_t temp = 0; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 788 | VReg *Vd = (VReg *)vd; |
| 789 | VReg *Vj = (VReg *)vj; |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 790 | int oprsz = simd_oprsz(desc); |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 791 | |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 792 | for (i = 0; i < oprsz / 16; i++) { |
| 793 | temp = 0; |
| 794 | temp = do_vmskltz_b(Vj->D(2 * i)); |
| 795 | temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); |
| 796 | Vd->D(2 * i) = temp; |
| 797 | Vd->D(2 * i + 1) = 0; |
| 798 | } |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 799 | } |
| 800 | |
| 801 | static uint64_t do_vmskltz_h(int64_t val) |
| 802 | { |
| 803 | uint64_t m = 0x8000800080008000ULL; |
| 804 | uint64_t c = val & m; |
| 805 | c |= c << 15; |
| 806 | c |= c << 30; |
| 807 | return c >> 60; |
| 808 | } |
| 809 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 810 | void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 811 | { |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 812 | int i; |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 813 | uint16_t temp = 0; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 814 | VReg *Vd = (VReg *)vd; |
| 815 | VReg *Vj = (VReg *)vj; |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 816 | int oprsz = simd_oprsz(desc); |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 817 | |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 818 | for (i = 0; i < oprsz / 16; i++) { |
| 819 | temp = 0; |
| 820 | temp = do_vmskltz_h(Vj->D(2 * i)); |
| 821 | temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4); |
| 822 | Vd->D(2 * i) = temp; |
| 823 | Vd->D(2 * i + 1) = 0; |
| 824 | } |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 825 | } |
| 826 | |
| 827 | static uint64_t do_vmskltz_w(int64_t val) |
| 828 | { |
| 829 | uint64_t m = 0x8000000080000000ULL; |
| 830 | uint64_t c = val & m; |
| 831 | c |= c << 31; |
| 832 | return c >> 62; |
| 833 | } |
| 834 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 835 | void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 836 | { |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 837 | int i; |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 838 | uint16_t temp = 0; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 839 | VReg *Vd = (VReg *)vd; |
| 840 | VReg *Vj = (VReg *)vj; |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 841 | int oprsz = simd_oprsz(desc); |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 842 | |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 843 | for (i = 0; i < oprsz / 16; i++) { |
| 844 | temp = 0; |
| 845 | temp = do_vmskltz_w(Vj->D(2 * i)); |
| 846 | temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2); |
| 847 | Vd->D(2 * i) = temp; |
| 848 | Vd->D(2 * i + 1) = 0; |
| 849 | } |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 850 | } |
| 851 | |
| 852 | static uint64_t do_vmskltz_d(int64_t val) |
| 853 | { |
| 854 | return (uint64_t)val >> 63; |
| 855 | } |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 856 | void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 857 | { |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 858 | int i; |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 859 | uint16_t temp = 0; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 860 | VReg *Vd = (VReg *)vd; |
| 861 | VReg *Vj = (VReg *)vj; |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 862 | int oprsz = simd_oprsz(desc); |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 863 | |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 864 | for (i = 0; i < oprsz / 16; i++) { |
| 865 | temp = 0; |
| 866 | temp = do_vmskltz_d(Vj->D(2 * i)); |
| 867 | temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1); |
| 868 | Vd->D(2 * i) = temp; |
| 869 | Vd->D(2 * i + 1) = 0; |
| 870 | } |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 871 | } |
| 872 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 873 | void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 874 | { |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 875 | int i; |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 876 | uint16_t temp = 0; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 877 | VReg *Vd = (VReg *)vd; |
| 878 | VReg *Vj = (VReg *)vj; |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 879 | int oprsz = simd_oprsz(desc); |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 880 | |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 881 | for (i = 0; i < oprsz / 16; i++) { |
| 882 | temp = 0; |
| 883 | temp = do_vmskltz_b(Vj->D(2 * i)); |
| 884 | temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); |
| 885 | Vd->D(2 * i) = (uint16_t)(~temp); |
| 886 | Vd->D(2 * i + 1) = 0; |
| 887 | } |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 888 | } |
| 889 | |
| 890 | static uint64_t do_vmskez_b(uint64_t a) |
| 891 | { |
| 892 | uint64_t m = 0x7f7f7f7f7f7f7f7fULL; |
| 893 | uint64_t c = ~(((a & m) + m) | a | m); |
| 894 | c |= c << 7; |
| 895 | c |= c << 14; |
| 896 | c |= c << 28; |
| 897 | return c >> 56; |
| 898 | } |
| 899 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 900 | void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 901 | { |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 902 | int i; |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 903 | uint16_t temp = 0; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 904 | VReg *Vd = (VReg *)vd; |
| 905 | VReg *Vj = (VReg *)vj; |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 906 | int oprsz = simd_oprsz(desc); |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 907 | |
Song Gao | 9707467 | 2023-09-14 10:26:20 +0800 | [diff] [blame] | 908 | for (i = 0; i < oprsz / 16; i++) { |
| 909 | temp = 0; |
| 910 | temp = do_vmskez_b(Vj->D(2 * i)); |
| 911 | temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8); |
| 912 | Vd->D(2 * i) = (uint16_t)(~temp); |
| 913 | Vd->D(2 * i + 1) = 0; |
| 914 | } |
Song Gao | 789f4a4 | 2023-05-04 20:27:46 +0800 | [diff] [blame] | 915 | } |
Song Gao | f205a53 | 2023-05-04 20:27:47 +0800 | [diff] [blame] | 916 | |
Song Gao | 4472a45 | 2023-09-14 10:26:22 +0800 | [diff] [blame] | 917 | void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | f205a53 | 2023-05-04 20:27:47 +0800 | [diff] [blame] | 918 | { |
| 919 | int i; |
| 920 | VReg *Vd = (VReg *)vd; |
| 921 | VReg *Vj = (VReg *)vj; |
| 922 | |
Song Gao | 4472a45 | 2023-09-14 10:26:22 +0800 | [diff] [blame] | 923 | for (i = 0; i < simd_oprsz(desc); i++) { |
Song Gao | f205a53 | 2023-05-04 20:27:47 +0800 | [diff] [blame] | 924 | Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); |
| 925 | } |
| 926 | } |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 927 | |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 928 | #define VSLLWIL(NAME, BIT, E1, E2) \ |
| 929 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 930 | { \ |
| 931 | int i, j, ofs; \ |
| 932 | VReg temp = {}; \ |
| 933 | VReg *Vd = (VReg *)vd; \ |
| 934 | VReg *Vj = (VReg *)vj; \ |
| 935 | int oprsz = simd_oprsz(desc); \ |
| 936 | typedef __typeof(temp.E1(0)) TD; \ |
| 937 | \ |
| 938 | ofs = LSX_LEN / BIT; \ |
| 939 | for (i = 0; i < oprsz / 16; i++) { \ |
| 940 | for (j = 0; j < ofs; j++) { \ |
| 941 | temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \ |
| 942 | } \ |
| 943 | } \ |
| 944 | *Vd = temp; \ |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 945 | } |
| 946 | |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 947 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 948 | void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 949 | { |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 950 | int i; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 951 | VReg *Vd = (VReg *)vd; |
| 952 | VReg *Vj = (VReg *)vj; |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 953 | int oprsz = simd_oprsz(desc); |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 954 | |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 955 | for (i = 0; i < oprsz / 16; i++) { |
| 956 | Vd->Q(i) = int128_makes64(Vj->D(2 * i)); |
| 957 | } |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 958 | } |
| 959 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 960 | void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 961 | { |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 962 | int i; |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 963 | VReg *Vd = (VReg *)vd; |
| 964 | VReg *Vj = (VReg *)vj; |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 965 | int oprsz = simd_oprsz(desc); |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 966 | |
Song Gao | 6567eac | 2023-09-14 10:26:24 +0800 | [diff] [blame] | 967 | for (i = 0; i < oprsz / 16; i++) { |
| 968 | Vd->Q(i) = int128_make64(Vj->UD(2 * i)); |
| 969 | } |
Song Gao | 9b21a7a | 2023-05-04 20:27:49 +0800 | [diff] [blame] | 970 | } |
| 971 | |
| 972 | VSLLWIL(vsllwil_h_b, 16, H, B) |
| 973 | VSLLWIL(vsllwil_w_h, 32, W, H) |
| 974 | VSLLWIL(vsllwil_d_w, 64, D, W) |
| 975 | VSLLWIL(vsllwil_hu_bu, 16, UH, UB) |
| 976 | VSLLWIL(vsllwil_wu_hu, 32, UW, UH) |
| 977 | VSLLWIL(vsllwil_du_wu, 64, UD, UW) |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 978 | |
| 979 | #define do_vsrlr(E, T) \ |
| 980 | static T do_vsrlr_ ##E(T s1, int sh) \ |
| 981 | { \ |
| 982 | if (sh == 0) { \ |
| 983 | return s1; \ |
| 984 | } else { \ |
| 985 | return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ |
| 986 | } \ |
| 987 | } |
| 988 | |
| 989 | do_vsrlr(B, uint8_t) |
| 990 | do_vsrlr(H, uint16_t) |
| 991 | do_vsrlr(W, uint32_t) |
| 992 | do_vsrlr(D, uint64_t) |
| 993 | |
| 994 | #define VSRLR(NAME, BIT, T, E) \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 995 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 996 | { \ |
| 997 | int i; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 998 | VReg *Vd = (VReg *)vd; \ |
| 999 | VReg *Vj = (VReg *)vj; \ |
| 1000 | VReg *Vk = (VReg *)vk; \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1001 | int oprsz = simd_oprsz(desc); \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1002 | \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1003 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1004 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ |
| 1005 | } \ |
| 1006 | } |
| 1007 | |
| 1008 | VSRLR(vsrlr_b, 8, uint8_t, B) |
| 1009 | VSRLR(vsrlr_h, 16, uint16_t, H) |
| 1010 | VSRLR(vsrlr_w, 32, uint32_t, W) |
| 1011 | VSRLR(vsrlr_d, 64, uint64_t, D) |
| 1012 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1013 | #define VSRLRI(NAME, BIT, E) \ |
| 1014 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1015 | { \ |
| 1016 | int i; \ |
| 1017 | VReg *Vd = (VReg *)vd; \ |
| 1018 | VReg *Vj = (VReg *)vj; \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1019 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1020 | \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1021 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1022 | Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ |
| 1023 | } \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1024 | } |
| 1025 | |
| 1026 | VSRLRI(vsrlri_b, 8, B) |
| 1027 | VSRLRI(vsrlri_h, 16, H) |
| 1028 | VSRLRI(vsrlri_w, 32, W) |
| 1029 | VSRLRI(vsrlri_d, 64, D) |
| 1030 | |
| 1031 | #define do_vsrar(E, T) \ |
| 1032 | static T do_vsrar_ ##E(T s1, int sh) \ |
| 1033 | { \ |
| 1034 | if (sh == 0) { \ |
| 1035 | return s1; \ |
| 1036 | } else { \ |
| 1037 | return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ |
| 1038 | } \ |
| 1039 | } |
| 1040 | |
| 1041 | do_vsrar(B, int8_t) |
| 1042 | do_vsrar(H, int16_t) |
| 1043 | do_vsrar(W, int32_t) |
| 1044 | do_vsrar(D, int64_t) |
| 1045 | |
| 1046 | #define VSRAR(NAME, BIT, T, E) \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 1047 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1048 | { \ |
| 1049 | int i; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 1050 | VReg *Vd = (VReg *)vd; \ |
| 1051 | VReg *Vj = (VReg *)vj; \ |
| 1052 | VReg *Vk = (VReg *)vk; \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1053 | int oprsz = simd_oprsz(desc); \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1054 | \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1055 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1056 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ |
| 1057 | } \ |
| 1058 | } |
| 1059 | |
| 1060 | VSRAR(vsrar_b, 8, uint8_t, B) |
| 1061 | VSRAR(vsrar_h, 16, uint16_t, H) |
| 1062 | VSRAR(vsrar_w, 32, uint32_t, W) |
| 1063 | VSRAR(vsrar_d, 64, uint64_t, D) |
| 1064 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1065 | #define VSRARI(NAME, BIT, E) \ |
| 1066 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1067 | { \ |
| 1068 | int i; \ |
| 1069 | VReg *Vd = (VReg *)vd; \ |
| 1070 | VReg *Vj = (VReg *)vj; \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1071 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1072 | \ |
Song Gao | 8c272fe | 2023-09-14 10:26:25 +0800 | [diff] [blame] | 1073 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1074 | Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ |
| 1075 | } \ |
Song Gao | ecb9371 | 2023-05-04 20:27:50 +0800 | [diff] [blame] | 1076 | } |
| 1077 | |
| 1078 | VSRARI(vsrari_b, 8, B) |
| 1079 | VSRARI(vsrari_h, 16, H) |
| 1080 | VSRARI(vsrari_w, 32, W) |
| 1081 | VSRARI(vsrari_d, 64, D) |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1082 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1083 | #define VSRLN(NAME, BIT, E1, E2) \ |
| 1084 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1085 | { \ |
| 1086 | int i, j, ofs; \ |
| 1087 | VReg *Vd = (VReg *)vd; \ |
| 1088 | VReg *Vj = (VReg *)vj; \ |
| 1089 | VReg *Vk = (VReg *)vk; \ |
| 1090 | int oprsz = simd_oprsz(desc); \ |
| 1091 | \ |
| 1092 | ofs = LSX_LEN / BIT; \ |
| 1093 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1094 | for (j = 0; j < ofs; j++) { \ |
| 1095 | Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ |
| 1096 | Vk->E2(j + ofs * i) % BIT); \ |
| 1097 | } \ |
| 1098 | Vd->D(2 * i + 1) = 0; \ |
| 1099 | } \ |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1100 | } |
| 1101 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1102 | VSRLN(vsrln_b_h, 16, B, UH) |
| 1103 | VSRLN(vsrln_h_w, 32, H, UW) |
| 1104 | VSRLN(vsrln_w_d, 64, W, UD) |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1105 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1106 | #define VSRAN(NAME, BIT, E1, E2, E3) \ |
| 1107 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1108 | { \ |
| 1109 | int i, j, ofs; \ |
| 1110 | VReg *Vd = (VReg *)vd; \ |
| 1111 | VReg *Vj = (VReg *)vj; \ |
| 1112 | VReg *Vk = (VReg *)vk; \ |
| 1113 | int oprsz = simd_oprsz(desc); \ |
| 1114 | \ |
| 1115 | ofs = LSX_LEN / BIT; \ |
| 1116 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1117 | for (j = 0; j < ofs; j++) { \ |
| 1118 | Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ |
| 1119 | Vk->E3(j + ofs * i) % BIT); \ |
| 1120 | } \ |
| 1121 | Vd->D(2 * i + 1) = 0; \ |
| 1122 | } \ |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1123 | } |
| 1124 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1125 | VSRAN(vsran_b_h, 16, B, H, UH) |
| 1126 | VSRAN(vsran_h_w, 32, H, W, UW) |
| 1127 | VSRAN(vsran_w_d, 64, W, D, UD) |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1128 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1129 | #define VSRLNI(NAME, BIT, E1, E2) \ |
| 1130 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1131 | { \ |
| 1132 | int i, j, ofs; \ |
| 1133 | VReg temp = {}; \ |
| 1134 | VReg *Vd = (VReg *)vd; \ |
| 1135 | VReg *Vj = (VReg *)vj; \ |
| 1136 | int oprsz = simd_oprsz(desc); \ |
| 1137 | \ |
| 1138 | ofs = LSX_LEN / BIT; \ |
| 1139 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1140 | for (j = 0; j < ofs; j++) { \ |
| 1141 | temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ |
| 1142 | temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ |
| 1143 | imm); \ |
| 1144 | } \ |
| 1145 | } \ |
| 1146 | *Vd = temp; \ |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1147 | } |
| 1148 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1149 | void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1150 | { |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1151 | int i; |
| 1152 | VReg temp = {}; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1153 | VReg *Vd = (VReg *)vd; |
| 1154 | VReg *Vj = (VReg *)vj; |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1155 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1156 | for (i = 0; i < 2; i++) { |
| 1157 | temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128)); |
| 1158 | temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128)); |
| 1159 | } |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1160 | *Vd = temp; |
| 1161 | } |
| 1162 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1163 | VSRLNI(vsrlni_b_h, 16, B, UH) |
| 1164 | VSRLNI(vsrlni_h_w, 32, H, UW) |
| 1165 | VSRLNI(vsrlni_w_d, 64, W, UD) |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1166 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1167 | #define VSRANI(NAME, BIT, E1, E2) \ |
| 1168 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1169 | { \ |
| 1170 | int i, j, ofs; \ |
| 1171 | VReg temp = {}; \ |
| 1172 | VReg *Vd = (VReg *)vd; \ |
| 1173 | VReg *Vj = (VReg *)vj; \ |
| 1174 | int oprsz = simd_oprsz(desc); \ |
| 1175 | \ |
| 1176 | ofs = LSX_LEN / BIT; \ |
| 1177 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1178 | for (j = 0; j < ofs; j++) { \ |
| 1179 | temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ |
| 1180 | temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ |
| 1181 | imm); \ |
| 1182 | } \ |
| 1183 | } \ |
| 1184 | *Vd = temp; \ |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1185 | } |
| 1186 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1187 | void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1188 | { |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1189 | int i; |
| 1190 | VReg temp = {}; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1191 | VReg *Vd = (VReg *)vd; |
| 1192 | VReg *Vj = (VReg *)vj; |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1193 | |
Song Gao | 40c7674 | 2023-09-14 10:26:26 +0800 | [diff] [blame] | 1194 | for (i = 0; i < 2; i++) { |
| 1195 | temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128)); |
| 1196 | temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128)); |
| 1197 | } |
Song Gao | d79fb8d | 2023-05-04 20:27:51 +0800 | [diff] [blame] | 1198 | *Vd = temp; |
| 1199 | } |
| 1200 | |
| 1201 | VSRANI(vsrani_b_h, 16, B, H) |
| 1202 | VSRANI(vsrani_h_w, 32, H, W) |
| 1203 | VSRANI(vsrani_w_d, 64, W, D) |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1204 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1205 | #define VSRLRN(NAME, BIT, E1, E2, E3) \ |
| 1206 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1207 | { \ |
| 1208 | int i, j, ofs; \ |
| 1209 | VReg *Vd = (VReg *)vd; \ |
| 1210 | VReg *Vj = (VReg *)vj; \ |
| 1211 | VReg *Vk = (VReg *)vk; \ |
| 1212 | int oprsz = simd_oprsz(desc); \ |
| 1213 | \ |
| 1214 | ofs = LSX_LEN / BIT; \ |
| 1215 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1216 | for (j = 0; j < ofs; j++) { \ |
| 1217 | Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \ |
| 1218 | Vk->E3(j + ofs * i) % BIT); \ |
| 1219 | } \ |
| 1220 | Vd->D(2 * i + 1) = 0; \ |
| 1221 | } \ |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1222 | } |
| 1223 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1224 | VSRLRN(vsrlrn_b_h, 16, B, H, UH) |
| 1225 | VSRLRN(vsrlrn_h_w, 32, H, W, UW) |
| 1226 | VSRLRN(vsrlrn_w_d, 64, W, D, UD) |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1227 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1228 | #define VSRARN(NAME, BIT, E1, E2, E3) \ |
| 1229 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1230 | { \ |
| 1231 | int i, j, ofs; \ |
| 1232 | VReg *Vd = (VReg *)vd; \ |
| 1233 | VReg *Vj = (VReg *)vj; \ |
| 1234 | VReg *Vk = (VReg *)vk; \ |
| 1235 | int oprsz = simd_oprsz(desc); \ |
| 1236 | \ |
| 1237 | ofs = LSX_LEN / BIT; \ |
| 1238 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1239 | for (j = 0; j < ofs; j++) { \ |
| 1240 | Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \ |
| 1241 | Vk->E3(j + ofs * i) % BIT); \ |
| 1242 | } \ |
| 1243 | Vd->D(2 * i + 1) = 0; \ |
| 1244 | } \ |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1245 | } |
| 1246 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1247 | VSRARN(vsrarn_b_h, 16, B, H, UH) |
| 1248 | VSRARN(vsrarn_h_w, 32, H, W, UW) |
| 1249 | VSRARN(vsrarn_w_d, 64, W, D, UD) |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1250 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1251 | #define VSRLRNI(NAME, BIT, E1, E2) \ |
| 1252 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1253 | { \ |
| 1254 | int i, j, ofs; \ |
| 1255 | VReg temp = {}; \ |
| 1256 | VReg *Vd = (VReg *)vd; \ |
| 1257 | VReg *Vj = (VReg *)vj; \ |
| 1258 | int oprsz = simd_oprsz(desc); \ |
| 1259 | \ |
| 1260 | ofs = LSX_LEN / BIT; \ |
| 1261 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1262 | for (j = 0; j < ofs; j++) { \ |
| 1263 | temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \ |
| 1264 | temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \ |
| 1265 | imm); \ |
| 1266 | } \ |
| 1267 | } \ |
| 1268 | *Vd = temp; \ |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1269 | } |
| 1270 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1271 | void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1272 | { |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1273 | int i; |
| 1274 | VReg temp = {}; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1275 | VReg *Vd = (VReg *)vd; |
| 1276 | VReg *Vj = (VReg *)vj; |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1277 | Int128 r[4]; |
| 1278 | int oprsz = simd_oprsz(desc); |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1279 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1280 | for (i = 0; i < oprsz / 16; i++) { |
| 1281 | if (imm == 0) { |
| 1282 | temp.D(2 * i) = int128_getlo(Vj->Q(i)); |
| 1283 | temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); |
| 1284 | } else { |
| 1285 | r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)), |
| 1286 | int128_one()); |
| 1287 | r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)), |
| 1288 | int128_one()); |
| 1289 | temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i), |
| 1290 | imm), r[2 * i])); |
| 1291 | temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i), |
| 1292 | imm), r[ 2 * i + 1])); |
| 1293 | } |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1294 | } |
| 1295 | *Vd = temp; |
| 1296 | } |
| 1297 | |
| 1298 | VSRLRNI(vsrlrni_b_h, 16, B, H) |
| 1299 | VSRLRNI(vsrlrni_h_w, 32, H, W) |
| 1300 | VSRLRNI(vsrlrni_w_d, 64, W, D) |
| 1301 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1302 | #define VSRARNI(NAME, BIT, E1, E2) \ |
| 1303 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1304 | { \ |
| 1305 | int i, j, ofs; \ |
| 1306 | VReg temp = {}; \ |
| 1307 | VReg *Vd = (VReg *)vd; \ |
| 1308 | VReg *Vj = (VReg *)vj; \ |
| 1309 | int oprsz = simd_oprsz(desc); \ |
| 1310 | \ |
| 1311 | ofs = LSX_LEN / BIT; \ |
| 1312 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1313 | for (j = 0; j < ofs; j++) { \ |
| 1314 | temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \ |
| 1315 | temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \ |
| 1316 | imm); \ |
| 1317 | } \ |
| 1318 | } \ |
| 1319 | *Vd = temp; \ |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1320 | } |
| 1321 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1322 | void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1323 | { |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1324 | int i; |
| 1325 | VReg temp = {}; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1326 | VReg *Vd = (VReg *)vd; |
| 1327 | VReg *Vj = (VReg *)vj; |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1328 | Int128 r[4]; |
| 1329 | int oprsz = simd_oprsz(desc); |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1330 | |
Song Gao | c50ce38 | 2023-09-14 10:26:27 +0800 | [diff] [blame] | 1331 | for (i = 0; i < oprsz / 16; i++) { |
| 1332 | if (imm == 0) { |
| 1333 | temp.D(2 * i) = int128_getlo(Vj->Q(i)); |
| 1334 | temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); |
| 1335 | } else { |
| 1336 | r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)), |
| 1337 | int128_one()); |
| 1338 | r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)), |
| 1339 | int128_one()); |
| 1340 | temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i), |
| 1341 | imm), r[2 * i])); |
| 1342 | temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i), |
| 1343 | imm), r[2 * i + 1])); |
| 1344 | } |
Song Gao | a5200a1 | 2023-05-04 20:27:52 +0800 | [diff] [blame] | 1345 | } |
| 1346 | *Vd = temp; |
| 1347 | } |
| 1348 | |
| 1349 | VSRARNI(vsrarni_b_h, 16, B, H) |
| 1350 | VSRARNI(vsrarni_h_w, 32, H, W) |
| 1351 | VSRARNI(vsrarni_w_d, 64, W, D) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1352 | |
| 1353 | #define SSRLNS(NAME, T1, T2, T3) \ |
| 1354 | static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ |
| 1355 | { \ |
| 1356 | T1 shft_res; \ |
| 1357 | if (sa == 0) { \ |
| 1358 | shft_res = e2; \ |
| 1359 | } else { \ |
| 1360 | shft_res = (((T1)e2) >> sa); \ |
| 1361 | } \ |
| 1362 | T3 mask; \ |
| 1363 | mask = (1ull << sh) -1; \ |
| 1364 | if (shft_res > mask) { \ |
| 1365 | return mask; \ |
| 1366 | } else { \ |
| 1367 | return shft_res; \ |
| 1368 | } \ |
| 1369 | } |
| 1370 | |
| 1371 | SSRLNS(B, uint16_t, int16_t, uint8_t) |
| 1372 | SSRLNS(H, uint32_t, int32_t, uint16_t) |
| 1373 | SSRLNS(W, uint64_t, int64_t, uint32_t) |
| 1374 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1375 | #define VSSRLN(NAME, BIT, E1, E2, E3) \ |
| 1376 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1377 | { \ |
| 1378 | int i, j, ofs; \ |
| 1379 | VReg *Vd = (VReg *)vd; \ |
| 1380 | VReg *Vj = (VReg *)vj; \ |
| 1381 | VReg *Vk = (VReg *)vk; \ |
| 1382 | int oprsz = simd_oprsz(desc); \ |
| 1383 | \ |
| 1384 | ofs = LSX_LEN / BIT; \ |
| 1385 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1386 | for (j = 0; j < ofs; j++) { \ |
| 1387 | Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ |
| 1388 | Vk->E3(j + ofs * i) % BIT, \ |
| 1389 | BIT / 2 - 1); \ |
| 1390 | } \ |
| 1391 | Vd->D(2 * i + 1) = 0; \ |
| 1392 | } \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1393 | } |
| 1394 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1395 | VSSRLN(vssrln_b_h, 16, B, H, UH) |
| 1396 | VSSRLN(vssrln_h_w, 32, H, W, UW) |
| 1397 | VSSRLN(vssrln_w_d, 64, W, D, UD) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1398 | |
| 1399 | #define SSRANS(E, T1, T2) \ |
| 1400 | static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ |
| 1401 | { \ |
| 1402 | T1 shft_res; \ |
| 1403 | if (sa == 0) { \ |
| 1404 | shft_res = e2; \ |
| 1405 | } else { \ |
| 1406 | shft_res = e2 >> sa; \ |
| 1407 | } \ |
| 1408 | T2 mask; \ |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1409 | mask = (1ll << sh) - 1; \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1410 | if (shft_res > mask) { \ |
| 1411 | return mask; \ |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1412 | } else if (shft_res < -(mask + 1)) { \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1413 | return ~mask; \ |
| 1414 | } else { \ |
| 1415 | return shft_res; \ |
| 1416 | } \ |
| 1417 | } |
| 1418 | |
| 1419 | SSRANS(B, int16_t, int8_t) |
| 1420 | SSRANS(H, int32_t, int16_t) |
| 1421 | SSRANS(W, int64_t, int32_t) |
| 1422 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1423 | #define VSSRAN(NAME, BIT, E1, E2, E3) \ |
| 1424 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1425 | { \ |
| 1426 | int i, j, ofs; \ |
| 1427 | VReg *Vd = (VReg *)vd; \ |
| 1428 | VReg *Vj = (VReg *)vj; \ |
| 1429 | VReg *Vk = (VReg *)vk; \ |
| 1430 | int oprsz = simd_oprsz(desc); \ |
| 1431 | \ |
| 1432 | ofs = LSX_LEN / BIT; \ |
| 1433 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1434 | for (j = 0; j < ofs; j++) { \ |
| 1435 | Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ |
| 1436 | Vk->E3(j + ofs * i) % BIT, \ |
| 1437 | BIT / 2 - 1); \ |
| 1438 | } \ |
| 1439 | Vd->D(2 * i + 1) = 0; \ |
| 1440 | } \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1441 | } |
| 1442 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1443 | VSSRAN(vssran_b_h, 16, B, H, UH) |
| 1444 | VSSRAN(vssran_h_w, 32, H, W, UW) |
| 1445 | VSSRAN(vssran_w_d, 64, W, D, UD) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1446 | |
| 1447 | #define SSRLNU(E, T1, T2, T3) \ |
| 1448 | static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ |
| 1449 | { \ |
| 1450 | T1 shft_res; \ |
| 1451 | if (sa == 0) { \ |
| 1452 | shft_res = e2; \ |
| 1453 | } else { \ |
| 1454 | shft_res = (((T1)e2) >> sa); \ |
| 1455 | } \ |
| 1456 | T2 mask; \ |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1457 | mask = (1ull << sh) - 1; \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1458 | if (shft_res > mask) { \ |
| 1459 | return mask; \ |
| 1460 | } else { \ |
| 1461 | return shft_res; \ |
| 1462 | } \ |
| 1463 | } |
| 1464 | |
| 1465 | SSRLNU(B, uint16_t, uint8_t, int16_t) |
| 1466 | SSRLNU(H, uint32_t, uint16_t, int32_t) |
| 1467 | SSRLNU(W, uint64_t, uint32_t, int64_t) |
| 1468 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1469 | #define VSSRLNU(NAME, BIT, E1, E2, E3) \ |
| 1470 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1471 | { \ |
| 1472 | int i, j, ofs; \ |
| 1473 | VReg *Vd = (VReg *)vd; \ |
| 1474 | VReg *Vj = (VReg *)vj; \ |
| 1475 | VReg *Vk = (VReg *)vk; \ |
| 1476 | int oprsz = simd_oprsz(desc); \ |
| 1477 | \ |
| 1478 | ofs = LSX_LEN / BIT; \ |
| 1479 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1480 | for (j = 0; j < ofs; j++) { \ |
| 1481 | Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ |
| 1482 | Vk->E3(j + ofs * i) % BIT, \ |
| 1483 | BIT / 2); \ |
| 1484 | } \ |
| 1485 | Vd->D(2 * i + 1) = 0; \ |
| 1486 | } \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1487 | } |
| 1488 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1489 | VSSRLNU(vssrln_bu_h, 16, B, H, UH) |
| 1490 | VSSRLNU(vssrln_hu_w, 32, H, W, UW) |
| 1491 | VSSRLNU(vssrln_wu_d, 64, W, D, UD) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1492 | |
| 1493 | #define SSRANU(E, T1, T2, T3) \ |
| 1494 | static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ |
| 1495 | { \ |
| 1496 | T1 shft_res; \ |
| 1497 | if (sa == 0) { \ |
| 1498 | shft_res = e2; \ |
| 1499 | } else { \ |
| 1500 | shft_res = e2 >> sa; \ |
| 1501 | } \ |
| 1502 | if (e2 < 0) { \ |
| 1503 | shft_res = 0; \ |
| 1504 | } \ |
| 1505 | T2 mask; \ |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1506 | mask = (1ull << sh) - 1; \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1507 | if (shft_res > mask) { \ |
| 1508 | return mask; \ |
| 1509 | } else { \ |
| 1510 | return shft_res; \ |
| 1511 | } \ |
| 1512 | } |
| 1513 | |
| 1514 | SSRANU(B, uint16_t, uint8_t, int16_t) |
| 1515 | SSRANU(H, uint32_t, uint16_t, int32_t) |
| 1516 | SSRANU(W, uint64_t, uint32_t, int64_t) |
| 1517 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1518 | #define VSSRANU(NAME, BIT, E1, E2, E3) \ |
| 1519 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1520 | { \ |
| 1521 | int i, j, ofs; \ |
| 1522 | VReg *Vd = (VReg *)vd; \ |
| 1523 | VReg *Vj = (VReg *)vj; \ |
| 1524 | VReg *Vk = (VReg *)vk; \ |
| 1525 | int oprsz = simd_oprsz(desc); \ |
| 1526 | \ |
| 1527 | ofs = LSX_LEN / BIT; \ |
| 1528 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1529 | for (j = 0; j < ofs; j++) { \ |
| 1530 | Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ |
| 1531 | Vk->E3(j + ofs * i) % BIT, \ |
| 1532 | BIT / 2); \ |
| 1533 | } \ |
| 1534 | Vd->D(2 * i + 1) = 0; \ |
| 1535 | } \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1536 | } |
| 1537 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1538 | VSSRANU(vssran_bu_h, 16, B, H, UH) |
| 1539 | VSSRANU(vssran_hu_w, 32, H, W, UW) |
| 1540 | VSSRANU(vssran_wu_d, 64, W, D, UD) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1541 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1542 | #define VSSRLNI(NAME, BIT, E1, E2) \ |
| 1543 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1544 | { \ |
| 1545 | int i, j, ofs; \ |
| 1546 | VReg temp = {}; \ |
| 1547 | VReg *Vd = (VReg *)vd; \ |
| 1548 | VReg *Vj = (VReg *)vj; \ |
| 1549 | int oprsz = simd_oprsz(desc); \ |
| 1550 | \ |
| 1551 | ofs = LSX_LEN / BIT; \ |
| 1552 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1553 | for (j = 0; j < ofs; j++) { \ |
| 1554 | temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ |
| 1555 | imm, BIT / 2 - 1); \ |
| 1556 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \ |
| 1557 | imm, BIT / 2 - 1); \ |
| 1558 | } \ |
| 1559 | } \ |
| 1560 | *Vd = temp; \ |
| 1561 | } |
| 1562 | |
| 1563 | static void do_vssrlni_q(VReg *Vd, VReg *Vj, |
| 1564 | uint64_t imm, int idx, Int128 mask) |
| 1565 | { |
| 1566 | Int128 shft_res1, shft_res2; |
| 1567 | |
| 1568 | if (imm == 0) { |
| 1569 | shft_res1 = Vj->Q(idx); |
| 1570 | shft_res2 = Vd->Q(idx); |
| 1571 | } else { |
| 1572 | shft_res1 = int128_urshift(Vj->Q(idx), imm); |
| 1573 | shft_res2 = int128_urshift(Vd->Q(idx), imm); |
| 1574 | } |
| 1575 | |
| 1576 | if (int128_ult(mask, shft_res1)) { |
| 1577 | Vd->D(idx * 2) = int128_getlo(mask); |
| 1578 | }else { |
| 1579 | Vd->D(idx * 2) = int128_getlo(shft_res1); |
| 1580 | } |
| 1581 | |
| 1582 | if (int128_ult(mask, shft_res2)) { |
| 1583 | Vd->D(idx * 2 + 1) = int128_getlo(mask); |
| 1584 | }else { |
| 1585 | Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); |
| 1586 | } |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1587 | } |
| 1588 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1589 | void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1590 | { |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1591 | int i; |
| 1592 | Int128 mask; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1593 | VReg *Vd = (VReg *)vd; |
| 1594 | VReg *Vj = (VReg *)vj; |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1595 | int oprsz = simd_oprsz(desc); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1596 | |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1597 | mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); |
| 1598 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1599 | for (i = 0; i < oprsz / 16; i++) { |
| 1600 | do_vssrlni_q(Vd, Vj, imm, i, mask); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1601 | } |
| 1602 | } |
| 1603 | |
| 1604 | VSSRLNI(vssrlni_b_h, 16, B, H) |
| 1605 | VSSRLNI(vssrlni_h_w, 32, H, W) |
| 1606 | VSSRLNI(vssrlni_w_d, 64, W, D) |
| 1607 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1608 | #define VSSRANI(NAME, BIT, E1, E2) \ |
| 1609 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1610 | { \ |
| 1611 | int i, j, ofs; \ |
| 1612 | VReg temp = {}; \ |
| 1613 | VReg *Vd = (VReg *)vd; \ |
| 1614 | VReg *Vj = (VReg *)vj; \ |
| 1615 | int oprsz = simd_oprsz(desc); \ |
| 1616 | \ |
| 1617 | ofs = LSX_LEN / BIT; \ |
| 1618 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1619 | for (j = 0; j < ofs; j++) { \ |
| 1620 | temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ |
| 1621 | imm, BIT / 2 - 1); \ |
| 1622 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \ |
| 1623 | imm, BIT / 2 - 1); \ |
| 1624 | } \ |
| 1625 | } \ |
| 1626 | *Vd = temp; \ |
| 1627 | } |
| 1628 | |
| 1629 | static void do_vssrani_d_q(VReg *Vd, VReg *Vj, |
| 1630 | uint64_t imm, int idx, Int128 mask, Int128 min) |
| 1631 | { |
| 1632 | Int128 shft_res1, shft_res2; |
| 1633 | |
| 1634 | if (imm == 0) { |
| 1635 | shft_res1 = Vj->Q(idx); |
| 1636 | shft_res2 = Vd->Q(idx); |
| 1637 | } else { |
| 1638 | shft_res1 = int128_rshift(Vj->Q(idx), imm); |
| 1639 | shft_res2 = int128_rshift(Vd->Q(idx), imm); |
| 1640 | } |
| 1641 | |
| 1642 | if (int128_gt(shft_res1, mask)) { |
| 1643 | Vd->D(idx * 2) = int128_getlo(mask); |
| 1644 | } else if (int128_lt(shft_res1, int128_neg(min))) { |
| 1645 | Vd->D(idx * 2) = int128_getlo(min); |
| 1646 | } else { |
| 1647 | Vd->D(idx * 2) = int128_getlo(shft_res1); |
| 1648 | } |
| 1649 | |
| 1650 | if (int128_gt(shft_res2, mask)) { |
| 1651 | Vd->D(idx * 2 + 1) = int128_getlo(mask); |
| 1652 | } else if (int128_lt(shft_res2, int128_neg(min))) { |
| 1653 | Vd->D(idx * 2 + 1) = int128_getlo(min); |
| 1654 | } else { |
| 1655 | Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); |
| 1656 | } |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1657 | } |
| 1658 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1659 | void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1660 | { |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1661 | int i; |
| 1662 | Int128 mask, min; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1663 | VReg *Vd = (VReg *)vd; |
| 1664 | VReg *Vj = (VReg *)vj; |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1665 | int oprsz = simd_oprsz(desc); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1666 | |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1667 | mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); |
| 1668 | min = int128_lshift(int128_one(), 63); |
| 1669 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1670 | for (i = 0; i < oprsz / 16; i++) { |
| 1671 | do_vssrani_d_q(Vd, Vj, imm, i, mask, min); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1672 | } |
| 1673 | } |
| 1674 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1675 | |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1676 | VSSRANI(vssrani_b_h, 16, B, H) |
| 1677 | VSSRANI(vssrani_h_w, 32, H, W) |
| 1678 | VSSRANI(vssrani_w_d, 64, W, D) |
| 1679 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1680 | #define VSSRLNUI(NAME, BIT, E1, E2) \ |
| 1681 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1682 | { \ |
| 1683 | int i, j, ofs; \ |
| 1684 | VReg temp = {}; \ |
| 1685 | VReg *Vd = (VReg *)vd; \ |
| 1686 | VReg *Vj = (VReg *)vj; \ |
| 1687 | int oprsz = simd_oprsz(desc); \ |
| 1688 | \ |
| 1689 | ofs = LSX_LEN / BIT; \ |
| 1690 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1691 | for (j = 0; j < ofs; j++) { \ |
| 1692 | temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ |
| 1693 | imm, BIT / 2); \ |
| 1694 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \ |
| 1695 | imm, BIT / 2); \ |
| 1696 | } \ |
| 1697 | } \ |
| 1698 | *Vd = temp; \ |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1699 | } |
| 1700 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1701 | void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1702 | { |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1703 | int i; |
| 1704 | Int128 mask; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1705 | VReg *Vd = (VReg *)vd; |
| 1706 | VReg *Vj = (VReg *)vj; |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1707 | int oprsz = simd_oprsz(desc); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1708 | |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1709 | mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); |
| 1710 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1711 | for (i = 0; i < oprsz / 16; i++) { |
| 1712 | do_vssrlni_q(Vd, Vj, imm, i, mask); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1713 | } |
| 1714 | } |
| 1715 | |
| 1716 | VSSRLNUI(vssrlni_bu_h, 16, B, H) |
| 1717 | VSSRLNUI(vssrlni_hu_w, 32, H, W) |
| 1718 | VSSRLNUI(vssrlni_wu_d, 64, W, D) |
| 1719 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1720 | #define VSSRANUI(NAME, BIT, E1, E2) \ |
| 1721 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1722 | { \ |
| 1723 | int i, j, ofs; \ |
| 1724 | VReg temp = {}; \ |
| 1725 | VReg *Vd = (VReg *)vd; \ |
| 1726 | VReg *Vj = (VReg *)vj; \ |
| 1727 | int oprsz = simd_oprsz(desc); \ |
| 1728 | \ |
| 1729 | ofs = LSX_LEN / BIT; \ |
| 1730 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1731 | for (j = 0; j < ofs; j++) { \ |
| 1732 | temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ |
| 1733 | imm, BIT / 2); \ |
| 1734 | temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \ |
| 1735 | imm, BIT / 2); \ |
| 1736 | } \ |
| 1737 | } \ |
| 1738 | *Vd = temp; \ |
| 1739 | } |
| 1740 | |
| 1741 | static void do_vssrani_du_q(VReg *Vd, VReg *Vj, |
| 1742 | uint64_t imm, int idx, Int128 mask) |
| 1743 | { |
| 1744 | Int128 shft_res1, shft_res2; |
| 1745 | |
| 1746 | if (imm == 0) { |
| 1747 | shft_res1 = Vj->Q(idx); |
| 1748 | shft_res2 = Vd->Q(idx); |
| 1749 | } else { |
| 1750 | shft_res1 = int128_rshift(Vj->Q(idx), imm); |
| 1751 | shft_res2 = int128_rshift(Vd->Q(idx), imm); |
| 1752 | } |
| 1753 | |
| 1754 | if (int128_lt(Vj->Q(idx), int128_zero())) { |
| 1755 | shft_res1 = int128_zero(); |
| 1756 | } |
| 1757 | |
| 1758 | if (int128_lt(Vd->Q(idx), int128_zero())) { |
| 1759 | shft_res2 = int128_zero(); |
| 1760 | } |
| 1761 | if (int128_ult(mask, shft_res1)) { |
| 1762 | Vd->D(idx * 2) = int128_getlo(mask); |
| 1763 | }else { |
| 1764 | Vd->D(idx * 2) = int128_getlo(shft_res1); |
| 1765 | } |
| 1766 | |
| 1767 | if (int128_ult(mask, shft_res2)) { |
| 1768 | Vd->D(idx * 2 + 1) = int128_getlo(mask); |
| 1769 | }else { |
| 1770 | Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); |
| 1771 | } |
| 1772 | |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1773 | } |
| 1774 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1775 | void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1776 | { |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1777 | int i; |
| 1778 | Int128 mask; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 1779 | VReg *Vd = (VReg *)vd; |
| 1780 | VReg *Vj = (VReg *)vj; |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1781 | int oprsz = simd_oprsz(desc); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1782 | |
| 1783 | mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); |
| 1784 | |
Song Gao | 6256c8c | 2023-09-14 10:26:28 +0800 | [diff] [blame] | 1785 | for (i = 0; i < oprsz / 16; i++) { |
| 1786 | do_vssrani_du_q(Vd, Vj, imm, i, mask); |
Song Gao | 83b3815 | 2023-05-04 20:27:53 +0800 | [diff] [blame] | 1787 | } |
| 1788 | } |
| 1789 | |
| 1790 | VSSRANUI(vssrani_bu_h, 16, B, H) |
| 1791 | VSSRANUI(vssrani_hu_w, 32, H, W) |
| 1792 | VSSRANUI(vssrani_wu_d, 64, W, D) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1793 | |
| 1794 | #define SSRLRNS(E1, E2, T1, T2, T3) \ |
| 1795 | static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ |
| 1796 | { \ |
| 1797 | T1 shft_res; \ |
| 1798 | \ |
| 1799 | shft_res = do_vsrlr_ ## E2(e2, sa); \ |
| 1800 | T1 mask; \ |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1801 | mask = (1ull << sh) - 1; \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1802 | if (shft_res > mask) { \ |
| 1803 | return mask; \ |
| 1804 | } else { \ |
| 1805 | return shft_res; \ |
| 1806 | } \ |
| 1807 | } |
| 1808 | |
| 1809 | SSRLRNS(B, H, uint16_t, int16_t, uint8_t) |
| 1810 | SSRLRNS(H, W, uint32_t, int32_t, uint16_t) |
| 1811 | SSRLRNS(W, D, uint64_t, int64_t, uint32_t) |
| 1812 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1813 | #define VSSRLRN(NAME, BIT, E1, E2, E3) \ |
| 1814 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1815 | { \ |
| 1816 | int i, j, ofs; \ |
| 1817 | VReg *Vd = (VReg *)vd; \ |
| 1818 | VReg *Vj = (VReg *)vj; \ |
| 1819 | VReg *Vk = (VReg *)vk; \ |
| 1820 | int oprsz = simd_oprsz(desc); \ |
| 1821 | \ |
| 1822 | ofs = LSX_LEN / BIT; \ |
| 1823 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1824 | for (j = 0; j < ofs; j++) { \ |
| 1825 | Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ |
| 1826 | Vk->E3(j + ofs * i) % BIT, \ |
| 1827 | BIT / 2 - 1); \ |
| 1828 | } \ |
| 1829 | Vd->D(2 * i + 1) = 0; \ |
| 1830 | } \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1831 | } |
| 1832 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1833 | VSSRLRN(vssrlrn_b_h, 16, B, H, UH) |
| 1834 | VSSRLRN(vssrlrn_h_w, 32, H, W, UW) |
| 1835 | VSSRLRN(vssrlrn_w_d, 64, W, D, UD) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1836 | |
| 1837 | #define SSRARNS(E1, E2, T1, T2) \ |
| 1838 | static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ |
| 1839 | { \ |
| 1840 | T1 shft_res; \ |
| 1841 | \ |
| 1842 | shft_res = do_vsrar_ ## E2(e2, sa); \ |
| 1843 | T2 mask; \ |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1844 | mask = (1ll << sh) - 1; \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1845 | if (shft_res > mask) { \ |
| 1846 | return mask; \ |
| 1847 | } else if (shft_res < -(mask +1)) { \ |
| 1848 | return ~mask; \ |
| 1849 | } else { \ |
| 1850 | return shft_res; \ |
| 1851 | } \ |
| 1852 | } |
| 1853 | |
| 1854 | SSRARNS(B, H, int16_t, int8_t) |
| 1855 | SSRARNS(H, W, int32_t, int16_t) |
| 1856 | SSRARNS(W, D, int64_t, int32_t) |
| 1857 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1858 | #define VSSRARN(NAME, BIT, E1, E2, E3) \ |
| 1859 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1860 | { \ |
| 1861 | int i, j, ofs; \ |
| 1862 | VReg *Vd = (VReg *)vd; \ |
| 1863 | VReg *Vj = (VReg *)vj; \ |
| 1864 | VReg *Vk = (VReg *)vk; \ |
| 1865 | int oprsz = simd_oprsz(desc); \ |
| 1866 | \ |
| 1867 | ofs = LSX_LEN / BIT; \ |
| 1868 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1869 | for (j = 0; j < ofs; j++) { \ |
| 1870 | Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ |
| 1871 | Vk->E3(j + ofs * i) % BIT, \ |
| 1872 | BIT/ 2 - 1); \ |
| 1873 | } \ |
| 1874 | Vd->D(2 * i + 1) = 0; \ |
| 1875 | } \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1876 | } |
| 1877 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1878 | VSSRARN(vssrarn_b_h, 16, B, H, UH) |
| 1879 | VSSRARN(vssrarn_h_w, 32, H, W, UW) |
| 1880 | VSSRARN(vssrarn_w_d, 64, W, D, UD) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1881 | |
| 1882 | #define SSRLRNU(E1, E2, T1, T2, T3) \ |
| 1883 | static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ |
| 1884 | { \ |
| 1885 | T1 shft_res; \ |
| 1886 | \ |
| 1887 | shft_res = do_vsrlr_ ## E2(e2, sa); \ |
| 1888 | \ |
| 1889 | T2 mask; \ |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1890 | mask = (1ull << sh) - 1; \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1891 | if (shft_res > mask) { \ |
| 1892 | return mask; \ |
| 1893 | } else { \ |
| 1894 | return shft_res; \ |
| 1895 | } \ |
| 1896 | } |
| 1897 | |
| 1898 | SSRLRNU(B, H, uint16_t, uint8_t, int16_t) |
| 1899 | SSRLRNU(H, W, uint32_t, uint16_t, int32_t) |
| 1900 | SSRLRNU(W, D, uint64_t, uint32_t, int64_t) |
| 1901 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1902 | #define VSSRLRNU(NAME, BIT, E1, E2, E3) \ |
| 1903 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1904 | { \ |
| 1905 | int i, j, ofs; \ |
| 1906 | VReg *Vd = (VReg *)vd; \ |
| 1907 | VReg *Vj = (VReg *)vj; \ |
| 1908 | VReg *Vk = (VReg *)vk; \ |
| 1909 | int oprsz = simd_oprsz(desc); \ |
| 1910 | \ |
| 1911 | ofs = LSX_LEN / BIT; \ |
| 1912 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1913 | for (j = 0; j < ofs; j++) { \ |
| 1914 | Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ |
| 1915 | Vk->E3(j + ofs * i) % BIT, \ |
| 1916 | BIT / 2); \ |
| 1917 | } \ |
| 1918 | Vd->D(2 * i + 1) = 0; \ |
| 1919 | } \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1920 | } |
| 1921 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1922 | VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH) |
| 1923 | VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW) |
| 1924 | VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1925 | |
| 1926 | #define SSRARNU(E1, E2, T1, T2, T3) \ |
| 1927 | static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ |
| 1928 | { \ |
| 1929 | T1 shft_res; \ |
| 1930 | \ |
| 1931 | if (e2 < 0) { \ |
| 1932 | shft_res = 0; \ |
| 1933 | } else { \ |
| 1934 | shft_res = do_vsrar_ ## E2(e2, sa); \ |
| 1935 | } \ |
| 1936 | T2 mask; \ |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1937 | mask = (1ull << sh) - 1; \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1938 | if (shft_res > mask) { \ |
| 1939 | return mask; \ |
| 1940 | } else { \ |
| 1941 | return shft_res; \ |
| 1942 | } \ |
| 1943 | } |
| 1944 | |
| 1945 | SSRARNU(B, H, uint16_t, uint8_t, int16_t) |
| 1946 | SSRARNU(H, W, uint32_t, uint16_t, int32_t) |
| 1947 | SSRARNU(W, D, uint64_t, uint32_t, int64_t) |
| 1948 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1949 | #define VSSRARNU(NAME, BIT, E1, E2, E3) \ |
| 1950 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 1951 | { \ |
| 1952 | int i, j, ofs; \ |
| 1953 | VReg *Vd = (VReg *)vd; \ |
| 1954 | VReg *Vj = (VReg *)vj; \ |
| 1955 | VReg *Vk = (VReg *)vk; \ |
| 1956 | int oprsz = simd_oprsz(desc); \ |
| 1957 | \ |
| 1958 | ofs = LSX_LEN / BIT; \ |
| 1959 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1960 | for (j = 0; j < ofs; j++) { \ |
| 1961 | Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ |
| 1962 | Vk->E3(j + ofs * i) % BIT, \ |
| 1963 | BIT / 2); \ |
| 1964 | } \ |
| 1965 | Vd->D(2 * i + 1) = 0; \ |
| 1966 | } \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1967 | } |
| 1968 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1969 | VSSRARNU(vssrarn_bu_h, 16, B, H, UH) |
| 1970 | VSSRARNU(vssrarn_hu_w, 32, H, W, UW) |
| 1971 | VSSRARNU(vssrarn_wu_d, 64, W, D, UD) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1972 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1973 | #define VSSRLRNI(NAME, BIT, E1, E2) \ |
| 1974 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 1975 | { \ |
| 1976 | int i, j, ofs; \ |
| 1977 | VReg temp = {}; \ |
| 1978 | VReg *Vd = (VReg *)vd; \ |
| 1979 | VReg *Vj = (VReg *)vj; \ |
| 1980 | int oprsz = simd_oprsz(desc); \ |
| 1981 | \ |
| 1982 | ofs = LSX_LEN / BIT; \ |
| 1983 | for (i = 0; i < oprsz / 16; i++) { \ |
| 1984 | for (j = 0; j < ofs; j++) { \ |
| 1985 | temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ |
| 1986 | imm, BIT / 2 - 1); \ |
| 1987 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \ |
| 1988 | imm, BIT / 2 - 1); \ |
| 1989 | } \ |
| 1990 | } \ |
| 1991 | *Vd = temp; \ |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 1992 | } |
| 1993 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 1994 | static void do_vssrlrni_q(VReg *Vd, VReg * Vj, |
| 1995 | uint64_t imm, int idx, Int128 mask) |
| 1996 | { |
| 1997 | Int128 shft_res1, shft_res2, r1, r2; |
| 1998 | if (imm == 0) { |
| 1999 | shft_res1 = Vj->Q(idx); |
| 2000 | shft_res2 = Vd->Q(idx); |
| 2001 | } else { |
| 2002 | r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one()); |
| 2003 | r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one()); |
| 2004 | shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1)); |
| 2005 | shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2)); |
| 2006 | } |
| 2007 | |
| 2008 | if (int128_ult(mask, shft_res1)) { |
| 2009 | Vd->D(idx * 2) = int128_getlo(mask); |
| 2010 | }else { |
| 2011 | Vd->D(idx * 2) = int128_getlo(shft_res1); |
| 2012 | } |
| 2013 | |
| 2014 | if (int128_ult(mask, shft_res2)) { |
| 2015 | Vd->D(idx * 2 + 1) = int128_getlo(mask); |
| 2016 | }else { |
| 2017 | Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); |
| 2018 | } |
| 2019 | } |
| 2020 | |
| 2021 | void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
| 2022 | { |
| 2023 | int i; |
| 2024 | Int128 mask; |
| 2025 | VReg *Vd = (VReg *)vd; |
| 2026 | VReg *Vj = (VReg *)vj; |
| 2027 | int oprsz = simd_oprsz(desc); |
| 2028 | |
| 2029 | mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); |
| 2030 | |
| 2031 | for (i = 0; i < oprsz / 16; i++) { |
| 2032 | do_vssrlrni_q(Vd, Vj, imm, i, mask); |
| 2033 | } |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2034 | } |
| 2035 | |
| 2036 | VSSRLRNI(vssrlrni_b_h, 16, B, H) |
| 2037 | VSSRLRNI(vssrlrni_h_w, 32, H, W) |
| 2038 | VSSRLRNI(vssrlrni_w_d, 64, W, D) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2039 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2040 | #define VSSRARNI(NAME, BIT, E1, E2) \ |
| 2041 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 2042 | { \ |
| 2043 | int i, j, ofs; \ |
| 2044 | VReg temp = {}; \ |
| 2045 | VReg *Vd = (VReg *)vd; \ |
| 2046 | VReg *Vj = (VReg *)vj; \ |
| 2047 | int oprsz = simd_oprsz(desc); \ |
| 2048 | \ |
| 2049 | ofs = LSX_LEN / BIT; \ |
| 2050 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2051 | for (j = 0; j < ofs; j++) { \ |
| 2052 | temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ |
| 2053 | imm, BIT / 2 - 1); \ |
| 2054 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \ |
| 2055 | imm, BIT / 2 - 1); \ |
| 2056 | } \ |
| 2057 | } \ |
| 2058 | *Vd = temp; \ |
| 2059 | } |
| 2060 | |
| 2061 | static void do_vssrarni_d_q(VReg *Vd, VReg *Vj, |
| 2062 | uint64_t imm, int idx, Int128 mask1, Int128 mask2) |
| 2063 | { |
| 2064 | Int128 shft_res1, shft_res2, r1, r2; |
| 2065 | |
| 2066 | if (imm == 0) { |
| 2067 | shft_res1 = Vj->Q(idx); |
| 2068 | shft_res2 = Vd->Q(idx); |
| 2069 | } else { |
| 2070 | r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); |
| 2071 | r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); |
| 2072 | shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); |
| 2073 | shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); |
| 2074 | } |
| 2075 | if (int128_gt(shft_res1, mask1)) { |
| 2076 | Vd->D(idx * 2) = int128_getlo(mask1); |
| 2077 | } else if (int128_lt(shft_res1, int128_neg(mask2))) { |
| 2078 | Vd->D(idx * 2) = int128_getlo(mask2); |
| 2079 | } else { |
| 2080 | Vd->D(idx * 2) = int128_getlo(shft_res1); |
| 2081 | } |
| 2082 | |
| 2083 | if (int128_gt(shft_res2, mask1)) { |
| 2084 | Vd->D(idx * 2 + 1) = int128_getlo(mask1); |
| 2085 | } else if (int128_lt(shft_res2, int128_neg(mask2))) { |
| 2086 | Vd->D(idx * 2 + 1) = int128_getlo(mask2); |
| 2087 | } else { |
| 2088 | Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); |
| 2089 | } |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2090 | } |
| 2091 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2092 | void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2093 | { |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2094 | int i; |
| 2095 | Int128 mask1, mask2; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2096 | VReg *Vd = (VReg *)vd; |
| 2097 | VReg *Vj = (VReg *)vj; |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2098 | int oprsz = simd_oprsz(desc); |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2099 | |
| 2100 | mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); |
| 2101 | mask2 = int128_lshift(int128_one(), 63); |
| 2102 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2103 | for (i = 0; i < oprsz / 16; i++) { |
| 2104 | do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2); |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2105 | } |
| 2106 | } |
| 2107 | |
| 2108 | VSSRARNI(vssrarni_b_h, 16, B, H) |
| 2109 | VSSRARNI(vssrarni_h_w, 32, H, W) |
| 2110 | VSSRARNI(vssrarni_w_d, 64, W, D) |
| 2111 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2112 | #define VSSRLRNUI(NAME, BIT, E1, E2) \ |
| 2113 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 2114 | { \ |
| 2115 | int i, j, ofs; \ |
| 2116 | VReg temp = {}; \ |
| 2117 | VReg *Vd = (VReg *)vd; \ |
| 2118 | VReg *Vj = (VReg *)vj; \ |
| 2119 | int oprsz = simd_oprsz(desc); \ |
| 2120 | \ |
| 2121 | ofs = LSX_LEN / BIT; \ |
| 2122 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2123 | for (j = 0; j < ofs; j++) { \ |
| 2124 | temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ |
| 2125 | imm, BIT / 2); \ |
| 2126 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \ |
| 2127 | imm, BIT / 2); \ |
| 2128 | } \ |
| 2129 | } \ |
| 2130 | *Vd = temp; \ |
| 2131 | } |
| 2132 | |
| 2133 | void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
| 2134 | { |
| 2135 | int i; |
| 2136 | Int128 mask; |
| 2137 | VReg *Vd = (VReg *)vd; |
| 2138 | VReg *Vj = (VReg *)vj; |
| 2139 | int oprsz = simd_oprsz(desc); |
| 2140 | |
| 2141 | mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); |
| 2142 | |
| 2143 | for (i = 0; i < oprsz / 16; i++) { |
| 2144 | do_vssrlrni_q(Vd, Vj, imm, i, mask); |
| 2145 | } |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2146 | } |
| 2147 | |
| 2148 | VSSRLRNUI(vssrlrni_bu_h, 16, B, H) |
| 2149 | VSSRLRNUI(vssrlrni_hu_w, 32, H, W) |
| 2150 | VSSRLRNUI(vssrlrni_wu_d, 64, W, D) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2151 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2152 | #define VSSRARNUI(NAME, BIT, E1, E2) \ |
| 2153 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 2154 | { \ |
| 2155 | int i, j, ofs; \ |
| 2156 | VReg temp = {}; \ |
| 2157 | VReg *Vd = (VReg *)vd; \ |
| 2158 | VReg *Vj = (VReg *)vj; \ |
| 2159 | int oprsz = simd_oprsz(desc); \ |
| 2160 | \ |
| 2161 | ofs = LSX_LEN / BIT; \ |
| 2162 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2163 | for (j = 0; j < ofs; j++) { \ |
| 2164 | temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ |
| 2165 | imm, BIT / 2); \ |
| 2166 | temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \ |
| 2167 | imm, BIT / 2); \ |
| 2168 | } \ |
| 2169 | } \ |
| 2170 | *Vd = temp; \ |
| 2171 | } |
| 2172 | |
| 2173 | static void do_vssrarni_du_q(VReg *Vd, VReg *Vj, |
| 2174 | uint64_t imm, int idx, Int128 mask1, Int128 mask2) |
| 2175 | { |
| 2176 | Int128 shft_res1, shft_res2, r1, r2; |
| 2177 | |
| 2178 | if (imm == 0) { |
| 2179 | shft_res1 = Vj->Q(idx); |
| 2180 | shft_res2 = Vd->Q(idx); |
| 2181 | } else { |
| 2182 | r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); |
| 2183 | r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); |
| 2184 | shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); |
| 2185 | shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); |
| 2186 | } |
| 2187 | |
| 2188 | if (int128_lt(Vj->Q(idx), int128_zero())) { |
| 2189 | shft_res1 = int128_zero(); |
| 2190 | } |
| 2191 | if (int128_lt(Vd->Q(idx), int128_zero())) { |
| 2192 | shft_res2 = int128_zero(); |
| 2193 | } |
| 2194 | |
| 2195 | if (int128_gt(shft_res1, mask1)) { |
| 2196 | Vd->D(idx * 2) = int128_getlo(mask1); |
| 2197 | } else if (int128_lt(shft_res1, int128_neg(mask2))) { |
| 2198 | Vd->D(idx * 2) = int128_getlo(mask2); |
| 2199 | } else { |
| 2200 | Vd->D(idx * 2) = int128_getlo(shft_res1); |
| 2201 | } |
| 2202 | |
| 2203 | if (int128_gt(shft_res2, mask1)) { |
| 2204 | Vd->D(idx * 2 + 1) = int128_getlo(mask1); |
| 2205 | } else if (int128_lt(shft_res2, int128_neg(mask2))) { |
| 2206 | Vd->D(idx * 2 + 1) = int128_getlo(mask2); |
| 2207 | } else { |
| 2208 | Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); |
| 2209 | } |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2210 | } |
| 2211 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2212 | void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2213 | { |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2214 | int i; |
| 2215 | Int128 mask1, mask2; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2216 | VReg *Vd = (VReg *)vd; |
| 2217 | VReg *Vj = (VReg *)vj; |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2218 | int oprsz = simd_oprsz(desc); |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2219 | |
| 2220 | mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); |
| 2221 | mask2 = int128_lshift(int128_one(), 64); |
| 2222 | |
Song Gao | 77fca79 | 2023-09-14 10:26:29 +0800 | [diff] [blame] | 2223 | for (i = 0; i < oprsz / 16; i++) { |
| 2224 | do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2); |
Song Gao | 162cd32 | 2023-05-04 20:27:54 +0800 | [diff] [blame] | 2225 | } |
| 2226 | } |
| 2227 | |
| 2228 | VSSRARNUI(vssrarni_bu_h, 16, B, H) |
| 2229 | VSSRARNUI(vssrarni_hu_w, 32, H, W) |
| 2230 | VSSRARNUI(vssrarni_wu_d, 64, W, D) |
Song Gao | 2e105e1 | 2023-05-04 20:27:55 +0800 | [diff] [blame] | 2231 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 2232 | #define DO_2OP(NAME, BIT, E, DO_OP) \ |
| 2233 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ |
| 2234 | { \ |
| 2235 | int i; \ |
| 2236 | VReg *Vd = (VReg *)vd; \ |
| 2237 | VReg *Vj = (VReg *)vj; \ |
Song Gao | 12ad133 | 2023-09-14 10:26:30 +0800 | [diff] [blame] | 2238 | int oprsz = simd_oprsz(desc); \ |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 2239 | \ |
Song Gao | 12ad133 | 2023-09-14 10:26:30 +0800 | [diff] [blame] | 2240 | for (i = 0; i < oprsz / (BIT / 8); i++) \ |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 2241 | { \ |
| 2242 | Vd->E(i) = DO_OP(Vj->E(i)); \ |
| 2243 | } \ |
Song Gao | 2e105e1 | 2023-05-04 20:27:55 +0800 | [diff] [blame] | 2244 | } |
| 2245 | |
Song Gao | 2e105e1 | 2023-05-04 20:27:55 +0800 | [diff] [blame] | 2246 | DO_2OP(vclo_b, 8, UB, DO_CLO_B) |
| 2247 | DO_2OP(vclo_h, 16, UH, DO_CLO_H) |
| 2248 | DO_2OP(vclo_w, 32, UW, DO_CLO_W) |
| 2249 | DO_2OP(vclo_d, 64, UD, DO_CLO_D) |
| 2250 | DO_2OP(vclz_b, 8, UB, DO_CLZ_B) |
| 2251 | DO_2OP(vclz_h, 16, UH, DO_CLZ_H) |
| 2252 | DO_2OP(vclz_w, 32, UW, DO_CLZ_W) |
| 2253 | DO_2OP(vclz_d, 64, UD, DO_CLZ_D) |
Song Gao | bb22ee5 | 2023-05-04 20:27:56 +0800 | [diff] [blame] | 2254 | |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 2255 | #define VPCNT(NAME, BIT, E, FN) \ |
| 2256 | void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ |
| 2257 | { \ |
| 2258 | int i; \ |
| 2259 | VReg *Vd = (VReg *)vd; \ |
| 2260 | VReg *Vj = (VReg *)vj; \ |
Song Gao | 956dec7 | 2023-09-14 10:26:31 +0800 | [diff] [blame] | 2261 | int oprsz = simd_oprsz(desc); \ |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 2262 | \ |
Song Gao | 956dec7 | 2023-09-14 10:26:31 +0800 | [diff] [blame] | 2263 | for (i = 0; i < oprsz / (BIT / 8); i++) \ |
Song Gao | ff27e33 | 2023-09-14 10:25:56 +0800 | [diff] [blame] | 2264 | { \ |
| 2265 | Vd->E(i) = FN(Vj->E(i)); \ |
| 2266 | } \ |
Song Gao | bb22ee5 | 2023-05-04 20:27:56 +0800 | [diff] [blame] | 2267 | } |
| 2268 | |
| 2269 | VPCNT(vpcnt_b, 8, UB, ctpop8) |
| 2270 | VPCNT(vpcnt_h, 16, UH, ctpop16) |
| 2271 | VPCNT(vpcnt_w, 32, UW, ctpop32) |
| 2272 | VPCNT(vpcnt_d, 64, UD, ctpop64) |
Song Gao | 0b1e670 | 2023-05-04 20:27:57 +0800 | [diff] [blame] | 2273 | |
Song Gao | 1b3e242 | 2023-09-14 10:26:32 +0800 | [diff] [blame] | 2274 | #define DO_BIT(NAME, BIT, E, DO_OP) \ |
| 2275 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 2276 | { \ |
| 2277 | int i; \ |
| 2278 | VReg *Vd = (VReg *)vd; \ |
| 2279 | VReg *Vj = (VReg *)vj; \ |
| 2280 | VReg *Vk = (VReg *)vk; \ |
| 2281 | int oprsz = simd_oprsz(desc); \ |
| 2282 | \ |
| 2283 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 2284 | Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ |
| 2285 | } \ |
Song Gao | 0b1e670 | 2023-05-04 20:27:57 +0800 | [diff] [blame] | 2286 | } |
| 2287 | |
| 2288 | DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) |
| 2289 | DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) |
| 2290 | DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) |
| 2291 | DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) |
| 2292 | DO_BIT(vbitset_b, 8, UB, DO_BITSET) |
| 2293 | DO_BIT(vbitset_h, 16, UH, DO_BITSET) |
| 2294 | DO_BIT(vbitset_w, 32, UW, DO_BITSET) |
| 2295 | DO_BIT(vbitset_d, 64, UD, DO_BITSET) |
| 2296 | DO_BIT(vbitrev_b, 8, UB, DO_BITREV) |
| 2297 | DO_BIT(vbitrev_h, 16, UH, DO_BITREV) |
| 2298 | DO_BIT(vbitrev_w, 32, UW, DO_BITREV) |
| 2299 | DO_BIT(vbitrev_d, 64, UD, DO_BITREV) |
| 2300 | |
Song Gao | 1b3e242 | 2023-09-14 10:26:32 +0800 | [diff] [blame] | 2301 | #define DO_BITI(NAME, BIT, E, DO_OP) \ |
| 2302 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 2303 | { \ |
| 2304 | int i; \ |
| 2305 | VReg *Vd = (VReg *)vd; \ |
| 2306 | VReg *Vj = (VReg *)vj; \ |
| 2307 | int oprsz = simd_oprsz(desc); \ |
| 2308 | \ |
| 2309 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 2310 | Vd->E(i) = DO_OP(Vj->E(i), imm); \ |
| 2311 | } \ |
Song Gao | 0b1e670 | 2023-05-04 20:27:57 +0800 | [diff] [blame] | 2312 | } |
| 2313 | |
| 2314 | DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) |
| 2315 | DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) |
| 2316 | DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) |
| 2317 | DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) |
| 2318 | DO_BITI(vbitseti_b, 8, UB, DO_BITSET) |
| 2319 | DO_BITI(vbitseti_h, 16, UH, DO_BITSET) |
| 2320 | DO_BITI(vbitseti_w, 32, UW, DO_BITSET) |
| 2321 | DO_BITI(vbitseti_d, 64, UD, DO_BITSET) |
| 2322 | DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) |
| 2323 | DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) |
| 2324 | DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) |
| 2325 | DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) |
Song Gao | ac95a0b | 2023-05-04 20:27:58 +0800 | [diff] [blame] | 2326 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 2327 | #define VFRSTP(NAME, BIT, MASK, E) \ |
| 2328 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 2329 | { \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2330 | int i, j, m, ofs; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 2331 | VReg *Vd = (VReg *)vd; \ |
| 2332 | VReg *Vj = (VReg *)vj; \ |
| 2333 | VReg *Vk = (VReg *)vk; \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2334 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 2335 | \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2336 | ofs = LSX_LEN / BIT; \ |
| 2337 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2338 | m = Vk->E(i * ofs) & MASK; \ |
| 2339 | for (j = 0; j < ofs; j++) { \ |
| 2340 | if (Vj->E(j + ofs * i) < 0) { \ |
| 2341 | break; \ |
| 2342 | } \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 2343 | } \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2344 | Vd->E(m + i * ofs) = j; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 2345 | } \ |
Song Gao | ac95a0b | 2023-05-04 20:27:58 +0800 | [diff] [blame] | 2346 | } |
| 2347 | |
| 2348 | VFRSTP(vfrstp_b, 8, 0xf, B) |
| 2349 | VFRSTP(vfrstp_h, 16, 0x7, H) |
| 2350 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2351 | #define VFRSTPI(NAME, BIT, E) \ |
| 2352 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 2353 | { \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2354 | int i, j, m, ofs; \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2355 | VReg *Vd = (VReg *)vd; \ |
| 2356 | VReg *Vj = (VReg *)vj; \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2357 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2358 | \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2359 | ofs = LSX_LEN / BIT; \ |
| 2360 | m = imm % ofs; \ |
| 2361 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2362 | for (j = 0; j < ofs; j++) { \ |
| 2363 | if (Vj->E(j + ofs * i) < 0) { \ |
| 2364 | break; \ |
| 2365 | } \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2366 | } \ |
Song Gao | abee168 | 2023-09-14 10:26:33 +0800 | [diff] [blame] | 2367 | Vd->E(m + i * ofs) = j; \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 2368 | } \ |
Song Gao | ac95a0b | 2023-05-04 20:27:58 +0800 | [diff] [blame] | 2369 | } |
| 2370 | |
| 2371 | VFRSTPI(vfrstpi_b, 8, B) |
| 2372 | VFRSTPI(vfrstpi_h, 16, H) |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2373 | |
| 2374 | static void vec_update_fcsr0_mask(CPULoongArchState *env, |
| 2375 | uintptr_t pc, int mask) |
| 2376 | { |
| 2377 | int flags = get_float_exception_flags(&env->fp_status); |
| 2378 | |
| 2379 | set_float_exception_flags(0, &env->fp_status); |
| 2380 | |
| 2381 | flags &= ~mask; |
| 2382 | |
| 2383 | if (flags) { |
| 2384 | flags = ieee_ex_to_loongarch(flags); |
| 2385 | UPDATE_FP_CAUSE(env->fcsr0, flags); |
| 2386 | } |
| 2387 | |
| 2388 | if (GET_FP_ENABLES(env->fcsr0) & flags) { |
| 2389 | do_raise_exception(env, EXCCODE_FPE, pc); |
| 2390 | } else { |
| 2391 | UPDATE_FP_FLAGS(env->fcsr0, flags); |
| 2392 | } |
| 2393 | } |
| 2394 | |
| 2395 | static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) |
| 2396 | { |
| 2397 | vec_update_fcsr0_mask(env, pc, 0); |
| 2398 | } |
| 2399 | |
| 2400 | static inline void vec_clear_cause(CPULoongArchState *env) |
| 2401 | { |
| 2402 | SET_FP_CAUSE(env->fcsr0, 0); |
| 2403 | } |
| 2404 | |
| 2405 | #define DO_3OP_F(NAME, BIT, E, FN) \ |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2406 | void HELPER(NAME)(void *vd, void *vj, void *vk, \ |
| 2407 | CPULoongArchState *env, uint32_t desc) \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2408 | { \ |
| 2409 | int i; \ |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2410 | VReg *Vd = (VReg *)vd; \ |
| 2411 | VReg *Vj = (VReg *)vj; \ |
| 2412 | VReg *Vk = (VReg *)vk; \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2413 | int oprsz = simd_oprsz(desc); \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2414 | \ |
| 2415 | vec_clear_cause(env); \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2416 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2417 | Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ |
| 2418 | vec_update_fcsr0(env, GETPC()); \ |
| 2419 | } \ |
| 2420 | } |
| 2421 | |
| 2422 | DO_3OP_F(vfadd_s, 32, UW, float32_add) |
| 2423 | DO_3OP_F(vfadd_d, 64, UD, float64_add) |
| 2424 | DO_3OP_F(vfsub_s, 32, UW, float32_sub) |
| 2425 | DO_3OP_F(vfsub_d, 64, UD, float64_sub) |
| 2426 | DO_3OP_F(vfmul_s, 32, UW, float32_mul) |
| 2427 | DO_3OP_F(vfmul_d, 64, UD, float64_mul) |
| 2428 | DO_3OP_F(vfdiv_s, 32, UW, float32_div) |
| 2429 | DO_3OP_F(vfdiv_d, 64, UD, float64_div) |
| 2430 | DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) |
| 2431 | DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) |
| 2432 | DO_3OP_F(vfmin_s, 32, UW, float32_minnum) |
| 2433 | DO_3OP_F(vfmin_d, 64, UD, float64_minnum) |
| 2434 | DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) |
| 2435 | DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) |
| 2436 | DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) |
| 2437 | DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) |
| 2438 | |
| 2439 | #define DO_4OP_F(NAME, BIT, E, FN, flags) \ |
Song Gao | e2600da | 2023-09-14 10:25:51 +0800 | [diff] [blame] | 2440 | void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ |
| 2441 | CPULoongArchState *env, uint32_t desc) \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2442 | { \ |
| 2443 | int i; \ |
Song Gao | e2600da | 2023-09-14 10:25:51 +0800 | [diff] [blame] | 2444 | VReg *Vd = (VReg *)vd; \ |
| 2445 | VReg *Vj = (VReg *)vj; \ |
| 2446 | VReg *Vk = (VReg *)vk; \ |
| 2447 | VReg *Va = (VReg *)va; \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2448 | int oprsz = simd_oprsz(desc); \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2449 | \ |
| 2450 | vec_clear_cause(env); \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2451 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2452 | Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ |
| 2453 | vec_update_fcsr0(env, GETPC()); \ |
| 2454 | } \ |
| 2455 | } |
| 2456 | |
| 2457 | DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) |
| 2458 | DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) |
| 2459 | DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) |
| 2460 | DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) |
| 2461 | DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) |
| 2462 | DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) |
| 2463 | DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, |
| 2464 | float_muladd_negate_c | float_muladd_negate_result) |
| 2465 | DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, |
| 2466 | float_muladd_negate_c | float_muladd_negate_result) |
| 2467 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2468 | #define DO_2OP_F(NAME, BIT, E, FN) \ |
| 2469 | void HELPER(NAME)(void *vd, void *vj, \ |
| 2470 | CPULoongArchState *env, uint32_t desc) \ |
| 2471 | { \ |
| 2472 | int i; \ |
| 2473 | VReg *Vd = (VReg *)vd; \ |
| 2474 | VReg *Vj = (VReg *)vj; \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2475 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2476 | \ |
| 2477 | vec_clear_cause(env); \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2478 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2479 | Vd->E(i) = FN(env, Vj->E(i)); \ |
| 2480 | } \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2481 | } |
| 2482 | |
| 2483 | #define FLOGB(BIT, T) \ |
| 2484 | static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ |
| 2485 | { \ |
| 2486 | T fp, fd; \ |
| 2487 | float_status *status = &env->fp_status; \ |
| 2488 | FloatRoundMode old_mode = get_float_rounding_mode(status); \ |
| 2489 | \ |
| 2490 | set_float_rounding_mode(float_round_down, status); \ |
| 2491 | fp = float ## BIT ##_log2(fj, status); \ |
| 2492 | fd = float ## BIT ##_round_to_int(fp, status); \ |
| 2493 | set_float_rounding_mode(old_mode, status); \ |
| 2494 | vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ |
| 2495 | return fd; \ |
| 2496 | } |
| 2497 | |
| 2498 | FLOGB(32, uint32_t) |
| 2499 | FLOGB(64, uint64_t) |
| 2500 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2501 | #define FCLASS(NAME, BIT, E, FN) \ |
| 2502 | void HELPER(NAME)(void *vd, void *vj, \ |
| 2503 | CPULoongArchState *env, uint32_t desc) \ |
| 2504 | { \ |
| 2505 | int i; \ |
| 2506 | VReg *Vd = (VReg *)vd; \ |
| 2507 | VReg *Vj = (VReg *)vj; \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2508 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2509 | \ |
Song Gao | c9caf15 | 2023-09-14 10:26:34 +0800 | [diff] [blame] | 2510 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2511 | Vd->E(i) = FN(env, Vj->E(i)); \ |
| 2512 | } \ |
Song Gao | aca6747 | 2023-05-04 20:27:59 +0800 | [diff] [blame] | 2513 | } |
| 2514 | |
| 2515 | FCLASS(vfclass_s, 32, UW, helper_fclass_s) |
| 2516 | FCLASS(vfclass_d, 64, UD, helper_fclass_d) |
| 2517 | |
| 2518 | #define FSQRT(BIT, T) \ |
| 2519 | static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ |
| 2520 | { \ |
| 2521 | T fd; \ |
| 2522 | fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ |
| 2523 | vec_update_fcsr0(env, GETPC()); \ |
| 2524 | return fd; \ |
| 2525 | } |
| 2526 | |
| 2527 | FSQRT(32, uint32_t) |
| 2528 | FSQRT(64, uint64_t) |
| 2529 | |
| 2530 | #define FRECIP(BIT, T) \ |
| 2531 | static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ |
| 2532 | { \ |
| 2533 | T fd; \ |
| 2534 | fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ |
| 2535 | vec_update_fcsr0(env, GETPC()); \ |
| 2536 | return fd; \ |
| 2537 | } |
| 2538 | |
| 2539 | FRECIP(32, uint32_t) |
| 2540 | FRECIP(64, uint64_t) |
| 2541 | |
| 2542 | #define FRSQRT(BIT, T) \ |
| 2543 | static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ |
| 2544 | { \ |
| 2545 | T fd, fp; \ |
| 2546 | fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ |
| 2547 | fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ |
| 2548 | vec_update_fcsr0(env, GETPC()); \ |
| 2549 | return fd; \ |
| 2550 | } |
| 2551 | |
| 2552 | FRSQRT(32, uint32_t) |
| 2553 | FRSQRT(64, uint64_t) |
| 2554 | |
| 2555 | DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) |
| 2556 | DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) |
| 2557 | DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) |
| 2558 | DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) |
| 2559 | DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) |
| 2560 | DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) |
| 2561 | DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) |
| 2562 | DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2563 | |
| 2564 | static uint32_t float16_cvt_float32(uint16_t h, float_status *status) |
| 2565 | { |
| 2566 | return float16_to_float32(h, true, status); |
| 2567 | } |
| 2568 | static uint64_t float32_cvt_float64(uint32_t s, float_status *status) |
| 2569 | { |
| 2570 | return float32_to_float64(s, status); |
| 2571 | } |
| 2572 | |
| 2573 | static uint16_t float32_cvt_float16(uint32_t s, float_status *status) |
| 2574 | { |
| 2575 | return float32_to_float16(s, true, status); |
| 2576 | } |
| 2577 | static uint32_t float64_cvt_float32(uint64_t d, float_status *status) |
| 2578 | { |
| 2579 | return float64_to_float32(d, status); |
| 2580 | } |
| 2581 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2582 | void HELPER(vfcvtl_s_h)(void *vd, void *vj, |
| 2583 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2584 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2585 | int i, j, ofs; |
| 2586 | VReg temp = {}; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2587 | VReg *Vd = (VReg *)vd; |
| 2588 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2589 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2590 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2591 | ofs = LSX_LEN / 32; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2592 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2593 | for (i = 0; i < oprsz / 16; i++) { |
| 2594 | for (j = 0; j < ofs; j++) { |
| 2595 | temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i), |
| 2596 | &env->fp_status); |
| 2597 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2598 | vec_update_fcsr0(env, GETPC()); |
| 2599 | } |
| 2600 | *Vd = temp; |
| 2601 | } |
| 2602 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2603 | void HELPER(vfcvtl_d_s)(void *vd, void *vj, |
| 2604 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2605 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2606 | int i, j, ofs; |
| 2607 | VReg temp = {}; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2608 | VReg *Vd = (VReg *)vd; |
| 2609 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2610 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2611 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2612 | ofs = LSX_LEN / 64; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2613 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2614 | for (i = 0; i < oprsz / 16; i++) { |
| 2615 | for (j = 0; j < ofs; j++) { |
| 2616 | temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i), |
| 2617 | &env->fp_status); |
| 2618 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2619 | vec_update_fcsr0(env, GETPC()); |
| 2620 | } |
| 2621 | *Vd = temp; |
| 2622 | } |
| 2623 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2624 | void HELPER(vfcvth_s_h)(void *vd, void *vj, |
| 2625 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2626 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2627 | int i, j, ofs; |
| 2628 | VReg temp = {}; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2629 | VReg *Vd = (VReg *)vd; |
| 2630 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2631 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2632 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2633 | ofs = LSX_LEN / 32; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2634 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2635 | for (i = 0; i < oprsz / 16; i++) { |
| 2636 | for (j = 0; j < ofs; j++) { |
| 2637 | temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)), |
| 2638 | &env->fp_status); |
| 2639 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2640 | vec_update_fcsr0(env, GETPC()); |
| 2641 | } |
| 2642 | *Vd = temp; |
| 2643 | } |
| 2644 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2645 | void HELPER(vfcvth_d_s)(void *vd, void *vj, |
| 2646 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2647 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2648 | int i, j, ofs; |
| 2649 | VReg temp = {}; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2650 | VReg *Vd = (VReg *)vd; |
| 2651 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2652 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2653 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2654 | ofs = LSX_LEN / 64; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2655 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2656 | for (i = 0; i < oprsz / 16; i++) { |
| 2657 | for (j = 0; j < ofs; j++) { |
| 2658 | temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)), |
| 2659 | &env->fp_status); |
| 2660 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2661 | vec_update_fcsr0(env, GETPC()); |
| 2662 | } |
| 2663 | *Vd = temp; |
| 2664 | } |
| 2665 | |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2666 | void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, |
| 2667 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2668 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2669 | int i, j, ofs; |
| 2670 | VReg temp = {}; |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2671 | VReg *Vd = (VReg *)vd; |
| 2672 | VReg *Vj = (VReg *)vj; |
| 2673 | VReg *Vk = (VReg *)vk; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2674 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2675 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2676 | ofs = LSX_LEN / 32; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2677 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2678 | for(i = 0; i < oprsz / 16; i++) { |
| 2679 | for (j = 0; j < ofs; j++) { |
| 2680 | temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i), |
| 2681 | &env->fp_status); |
| 2682 | temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i), |
| 2683 | &env->fp_status); |
| 2684 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2685 | vec_update_fcsr0(env, GETPC()); |
| 2686 | } |
| 2687 | *Vd = temp; |
| 2688 | } |
| 2689 | |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2690 | void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, |
| 2691 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2692 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2693 | int i, j, ofs; |
| 2694 | VReg temp = {}; |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2695 | VReg *Vd = (VReg *)vd; |
| 2696 | VReg *Vj = (VReg *)vj; |
| 2697 | VReg *Vk = (VReg *)vk; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2698 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2699 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2700 | ofs = LSX_LEN / 64; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2701 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2702 | for(i = 0; i < oprsz / 16; i++) { |
| 2703 | for (j = 0; j < ofs; j++) { |
| 2704 | temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i), |
| 2705 | &env->fp_status); |
| 2706 | temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i), |
| 2707 | &env->fp_status); |
| 2708 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2709 | vec_update_fcsr0(env, GETPC()); |
| 2710 | } |
| 2711 | *Vd = temp; |
| 2712 | } |
| 2713 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2714 | void HELPER(vfrint_s)(void *vd, void *vj, |
| 2715 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2716 | { |
| 2717 | int i; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2718 | VReg *Vd = (VReg *)vd; |
| 2719 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2720 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2721 | |
| 2722 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2723 | for (i = 0; i < oprsz / 4; i++) { |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2724 | Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); |
| 2725 | vec_update_fcsr0(env, GETPC()); |
| 2726 | } |
| 2727 | } |
| 2728 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2729 | void HELPER(vfrint_d)(void *vd, void *vj, |
| 2730 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2731 | { |
| 2732 | int i; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2733 | VReg *Vd = (VReg *)vd; |
| 2734 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2735 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2736 | |
| 2737 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2738 | for (i = 0; i < oprsz / 8; i++) { |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2739 | Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); |
| 2740 | vec_update_fcsr0(env, GETPC()); |
| 2741 | } |
| 2742 | } |
| 2743 | |
| 2744 | #define FCVT_2OP(NAME, BIT, E, MODE) \ |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2745 | void HELPER(NAME)(void *vd, void *vj, \ |
| 2746 | CPULoongArchState *env, uint32_t desc) \ |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2747 | { \ |
| 2748 | int i; \ |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2749 | VReg *Vd = (VReg *)vd; \ |
| 2750 | VReg *Vj = (VReg *)vj; \ |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2751 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2752 | \ |
| 2753 | vec_clear_cause(env); \ |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2754 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2755 | FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ |
| 2756 | set_float_rounding_mode(MODE, &env->fp_status); \ |
| 2757 | Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ |
| 2758 | set_float_rounding_mode(old_mode, &env->fp_status); \ |
| 2759 | vec_update_fcsr0(env, GETPC()); \ |
| 2760 | } \ |
| 2761 | } |
| 2762 | |
| 2763 | FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) |
| 2764 | FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) |
| 2765 | FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) |
| 2766 | FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) |
| 2767 | FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) |
| 2768 | FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) |
| 2769 | FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) |
| 2770 | FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) |
| 2771 | |
| 2772 | #define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ |
| 2773 | static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ |
| 2774 | { \ |
| 2775 | T2 fd; \ |
| 2776 | FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ |
| 2777 | \ |
| 2778 | set_float_rounding_mode(MODE, &env->fp_status); \ |
| 2779 | fd = do_## FMT1 ##_to_## FMT2(env, fj); \ |
| 2780 | set_float_rounding_mode(old_mode, &env->fp_status); \ |
| 2781 | return fd; \ |
| 2782 | } |
| 2783 | |
| 2784 | #define DO_FTINT(FMT1, FMT2, T1, T2) \ |
| 2785 | static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ |
| 2786 | { \ |
| 2787 | T2 fd; \ |
| 2788 | \ |
| 2789 | fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ |
| 2790 | if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ |
| 2791 | if (FMT1 ##_is_any_nan(fj)) { \ |
| 2792 | fd = 0; \ |
| 2793 | } \ |
| 2794 | } \ |
| 2795 | vec_update_fcsr0(env, GETPC()); \ |
| 2796 | return fd; \ |
| 2797 | } |
| 2798 | |
| 2799 | DO_FTINT(float32, int32, uint32_t, uint32_t) |
| 2800 | DO_FTINT(float64, int64, uint64_t, uint64_t) |
| 2801 | DO_FTINT(float32, uint32, uint32_t, uint32_t) |
| 2802 | DO_FTINT(float64, uint64, uint64_t, uint64_t) |
| 2803 | DO_FTINT(float64, int32, uint64_t, uint32_t) |
| 2804 | DO_FTINT(float32, int64, uint32_t, uint64_t) |
| 2805 | |
| 2806 | FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) |
| 2807 | FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) |
| 2808 | FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) |
| 2809 | FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) |
| 2810 | FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) |
| 2811 | FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) |
| 2812 | FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) |
| 2813 | FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) |
| 2814 | |
| 2815 | DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) |
| 2816 | DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) |
| 2817 | DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) |
| 2818 | DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) |
| 2819 | DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) |
| 2820 | DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) |
| 2821 | DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) |
| 2822 | DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) |
| 2823 | DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) |
| 2824 | DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) |
| 2825 | |
| 2826 | FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) |
| 2827 | FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) |
| 2828 | |
| 2829 | DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) |
| 2830 | DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) |
| 2831 | DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) |
| 2832 | DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) |
| 2833 | |
| 2834 | FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) |
| 2835 | FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) |
| 2836 | FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) |
| 2837 | FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) |
| 2838 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2839 | #define FTINT_W_D(NAME, FN) \ |
| 2840 | void HELPER(NAME)(void *vd, void *vj, void *vk, \ |
| 2841 | CPULoongArchState *env, uint32_t desc) \ |
| 2842 | { \ |
| 2843 | int i, j, ofs; \ |
| 2844 | VReg temp = {}; \ |
| 2845 | VReg *Vd = (VReg *)vd; \ |
| 2846 | VReg *Vj = (VReg *)vj; \ |
| 2847 | VReg *Vk = (VReg *)vk; \ |
| 2848 | int oprsz = simd_oprsz(desc); \ |
| 2849 | \ |
| 2850 | ofs = LSX_LEN / 64; \ |
| 2851 | vec_clear_cause(env); \ |
| 2852 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2853 | for (j = 0; j < ofs; j++) { \ |
| 2854 | temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \ |
| 2855 | temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \ |
| 2856 | } \ |
| 2857 | } \ |
| 2858 | *Vd = temp; \ |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2859 | } |
| 2860 | |
| 2861 | FTINT_W_D(vftint_w_d, do_float64_to_int32) |
| 2862 | FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) |
| 2863 | FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) |
| 2864 | FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) |
| 2865 | FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) |
| 2866 | |
| 2867 | FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) |
| 2868 | FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) |
| 2869 | FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) |
| 2870 | FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) |
| 2871 | FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) |
| 2872 | FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) |
| 2873 | FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) |
| 2874 | FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) |
| 2875 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2876 | #define FTINTL_L_S(NAME, FN) \ |
| 2877 | void HELPER(NAME)(void *vd, void *vj, \ |
| 2878 | CPULoongArchState *env, uint32_t desc) \ |
| 2879 | { \ |
| 2880 | int i, j, ofs; \ |
| 2881 | VReg temp; \ |
| 2882 | VReg *Vd = (VReg *)vd; \ |
| 2883 | VReg *Vj = (VReg *)vj; \ |
| 2884 | int oprsz = simd_oprsz(desc); \ |
| 2885 | \ |
| 2886 | ofs = LSX_LEN / 64; \ |
| 2887 | vec_clear_cause(env); \ |
| 2888 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2889 | for (j = 0; j < ofs; j++) { \ |
| 2890 | temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \ |
| 2891 | } \ |
| 2892 | } \ |
| 2893 | *Vd = temp; \ |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2894 | } |
| 2895 | |
| 2896 | FTINTL_L_S(vftintl_l_s, do_float32_to_int64) |
| 2897 | FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) |
| 2898 | FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) |
| 2899 | FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) |
| 2900 | FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) |
| 2901 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2902 | #define FTINTH_L_S(NAME, FN) \ |
| 2903 | void HELPER(NAME)(void *vd, void *vj, \ |
| 2904 | CPULoongArchState *env, uint32_t desc) \ |
| 2905 | { \ |
| 2906 | int i, j, ofs; \ |
| 2907 | VReg temp = {}; \ |
| 2908 | VReg *Vd = (VReg *)vd; \ |
| 2909 | VReg *Vj = (VReg *)vj; \ |
| 2910 | int oprsz = simd_oprsz(desc); \ |
| 2911 | \ |
| 2912 | ofs = LSX_LEN / 64; \ |
| 2913 | vec_clear_cause(env); \ |
| 2914 | for (i = 0; i < oprsz / 16; i++) { \ |
| 2915 | for (j = 0; j < ofs; j++) { \ |
| 2916 | temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \ |
| 2917 | } \ |
| 2918 | } \ |
| 2919 | *Vd = temp; \ |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2920 | } |
| 2921 | |
| 2922 | FTINTH_L_S(vftinth_l_s, do_float32_to_int64) |
| 2923 | FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) |
| 2924 | FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) |
| 2925 | FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) |
| 2926 | FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) |
| 2927 | |
| 2928 | #define FFINT(NAME, FMT1, FMT2, T1, T2) \ |
| 2929 | static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ |
| 2930 | { \ |
| 2931 | T2 fd; \ |
| 2932 | \ |
| 2933 | fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ |
| 2934 | vec_update_fcsr0(env, GETPC()); \ |
| 2935 | return fd; \ |
| 2936 | } |
| 2937 | |
| 2938 | FFINT(s_w, int32, float32, int32_t, uint32_t) |
| 2939 | FFINT(d_l, int64, float64, int64_t, uint64_t) |
| 2940 | FFINT(s_wu, uint32, float32, uint32_t, uint32_t) |
| 2941 | FFINT(d_lu, uint64, float64, uint64_t, uint64_t) |
| 2942 | |
| 2943 | DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) |
| 2944 | DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) |
| 2945 | DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) |
| 2946 | DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) |
| 2947 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2948 | void HELPER(vffintl_d_w)(void *vd, void *vj, |
| 2949 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2950 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2951 | int i, j, ofs; |
| 2952 | VReg temp = {}; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2953 | VReg *Vd = (VReg *)vd; |
| 2954 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2955 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2956 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2957 | ofs = LSX_LEN / 64; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2958 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2959 | for (i = 0; i < oprsz / 16; i++) { |
| 2960 | for (j = 0; j < ofs; j++) { |
| 2961 | temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i), |
| 2962 | &env->fp_status); |
| 2963 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2964 | vec_update_fcsr0(env, GETPC()); |
| 2965 | } |
| 2966 | *Vd = temp; |
| 2967 | } |
| 2968 | |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2969 | void HELPER(vffinth_d_w)(void *vd, void *vj, |
| 2970 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2971 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2972 | int i, j, ofs; |
| 2973 | VReg temp = {}; |
Song Gao | 226bf88 | 2023-09-14 10:25:55 +0800 | [diff] [blame] | 2974 | VReg *Vd = (VReg *)vd; |
| 2975 | VReg *Vj = (VReg *)vj; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2976 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2977 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2978 | ofs = LSX_LEN / 64; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2979 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2980 | for (i = 0; i < oprsz /16; i++) { |
| 2981 | for (j = 0; j < ofs; j++) { |
| 2982 | temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)), |
| 2983 | &env->fp_status); |
| 2984 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2985 | vec_update_fcsr0(env, GETPC()); |
| 2986 | } |
| 2987 | *Vd = temp; |
| 2988 | } |
| 2989 | |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2990 | void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, |
| 2991 | CPULoongArchState *env, uint32_t desc) |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2992 | { |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2993 | int i, j, ofs; |
| 2994 | VReg temp = {}; |
Song Gao | 3b28675 | 2023-09-14 10:25:53 +0800 | [diff] [blame] | 2995 | VReg *Vd = (VReg *)vd; |
| 2996 | VReg *Vj = (VReg *)vj; |
| 2997 | VReg *Vk = (VReg *)vk; |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 2998 | int oprsz = simd_oprsz(desc); |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 2999 | |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 3000 | ofs = LSX_LEN / 64; |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 3001 | vec_clear_cause(env); |
Song Gao | 60df31a | 2023-09-14 10:26:35 +0800 | [diff] [blame] | 3002 | for (i = 0; i < oprsz / 16; i++) { |
| 3003 | for (j = 0; j < ofs; j++) { |
| 3004 | temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i), |
| 3005 | &env->fp_status); |
| 3006 | temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i), |
| 3007 | &env->fp_status); |
| 3008 | } |
Song Gao | 399665d | 2023-05-04 20:28:00 +0800 | [diff] [blame] | 3009 | vec_update_fcsr0(env, GETPC()); |
| 3010 | } |
| 3011 | *Vd = temp; |
| 3012 | } |
Song Gao | f435e1e | 2023-05-04 20:28:01 +0800 | [diff] [blame] | 3013 | |
Song Gao | 4da72d4 | 2023-09-14 10:26:36 +0800 | [diff] [blame] | 3014 | #define VCMPI(NAME, BIT, E, DO_OP) \ |
| 3015 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 3016 | { \ |
| 3017 | int i; \ |
| 3018 | VReg *Vd = (VReg *)vd; \ |
| 3019 | VReg *Vj = (VReg *)vj; \ |
| 3020 | typedef __typeof(Vd->E(0)) TD; \ |
| 3021 | int oprsz = simd_oprsz(desc); \ |
| 3022 | \ |
| 3023 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 3024 | Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ |
| 3025 | } \ |
Song Gao | f435e1e | 2023-05-04 20:28:01 +0800 | [diff] [blame] | 3026 | } |
| 3027 | |
| 3028 | VCMPI(vseqi_b, 8, B, VSEQ) |
| 3029 | VCMPI(vseqi_h, 16, H, VSEQ) |
| 3030 | VCMPI(vseqi_w, 32, W, VSEQ) |
| 3031 | VCMPI(vseqi_d, 64, D, VSEQ) |
| 3032 | VCMPI(vslei_b, 8, B, VSLE) |
| 3033 | VCMPI(vslei_h, 16, H, VSLE) |
| 3034 | VCMPI(vslei_w, 32, W, VSLE) |
| 3035 | VCMPI(vslei_d, 64, D, VSLE) |
| 3036 | VCMPI(vslei_bu, 8, UB, VSLE) |
| 3037 | VCMPI(vslei_hu, 16, UH, VSLE) |
| 3038 | VCMPI(vslei_wu, 32, UW, VSLE) |
| 3039 | VCMPI(vslei_du, 64, UD, VSLE) |
| 3040 | VCMPI(vslti_b, 8, B, VSLT) |
| 3041 | VCMPI(vslti_h, 16, H, VSLT) |
| 3042 | VCMPI(vslti_w, 32, W, VSLT) |
| 3043 | VCMPI(vslti_d, 64, D, VSLT) |
| 3044 | VCMPI(vslti_bu, 8, UB, VSLT) |
| 3045 | VCMPI(vslti_hu, 16, UH, VSLT) |
| 3046 | VCMPI(vslti_wu, 32, UW, VSLT) |
| 3047 | VCMPI(vslti_du, 64, UD, VSLT) |
Song Gao | 386c4e8 | 2023-05-04 20:28:02 +0800 | [diff] [blame] | 3048 | |
| 3049 | static uint64_t vfcmp_common(CPULoongArchState *env, |
| 3050 | FloatRelation cmp, uint32_t flags) |
| 3051 | { |
| 3052 | uint64_t ret = 0; |
| 3053 | |
| 3054 | switch (cmp) { |
| 3055 | case float_relation_less: |
| 3056 | ret = (flags & FCMP_LT); |
| 3057 | break; |
| 3058 | case float_relation_equal: |
| 3059 | ret = (flags & FCMP_EQ); |
| 3060 | break; |
| 3061 | case float_relation_greater: |
| 3062 | ret = (flags & FCMP_GT); |
| 3063 | break; |
| 3064 | case float_relation_unordered: |
| 3065 | ret = (flags & FCMP_UN); |
| 3066 | break; |
| 3067 | default: |
| 3068 | g_assert_not_reached(); |
| 3069 | } |
| 3070 | |
| 3071 | if (ret) { |
| 3072 | ret = -1; |
| 3073 | } |
| 3074 | |
| 3075 | return ret; |
| 3076 | } |
| 3077 | |
| 3078 | #define VFCMP(NAME, BIT, E, FN) \ |
Song Gao | 3eeda5f | 2023-09-14 10:26:37 +0800 | [diff] [blame] | 3079 | void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \ |
Song Gao | 386c4e8 | 2023-05-04 20:28:02 +0800 | [diff] [blame] | 3080 | uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ |
| 3081 | { \ |
| 3082 | int i; \ |
| 3083 | VReg t; \ |
| 3084 | VReg *Vd = &(env->fpr[vd].vreg); \ |
| 3085 | VReg *Vj = &(env->fpr[vj].vreg); \ |
| 3086 | VReg *Vk = &(env->fpr[vk].vreg); \ |
| 3087 | \ |
| 3088 | vec_clear_cause(env); \ |
Song Gao | 3eeda5f | 2023-09-14 10:26:37 +0800 | [diff] [blame] | 3089 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 386c4e8 | 2023-05-04 20:28:02 +0800 | [diff] [blame] | 3090 | FloatRelation cmp; \ |
| 3091 | cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ |
| 3092 | t.E(i) = vfcmp_common(env, cmp, flags); \ |
| 3093 | vec_update_fcsr0(env, GETPC()); \ |
| 3094 | } \ |
| 3095 | *Vd = t; \ |
| 3096 | } |
| 3097 | |
| 3098 | VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) |
| 3099 | VFCMP(vfcmp_s_s, 32, UW, float32_compare) |
| 3100 | VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) |
| 3101 | VFCMP(vfcmp_s_d, 64, UD, float64_compare) |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3102 | |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3103 | void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3104 | { |
| 3105 | int i; |
| 3106 | VReg *Vd = (VReg *)vd; |
| 3107 | VReg *Vj = (VReg *)vj; |
| 3108 | |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3109 | for (i = 0; i < simd_oprsz(desc); i++) { |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3110 | Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); |
| 3111 | } |
| 3112 | } |
| 3113 | |
| 3114 | /* Copy from target/arm/tcg/sve_helper.c */ |
| 3115 | static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) |
| 3116 | { |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3117 | int bits = 8 << esz; |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3118 | uint64_t ones = dup_const(esz, 1); |
| 3119 | uint64_t signs = ones << (bits - 1); |
| 3120 | uint64_t cmp0, cmp1; |
| 3121 | |
| 3122 | cmp1 = dup_const(esz, n); |
| 3123 | cmp0 = cmp1 ^ m0; |
| 3124 | cmp1 = cmp1 ^ m1; |
| 3125 | cmp0 = (cmp0 - ones) & ~cmp0; |
| 3126 | cmp1 = (cmp1 - ones) & ~cmp1; |
| 3127 | return (cmp0 | cmp1) & signs; |
| 3128 | } |
| 3129 | |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3130 | #define SETANYEQZ(NAME, MO) \ |
| 3131 | void HELPER(NAME)(CPULoongArchState *env, \ |
| 3132 | uint32_t oprsz, uint32_t cd, uint32_t vj) \ |
| 3133 | { \ |
| 3134 | VReg *Vj = &(env->fpr[vj].vreg); \ |
| 3135 | \ |
| 3136 | env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ |
| 3137 | if (oprsz == 32) { \ |
| 3138 | env->cf[cd & 0x7] = env->cf[cd & 0x7] || \ |
| 3139 | do_match2(0, Vj->D(2), Vj->D(3), MO); \ |
| 3140 | } \ |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3141 | } |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3142 | |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3143 | SETANYEQZ(vsetanyeqz_b, MO_8) |
| 3144 | SETANYEQZ(vsetanyeqz_h, MO_16) |
| 3145 | SETANYEQZ(vsetanyeqz_w, MO_32) |
| 3146 | SETANYEQZ(vsetanyeqz_d, MO_64) |
| 3147 | |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3148 | #define SETALLNEZ(NAME, MO) \ |
| 3149 | void HELPER(NAME)(CPULoongArchState *env, \ |
| 3150 | uint32_t oprsz, uint32_t cd, uint32_t vj) \ |
| 3151 | { \ |
| 3152 | VReg *Vj = &(env->fpr[vj].vreg); \ |
| 3153 | \ |
| 3154 | env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ |
| 3155 | if (oprsz == 32) { \ |
| 3156 | env->cf[cd & 0x7] = env->cf[cd & 0x7] && \ |
| 3157 | !do_match2(0, Vj->D(2), Vj->D(3), MO); \ |
| 3158 | } \ |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3159 | } |
Song Gao | f3dfcc8 | 2023-09-14 10:26:38 +0800 | [diff] [blame] | 3160 | |
Song Gao | d0dfa19 | 2023-05-04 20:28:03 +0800 | [diff] [blame] | 3161 | SETALLNEZ(vsetallnez_b, MO_8) |
| 3162 | SETALLNEZ(vsetallnez_h, MO_16) |
| 3163 | SETALLNEZ(vsetallnez_w, MO_32) |
| 3164 | SETALLNEZ(vsetallnez_d, MO_64) |
Song Gao | d5e5563 | 2023-05-04 20:28:05 +0800 | [diff] [blame] | 3165 | |
Song Gao | df97f33 | 2023-09-14 10:26:40 +0800 | [diff] [blame] | 3166 | #define XVINSVE0(NAME, E, MASK) \ |
| 3167 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 3168 | { \ |
| 3169 | VReg *Vd = (VReg *)vd; \ |
| 3170 | VReg *Vj = (VReg *)vj; \ |
| 3171 | Vd->E(imm & MASK) = Vj->E(0); \ |
| 3172 | } |
| 3173 | |
| 3174 | XVINSVE0(xvinsve0_w, W, 0x7) |
| 3175 | XVINSVE0(xvinsve0_d, D, 0x3) |
| 3176 | |
| 3177 | #define XVPICKVE(NAME, E, BIT, MASK) \ |
| 3178 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 3179 | { \ |
| 3180 | int i; \ |
| 3181 | VReg *Vd = (VReg *)vd; \ |
| 3182 | VReg *Vj = (VReg *)vj; \ |
| 3183 | int oprsz = simd_oprsz(desc); \ |
| 3184 | \ |
| 3185 | Vd->E(0) = Vj->E(imm & MASK); \ |
| 3186 | for (i = 1; i < oprsz / (BIT / 8); i++) { \ |
| 3187 | Vd->E(i) = 0; \ |
| 3188 | } \ |
| 3189 | } |
| 3190 | |
| 3191 | XVPICKVE(xvpickve_w, W, 32, 0x7) |
| 3192 | XVPICKVE(xvpickve_d, D, 64, 0x3) |
| 3193 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3194 | #define VPACKEV(NAME, BIT, E) \ |
| 3195 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3196 | { \ |
| 3197 | int i; \ |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3198 | VReg temp = {}; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3199 | VReg *Vd = (VReg *)vd; \ |
| 3200 | VReg *Vj = (VReg *)vj; \ |
| 3201 | VReg *Vk = (VReg *)vk; \ |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3202 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3203 | \ |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3204 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3205 | temp.E(2 * i + 1) = Vj->E(2 * i); \ |
| 3206 | temp.E(2 *i) = Vk->E(2 * i); \ |
| 3207 | } \ |
| 3208 | *Vd = temp; \ |
Song Gao | d5e5563 | 2023-05-04 20:28:05 +0800 | [diff] [blame] | 3209 | } |
| 3210 | |
| 3211 | VPACKEV(vpackev_b, 16, B) |
| 3212 | VPACKEV(vpackev_h, 32, H) |
| 3213 | VPACKEV(vpackev_w, 64, W) |
| 3214 | VPACKEV(vpackev_d, 128, D) |
| 3215 | |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3216 | #define VPACKOD(NAME, BIT, E) \ |
| 3217 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3218 | { \ |
| 3219 | int i; \ |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3220 | VReg temp = {}; \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3221 | VReg *Vd = (VReg *)vd; \ |
| 3222 | VReg *Vj = (VReg *)vj; \ |
| 3223 | VReg *Vk = (VReg *)vk; \ |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3224 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3225 | \ |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3226 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
Song Gao | 04711da | 2023-09-14 10:25:54 +0800 | [diff] [blame] | 3227 | temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ |
| 3228 | temp.E(2 * i) = Vk->E(2 * i + 1); \ |
| 3229 | } \ |
| 3230 | *Vd = temp; \ |
Song Gao | d5e5563 | 2023-05-04 20:28:05 +0800 | [diff] [blame] | 3231 | } |
| 3232 | |
| 3233 | VPACKOD(vpackod_b, 16, B) |
| 3234 | VPACKOD(vpackod_h, 32, H) |
| 3235 | VPACKOD(vpackod_w, 64, W) |
| 3236 | VPACKOD(vpackod_d, 128, D) |
| 3237 | |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3238 | #define VPICKEV(NAME, BIT, E) \ |
| 3239 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3240 | { \ |
| 3241 | int i, j, ofs; \ |
| 3242 | VReg temp = {}; \ |
| 3243 | VReg *Vd = (VReg *)vd; \ |
| 3244 | VReg *Vj = (VReg *)vj; \ |
| 3245 | VReg *Vk = (VReg *)vk; \ |
| 3246 | int oprsz = simd_oprsz(desc); \ |
| 3247 | \ |
| 3248 | ofs = LSX_LEN / BIT; \ |
| 3249 | for (i = 0; i < oprsz / 16; i++) { \ |
| 3250 | for (j = 0; j < ofs; j++) { \ |
| 3251 | temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \ |
| 3252 | temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \ |
| 3253 | } \ |
| 3254 | } \ |
| 3255 | *Vd = temp; \ |
Song Gao | d5e5563 | 2023-05-04 20:28:05 +0800 | [diff] [blame] | 3256 | } |
| 3257 | |
| 3258 | VPICKEV(vpickev_b, 16, B) |
| 3259 | VPICKEV(vpickev_h, 32, H) |
| 3260 | VPICKEV(vpickev_w, 64, W) |
| 3261 | VPICKEV(vpickev_d, 128, D) |
| 3262 | |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3263 | #define VPICKOD(NAME, BIT, E) \ |
| 3264 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3265 | { \ |
| 3266 | int i, j, ofs; \ |
| 3267 | VReg temp = {}; \ |
| 3268 | VReg *Vd = (VReg *)vd; \ |
| 3269 | VReg *Vj = (VReg *)vj; \ |
| 3270 | VReg *Vk = (VReg *)vk; \ |
| 3271 | int oprsz = simd_oprsz(desc); \ |
| 3272 | \ |
| 3273 | ofs = LSX_LEN / BIT; \ |
| 3274 | for (i = 0; i < oprsz / 16; i++) { \ |
| 3275 | for (j = 0; j < ofs; j++) { \ |
| 3276 | temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \ |
| 3277 | temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \ |
| 3278 | } \ |
| 3279 | } \ |
| 3280 | *Vd = temp; \ |
Song Gao | d5e5563 | 2023-05-04 20:28:05 +0800 | [diff] [blame] | 3281 | } |
| 3282 | |
| 3283 | VPICKOD(vpickod_b, 16, B) |
| 3284 | VPICKOD(vpickod_h, 32, H) |
| 3285 | VPICKOD(vpickod_w, 64, W) |
| 3286 | VPICKOD(vpickod_d, 128, D) |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3287 | |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3288 | #define VILVL(NAME, BIT, E) \ |
| 3289 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3290 | { \ |
| 3291 | int i, j, ofs; \ |
| 3292 | VReg temp = {}; \ |
| 3293 | VReg *Vd = (VReg *)vd; \ |
| 3294 | VReg *Vj = (VReg *)vj; \ |
| 3295 | VReg *Vk = (VReg *)vk; \ |
| 3296 | int oprsz = simd_oprsz(desc); \ |
| 3297 | \ |
| 3298 | ofs = LSX_LEN / BIT; \ |
| 3299 | for (i = 0; i < oprsz / 16; i++) { \ |
| 3300 | for (j = 0; j < ofs; j++) { \ |
| 3301 | temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \ |
| 3302 | temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \ |
| 3303 | } \ |
| 3304 | } \ |
| 3305 | *Vd = temp; \ |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3306 | } |
| 3307 | |
| 3308 | VILVL(vilvl_b, 16, B) |
| 3309 | VILVL(vilvl_h, 32, H) |
| 3310 | VILVL(vilvl_w, 64, W) |
| 3311 | VILVL(vilvl_d, 128, D) |
| 3312 | |
Song Gao | ad29214 | 2023-09-14 10:26:41 +0800 | [diff] [blame] | 3313 | #define VILVH(NAME, BIT, E) \ |
| 3314 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3315 | { \ |
| 3316 | int i, j, ofs; \ |
| 3317 | VReg temp = {}; \ |
| 3318 | VReg *Vd = (VReg *)vd; \ |
| 3319 | VReg *Vj = (VReg *)vj; \ |
| 3320 | VReg *Vk = (VReg *)vk; \ |
| 3321 | int oprsz = simd_oprsz(desc); \ |
| 3322 | \ |
| 3323 | ofs = LSX_LEN / BIT; \ |
| 3324 | for (i = 0; i < oprsz / 16; i++) { \ |
| 3325 | for (j = 0; j < ofs; j++) { \ |
| 3326 | temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \ |
| 3327 | temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \ |
| 3328 | } \ |
| 3329 | } \ |
| 3330 | *Vd = temp; \ |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3331 | } |
| 3332 | |
| 3333 | VILVH(vilvh_b, 16, B) |
| 3334 | VILVH(vilvh_h, 32, H) |
| 3335 | VILVH(vilvh_w, 64, W) |
| 3336 | VILVH(vilvh_d, 128, D) |
| 3337 | |
Song Gao | eb48ab2 | 2023-09-14 10:25:52 +0800 | [diff] [blame] | 3338 | void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3339 | { |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3340 | int i, j, m; |
| 3341 | VReg temp = {}; |
Song Gao | eb48ab2 | 2023-09-14 10:25:52 +0800 | [diff] [blame] | 3342 | VReg *Vd = (VReg *)vd; |
| 3343 | VReg *Vj = (VReg *)vj; |
| 3344 | VReg *Vk = (VReg *)vk; |
| 3345 | VReg *Va = (VReg *)va; |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3346 | int oprsz = simd_oprsz(desc); |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3347 | |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3348 | m = LSX_LEN / 8; |
| 3349 | for (i = 0; i < (oprsz / 16) * m; i++) { |
| 3350 | j = i < m ? 0 : 1; |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3351 | uint64_t k = (uint8_t)Va->B(i) % (2 * m); |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3352 | temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m); |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3353 | } |
| 3354 | *Vd = temp; |
| 3355 | } |
| 3356 | |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3357 | #define VSHUF(NAME, BIT, E) \ |
| 3358 | void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ |
| 3359 | { \ |
| 3360 | int i, j, m; \ |
| 3361 | VReg temp = {}; \ |
| 3362 | VReg *Vd = (VReg *)vd; \ |
| 3363 | VReg *Vj = (VReg *)vj; \ |
| 3364 | VReg *Vk = (VReg *)vk; \ |
| 3365 | int oprsz = simd_oprsz(desc); \ |
| 3366 | \ |
| 3367 | m = LSX_LEN / BIT; \ |
| 3368 | for (i = 0; i < (oprsz / 16) * m; i++) { \ |
| 3369 | j = i < m ? 0 : 1; \ |
| 3370 | uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \ |
| 3371 | temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \ |
| 3372 | } \ |
| 3373 | *Vd = temp; \ |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3374 | } |
| 3375 | |
| 3376 | VSHUF(vshuf_h, 16, H) |
| 3377 | VSHUF(vshuf_w, 32, W) |
| 3378 | VSHUF(vshuf_d, 64, D) |
| 3379 | |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3380 | #define VSHUF4I(NAME, BIT, E) \ |
| 3381 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 3382 | { \ |
| 3383 | int i, j, max; \ |
| 3384 | VReg temp = {}; \ |
| 3385 | VReg *Vd = (VReg *)vd; \ |
| 3386 | VReg *Vj = (VReg *)vj; \ |
| 3387 | int oprsz = simd_oprsz(desc); \ |
| 3388 | \ |
| 3389 | max = LSX_LEN / BIT; \ |
| 3390 | for (i = 0; i < oprsz / (BIT / 8); i++) { \ |
| 3391 | j = i < max ? 1 : 2; \ |
| 3392 | temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \ |
| 3393 | } \ |
| 3394 | *Vd = temp; \ |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3395 | } |
| 3396 | |
| 3397 | VSHUF4I(vshuf4i_b, 8, B) |
| 3398 | VSHUF4I(vshuf4i_h, 16, H) |
| 3399 | VSHUF4I(vshuf4i_w, 32, W) |
| 3400 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3401 | void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3402 | { |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3403 | int i; |
| 3404 | VReg temp = {}; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3405 | VReg *Vd = (VReg *)vd; |
| 3406 | VReg *Vj = (VReg *)vj; |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3407 | int oprsz = simd_oprsz(desc); |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3408 | |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3409 | for (i = 0; i < oprsz / 16; i++) { |
| 3410 | temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i); |
| 3411 | temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i); |
| 3412 | } |
| 3413 | *Vd = temp; |
| 3414 | } |
| 3415 | |
| 3416 | void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc) |
| 3417 | { |
| 3418 | int i, m; |
| 3419 | VReg temp = {}; |
| 3420 | VReg *Vd = (VReg *)vd; |
| 3421 | VReg *Vj = (VReg *)vj; |
| 3422 | VReg *Vk = (VReg *)vk; |
| 3423 | |
| 3424 | m = LASX_LEN / 32; |
| 3425 | for (i = 0; i < m ; i++) { |
| 3426 | uint64_t k = (uint8_t)Vk->W(i) % 8; |
| 3427 | temp.W(i) = Vj->W(k); |
| 3428 | } |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3429 | *Vd = temp; |
| 3430 | } |
| 3431 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3432 | void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3433 | { |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3434 | int i; |
| 3435 | VReg temp = {}; |
| 3436 | VReg *Vd = (VReg *)vd; |
| 3437 | VReg *Vj = (VReg *)vj; |
| 3438 | int oprsz = simd_oprsz(desc); |
| 3439 | |
| 3440 | for (i = 0; i < oprsz / 16; i++) { |
| 3441 | temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i); |
| 3442 | temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i); |
| 3443 | temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i); |
| 3444 | temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i); |
| 3445 | } |
| 3446 | *Vd = temp; |
| 3447 | } |
| 3448 | |
| 3449 | void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
| 3450 | { |
| 3451 | VReg temp = {}; |
| 3452 | VReg *Vd = (VReg *)vd; |
| 3453 | VReg *Vj = (VReg *)vj; |
| 3454 | |
| 3455 | temp.D(0) = Vj->D(imm & 0x3); |
| 3456 | temp.D(1) = Vj->D((imm >> 2) & 0x3); |
| 3457 | temp.D(2) = Vj->D((imm >> 4) & 0x3); |
| 3458 | temp.D(3) = Vj->D((imm >> 6) & 0x3); |
| 3459 | *Vd = temp; |
| 3460 | } |
| 3461 | |
| 3462 | void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) |
| 3463 | { |
| 3464 | int i; |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3465 | VReg temp; |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3466 | VReg *Vd = (VReg *)vd; |
| 3467 | VReg *Vj = (VReg *)vj; |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3468 | |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3469 | for (i = 0; i < 2; i++, imm >>= 4) { |
| 3470 | temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1); |
| 3471 | } |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3472 | *Vd = temp; |
| 3473 | } |
| 3474 | |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3475 | #define VEXTRINS(NAME, BIT, E, MASK) \ |
| 3476 | void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ |
| 3477 | { \ |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3478 | int i, ins, extr, max; \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3479 | VReg *Vd = (VReg *)vd; \ |
| 3480 | VReg *Vj = (VReg *)vj; \ |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3481 | int oprsz = simd_oprsz(desc); \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3482 | \ |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3483 | max = LSX_LEN / BIT; \ |
Song Gao | 329517d | 2023-09-14 10:25:57 +0800 | [diff] [blame] | 3484 | ins = (imm >> 4) & MASK; \ |
| 3485 | extr = imm & MASK; \ |
Song Gao | 513e88a | 2023-09-14 10:26:42 +0800 | [diff] [blame] | 3486 | for (i = 0; i < oprsz / 16; i++) { \ |
| 3487 | Vd->E(ins + i * max) = Vj->E(extr + i * max); \ |
| 3488 | } \ |
Song Gao | e93dd43 | 2023-05-04 20:28:06 +0800 | [diff] [blame] | 3489 | } |
| 3490 | |
| 3491 | VEXTRINS(vextrins_b, 8, B, 0xf) |
| 3492 | VEXTRINS(vextrins_h, 16, H, 0x7) |
| 3493 | VEXTRINS(vextrins_w, 32, W, 0x3) |
| 3494 | VEXTRINS(vextrins_d, 64, D, 0x1) |