| /* |
| * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| #include <stdio.h> |
| #include <stdint.h> |
| #include <stdbool.h> |
| #include <string.h> |
| #include <limits.h> |
| |
| int err; |
| |
| #include "hvx_misc.h" |
| |
| #define fVROUND(VAL, SHAMT) \ |
| ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0)) |
| |
| #define fVSATUB(VAL) \ |
| ((((VAL) & 0xffLL) == (VAL)) ? \ |
| (VAL) : \ |
| ((((int32_t)(VAL)) < 0) ? 0 : 0xff)) |
| |
| #define fVSATUH(VAL) \ |
| ((((VAL) & 0xffffLL) == (VAL)) ? \ |
| (VAL) : \ |
| ((((int32_t)(VAL)) < 0) ? 0 : 0xffff)) |
| |
| static void test_vasrvuhubrndsat(void) |
| { |
| void *p0 = buffer0; |
| void *p1 = buffer1; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE / 2; i++) { |
| asm("v4 = vmem(%0 + #0)\n\t" |
| "v5 = vmem(%0 + #1)\n\t" |
| "v6 = vmem(%1 + #0)\n\t" |
| "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t" |
| "vmem(%2) = v5\n\t" |
| : : "r"(p0), "r"(p1), "r"(pout) |
| : "v4", "v5", "v6", "memory"); |
| p0 += sizeof(MMVector) * 2; |
| p1 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { |
| int shamt; |
| uint8_t byte0; |
| uint8_t byte1; |
| |
| shamt = buffer1[i].ub[2 * j + 0] & 0x7; |
| byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt); |
| shamt = buffer1[i].ub[2 * j + 1] & 0x7; |
| byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt); |
| expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); |
| } |
| } |
| |
| check_output_h(__LINE__, BUFSIZE / 2); |
| } |
| |
| static void test_vasrvuhubsat(void) |
| { |
| void *p0 = buffer0; |
| void *p1 = buffer1; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE / 2; i++) { |
| asm("v4 = vmem(%0 + #0)\n\t" |
| "v5 = vmem(%0 + #1)\n\t" |
| "v6 = vmem(%1 + #0)\n\t" |
| "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t" |
| "vmem(%2) = v5\n\t" |
| : : "r"(p0), "r"(p1), "r"(pout) |
| : "v4", "v5", "v6", "memory"); |
| p0 += sizeof(MMVector) * 2; |
| p1 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { |
| int shamt; |
| uint8_t byte0; |
| uint8_t byte1; |
| |
| shamt = buffer1[i].ub[2 * j + 0] & 0x7; |
| byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt); |
| shamt = buffer1[i].ub[2 * j + 1] & 0x7; |
| byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt); |
| expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); |
| } |
| } |
| |
| check_output_h(__LINE__, BUFSIZE / 2); |
| } |
| |
| static void test_vasrvwuhrndsat(void) |
| { |
| void *p0 = buffer0; |
| void *p1 = buffer1; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE / 2; i++) { |
| asm("v4 = vmem(%0 + #0)\n\t" |
| "v5 = vmem(%0 + #1)\n\t" |
| "v6 = vmem(%1 + #0)\n\t" |
| "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t" |
| "vmem(%2) = v5\n\t" |
| : : "r"(p0), "r"(p1), "r"(pout) |
| : "v4", "v5", "v6", "memory"); |
| p0 += sizeof(MMVector) * 2; |
| p1 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { |
| int shamt; |
| uint16_t half0; |
| uint16_t half1; |
| |
| shamt = buffer1[i].uh[2 * j + 0] & 0xf; |
| half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt); |
| shamt = buffer1[i].uh[2 * j + 1] & 0xf; |
| half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt); |
| expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); |
| } |
| } |
| |
| check_output_w(__LINE__, BUFSIZE / 2); |
| } |
| |
| static void test_vasrvwuhsat(void) |
| { |
| void *p0 = buffer0; |
| void *p1 = buffer1; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE / 2; i++) { |
| asm("v4 = vmem(%0 + #0)\n\t" |
| "v5 = vmem(%0 + #1)\n\t" |
| "v6 = vmem(%1 + #0)\n\t" |
| "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t" |
| "vmem(%2) = v5\n\t" |
| : : "r"(p0), "r"(p1), "r"(pout) |
| : "v4", "v5", "v6", "memory"); |
| p0 += sizeof(MMVector) * 2; |
| p1 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { |
| int shamt; |
| uint16_t half0; |
| uint16_t half1; |
| |
| shamt = buffer1[i].uh[2 * j + 0] & 0xf; |
| half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt); |
| shamt = buffer1[i].uh[2 * j + 1] & 0xf; |
| half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt); |
| expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); |
| } |
| } |
| |
| check_output_w(__LINE__, BUFSIZE / 2); |
| } |
| |
| static void test_vassign_tmp(void) |
| { |
| void *p0 = buffer0; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE; i++) { |
| /* |
| * Assign into v12 as .tmp, then use it in the next packet |
| * Should get the new value within the same packet and |
| * the old value in the next packet |
| */ |
| asm("v3 = vmem(%0 + #0)\n\t" |
| "r1 = #1\n\t" |
| "v12 = vsplat(r1)\n\t" |
| "r1 = #2\n\t" |
| "v13 = vsplat(r1)\n\t" |
| "{\n\t" |
| " v12.tmp = v13\n\t" |
| " v4.w = vadd(v12.w, v3.w)\n\t" |
| "}\n\t" |
| "v4.w = vadd(v4.w, v12.w)\n\t" |
| "vmem(%1 + #0) = v4\n\t" |
| : : "r"(p0), "r"(pout) |
| : "r1", "v3", "v4", "v12", "v13", "memory"); |
| p0 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { |
| expect[i].w[j] = buffer0[i].w[j] + 3; |
| } |
| } |
| |
| check_output_w(__LINE__, BUFSIZE); |
| } |
| |
| static void test_vcombine_tmp(void) |
| { |
| void *p0 = buffer0; |
| void *p1 = buffer1; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE; i++) { |
| /* |
| * Combine into v13:12 as .tmp, then use it in the next packet |
| * Should get the new value within the same packet and |
| * the old value in the next packet |
| */ |
| asm("v3 = vmem(%0 + #0)\n\t" |
| "r1 = #1\n\t" |
| "v12 = vsplat(r1)\n\t" |
| "r1 = #2\n\t" |
| "v13 = vsplat(r1)\n\t" |
| "r1 = #3\n\t" |
| "v14 = vsplat(r1)\n\t" |
| "r1 = #4\n\t" |
| "v15 = vsplat(r1)\n\t" |
| "{\n\t" |
| " v13:12.tmp = vcombine(v15, v14)\n\t" |
| " v4.w = vadd(v12.w, v3.w)\n\t" |
| " v16 = v13\n\t" |
| "}\n\t" |
| "v4.w = vadd(v4.w, v12.w)\n\t" |
| "v4.w = vadd(v4.w, v13.w)\n\t" |
| "v4.w = vadd(v4.w, v16.w)\n\t" |
| "vmem(%2 + #0) = v4\n\t" |
| : : "r"(p0), "r"(p1), "r"(pout) |
| : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory"); |
| p0 += sizeof(MMVector); |
| p1 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { |
| expect[i].w[j] = buffer0[i].w[j] + 10; |
| } |
| } |
| |
| check_output_w(__LINE__, BUFSIZE); |
| } |
| |
| static void test_vmpyuhvs(void) |
| { |
| void *p0 = buffer0; |
| void *p1 = buffer1; |
| void *pout = output; |
| |
| memset(expect, 0xaa, sizeof(expect)); |
| memset(output, 0xbb, sizeof(output)); |
| |
| for (int i = 0; i < BUFSIZE; i++) { |
| asm("v4 = vmem(%0 + #0)\n\t" |
| "v5 = vmem(%1 + #0)\n\t" |
| "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t" |
| "vmem(%2) = v4\n\t" |
| : : "r"(p0), "r"(p1), "r"(pout) |
| : "v4", "v5", "memory"); |
| p0 += sizeof(MMVector); |
| p1 += sizeof(MMVector); |
| pout += sizeof(MMVector); |
| |
| for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { |
| expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16; |
| } |
| } |
| |
| check_output_h(__LINE__, BUFSIZE); |
| } |
| |
| int main() |
| { |
| init_buffers(); |
| |
| test_vasrvuhubrndsat(); |
| test_vasrvuhubsat(); |
| test_vasrvwuhrndsat(); |
| test_vasrvwuhsat(); |
| |
| test_vassign_tmp(); |
| test_vcombine_tmp(); |
| |
| test_vmpyuhvs(); |
| |
| puts(err ? "FAIL" : "PASS"); |
| return err ? 1 : 0; |
| } |