| ;------------------------------------------------------------------------------ | |
| ; | |
| ; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR> | |
| ; SPDX-License-Identifier: BSD-2-Clause-Patent | |
| ; | |
| ; Abstract: | |
| ; | |
| ; Provide macro for register save/restore using SSE registers | |
| ; | |
| ;------------------------------------------------------------------------------ | |
| ; | |
| ; Define SSE and AVX instruction set | |
| ; | |
| ; | |
| ; Define SSE macros using SSE 4.1 instructions | |
| ; args 1:XMM, 2:IDX, 3:REG | |
| ; | |
| %macro SXMMN 3 | |
| pinsrq %1, %3, (%2 & 3) | |
| %endmacro | |
| ; | |
| ; args 1:XMM, 2:REG, 3:IDX | |
| ; | |
| %macro LXMMN 3 | |
| pextrq %2, %1, (%3 & 3) | |
| %endmacro | |
| ; | |
| ; Define AVX macros using AVX instructions | |
| ; Save XMM to YMM | |
| ; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM | |
| ; | |
| %macro SYMMN 3 | |
| vinsertf128 %1, %1, %3, %2 | |
| %endmacro | |
| ; | |
| ; Restore XMM from YMM | |
| ; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits) | |
| ; | |
| %macro LYMMN 3 | |
| vextractf128 %2, %1, %3 | |
| %endmacro | |
| ; | |
| ; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI. | |
| ; Modified: XMM5, YMM6, YMM7 and YMM8 | |
| ; | |
| %macro SAVE_REGS 0 | |
| SXMMN xmm5, 0, rbp | |
| SXMMN xmm5, 1, rbx | |
| SYMMN ymm7, 1, xmm5 | |
| SXMMN xmm5, 0, rsi | |
| SXMMN xmm5, 1, rdi | |
| SYMMN ymm8, 1, xmm5 | |
| SAVE_RSP | |
| %endmacro | |
| ; | |
| ; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI. | |
| ; Modified: XMM5, RBP, RBX, RSI, RDI and RSP | |
| ; | |
| %macro LOAD_REGS 0 | |
| LYMMN ymm7, xmm5, 1 | |
| LXMMN xmm5, rbp, 0 | |
| LXMMN xmm5, rbx, 1 | |
| LYMMN ymm8, xmm5, 1 | |
| LXMMN xmm5, rsi, 0 | |
| LXMMN xmm5, rdi, 1 | |
| LOAD_RSP | |
| %endmacro | |
| ; | |
| ; Restore RBP from YMM7[128:191] | |
| ; Modified: XMM5 and RBP | |
| ; | |
| %macro LOAD_RBP 0 | |
| LYMMN ymm7, xmm5, 1 | |
| movq rbp, xmm5 | |
| %endmacro | |
| ; | |
| ; Restore RBX from YMM7[192:255] | |
| ; Modified: XMM5 and RBX | |
| ; | |
| %macro LOAD_RBX 0 | |
| LYMMN ymm7, xmm5, 1 | |
| LXMMN xmm5, rbx, 1 | |
| %endmacro | |
| ; | |
| ; Upper half of YMM6 to save/restore Time Stamp, RSP | |
| ; | |
| ; | |
| ; Save Time Stamp to YMM6[192:255] | |
| ; arg 1:general purpose register which holds time stamp | |
| ; Modified: XMM5 and YMM6 | |
| ; | |
| %macro SAVE_TS 1 | |
| LYMMN ymm6, xmm5, 1 | |
| SXMMN xmm5, 1, %1 | |
| SYMMN ymm6, 1, xmm5 | |
| %endmacro | |
| ; | |
| ; Restore Time Stamp from YMM6[192:255] | |
| ; arg 1:general purpose register where to save time stamp | |
| ; Modified: XMM5 and %1 | |
| ; | |
| %macro LOAD_TS 1 | |
| LYMMN ymm6, xmm5, 1 | |
| LXMMN xmm5, %1, 1 | |
| %endmacro | |
| ; | |
| ; Save RSP to YMM6[128:191] | |
| ; Modified: XMM5 and YMM6 | |
| ; | |
| %macro SAVE_RSP 0 | |
| LYMMN ymm6, xmm5, 1 | |
| SXMMN xmm5, 0, rsp | |
| SYMMN ymm6, 1, xmm5 | |
| %endmacro | |
| ; | |
| ; Restore RSP from YMM6[128:191] | |
| ; Modified: XMM5 and RSP | |
| ; | |
| %macro LOAD_RSP 0 | |
| LYMMN ymm6, xmm5, 1 | |
| movq rsp, xmm5 | |
| %endmacro | |
| ; | |
| ; Upper half of YMM9 to save/restore UCODE status, BFV address | |
| ; | |
| ; | |
| ; Save uCode status to YMM9[192:255] | |
| ; arg 1:general purpose register which holds uCode status | |
| ; Modified: XMM5 and YMM9 | |
| ; | |
| %macro SAVE_UCODE_STATUS 1 | |
| LYMMN ymm9, xmm5, 1 | |
| SXMMN xmm5, 0, %1 | |
| SYMMN ymm9, 1, xmm5 | |
| %endmacro | |
| ; | |
| ; Restore uCode status from YMM9[192:255] | |
| ; arg 1:general purpose register where to save uCode status | |
| ; Modified: XMM5 and %1 | |
| ; | |
| %macro LOAD_UCODE_STATUS 1 | |
| LYMMN ymm9, xmm5, 1 | |
| movq %1, xmm5 | |
| %endmacro | |
| ; | |
| ; Save BFV address to YMM9[128:191] | |
| ; arg 1:general purpose register which holds BFV address | |
| ; Modified: XMM5 and YMM9 | |
| ; | |
| %macro SAVE_BFV 1 | |
| LYMMN ymm9, xmm5, 1 | |
| SXMMN xmm5, 1, %1 | |
| SYMMN ymm9, 1, xmm5 | |
| %endmacro | |
| ; | |
| ; Restore BFV address from YMM9[128:191] | |
| ; arg 1:general purpose register where to save BFV address | |
| ; Modified: XMM5 and %1 | |
| ; | |
| %macro LOAD_BFV 1 | |
| LYMMN ymm9, xmm5, 1 | |
| LXMMN xmm5, %1, 1 | |
| %endmacro | |
| ; | |
| ; Upper half of YMM10 to save/restore RCX | |
| ; | |
| ; | |
| ; Save RCX to YMM10[128:191] | |
| ; Modified: XMM5 and YMM10 | |
| ; | |
| %macro SAVE_RCX 0 | |
| LYMMN ymm10, xmm5, 1 | |
| SXMMN xmm5, 0, rcx | |
| SYMMN ymm10, 1, xmm5 | |
| %endmacro | |
| ; | |
| ; Restore RCX from YMM10[128:191] | |
| ; Modified: XMM5 and RCX | |
| ; | |
| %macro LOAD_RCX 0 | |
| LYMMN ymm10, xmm5, 1 | |
| movq rcx, xmm5 | |
| %endmacro | |
| ; | |
| ; YMM7[128:191] for calling stack | |
| ; arg 1:Entry | |
| ; Modified: RSI, XMM5, YMM7 | |
| ; | |
| %macro CALL_YMM 1 | |
| mov rsi, %%ReturnAddress | |
| LYMMN ymm7, xmm5, 1 | |
| SXMMN xmm5, 0, rsi | |
| SYMMN ymm7, 1, xmm5 | |
| mov rsi, %1 | |
| jmp rsi | |
| %%ReturnAddress: | |
| %endmacro | |
| ; | |
| ; Restore RIP from YMM7[128:191] | |
| ; Modified: RSI, XMM5 | |
| ; | |
| %macro RET_YMM 0 | |
| LYMMN ymm7, xmm5, 1 | |
| movq rsi, xmm5 | |
| jmp rsi | |
| %endmacro | |
| %macro ENABLE_SSE 0 | |
| ; | |
| ; Initialize floating point units | |
| ; | |
| jmp NextAddress | |
| align 4 | |
| ; | |
| ; Float control word initial value: | |
| ; all exceptions masked, double-precision, round-to-nearest | |
| ; | |
| FpuControlWord DW 027Fh | |
| ; | |
| ; Multimedia-extensions control word: | |
| ; all exceptions masked, round-to-nearest, flush to zero for masked underflow | |
| ; | |
| MmxControlWord DQ 01F80h | |
| SseError: | |
| ; | |
| ; Processor has to support SSE | |
| ; | |
| jmp SseError | |
| NextAddress: | |
| finit | |
| mov rax, FpuControlWord | |
| fldcw [rax] | |
| ; | |
| ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test | |
| ; whether the processor supports SSE instruction. | |
| ; | |
| ; Save RBX to R11 | |
| ; Save RCX to R10 | |
| ; | |
| mov r11, rbx | |
| mov r10, rcx | |
| mov rax, 1 | |
| cpuid | |
| bt rdx, 25 | |
| jnc SseError | |
| ; | |
| ; SSE 4.1 support | |
| ; | |
| bt ecx, 19 | |
| jnc SseError | |
| ; | |
| ; Restore RBX from R11 | |
| ; Restore RCX from R10 | |
| ; | |
| mov rbx, r11 | |
| mov rcx, r10 | |
| ; | |
| ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10) | |
| ; | |
| mov rax, cr4 | |
| or rax, 00000600h | |
| mov cr4, rax | |
| ; | |
| ; The processor should support SSE instruction and we can use | |
| ; ldmxcsr instruction | |
| ; | |
| mov rax, MmxControlWord | |
| ldmxcsr [rax] | |
| %endmacro | |
| %macro ENABLE_AVX 0 | |
| ; | |
| ; Save RBX to R11 | |
| ; Save RCX to R10 | |
| ; | |
| mov r11, rbx | |
| mov r10, rcx | |
| mov eax, 1 | |
| cpuid | |
| and ecx, 10000000h | |
| cmp ecx, 10000000h ; check AVX feature flag | |
| je EnableAvx | |
| AvxError: | |
| ; | |
| ; Processor has to support AVX | |
| ; | |
| jmp AvxError | |
| EnableAvx: | |
| ; | |
| ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction | |
| ; | |
| mov rax, cr4 | |
| or rax, 00040000h | |
| mov cr4, rax | |
| mov rcx, 0 ; index 0 | |
| xgetbv ; result in edx:eax | |
| or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state | |
| xsetbv | |
| ; | |
| ; Restore RBX from R11 | |
| ; Restore RCX from R10 | |
| ; | |
| mov rbx, r11 | |
| mov rcx, r10 | |
| %endmacro | |