;------------------------------------------------------------------------------ | |
; | |
; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR> | |
; SPDX-License-Identifier: BSD-2-Clause-Patent | |
; | |
; Abstract: | |
; | |
; Provide macro for register save/restore using SSE registers | |
; | |
;------------------------------------------------------------------------------ | |
; | |
; Define SSE and AVX instruction set | |
; | |
; | |
; Define SSE macros using SSE 4.1 instructions | |
; args 1:XMM, 2:IDX, 3:REG | |
; | |
%macro SXMMN 3 | |
pinsrq %1, %3, (%2 & 3) | |
%endmacro | |
; | |
; args 1:XMM, 2:REG, 3:IDX | |
; | |
%macro LXMMN 3 | |
pextrq %2, %1, (%3 & 3) | |
%endmacro | |
; | |
; Define AVX macros using AVX instructions | |
; Save XMM to YMM | |
; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM | |
; | |
%macro SYMMN 3 | |
vinsertf128 %1, %1, %3, %2 | |
%endmacro | |
; | |
; Restore XMM from YMM | |
; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits) | |
; | |
%macro LYMMN 3 | |
vextractf128 %2, %1, %3 | |
%endmacro | |
; | |
; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI. | |
; Modified: XMM5, YMM6, YMM7 and YMM8 | |
; | |
%macro SAVE_REGS 0 | |
SXMMN xmm5, 0, rbp | |
SXMMN xmm5, 1, rbx | |
SYMMN ymm7, 1, xmm5 | |
SXMMN xmm5, 0, rsi | |
SXMMN xmm5, 1, rdi | |
SYMMN ymm8, 1, xmm5 | |
SAVE_RSP | |
%endmacro | |
; | |
; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI. | |
; Modified: XMM5, RBP, RBX, RSI, RDI and RSP | |
; | |
%macro LOAD_REGS 0 | |
LYMMN ymm7, xmm5, 1 | |
LXMMN xmm5, rbp, 0 | |
LXMMN xmm5, rbx, 1 | |
LYMMN ymm8, xmm5, 1 | |
LXMMN xmm5, rsi, 0 | |
LXMMN xmm5, rdi, 1 | |
LOAD_RSP | |
%endmacro | |
; | |
; Restore RBP from YMM7[128:191] | |
; Modified: XMM5 and RBP | |
; | |
%macro LOAD_RBP 0 | |
LYMMN ymm7, xmm5, 1 | |
movq rbp, xmm5 | |
%endmacro | |
; | |
; Restore RBX from YMM7[192:255] | |
; Modified: XMM5 and RBX | |
; | |
%macro LOAD_RBX 0 | |
LYMMN ymm7, xmm5, 1 | |
LXMMN xmm5, rbx, 1 | |
%endmacro | |
; | |
; Upper half of YMM6 to save/restore Time Stamp, RSP | |
; | |
; | |
; Save Time Stamp to YMM6[192:255] | |
; arg 1:general purpose register which holds time stamp | |
; Modified: XMM5 and YMM6 | |
; | |
%macro SAVE_TS 1 | |
LYMMN ymm6, xmm5, 1 | |
SXMMN xmm5, 1, %1 | |
SYMMN ymm6, 1, xmm5 | |
%endmacro | |
; | |
; Restore Time Stamp from YMM6[192:255] | |
; arg 1:general purpose register where to save time stamp | |
; Modified: XMM5 and %1 | |
; | |
%macro LOAD_TS 1 | |
LYMMN ymm6, xmm5, 1 | |
LXMMN xmm5, %1, 1 | |
%endmacro | |
; | |
; Save RSP to YMM6[128:191] | |
; Modified: XMM5 and YMM6 | |
; | |
%macro SAVE_RSP 0 | |
LYMMN ymm6, xmm5, 1 | |
SXMMN xmm5, 0, rsp | |
SYMMN ymm6, 1, xmm5 | |
%endmacro | |
; | |
; Restore RSP from YMM6[128:191] | |
; Modified: XMM5 and RSP | |
; | |
%macro LOAD_RSP 0 | |
LYMMN ymm6, xmm5, 1 | |
movq rsp, xmm5 | |
%endmacro | |
; | |
; Upper half of YMM9 to save/restore UCODE status, BFV address | |
; | |
; | |
; Save uCode status to YMM9[192:255] | |
; arg 1:general purpose register which holds uCode status | |
; Modified: XMM5 and YMM9 | |
; | |
%macro SAVE_UCODE_STATUS 1 | |
LYMMN ymm9, xmm5, 1 | |
SXMMN xmm5, 0, %1 | |
SYMMN ymm9, 1, xmm5 | |
%endmacro | |
; | |
; Restore uCode status from YMM9[192:255] | |
; arg 1:general purpose register where to save uCode status | |
; Modified: XMM5 and %1 | |
; | |
%macro LOAD_UCODE_STATUS 1 | |
LYMMN ymm9, xmm5, 1 | |
movq %1, xmm5 | |
%endmacro | |
; | |
; Save BFV address to YMM9[128:191] | |
; arg 1:general purpose register which holds BFV address | |
; Modified: XMM5 and YMM9 | |
; | |
%macro SAVE_BFV 1 | |
LYMMN ymm9, xmm5, 1 | |
SXMMN xmm5, 1, %1 | |
SYMMN ymm9, 1, xmm5 | |
%endmacro | |
; | |
; Restore BFV address from YMM9[128:191] | |
; arg 1:general purpose register where to save BFV address | |
; Modified: XMM5 and %1 | |
; | |
%macro LOAD_BFV 1 | |
LYMMN ymm9, xmm5, 1 | |
LXMMN xmm5, %1, 1 | |
%endmacro | |
; | |
; Upper half of YMM10 to save/restore RCX | |
; | |
; | |
; Save RCX to YMM10[128:191] | |
; Modified: XMM5 and YMM10 | |
; | |
%macro SAVE_RCX 0 | |
LYMMN ymm10, xmm5, 1 | |
SXMMN xmm5, 0, rcx | |
SYMMN ymm10, 1, xmm5 | |
%endmacro | |
; | |
; Restore RCX from YMM10[128:191] | |
; Modified: XMM5 and RCX | |
; | |
%macro LOAD_RCX 0 | |
LYMMN ymm10, xmm5, 1 | |
movq rcx, xmm5 | |
%endmacro | |
; | |
; YMM7[128:191] for calling stack | |
; arg 1:Entry | |
; Modified: RSI, XMM5, YMM7 | |
; | |
%macro CALL_YMM 1 | |
mov rsi, %%ReturnAddress | |
LYMMN ymm7, xmm5, 1 | |
SXMMN xmm5, 0, rsi | |
SYMMN ymm7, 1, xmm5 | |
mov rsi, %1 | |
jmp rsi | |
%%ReturnAddress: | |
%endmacro | |
; | |
; Restore RIP from YMM7[128:191] | |
; Modified: RSI, XMM5 | |
; | |
%macro RET_YMM 0 | |
LYMMN ymm7, xmm5, 1 | |
movq rsi, xmm5 | |
jmp rsi | |
%endmacro | |
%macro ENABLE_SSE 0 | |
; | |
; Initialize floating point units | |
; | |
jmp NextAddress | |
align 4 | |
; | |
; Float control word initial value: | |
; all exceptions masked, double-precision, round-to-nearest | |
; | |
FpuControlWord DW 027Fh | |
; | |
; Multimedia-extensions control word: | |
; all exceptions masked, round-to-nearest, flush to zero for masked underflow | |
; | |
MmxControlWord DQ 01F80h | |
SseError: | |
; | |
; Processor has to support SSE | |
; | |
jmp SseError | |
NextAddress: | |
finit | |
mov rax, FpuControlWord | |
fldcw [rax] | |
; | |
; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test | |
; whether the processor supports SSE instruction. | |
; | |
; Save RBX to R11 | |
; Save RCX to R10 | |
; | |
mov r11, rbx | |
mov r10, rcx | |
mov rax, 1 | |
cpuid | |
bt rdx, 25 | |
jnc SseError | |
; | |
; SSE 4.1 support | |
; | |
bt ecx, 19 | |
jnc SseError | |
; | |
; Restore RBX from R11 | |
; Restore RCX from R10 | |
; | |
mov rbx, r11 | |
mov rcx, r10 | |
; | |
; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10) | |
; | |
mov rax, cr4 | |
or rax, 00000600h | |
mov cr4, rax | |
; | |
; The processor should support SSE instruction and we can use | |
; ldmxcsr instruction | |
; | |
mov rax, MmxControlWord | |
ldmxcsr [rax] | |
%endmacro | |
%macro ENABLE_AVX 0 | |
; | |
; Save RBX to R11 | |
; Save RCX to R10 | |
; | |
mov r11, rbx | |
mov r10, rcx | |
mov eax, 1 | |
cpuid | |
and ecx, 10000000h | |
cmp ecx, 10000000h ; check AVX feature flag | |
je EnableAvx | |
AvxError: | |
; | |
; Processor has to support AVX | |
; | |
jmp AvxError | |
EnableAvx: | |
; | |
; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction | |
; | |
mov rax, cr4 | |
or rax, 00040000h | |
mov cr4, rax | |
mov rcx, 0 ; index 0 | |
xgetbv ; result in edx:eax | |
or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state | |
xsetbv | |
; | |
; Restore RBX from R11 | |
; Restore RCX from R10 | |
; | |
mov rbx, r11 | |
mov rcx, r10 | |
%endmacro | |