| ;------------------------------------------------------------------------------ | |
| ; | |
| ; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR> | |
| ; SPDX-License-Identifier: BSD-2-Clause-Patent | |
| ; | |
| ; Module Name: | |
| ; | |
| ; CopyMem.nasm | |
| ; | |
| ; Abstract: | |
| ; | |
| ; CopyMem function | |
| ; | |
| ; Notes: | |
| ; | |
| ;------------------------------------------------------------------------------ | |
| SECTION .text | |
| ;------------------------------------------------------------------------------ | |
| ; VOID * | |
| ; InternalMemCopyMem ( | |
| ; IN VOID *Destination, | |
| ; IN VOID *Source, | |
| ; IN UINTN Count | |
| ; ); | |
| ;------------------------------------------------------------------------------ | |
| global ASM_PFX(InternalMemCopyMem) | |
| ASM_PFX(InternalMemCopyMem): | |
| push esi | |
| push edi | |
| mov esi, [esp + 16] ; esi <- Source | |
| mov edi, [esp + 12] ; edi <- Destination | |
| mov edx, [esp + 20] ; edx <- Count | |
| lea eax, [esi + edx - 1] ; eax <- End of Source | |
| cmp esi, edi | |
| jae .0 | |
| cmp eax, edi ; Overlapped? | |
| jae @CopyBackward ; Copy backward if overlapped | |
| .0: | |
| xor ecx, ecx | |
| sub ecx, edi | |
| and ecx, 15 ; ecx + edi aligns on 16-byte boundary | |
| jz .1 | |
| cmp ecx, edx | |
| cmova ecx, edx | |
| sub edx, ecx ; edx <- remaining bytes to copy | |
| rep movsb | |
| .1: | |
| mov ecx, edx | |
| and edx, 15 | |
| shr ecx, 4 ; ecx <- # of DQwords to copy | |
| jz @CopyBytes | |
| add esp, -16 | |
| movdqu [esp], xmm0 ; save xmm0 | |
| .2: | |
| movdqu xmm0, [esi] ; esi may not be 16-bytes aligned | |
| movntdq [edi], xmm0 ; edi should be 16-bytes aligned | |
| add esi, 16 | |
| add edi, 16 | |
| loop .2 | |
| mfence | |
| movdqu xmm0, [esp] ; restore xmm0 | |
| add esp, 16 ; stack cleanup | |
| jmp @CopyBytes | |
| @CopyBackward: | |
| mov esi, eax ; esi <- Last byte in Source | |
| lea edi, [edi + edx - 1] ; edi <- Last byte in Destination | |
| std | |
| @CopyBytes: | |
| mov ecx, edx | |
| rep movsb | |
| cld | |
| mov eax, [esp + 12] ; eax <- Destination as return value | |
| pop edi | |
| pop esi | |
| ret | |