| #/*++ |
| # |
| #Copyright (c) 2006, Intel Corporation |
| #All rights reserved. This program and the accompanying materials |
| #are licensed and made available under the terms and conditions of the BSD License |
| #which accompanies this distribution. The full text of the license may be found at |
| #http://opensource.org/licenses/bsd-license.php |
| # |
| #THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, |
| #WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. |
| # |
| #Module Name: |
| # |
| # EfiCopyMem.c |
| # |
| #Abstract: |
| # |
| # This is the code that supports IA32-optimized CopyMem service |
| # |
| #--*/ |
| #include "EfiBind.h" |
| #--------------------------------------------------------------------------- |
| .686: |
| #.MODEL flat,C |
| .mmx: |
| .code: |
| |
| #--------------------------------------------------------------------------- |
| |
| .globl ASM_PFX(EfiCommonLibCopyMem) |
| |
| #VOID |
| #EfiCommonLibCopyMem ( |
| # IN VOID *Destination, |
| # IN VOID *Source, |
| # IN UINTN Count |
| # ) |
| #/*++ |
| # |
| #Routine Description: |
| # |
| # Copy Length bytes from Source to Destination. |
| # |
| #Arguments: |
| # |
| # Destination - Target of copy |
| # |
| # Source - Place to copy from |
| # |
| # Length - Number of bytes to copy |
| # |
| #Returns: |
| # |
| # None |
| # |
| #--*/ |
| ASM_PFX(EfiCommonLibCopyMem): |
| |
| pushl %ebp |
| movl %esp, %ebp |
| pushl %ecx # reserve space for Scratch Local variable UINT64 MmxSave |
| pushl %ecx |
| pushl %esi |
| pushl %edi |
| |
| movl 0x10(%ebp), %ecx # Count |
| movl 0xC(%ebp), %esi # Source |
| movl 8(%ebp), %edi # Destination |
| |
| ##First off, make sure we have no overlap. That is to say, |
| ## if (Source == Destination) => do nothing |
| ## if (Source + Count <= Destination) => regular copy |
| ## if (Destination + Count <= Source) => regular copy |
| ## otherwise, do a reverse copy |
| movl %esi, %eax |
| addl %ecx, %eax # Source + Count |
| cmpl %edi, %eax |
| jle _StartByteCopy |
| |
| movl %edi, %eax |
| addl %ecx, %eax # Dest + Count |
| cmpl %esi, %eax |
| jle _StartByteCopy |
| |
| cmpl %edi, %esi |
| je _CopyMemDone |
| jl _CopyOverlapped # too bad -- overlaps |
| |
| # Pick up misaligned start bytes to get destination pointer 4-byte aligned |
| _StartByteCopy: |
| cmpl $0, %ecx |
| je _CopyMemDone # Count == 0, all done |
| movl %edi, %edx |
| andb $3, %dl # check lower 2 bits of address |
| testb %dl, %dl |
| je _CopyBlocks # already aligned? |
| |
| # Copy a byte |
| movb (%esi), %al # get byte from Source |
| movb %al, (%edi) # write byte to Destination |
| decl %ecx |
| incl %edi |
| incl %esi |
| jmp _StartByteCopy # back to top of loop |
| |
| _CopyBlocks: |
| # Compute how many 64-byte blocks we can clear |
| movl %ecx, %eax # get Count in eax |
| shrl $6, %eax # convert to 64-byte count |
| shll $6, %eax # convert back to bytes |
| subl %eax, %ecx # subtract from the original count |
| shrl $6, %eax # and this is how many 64-byte blocks |
| |
| # If no 64-byte blocks, then skip |
| cmpl $0, %eax |
| je _CopyRemainingDWords |
| |
| # Save mm0 to UINT64 MmxSave |
| movq %mm0, -8(%ebp) |
| |
| copymmx: |
| |
| movq %ds:(%esi), %mm0 |
| movq %mm0, %ds:(%edi) |
| movq %ds:8(%esi), %mm0 |
| movq %mm0, %ds:8(%edi) |
| movq %ds:16(%esi), %mm0 |
| movq %mm0, %ds:16(%edi) |
| movq %ds:24(%esi), %mm0 |
| movq %mm0, %ds:24(%edi) |
| movq %ds:32(%esi), %mm0 |
| movq %mm0, %ds:32(%edi) |
| movq %ds:40(%esi), %mm0 |
| movq %mm0, %ds:40(%edi) |
| movq %ds:48(%esi), %mm0 |
| movq %mm0, %ds:48(%edi) |
| movq %ds:56(%esi), %mm0 |
| movq %mm0, %ds:56(%edi) |
| |
| addl $64, %edi |
| addl $64, %esi |
| decl %eax |
| jnz copymmx |
| |
| # Restore mm0 from MmxSave |
| movq -8(%ebp), %mm0 |
| emms # Exit MMX Instruction |
| |
| # Copy as many DWORDS as possible |
| _CopyRemainingDWords: |
| cmpl $4, %ecx |
| jb _CopyRemainingBytes |
| |
| movl (%esi), %eax # get data from Source |
| movl %eax, (%edi) # write byte to Destination |
| subl $4, %ecx # decrement Count |
| addl $4, %esi # advance Source pointer |
| addl $4, %edi # advance Destination pointer |
| jmp _CopyRemainingDWords # back to top |
| |
| _CopyRemainingBytes: |
| cmpl $0, %ecx |
| je _CopyMemDone |
| movb (%esi), %al # get byte from Source |
| movb %al, (%edi) # write byte to Destination |
| decl %ecx |
| incl %esi |
| incl %edi # advance Destination pointer |
| jmp _CopyRemainingBytes # back to top of loop |
| |
| # |
| # We do this block if the source and destination buffers overlap. To |
| # handle it, copy starting at the end of the source buffer and work |
| # your way back. Since this is the atypical case, this code has not |
| # been optimized, and thus simply copies bytes. |
| # |
| _CopyOverlapped: |
| |
| # Move the source and destination pointers to the end of the range |
| addl %ecx, %esi # Source + Count |
| decl %esi |
| addl %ecx, %edi # Dest + Count |
| decl %edi |
| |
| _CopyOverlappedLoop: |
| cmpl $0, %ecx |
| je _CopyMemDone |
| movb (%esi), %al # get byte from Source |
| movb %al, (%edi) # write byte to Destination |
| decl %ecx |
| decl %esi |
| decl %edi |
| jmp _CopyOverlappedLoop # back to top of loop |
| |
| _CopyMemDone: |
| |
| popl %edi |
| popl %esi |
| leave |
| ret |
| #EfiCommonLibCopyMem ENDP |
| |