MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S - edk2 - Git at Google

 #
 # ConvertAsm.py: Automatically generated from CopyMem.asm
 #
 #------------------------------------------------------------------------------
 #
 # Copyright (c) 2006 - 2009, Intel Corporation. All rights reserved.<BR>
 # This program and the accompanying materials
 # are licensed and made available under the terms and conditions of the BSD License
 # which accompanies this distribution.  The full text of the license may be found at
 # http://opensource.org/licenses/bsd-license.php.
 #
 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 #
 # Module Name:
 #
 #   CopyMem.S
 #
 # Abstract:
 #
 #   CopyMem function
 #
 # Notes:
 #
 #------------------------------------------------------------------------------

 #------------------------------------------------------------------------------
 #  VOID *
 #  EFIAPI
 #  InternalMemCopyMem (
 #    IN VOID   *Destination,
 #    IN VOID   *Source,
 #    IN UINTN  Count
 #    )
 #------------------------------------------------------------------------------
 ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
 ASM_PFX(InternalMemCopyMem):
     pushq   %rsi
     pushq   %rdi
     movq    %rdx, %rsi                  # rsi <- Source
     movq    %rcx, %rdi                  # rdi <- Destination
     leaq    -1(%rsi,%r8,), %r9          # r9 <- Last byte of Source
     cmpq    %rdi, %rsi
     movq    %rdi, %rax                  # rax <- Destination as return value
     jae     L0                          # Copy forward if Source > Destination
     cmpq    %rdi, %r9                   # Overlapped?
     jae     L_CopyBackward              # Copy backward if overlapped
 L0:
     xorq    %rcx, %rcx
     subq    %rdi, %rcx                  # rcx <- -rdi
     andq    $15, %rcx                   # rcx + rsi should be 16 bytes aligned
     jz      L1                          # skip if rcx == 0
     cmpq    %r8, %rcx
     cmova   %r8, %rcx
     subq    %rcx, %r8
     rep     movsb
 L1:
     movq    %r8,  %rcx
     andq    $15, %r8
     shrq    $4, %rcx                    # rcx <- # of DQwords to copy
     jz      L_CopyBytes
     movdqu  %xmm0, 0x18(%rsp)           # save xmm0 on stack
 L2:
     movdqu  (%rsi), %xmm0               # rsi may not be 16-byte aligned
     movntdq %xmm0, (%rdi)               # rdi should be 16-byte aligned
     addq    $16, %rsi
     addq    $16, %rdi
     loop    L2
     mfence
     movdqa  0x18(%rsp), %xmm0            # restore xmm0
     jmp     L_CopyBytes                  # copy remaining bytes
 L_CopyBackward:
     movq    %r9, %rsi                   # rsi <- Last byte of Source
     leaq     -1(%rdi, %r8,), %rdi       # rdi <- Last byte of Destination
     std
 L_CopyBytes:
     movq    %r8, %rcx
     rep     movsb
     cld
     popq    %rdi
     popq    %rsi
     ret
	#
	# ConvertAsm.py: Automatically generated from CopyMem.asm
	#
	#------------------------------------------------------------------------------
	#
	# Copyright (c) 2006 - 2009, Intel Corporation. All rights reserved.<BR>
	# This program and the accompanying materials
	# are licensed and made available under the terms and conditions of the BSD License
	# which accompanies this distribution. The full text of the license may be found at
	# http://opensource.org/licenses/bsd-license.php.
	#
	# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
	# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
	#
	# Module Name:
	#
	# CopyMem.S
	#
	# Abstract:
	#
	# CopyMem function
	#
	# Notes:
	#
	#------------------------------------------------------------------------------

	#------------------------------------------------------------------------------
	# VOID *
	# EFIAPI
	# InternalMemCopyMem (
	# IN VOID *Destination,
	# IN VOID *Source,
	# IN UINTN Count
	# )
	#------------------------------------------------------------------------------
	ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
	ASM_PFX(InternalMemCopyMem):
	pushq %rsi
	pushq %rdi
	movq %rdx, %rsi # rsi <- Source
	movq %rcx, %rdi # rdi <- Destination
	leaq -1(%rsi,%r8,), %r9 # r9 <- Last byte of Source
	cmpq %rdi, %rsi
	movq %rdi, %rax # rax <- Destination as return value
	jae L0 # Copy forward if Source > Destination
	cmpq %rdi, %r9 # Overlapped?
	jae L_CopyBackward # Copy backward if overlapped
	L0:
	xorq %rcx, %rcx
	subq %rdi, %rcx # rcx <- -rdi
	andq $15, %rcx # rcx + rsi should be 16 bytes aligned
	jz L1 # skip if rcx == 0
	cmpq %r8, %rcx
	cmova %r8, %rcx
	subq %rcx, %r8
	rep movsb
	L1:
	movq %r8, %rcx
	andq $15, %r8
	shrq $4, %rcx # rcx <- # of DQwords to copy
	jz L_CopyBytes
	movdqu %xmm0, 0x18(%rsp) # save xmm0 on stack
	L2:
	movdqu (%rsi), %xmm0 # rsi may not be 16-byte aligned
	movntdq %xmm0, (%rdi) # rdi should be 16-byte aligned
	addq $16, %rsi
	addq $16, %rdi
	loop L2
	mfence
	movdqa 0x18(%rsp), %xmm0 # restore xmm0
	jmp L_CopyBytes # copy remaining bytes
	L_CopyBackward:
	movq %r9, %rsi # rsi <- Last byte of Source
	leaq -1(%rdi, %r8,), %rdi # rdi <- Last byte of Destination
	std
	L_CopyBytes:
	movq %r8, %rcx
	rep movsb
	cld
	popq %rdi
	popq %rsi
	ret