blob: 1a1e79dacc10a743da1505b2b7826282e489bdc4 [file] [log] [blame]
/*
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/
/** @file
*
* Optimised string operations
*
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <string.h>
#include <config/defaults.h>
/* Use generic_memcpy_reverse() if we cannot safely set the direction flag */
#ifdef UNSAFE_STD
#define USE_GENERIC_MEMCPY_REVERSE 1
#else
#define USE_GENERIC_MEMCPY_REVERSE 0
#endif
/**
* Copy memory area
*
* @v dest Destination address
* @v src Source address
* @v len Length
* @ret dest Destination address
*/
void * __attribute__ (( noinline )) __memcpy ( void *dest, const void *src,
size_t len ) {
void *edi = dest;
const void *esi = src;
int discard_ecx;
/* We often do large dword-aligned and dword-length block
* moves. Using movsl rather than movsb speeds these up by
* around 32%.
*/
__asm__ __volatile__ ( "rep movsl"
: "=&D" ( edi ), "=&S" ( esi ),
"=&c" ( discard_ecx )
: "0" ( edi ), "1" ( esi ), "2" ( len >> 2 )
: "memory" );
__asm__ __volatile__ ( "rep movsb"
: "=&D" ( edi ), "=&S" ( esi ),
"=&c" ( discard_ecx )
: "0" ( edi ), "1" ( esi ), "2" ( len & 3 )
: "memory" );
return dest;
}
/**
* Copy memory area backwards
*
* @v dest Destination address
* @v src Source address
* @v len Length
* @ret dest Destination address
*/
void * __attribute__ (( noinline )) __memcpy_reverse ( void *dest,
const void *src,
size_t len ) {
void *edi = ( dest + len - 1 );
const void *esi = ( src + len - 1 );
int discard_ecx;
/* Use unoptimised version if we are not permitted to modify
* the direction flag.
*/
if ( USE_GENERIC_MEMCPY_REVERSE )
return generic_memcpy_reverse ( dest, src, len );
/* Assume memmove() is not performance-critical, and perform a
* bytewise copy for simplicity.
*/
__asm__ __volatile__ ( "std\n\t"
"rep movsb\n\t"
"cld\n\t"
: "=&D" ( edi ), "=&S" ( esi ),
"=&c" ( discard_ecx )
: "0" ( edi ), "1" ( esi ),
"2" ( len )
: "memory" );
return dest;
}
/**
* Copy (possibly overlapping) memory area
*
* @v dest Destination address
* @v src Source address
* @v len Length
* @ret dest Destination address
*/
void * __memmove ( void *dest, const void *src, size_t len ) {
if ( dest <= src ) {
return __memcpy ( dest, src, len );
} else {
return __memcpy_reverse ( dest, src, len );
}
}