blob: ed323d5d0ad853852af65c327b91d78513fe5a70 [file] [log] [blame]
/*
* Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
/** @file
*
* TCP/IP checksum
*
*/
#include <limits.h>
#include <ipxe/tcpip.h>
extern char x86_tcpip_loop_end[];
/**
* Calculate continued TCP/IP checkum
*
* @v partial Checksum of already-summed data, in network byte order
* @v data Data buffer
* @v len Length of data buffer
* @ret cksum Updated checksum, in network byte order
*/
uint16_t tcpip_continue_chksum ( uint16_t partial, const void *data,
size_t len ) {
unsigned long sum = ( ( ~partial ) & 0xffff );
unsigned long initial_word_count;
unsigned long loop_count;
unsigned long loop_partial_count;
unsigned long final_word_count;
unsigned long final_byte;
unsigned long discard_S;
unsigned long discard_c;
unsigned long discard_a;
unsigned long discard_r1;
unsigned long discard_r2;
/* Calculate number of initial 16-bit words required to bring
* the main loop into alignment. (We don't care about the
* speed for data aligned to less than 16 bits, since this
* situation won't occur in practice.)
*/
if ( len >= sizeof ( sum ) ) {
initial_word_count = ( ( -( ( intptr_t ) data ) &
( sizeof ( sum ) - 1 ) ) >> 1 );
} else {
initial_word_count = 0;
}
len -= ( initial_word_count * 2 );
/* Calculate number of iterations of the main loop. This loop
* processes native machine words (32-bit or 64-bit), and is
* unrolled 16 times. We calculate an overall iteration
* count, and a starting point for the first iteration.
*/
loop_count = ( len / ( sizeof ( sum ) * 16 ) );
loop_partial_count =
( ( len % ( sizeof ( sum ) * 16 ) ) / sizeof ( sum ) );
/* Calculate number of 16-bit words remaining after the main
* loop completes.
*/
final_word_count = ( ( len % sizeof ( sum ) ) / 2 );
/* Calculate whether or not a final byte remains at the end */
final_byte = ( len & 1 );
/* Calculate the checksum */
__asm__ ( /* Calculate position at which to jump into the
* unrolled loop.
*/
"imul $( -x86_tcpip_loop_step_size ), %4\n\t"
"add %5, %4\n\t"
/* Clear carry flag before starting checksumming */
"clc\n\t"
/* Checksum initial words */
"jmp 2f\n\t"
"\n1:\n\t"
"lodsw\n\t"
"adcw %w2, %w0\n\t"
"\n2:\n\t"
"loop 1b\n\t"
/* Main "lods;adc" loop, unrolled x16 */
"mov %12, %3\n\t"
"jmp *%4\n\t"
"\nx86_tcpip_loop_start:\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"lods%z2\n\tadc %2, %0\n\t"
"\nx86_tcpip_loop_end:\n\t"
"loop x86_tcpip_loop_start\n\t"
".equ x86_tcpip_loop_step_size, "
" ( ( x86_tcpip_loop_end - x86_tcpip_loop_start ) >> 4 )\n\t"
/* Checksum remaining whole words */
"mov %13, %3\n\t"
"jmp 2f\n\t"
"\n1:\n\t"
"lodsw\n\t"
"adcw %w2, %w0\n\t"
"\n2:\n\t"
"loop 1b\n\t"
/* Checksum final byte if applicable */
"mov %14, %3\n\t"
"loop 1f\n\t"
"adcb (%1), %b0\n\t"
"adcb $0, %h0\n\t"
"\n1:\n\t"
/* Fold down to a uint16_t */
"push %0\n\t"
"popw %w0\n\t"
"popw %w2\n\t"
"adcw %w2, %w0\n\t"
#if ULONG_MAX > 0xffffffffUL /* 64-bit only */
"popw %w2\n\t"
"adcw %w2, %w0\n\t"
"popw %w2\n\t"
"adcw %w2, %w0\n\t"
#endif /* 64-bit only */
/* Consume CF */
"adcw $0, %w0\n\t"
"adcw $0, %w0\n\t"
: "=&Q" ( sum ), "=&S" ( discard_S ), "=&a" ( discard_a ),
"=&c" ( discard_c ), "=&r" ( discard_r1 ),
"=&r" ( discard_r2 )
: "0" ( sum ), "1" ( data ), "2" ( 0 ),
"3" ( initial_word_count + 1 ), "4" ( loop_partial_count ),
"5" ( x86_tcpip_loop_end ), "g" ( loop_count + 1 ),
"g" ( final_word_count + 1 ), "g" ( final_byte ) );
return ( ~sum & 0xffff );
}