src/arch/arm64/core/arm64_tcpip.c - ipxe - Git at Google

 /*
  * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
  * License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  * 02110-1301, USA.
  *
  * You can also choose to distribute this program under the terms of
  * the Unmodified Binary Distribution Licence (as given in the file
  * COPYING.UBDL), provided that you have satisfied its requirements.
  */

 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

 /** @file
  *
  * TCP/IP checksum
  *
  */

 #include <strings.h>
 #include <ipxe/tcpip.h>

 /** Alignment used by main checksumming loop */
 #define TCPIP_CHKSUM_ALIGN 16

 /** Number of steps in each iteration of the unrolled main checksumming loop */
 #define TCPIP_CHKSUM_UNROLL 4

 /**
  * Calculate continued TCP/IP checkum
  *
  * @v sum		Checksum of already-summed data, in network byte order
  * @v data		Data buffer
  * @v len		Length of data buffer
  * @ret sum		Updated checksum, in network byte order
  */
 uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
 				 size_t len ) {
 	intptr_t start;
 	intptr_t end;
 	intptr_t mid;
 	unsigned int pre;
 	unsigned int post;
 	unsigned int first;
 	uint64_t discard_low;
 	uint64_t discard_high;

 	/* Avoid potentially undefined shift operation */
 	if ( len == 0 )
 		return sum;

 	/* Find maximally-aligned midpoint.  For short blocks of data,
 	 * this may be aligned to fewer than 16 bytes.
 	 */
 	start = ( ( intptr_t ) data );
 	end = ( start + len );
 	mid = ( end &
 		~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );

 	/* Calculate pre- and post-alignment lengths */
 	pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
 	post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );

 	/* Calculate number of steps in first iteration of unrolled loop */
 	first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
 		  ( TCPIP_CHKSUM_UNROLL - 1 ) );

 	/* Calculate checksum */
 	__asm__ ( /* Invert sum */
 		  "eor %w0, %w0, #0xffff\n\t"
 		  /* Clear carry flag */
 		  "cmn xzr, xzr\n\t"
 		  /* Byteswap and sum pre-alignment byte, if applicable */
 		  "tbz %w4, #0, 1f\n\t"
 		  "ldrb %w2, [%1], #1\n\t"
 		  "rev16 %w0, %w0\n\t"
 		  "rev16 %w2, %w2\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Sum pre-alignment halfword, if applicable */
 		  "tbz %w4, #1, 1f\n\t"
 		  "ldrh %w2, [%1], #2\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Sum pre-alignment word, if applicable */
 		  "tbz %w4, #2, 1f\n\t"
 		  "ldr %w2, [%1], #4\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Sum pre-alignment doubleword, if applicable */
 		  "tbz %w4, #3, 1f\n\t"
 		  "ldr %2, [%1], #8\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Jump into unrolled (x4) main loop */
 		  "adr %2, 2f\n\t"
 		  "sub %2, %2, %5, lsl #3\n\t"
 		  "sub %2, %2, %5, lsl #2\n\t"
 		  "br %2\n\t"
 		  "\n1:\n\t"
 		  "ldp %2, %3, [%1], #16\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "adcs %0, %0, %3\n\t"
 		  "ldp %2, %3, [%1], #16\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "adcs %0, %0, %3\n\t"
 		  "ldp %2, %3, [%1], #16\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "adcs %0, %0, %3\n\t"
 		  "ldp %2, %3, [%1], #16\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "adcs %0, %0, %3\n\t"
 		  "\n2:\n\t"
 		  "sub %2, %1, %6\n\t"
 		  "cbnz %2, 1b\n\t"
 		  /* Sum post-alignment doubleword, if applicable */
 		  "tbz %w7, #3, 1f\n\t"
 		  "ldr %2, [%1], #8\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Sum post-alignment word, if applicable */
 		  "tbz %w7, #2, 1f\n\t"
 		  "ldr %w2, [%1], #4\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Sum post-alignment halfword, if applicable */
 		  "tbz %w7, #1, 1f\n\t"
 		  "ldrh %w2, [%1], #2\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Sum post-alignment byte, if applicable */
 		  "tbz %w7, #0, 1f\n\t"
 		  "ldrb %w2, [%1], #1\n\t"
 		  "adcs %0, %0, %2\n\t"
 		  "\n1:\n\t"
 		  /* Fold down to a uint32_t plus carry flag */
 		  "lsr %2, %0, #32\n\t"
 		  "adcs %w0, %w0, %w2\n\t"
 		  /* Fold down to a uint16_t plus carry in bit 16 */
 		  "ubfm %2, %0, #0, #15\n\t"
 		  "ubfm %3, %0, #16, #31\n\t"
 		  "adc %w0, %w2, %w3\n\t"
 		  /* Fold down to a uint16_t */
 		  "tbz %w0, #16, 1f\n\t"
 		  "mov %w2, #0xffff\n\t"
 		  "sub %w0, %w0, %w2\n\t"
 		  "tbz %w0, #16, 1f\n\t"
 		  "sub %w0, %w0, %w2\n\t"
 		  "\n1:\n\t"
 		  /* Byteswap back, if applicable */
 		  "tbz %w4, #0, 1f\n\t"
 		  "rev16 %w0, %w0\n\t"
 		  "\n1:\n\t"
 		  /* Invert sum */
 		  "eor %w0, %w0, #0xffff\n\t"
 		  : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
 		    "=&r" ( discard_high )
 		  : "r" ( pre ), "r" ( first ), "r" ( end - post ),
 		    "r" ( post )
 		  : "cc" );

 	return sum;
 }
	/*
	* Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public License as
	* published by the Free Software Foundation; either version 2 of the
	* License, or (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful, but
	* WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
	* 02110-1301, USA.
	*
	* You can also choose to distribute this program under the terms of
	* the Unmodified Binary Distribution Licence (as given in the file
	* COPYING.UBDL), provided that you have satisfied its requirements.
	*/

	FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

	/** @file
	*
	* TCP/IP checksum
	*
	*/

	#include <strings.h>
	#include <ipxe/tcpip.h>

	/** Alignment used by main checksumming loop */
	#define TCPIP_CHKSUM_ALIGN 16

	/** Number of steps in each iteration of the unrolled main checksumming loop */
	#define TCPIP_CHKSUM_UNROLL 4

	/**
	* Calculate continued TCP/IP checkum
	*
	* @v sum Checksum of already-summed data, in network byte order
	* @v data Data buffer
	* @v len Length of data buffer
	* @ret sum Updated checksum, in network byte order
	*/
	uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
	size_t len ) {
	intptr_t start;
	intptr_t end;
	intptr_t mid;
	unsigned int pre;
	unsigned int post;
	unsigned int first;
	uint64_t discard_low;
	uint64_t discard_high;

	/* Avoid potentially undefined shift operation */
	if ( len == 0 )
	return sum;

	/* Find maximally-aligned midpoint. For short blocks of data,
	* this may be aligned to fewer than 16 bytes.
	*/
	start = ( ( intptr_t ) data );
	end = ( start + len );
	mid = ( end &
	~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );

	/* Calculate pre- and post-alignment lengths */
	pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
	post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );

	/* Calculate number of steps in first iteration of unrolled loop */
	first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
	( TCPIP_CHKSUM_UNROLL - 1 ) );

	/* Calculate checksum */
	__asm__ ( /* Invert sum */
	"eor %w0, %w0, #0xffff\n\t"
	/* Clear carry flag */
	"cmn xzr, xzr\n\t"
	/* Byteswap and sum pre-alignment byte, if applicable */
	"tbz %w4, #0, 1f\n\t"
	"ldrb %w2, [%1], #1\n\t"
	"rev16 %w0, %w0\n\t"
	"rev16 %w2, %w2\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Sum pre-alignment halfword, if applicable */
	"tbz %w4, #1, 1f\n\t"
	"ldrh %w2, [%1], #2\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Sum pre-alignment word, if applicable */
	"tbz %w4, #2, 1f\n\t"
	"ldr %w2, [%1], #4\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Sum pre-alignment doubleword, if applicable */
	"tbz %w4, #3, 1f\n\t"
	"ldr %2, [%1], #8\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Jump into unrolled (x4) main loop */
	"adr %2, 2f\n\t"
	"sub %2, %2, %5, lsl #3\n\t"
	"sub %2, %2, %5, lsl #2\n\t"
	"br %2\n\t"
	"\n1:\n\t"
	"ldp %2, %3, [%1], #16\n\t"
	"adcs %0, %0, %2\n\t"
	"adcs %0, %0, %3\n\t"
	"ldp %2, %3, [%1], #16\n\t"
	"adcs %0, %0, %2\n\t"
	"adcs %0, %0, %3\n\t"
	"ldp %2, %3, [%1], #16\n\t"
	"adcs %0, %0, %2\n\t"
	"adcs %0, %0, %3\n\t"
	"ldp %2, %3, [%1], #16\n\t"
	"adcs %0, %0, %2\n\t"
	"adcs %0, %0, %3\n\t"
	"\n2:\n\t"
	"sub %2, %1, %6\n\t"
	"cbnz %2, 1b\n\t"
	/* Sum post-alignment doubleword, if applicable */
	"tbz %w7, #3, 1f\n\t"
	"ldr %2, [%1], #8\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Sum post-alignment word, if applicable */
	"tbz %w7, #2, 1f\n\t"
	"ldr %w2, [%1], #4\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Sum post-alignment halfword, if applicable */
	"tbz %w7, #1, 1f\n\t"
	"ldrh %w2, [%1], #2\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Sum post-alignment byte, if applicable */
	"tbz %w7, #0, 1f\n\t"
	"ldrb %w2, [%1], #1\n\t"
	"adcs %0, %0, %2\n\t"
	"\n1:\n\t"
	/* Fold down to a uint32_t plus carry flag */
	"lsr %2, %0, #32\n\t"
	"adcs %w0, %w0, %w2\n\t"
	/* Fold down to a uint16_t plus carry in bit 16 */
	"ubfm %2, %0, #0, #15\n\t"
	"ubfm %3, %0, #16, #31\n\t"
	"adc %w0, %w2, %w3\n\t"
	/* Fold down to a uint16_t */
	"tbz %w0, #16, 1f\n\t"
	"mov %w2, #0xffff\n\t"
	"sub %w0, %w0, %w2\n\t"
	"tbz %w0, #16, 1f\n\t"
	"sub %w0, %w0, %w2\n\t"
	"\n1:\n\t"
	/* Byteswap back, if applicable */
	"tbz %w4, #0, 1f\n\t"
	"rev16 %w0, %w0\n\t"
	"\n1:\n\t"
	/* Invert sum */
	"eor %w0, %w0, #0xffff\n\t"
	: "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
	"=&r" ( discard_high )
	: "r" ( pre ), "r" ( first ), "r" ( end - post ),
	"r" ( post )
	: "cc" );

	return sum;
	}