src/tests/utf8_test.c - ipxe - Git at Google

 /*
  * Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
  * License, or any later version.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  * 02110-1301, USA.
  *
  * You can also choose to distribute this program under the terms of
  * the Unmodified Binary Distribution Licence (as given in the file
  * COPYING.UBDL), provided that you have satisfied its requirements.
  */

 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

 /** @file
  *
  * UTF-8 Unicode encoding tests
  *
  */

 /* Forcibly enable assertions */
 #undef NDEBUG

 #include <string.h>
 #include <ipxe/utf8.h>
 #include <ipxe/test.h>

 /** A UTF-8 accumulation test */
 struct utf8_accumulate_test {
 	/** UTF-8 byte string */
 	const char *bytes;
 	/** Expected character sequence */
 	const unsigned int *expected;
 	/** Length */
 	size_t len;
 };

 /** Define inline data */
 #define DATA(...) { __VA_ARGS__ }

 /** Define a UTF-8 accumulation test */
 #define UTF8_ACCUMULATE( name, BYTES, EXPECTED )			\
 	static const char name ## _bytes[] = BYTES;			\
 	static const unsigned int name ## _expected[] = EXPECTED;	\
 	static struct utf8_accumulate_test name = {			\
 		.bytes = name ## _bytes,				\
 		.expected = name ## _expected,				\
 		.len = ( sizeof ( name ## _expected ) /			\
 			 sizeof ( name ## _expected[0] ) ),		\
 	};

 /** Basic ASCII test */
 UTF8_ACCUMULATE ( ascii, "Hello world!",
 		  DATA ( 'H', 'e', 'l', 'l', 'o', ' ',
 			 'w', 'o', 'r', 'l', 'd', '!' ) );

 /** Multi-byte character test */
 UTF8_ACCUMULATE ( multibyte, "Héllô wörld 🥳",
 		  DATA ( 'H', 0, L'é', 'l', 'l', 0, L'ô', ' ',
 			 'w', 0, L'ö', 'r', 'l', 'd', ' ',
 			 0, 0, 0, 0x1f973 ) );

 /** Stray continuation byte test */
 UTF8_ACCUMULATE ( stray_continuation,
 		  DATA ( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c' ),
 		  DATA ( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c' ) );

 /** Missing continuation byte test */
 UTF8_ACCUMULATE ( missing_continuation,
 		  DATA ( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c' ),
 		  DATA ( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c' ) );

 /** Illegal two-byte sequence test */
 UTF8_ACCUMULATE ( illegal_two,
 		  DATA ( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80,
 			 'd' ),
 		  DATA ( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd' ) );

 /** Illegal three-byte sequence test */
 UTF8_ACCUMULATE ( illegal_three,
 		  DATA ( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c',
 			 0xe0, 0x80, 0x80, 'd' ),
 		  DATA ( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c',
 			 0, 0, 0xfffd, 'd' ) );

 /** Illegal four-byte sequence test */
 UTF8_ACCUMULATE ( illegal_four,
 		  DATA ( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf,
 			 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd' ),
 		  DATA ( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c',
 			 0, 0, 0, 0xfffd, 'd' ) );

 /** Illegal overlength sequence test */
 UTF8_ACCUMULATE ( illegal_length,
 		  DATA ( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf,
 			 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf,
 			 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf,
 			 0xbf, 0xbf, 0xbf, 'e' ),
 		  DATA ( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b',
 			 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c',
 			 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 			 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
 			 0xfffd, 0xfffd, 0xfffd, 'e' ) );

 /**
  * Report UTF-8 accumulation test result
  *
  * @v test		UTF-8 accumulation test
  * @v file		Test code file
  * @v line		Test code line
  */
 static void utf8_accumulate_okx ( struct utf8_accumulate_test *test,
 				  const char *file, unsigned int line ) {
 	struct utf8_accumulator utf8;
 	unsigned int character;
 	unsigned int i;

 	/* Initialise accumulator */
 	memset ( &utf8, 0, sizeof ( utf8 ) );

 	/* Test each byte in turn */
 	for ( i = 0 ; i < test->len ; i++ ) {
 		character = utf8_accumulate ( &utf8, test->bytes[i] );
 		DBGC ( test, "UTF8 byte %02x character %02x\n",
 		       test->bytes[i], character );
 		okx ( character == test->expected[i], file, line );
 	}
 }
 #define utf8_accumulate_ok( test ) \
 	utf8_accumulate_okx ( test, __FILE__, __LINE__ )

 /**
  * Perform UTF-8 self-test
  *
  */
 static void utf8_test_exec ( void ) {

 	/* Accumulation tests */
 	utf8_accumulate_ok ( &ascii );
 	utf8_accumulate_ok ( &multibyte );
 	utf8_accumulate_ok ( &stray_continuation );
 	utf8_accumulate_ok ( &missing_continuation );
 	utf8_accumulate_ok ( &illegal_two );
 	utf8_accumulate_ok ( &illegal_three );
 	utf8_accumulate_ok ( &illegal_four );
 	utf8_accumulate_ok ( &illegal_length );
 }

 /** UTF-8 self-test */
 struct self_test utf8_test __self_test = {
 	.name = "utf8",
 	.exec = utf8_test_exec,
 };
	/*
	* Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>.
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public License as
	* published by the Free Software Foundation; either version 2 of the
	* License, or any later version.
	*
	* This program is distributed in the hope that it will be useful, but
	* WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
	* 02110-1301, USA.
	*
	* You can also choose to distribute this program under the terms of
	* the Unmodified Binary Distribution Licence (as given in the file
	* COPYING.UBDL), provided that you have satisfied its requirements.
	*/

	FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

	/** @file
	*
	* UTF-8 Unicode encoding tests
	*
	*/

	/* Forcibly enable assertions */
	#undef NDEBUG

	#include <string.h>
	#include <ipxe/utf8.h>
	#include <ipxe/test.h>

	/** A UTF-8 accumulation test */
	struct utf8_accumulate_test {
	/** UTF-8 byte string */
	const char *bytes;
	/** Expected character sequence */
	const unsigned int *expected;
	/** Length */
	size_t len;
	};

	/** Define inline data */
	#define DATA(...) { __VA_ARGS__ }

	/** Define a UTF-8 accumulation test */
	#define UTF8_ACCUMULATE( name, BYTES, EXPECTED ) \
	static const char name ## _bytes[] = BYTES; \
	static const unsigned int name ## _expected[] = EXPECTED; \
	static struct utf8_accumulate_test name = { \
	.bytes = name ## _bytes, \
	.expected = name ## _expected, \
	.len = ( sizeof ( name ## _expected ) / \
	sizeof ( name ## _expected[0] ) ), \
	};

	/** Basic ASCII test */
	UTF8_ACCUMULATE ( ascii, "Hello world!",
	DATA ( 'H', 'e', 'l', 'l', 'o', ' ',
	'w', 'o', 'r', 'l', 'd', '!' ) );

	/** Multi-byte character test */
	UTF8_ACCUMULATE ( multibyte, "Héllô wörld 🥳",
	DATA ( 'H', 0, L'é', 'l', 'l', 0, L'ô', ' ',
	'w', 0, L'ö', 'r', 'l', 'd', ' ',
	0, 0, 0, 0x1f973 ) );

	/** Stray continuation byte test */
	UTF8_ACCUMULATE ( stray_continuation,
	DATA ( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c' ),
	DATA ( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c' ) );

	/** Missing continuation byte test */
	UTF8_ACCUMULATE ( missing_continuation,
	DATA ( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c' ),
	DATA ( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c' ) );

	/** Illegal two-byte sequence test */
	UTF8_ACCUMULATE ( illegal_two,
	DATA ( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80,
	'd' ),
	DATA ( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd' ) );

	/** Illegal three-byte sequence test */
	UTF8_ACCUMULATE ( illegal_three,
	DATA ( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c',
	0xe0, 0x80, 0x80, 'd' ),
	DATA ( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c',
	0, 0, 0xfffd, 'd' ) );

	/** Illegal four-byte sequence test */
	UTF8_ACCUMULATE ( illegal_four,
	DATA ( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf,
	0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd' ),
	DATA ( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c',
	0, 0, 0, 0xfffd, 'd' ) );

	/** Illegal overlength sequence test */
	UTF8_ACCUMULATE ( illegal_length,
	DATA ( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf,
	0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf,
	0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf,
	0xbf, 0xbf, 0xbf, 'e' ),
	DATA ( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b',
	0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c',
	0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
	0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
	0xfffd, 0xfffd, 0xfffd, 'e' ) );

	/**
	* Report UTF-8 accumulation test result
	*
	* @v test UTF-8 accumulation test
	* @v file Test code file
	* @v line Test code line
	*/
	static void utf8_accumulate_okx ( struct utf8_accumulate_test *test,
	const char *file, unsigned int line ) {
	struct utf8_accumulator utf8;
	unsigned int character;
	unsigned int i;

	/* Initialise accumulator */
	memset ( &utf8, 0, sizeof ( utf8 ) );

	/* Test each byte in turn */
	for ( i = 0 ; i < test->len ; i++ ) {
	character = utf8_accumulate ( &utf8, test->bytes[i] );
	DBGC ( test, "UTF8 byte %02x character %02x\n",
	test->bytes[i], character );
	okx ( character == test->expected[i], file, line );
	}
	}
	#define utf8_accumulate_ok( test ) \
	utf8_accumulate_okx ( test, __FILE__, __LINE__ )

	/**
	* Perform UTF-8 self-test
	*
	*/
	static void utf8_test_exec ( void ) {

	/* Accumulation tests */
	utf8_accumulate_ok ( &ascii );
	utf8_accumulate_ok ( &multibyte );
	utf8_accumulate_ok ( &stray_continuation );
	utf8_accumulate_ok ( &missing_continuation );
	utf8_accumulate_ok ( &illegal_two );
	utf8_accumulate_ok ( &illegal_three );
	utf8_accumulate_ok ( &illegal_four );
	utf8_accumulate_ok ( &illegal_length );
	}

	/** UTF-8 self-test */
	struct self_test utf8_test __self_test = {
	.name = "utf8",
	.exec = utf8_test_exec,
	};