BaseTools/Tests/CheckUnicodeSourceFiles.py - edk2 - Git at Google

 ## @file
 #  Unit tests for AutoGen.UniClassObject
 #
 #  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
 #
 #  SPDX-License-Identifier: BSD-2-Clause-Patent
 #

 ##
 # Import Modules
 #
 import os
 import unittest

 import codecs

 import TestTools

 from Common.Misc import PathClass
 import AutoGen.UniClassObject as BtUni

 from Common import EdkLogger
 EdkLogger.InitializeForUnitTest()

 class Tests(TestTools.BaseToolsTest):

     SampleData = u'''
         #langdef en-US "English"
         #string STR_A #language en-US "STR_A for en-US"
     '''

     def EncodeToFile(self, encoding, string=None):
         if string is None:
             string = self.SampleData
         if encoding is not None:
             data = codecs.encode(string, encoding)
         else:
             data = string
         path = 'input.uni'
         self.WriteTmpFile(path, data)
         return PathClass(self.GetTmpFilePath(path))

     def ErrorFailure(self, error, encoding, shouldPass):
         msg = error + ' should '
         if shouldPass:
             msg += 'not '
         msg += 'be generated for '
         msg += '%s data in a .uni file' % encoding
         self.fail(msg)

     def UnicodeErrorFailure(self, encoding, shouldPass):
         self.ErrorFailure('UnicodeError', encoding, shouldPass)

     def EdkErrorFailure(self, encoding, shouldPass):
         self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)

     def CheckFile(self, encoding, shouldPass, string=None):
         path = self.EncodeToFile(encoding, string)
         try:
             BtUni.UniFileClassObject([path])
             if shouldPass:
                 return
         except UnicodeError:
             if not shouldPass:
                 return
             else:
                 self.UnicodeErrorFailure(encoding, shouldPass)
         except EdkLogger.FatalError:
             if not shouldPass:
                 return
             else:
                 self.EdkErrorFailure(encoding, shouldPass)
         except Exception:
             pass

         self.EdkErrorFailure(encoding, shouldPass)

     def testUtf16InUniFile(self):
         self.CheckFile('utf_16', shouldPass=True)

     def testSupplementaryPlaneUnicodeCharInUtf16File(self):
         #
         # Supplementary Plane characters can exist in UTF-16 files,
         # but they are not valid UCS-2 characters.
         #
         # This test makes sure that BaseTools rejects these characters
         # if seen in a .uni file.
         #
         data = u'''
             #langdef en-US "English"
             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
         '''

         self.CheckFile('utf_16', shouldPass=False, string=data)

     def testSurrogatePairUnicodeCharInUtf16File(self):
         #
         # Surrogate Pair code points are used in UTF-16 files to
         # encode the Supplementary Plane characters. But, a Surrogate
         # Pair code point which is not followed by another Surrogate
         # Pair code point might be interpreted as a single code point
         # with the Surrogate Pair code point.
         #
         # This test makes sure that BaseTools rejects these characters
         # if seen in a .uni file.
         #
         data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '

         self.CheckFile(encoding=None, shouldPass=False, string=data)

     def testValidUtf8File(self):
         self.CheckFile(encoding='utf_8', shouldPass=True)

     def testValidUtf8FileWithBom(self):
         #
         # Same test as testValidUtf8File, but add the UTF-8 BOM
         #
         data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')

         self.CheckFile(encoding=None, shouldPass=True, string=data)

     def test32bitUnicodeCharInUtf8File(self):
         data = u'''
             #langdef en-US "English"
             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
         '''

         self.CheckFile('utf_16', shouldPass=False, string=data)

     def test32bitUnicodeCharInUtf8File(self):
         data = u'''
             #langdef en-US "English"
             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
         '''

         self.CheckFile('utf_8', shouldPass=False, string=data)

     def test32bitUnicodeCharInUtf8Comment(self):
         data = u'''
             // Even in comments, we reject non-UCS-2 chars: \U00010300
             #langdef en-US "English"
             #string STR_A #language en-US "A"
         '''

         self.CheckFile('utf_8', shouldPass=False, string=data)

     def testSurrogatePairUnicodeCharInUtf8File(self):
         #
         # Surrogate Pair code points are used in UTF-16 files to
         # encode the Supplementary Plane characters. In UTF-8, it is
         # trivial to encode these code points, but they are not valid
         # code points for characters, since they are reserved for the
         # UTF-16 Surrogate Pairs.
         #
         # This test makes sure that BaseTools rejects these characters
         # if seen in a .uni file.
         #
         data = b'\xed\xa0\x81'

         self.CheckFile(encoding=None, shouldPass=False, string=data)

     def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
         #
         # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
         # the UTF-8 BOM
         #
         data = codecs.BOM_UTF8 + b'\xed\xa0\x81'

         self.CheckFile(encoding=None, shouldPass=False, string=data)

 TheTestSuite = TestTools.MakeTheTestSuite(locals())

 if __name__ == '__main__':
     allTests = TheTestSuite()
     unittest.TextTestRunner().run(allTests)
	## @file
	# Unit tests for AutoGen.UniClassObject
	#
	# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
	#
	# SPDX-License-Identifier: BSD-2-Clause-Patent
	#

	##
	# Import Modules
	#
	import os
	import unittest

	import codecs

	import TestTools

	from Common.Misc import PathClass
	import AutoGen.UniClassObject as BtUni

	from Common import EdkLogger
	EdkLogger.InitializeForUnitTest()

	class Tests(TestTools.BaseToolsTest):

	SampleData = u'''
	#langdef en-US "English"
	#string STR_A #language en-US "STR_A for en-US"
	'''

	def EncodeToFile(self, encoding, string=None):
	if string is None:
	string = self.SampleData
	if encoding is not None:
	data = codecs.encode(string, encoding)
	else:
	data = string
	path = 'input.uni'
	self.WriteTmpFile(path, data)
	return PathClass(self.GetTmpFilePath(path))

	def ErrorFailure(self, error, encoding, shouldPass):
	msg = error + ' should '
	if shouldPass:
	msg += 'not '
	msg += 'be generated for '
	msg += '%s data in a .uni file' % encoding
	self.fail(msg)

	def UnicodeErrorFailure(self, encoding, shouldPass):
	self.ErrorFailure('UnicodeError', encoding, shouldPass)

	def EdkErrorFailure(self, encoding, shouldPass):
	self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)

	def CheckFile(self, encoding, shouldPass, string=None):
	path = self.EncodeToFile(encoding, string)
	try:
	BtUni.UniFileClassObject([path])
	if shouldPass:
	return
	except UnicodeError:
	if not shouldPass:
	return
	else:
	self.UnicodeErrorFailure(encoding, shouldPass)
	except EdkLogger.FatalError:
	if not shouldPass:
	return
	else:
	self.EdkErrorFailure(encoding, shouldPass)
	except Exception:
	pass

	self.EdkErrorFailure(encoding, shouldPass)

	def testUtf16InUniFile(self):
	self.CheckFile('utf_16', shouldPass=True)

	def testSupplementaryPlaneUnicodeCharInUtf16File(self):
	#
	# Supplementary Plane characters can exist in UTF-16 files,
	# but they are not valid UCS-2 characters.
	#
	# This test makes sure that BaseTools rejects these characters
	# if seen in a .uni file.
	#
	data = u'''
	#langdef en-US "English"
	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
	'''

	self.CheckFile('utf_16', shouldPass=False, string=data)

	def testSurrogatePairUnicodeCharInUtf16File(self):
	#
	# Surrogate Pair code points are used in UTF-16 files to
	# encode the Supplementary Plane characters. But, a Surrogate
	# Pair code point which is not followed by another Surrogate
	# Pair code point might be interpreted as a single code point
	# with the Surrogate Pair code point.
	#
	# This test makes sure that BaseTools rejects these characters
	# if seen in a .uni file.
	#
	data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '

	self.CheckFile(encoding=None, shouldPass=False, string=data)

	def testValidUtf8File(self):
	self.CheckFile(encoding='utf_8', shouldPass=True)

	def testValidUtf8FileWithBom(self):
	#
	# Same test as testValidUtf8File, but add the UTF-8 BOM
	#
	data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')

	self.CheckFile(encoding=None, shouldPass=True, string=data)

	def test32bitUnicodeCharInUtf8File(self):
	data = u'''
	#langdef en-US "English"
	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
	'''

	self.CheckFile('utf_16', shouldPass=False, string=data)

	def test32bitUnicodeCharInUtf8File(self):
	data = u'''
	#langdef en-US "English"
	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
	'''

	self.CheckFile('utf_8', shouldPass=False, string=data)

	def test32bitUnicodeCharInUtf8Comment(self):
	data = u'''
	// Even in comments, we reject non-UCS-2 chars: \U00010300
	#langdef en-US "English"
	#string STR_A #language en-US "A"
	'''

	self.CheckFile('utf_8', shouldPass=False, string=data)

	def testSurrogatePairUnicodeCharInUtf8File(self):
	#
	# Surrogate Pair code points are used in UTF-16 files to
	# encode the Supplementary Plane characters. In UTF-8, it is
	# trivial to encode these code points, but they are not valid
	# code points for characters, since they are reserved for the
	# UTF-16 Surrogate Pairs.
	#
	# This test makes sure that BaseTools rejects these characters
	# if seen in a .uni file.
	#
	data = b'\xed\xa0\x81'

	self.CheckFile(encoding=None, shouldPass=False, string=data)

	def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
	#
	# Same test as testSurrogatePairUnicodeCharInUtf8File, but add
	# the UTF-8 BOM
	#
	data = codecs.BOM_UTF8 + b'\xed\xa0\x81'

	self.CheckFile(encoding=None, shouldPass=False, string=data)

	TheTestSuite = TestTools.MakeTheTestSuite(locals())

	if __name__ == '__main__':
	allTests = TheTestSuite()
	unittest.TextTestRunner().run(allTests)