| from test.test_support import run_unittest, open_urlresource | |
| import unittest | |
| from httplib import HTTPException | |
| import sys | |
| import os | |
| from unicodedata import normalize, unidata_version | |
| TESTDATAFILE = "NormalizationTest.txt" | |
| TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE | |
| def check_version(testfile): | |
| hdr = testfile.readline() | |
| return unidata_version in hdr | |
| class RangeError(Exception): | |
| pass | |
| def NFC(str): | |
| return normalize("NFC", str) | |
| def NFKC(str): | |
| return normalize("NFKC", str) | |
| def NFD(str): | |
| return normalize("NFD", str) | |
| def NFKD(str): | |
| return normalize("NFKD", str) | |
| def unistr(data): | |
| data = [int(x, 16) for x in data.split(" ")] | |
| for x in data: | |
| if x > sys.maxunicode: | |
| raise RangeError | |
| return u"".join([unichr(x) for x in data]) | |
| class NormalizationTest(unittest.TestCase): | |
| def test_main(self): | |
| part = None | |
| part1_data = {} | |
| # Hit the exception early | |
| try: | |
| testdata = open_urlresource(TESTDATAURL, check_version) | |
| except (IOError, HTTPException): | |
| self.skipTest("Could not retrieve " + TESTDATAURL) | |
| for line in testdata: | |
| if '#' in line: | |
| line = line.split('#')[0] | |
| line = line.strip() | |
| if not line: | |
| continue | |
| if line.startswith("@Part"): | |
| part = line.split()[0] | |
| continue | |
| try: | |
| c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] | |
| except RangeError: | |
| # Skip unsupported characters; | |
| # try atleast adding c1 if we are in part1 | |
| if part == "@Part1": | |
| try: | |
| c1 = unistr(line.split(';')[0]) | |
| except RangeError: | |
| pass | |
| else: | |
| part1_data[c1] = 1 | |
| continue | |
| # Perform tests | |
| self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) | |
| self.assertTrue(c4 == NFC(c4) == NFC(c5), line) | |
| self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) | |
| self.assertTrue(c5 == NFD(c4) == NFD(c5), line) | |
| self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ | |
| NFKC(c3) == NFKC(c4) == NFKC(c5), | |
| line) | |
| self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ | |
| NFKD(c3) == NFKD(c4) == NFKD(c5), | |
| line) | |
| # Record part 1 data | |
| if part == "@Part1": | |
| part1_data[c1] = 1 | |
| # Perform tests for all other data | |
| for c in range(sys.maxunicode+1): | |
| X = unichr(c) | |
| if X in part1_data: | |
| continue | |
| self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) | |
| def test_bug_834676(self): | |
| # Check for bug 834676 | |
| normalize('NFC', u'\ud55c\uae00') | |
| def test_main(): | |
| run_unittest(NormalizationTest) | |
| if __name__ == "__main__": | |
| test_main() |