diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-11-23 22:08:15 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-11-23 22:08:15 (GMT) |
commit | 677bde2dd14ac2c8f170779adcc732f991db8bd6 (patch) | |
tree | daaeacd804a9e45a96c7819ece9d78d73a690439 /Lib/test | |
parent | 74a530d42dcd0d33587aed66d600a6687ce30cbd (diff) | |
download | cpython-677bde2dd14ac2c8f170779adcc732f991db8bd6.zip cpython-677bde2dd14ac2c8f170779adcc732f991db8bd6.tar.gz cpython-677bde2dd14ac2c8f170779adcc732f991db8bd6.tar.bz2 |
Patch #626485: Support Unicode normalization.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_normalization.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py new file mode 100644 index 0000000..a263fc5 --- /dev/null +++ b/Lib/test/test_normalization.py @@ -0,0 +1,68 @@ +from test.test_support import verbose, TestFailed, TestSkipped, verify +import sys +from unicodedata import normalize +try: + data = open("NormalizationTest.txt","r").readlines() +except IOError: + raise TestSkipped("NormalizationTest.txt not found, download from http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt") + +class RangeError: + pass + +def NFC(str): + return normalize("NFC", str) + +def NFKC(str): + return normalize("NFKC", str) + +def NFD(str): + return normalize("NFD", str) + +def NFKD(str): + return normalize("NFKD", str) + +def unistr(data): + data = [int(x, 16) for x in data.split(" ")] + for x in data: + if x > sys.maxunicode: + raise RangeError + return u"".join([unichr(x) for x in data]) + +part1_data = {} +for line in data: + if '#' in line: + line = line.split('#')[0] + line = line.strip() + if not line: + continue + if line.startswith("@Part"): + part = line + continue + try: + c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] + except RangeError: + # Skip unsupported characters + continue + + if verbose: + print line + + # Perform tests + verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) + verify(c4 == NFC(c4) == NFC(c5), line) + verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) + verify(c5 == NFD(c4) == NFD(c5), line) + verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), line) + verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), line) + + # Record part 1 data + if part == "@Part1": + part1_data[c1] = 1 + +# Perform tests for all other data +for c in range(sys.maxunicode+1): + X = unichr(c) + if X in part1_data: + continue + assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c + |