summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-11-23 22:08:15 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-11-23 22:08:15 (GMT)
commit677bde2dd14ac2c8f170779adcc732f991db8bd6 (patch)
treedaaeacd804a9e45a96c7819ece9d78d73a690439 /Lib/test
parent74a530d42dcd0d33587aed66d600a6687ce30cbd (diff)
downloadcpython-677bde2dd14ac2c8f170779adcc732f991db8bd6.zip
cpython-677bde2dd14ac2c8f170779adcc732f991db8bd6.tar.gz
cpython-677bde2dd14ac2c8f170779adcc732f991db8bd6.tar.bz2
Patch #626485: Support Unicode normalization.
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_normalization.py68
1 files changed, 68 insertions, 0 deletions
diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py
new file mode 100644
index 0000000..a263fc5
--- /dev/null
+++ b/Lib/test/test_normalization.py
@@ -0,0 +1,68 @@
+from test.test_support import verbose, TestFailed, TestSkipped, verify
+import sys
+from unicodedata import normalize
+try:
+ data = open("NormalizationTest.txt","r").readlines()
+except IOError:
+ raise TestSkipped("NormalizationTest.txt not found, download from http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt")
+
+class RangeError:
+ pass
+
+def NFC(str):
+ return normalize("NFC", str)
+
+def NFKC(str):
+ return normalize("NFKC", str)
+
+def NFD(str):
+ return normalize("NFD", str)
+
+def NFKD(str):
+ return normalize("NFKD", str)
+
+def unistr(data):
+ data = [int(x, 16) for x in data.split(" ")]
+ for x in data:
+ if x > sys.maxunicode:
+ raise RangeError
+ return u"".join([unichr(x) for x in data])
+
+part1_data = {}
+for line in data:
+ if '#' in line:
+ line = line.split('#')[0]
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith("@Part"):
+ part = line
+ continue
+ try:
+ c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
+ except RangeError:
+ # Skip unsupported characters
+ continue
+
+ if verbose:
+ print line
+
+ # Perform tests
+ verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
+ verify(c4 == NFC(c4) == NFC(c5), line)
+ verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
+ verify(c5 == NFD(c4) == NFD(c5), line)
+ verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), line)
+ verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), line)
+
+ # Record part 1 data
+ if part == "@Part1":
+ part1_data[c1] = 1
+
+# Perform tests for all other data
+for c in range(sys.maxunicode+1):
+ X = unichr(c)
+ if X in part1_data:
+ continue
+ assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c
+