summaryrefslogtreecommitdiffstats
path: root/Tools/unicode
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2005-10-21 13:47:03 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2005-10-21 13:47:03 (GMT)
commit92b201debcb7c796a68df372f3660e02b2c85d40 (patch)
tree491c56f6a6cf99810eee6c4935fc25dded20b3b4 /Tools/unicode
parentc5694c8bf4bf2008b42e0107fb245415df4147fd (diff)
downloadcpython-92b201debcb7c796a68df372f3660e02b2c85d40.zip
cpython-92b201debcb7c796a68df372f3660e02b2c85d40.tar.gz
cpython-92b201debcb7c796a68df372f3660e02b2c85d40.tar.bz2
Add two new tools to compare codecs and show differences and to
list all installed codecs.
Diffstat (limited to 'Tools/unicode')
-rw-r--r--Tools/unicode/comparecodecs.py53
-rw-r--r--Tools/unicode/listcodecs.py41
2 files changed, 94 insertions, 0 deletions
diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py
new file mode 100644
index 0000000..cd417a4
--- /dev/null
+++ b/Tools/unicode/comparecodecs.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+""" Compare the output of two codecs.
+
+(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
+
+ Licensed to PSF under a Contributor Agreement.
+
+"""
+import sys
+
+def compare_codecs(encoding1, encoding2):
+
+ print 'Comparing encoding/decoding of %r and %r' % (encoding1, encoding2)
+ mismatch = 0
+ # Check encoding
+ for i in range(sys.maxunicode):
+ u = unichr(i)
+ try:
+ c1 = u.encode(encoding1)
+ except UnicodeError, reason:
+ c1 = '<undefined>'
+ try:
+ c2 = u.encode(encoding2)
+ except UnicodeError, reason:
+ c2 = '<undefined>'
+ if c1 != c2:
+ print ' * encoding mismatch for 0x%04X: %-14r != %r' % \
+ (i, c1, c2)
+ mismatch += 1
+ # Check decoding
+ for i in range(256):
+ c = chr(i)
+ try:
+ u1 = c.decode(encoding1)
+ except UnicodeError:
+ u1 = u'<undefined>'
+ try:
+ u2 = c.decode(encoding2)
+ except UnicodeError:
+ u2 = u'<undefined>'
+ if u1 != u2:
+ print ' * decoding mismatch for 0x%04X: %-14r != %r' % \
+ (i, u1, u2)
+ mismatch += 1
+ if mismatch:
+ print
+ print 'Found %i mismatches' % mismatch
+ else:
+ print '-> Codecs are identical.'
+
+if __name__ == '__main__':
+ compare_codecs(sys.argv[1], sys.argv[2])
diff --git a/Tools/unicode/listcodecs.py b/Tools/unicode/listcodecs.py
new file mode 100644
index 0000000..19d21e1
--- /dev/null
+++ b/Tools/unicode/listcodecs.py
@@ -0,0 +1,41 @@
+""" List all available codec modules.
+
+(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
+
+ Licensed to PSF under a Contributor Agreement.
+
+"""
+
+import os, codecs, encodings
+
+_debug = 0
+
+def listcodecs(dir):
+ names = []
+ for filename in os.listdir(dir):
+ if filename[-3:] != '.py':
+ continue
+ name = filename[:-3]
+ # Check whether we've found a true codec
+ try:
+ codecs.lookup(name)
+ except LookupError:
+ # Codec not found
+ continue
+ except Exception, reason:
+ # Probably an error from importing the codec; still it's
+ # a valid code name
+ if _debug:
+ print '* problem importing codec %r: %s' % \
+ (name, reason)
+ names.append(name)
+ return names
+
+
+if __name__ == '__main__':
+ names = listcodecs(encodings.__path__[0])
+ names.sort()
+ print 'all_codecs = ['
+ for name in names:
+ print ' %r,' % name
+ print ']'