diff options
author | Marc-André Lemburg <mal@egenix.com> | 2005-10-21 13:47:03 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2005-10-21 13:47:03 (GMT) |
commit | 92b201debcb7c796a68df372f3660e02b2c85d40 (patch) | |
tree | 491c56f6a6cf99810eee6c4935fc25dded20b3b4 /Tools/unicode | |
parent | c5694c8bf4bf2008b42e0107fb245415df4147fd (diff) | |
download | cpython-92b201debcb7c796a68df372f3660e02b2c85d40.zip cpython-92b201debcb7c796a68df372f3660e02b2c85d40.tar.gz cpython-92b201debcb7c796a68df372f3660e02b2c85d40.tar.bz2 |
Add two new tools to compare codecs and show differences and to
list all installed codecs.
Diffstat (limited to 'Tools/unicode')
-rw-r--r-- | Tools/unicode/comparecodecs.py | 53 | ||||
-rw-r--r-- | Tools/unicode/listcodecs.py | 41 |
2 files changed, 94 insertions, 0 deletions
diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py new file mode 100644 index 0000000..cd417a4 --- /dev/null +++ b/Tools/unicode/comparecodecs.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +""" Compare the output of two codecs. + +(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com). + + Licensed to PSF under a Contributor Agreement. + +""" +import sys + +def compare_codecs(encoding1, encoding2): + + print 'Comparing encoding/decoding of %r and %r' % (encoding1, encoding2) + mismatch = 0 + # Check encoding + for i in range(sys.maxunicode): + u = unichr(i) + try: + c1 = u.encode(encoding1) + except UnicodeError, reason: + c1 = '<undefined>' + try: + c2 = u.encode(encoding2) + except UnicodeError, reason: + c2 = '<undefined>' + if c1 != c2: + print ' * encoding mismatch for 0x%04X: %-14r != %r' % \ + (i, c1, c2) + mismatch += 1 + # Check decoding + for i in range(256): + c = chr(i) + try: + u1 = c.decode(encoding1) + except UnicodeError: + u1 = u'<undefined>' + try: + u2 = c.decode(encoding2) + except UnicodeError: + u2 = u'<undefined>' + if u1 != u2: + print ' * decoding mismatch for 0x%04X: %-14r != %r' % \ + (i, u1, u2) + mismatch += 1 + if mismatch: + print + print 'Found %i mismatches' % mismatch + else: + print '-> Codecs are identical.' + +if __name__ == '__main__': + compare_codecs(sys.argv[1], sys.argv[2]) diff --git a/Tools/unicode/listcodecs.py b/Tools/unicode/listcodecs.py new file mode 100644 index 0000000..19d21e1 --- /dev/null +++ b/Tools/unicode/listcodecs.py @@ -0,0 +1,41 @@ +""" List all available codec modules. + +(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com). + + Licensed to PSF under a Contributor Agreement. + +""" + +import os, codecs, encodings + +_debug = 0 + +def listcodecs(dir): + names = [] + for filename in os.listdir(dir): + if filename[-3:] != '.py': + continue + name = filename[:-3] + # Check whether we've found a true codec + try: + codecs.lookup(name) + except LookupError: + # Codec not found + continue + except Exception, reason: + # Probably an error from importing the codec; still it's + # a valid code name + if _debug: + print '* problem importing codec %r: %s' % \ + (name, reason) + names.append(name) + return names + + +if __name__ == '__main__': + names = listcodecs(encodings.__path__[0]) + names.sort() + print 'all_codecs = [' + for name in names: + print ' %r,' % name + print ']' |