diff options
Diffstat (limited to 'Lib')
81 files changed, 1196 insertions, 12612 deletions
diff --git a/Lib/bsddb/__init__.py b/Lib/bsddb/__init__.py index c004c08..90ed362 100644 --- a/Lib/bsddb/__init__.py +++ b/Lib/bsddb/__init__.py @@ -343,6 +343,7 @@ def _openDBEnv(cachesize): e.set_cachesize(0, cachesize) else: raise error, "cachesize must be >= 20480" + e.set_lk_detect(db.DB_LOCK_DEFAULT) e.open('.', db.DB_PRIVATE | db.DB_CREATE | db.DB_THREAD | db.DB_INIT_LOCK | db.DB_INIT_MPOOL) return e diff --git a/Lib/bsddb/dbobj.py b/Lib/bsddb/dbobj.py index 4a75dd2..961b9e9 100644 --- a/Lib/bsddb/dbobj.py +++ b/Lib/bsddb/dbobj.py @@ -91,9 +91,14 @@ class DBEnv: return self._cobj.lock_stat(*args, **kwargs) def log_archive(self, *args, **kwargs): return self._cobj.log_archive(*args, **kwargs) + def set_get_returns_none(self, *args, **kwargs): return self._cobj.set_get_returns_none(*args, **kwargs) + if db.version() >= (4,0): + def log_stat(self, *args, **kwargs): + return apply(self._cobj.log_stat, args, kwargs) + if db.version() >= (4,1): def dbremove(self, *args, **kwargs): return self._cobj.dbremove(*args, **kwargs) @@ -102,6 +107,10 @@ class DBEnv: def set_encrypt(self, *args, **kwargs): return self._cobj.set_encrypt(*args, **kwargs) + if db.version() >= (4,4): + def lsn_reset(self, *args, **kwargs): + return apply(self._cobj.lsn_reset, args, kwargs) + class DB(DictMixin): def __init__(self, dbenv, *args, **kwargs): @@ -208,3 +217,38 @@ class DB(DictMixin): if db.version() >= (4,1): def set_encrypt(self, *args, **kwargs): return self._cobj.set_encrypt(*args, **kwargs) + + +class DBSequence: + def __init__(self, *args, **kwargs): + self._cobj = apply(db.DBSequence, args, kwargs) + + def close(self, *args, **kwargs): + return apply(self._cobj.close, args, kwargs) + def get(self, *args, **kwargs): + return apply(self._cobj.get, args, kwargs) + def get_dbp(self, *args, **kwargs): + return apply(self._cobj.get_dbp, args, kwargs) + def get_key(self, *args, **kwargs): + return apply(self._cobj.get_key, args, kwargs) + def init_value(self, *args, **kwargs): + return apply(self._cobj.init_value, args, kwargs) + def open(self, *args, **kwargs): + return apply(self._cobj.open, args, kwargs) + def remove(self, *args, **kwargs): + return apply(self._cobj.remove, args, kwargs) + def stat(self, *args, **kwargs): + return apply(self._cobj.stat, args, kwargs) + def set_cachesize(self, *args, **kwargs): + return apply(self._cobj.set_cachesize, args, kwargs) + def set_flags(self, *args, **kwargs): + return apply(self._cobj.set_flags, args, kwargs) + def set_range(self, *args, **kwargs): + return apply(self._cobj.set_range, args, kwargs) + def get_cachesize(self, *args, **kwargs): + return apply(self._cobj.get_cachesize, args, kwargs) + def get_flags(self, *args, **kwargs): + return apply(self._cobj.get_flags, args, kwargs) + def get_range(self, *args, **kwargs): + return apply(self._cobj.get_range, args, kwargs) +>>>>>>> .merge-right.r46752 diff --git a/Lib/bsddb/dbtables.py b/Lib/bsddb/dbtables.py index fd33b6e..369db43 100644 --- a/Lib/bsddb/dbtables.py +++ b/Lib/bsddb/dbtables.py @@ -131,7 +131,8 @@ def contains_metastrings(s) : class bsdTableDB : def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0): - """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600) + """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600) + Open database name in the dbhome BerkeleyDB directory. Use keyword arguments when calling this constructor. """ @@ -218,7 +219,8 @@ class bsdTableDB : def CreateTable(self, table, columns): - """CreateTable(table, columns) - Create a new table in the database + """CreateTable(table, columns) - Create a new table in the database. + raises TableDBError if it already exists or for other DB errors. """ assert isinstance(columns, ListType) @@ -286,7 +288,8 @@ class bsdTableDB : def CreateOrExtendTable(self, table, columns): """CreateOrExtendTable(table, columns) - - Create a new table in the database. + Create a new table in the database. + If a table of this name already exists, extend it to have any additional columns present in the given list as well as all of its current columns. @@ -411,14 +414,15 @@ class bsdTableDB : def Modify(self, table, conditions={}, mappings={}): - """Modify(table, conditions) - Modify in rows matching 'conditions' - using mapping functions in 'mappings' - * conditions is a dictionary keyed on column names - containing condition functions expecting the data string as an - argument and returning a boolean. - * mappings is a dictionary keyed on column names containint condition - functions expecting the data string as an argument and returning the - new string for that column. + """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings' + + * table - the table name + * conditions - a dictionary keyed on column names containing + a condition callable expecting the data string as an + argument and returning a boolean. + * mappings - a dictionary keyed on column names containing a + condition callable expecting the data string as an argument and + returning the new string for that column. """ try: matching_rowids = self.__Select(table, [], conditions) @@ -450,7 +454,8 @@ class bsdTableDB : txn.commit() txn = None - except DBError, dberror: + # catch all exceptions here since we call unknown callables + except: if txn: txn.abort() raise @@ -461,9 +466,10 @@ class bsdTableDB : def Delete(self, table, conditions={}): """Delete(table, conditions) - Delete items matching the given conditions from the table. - * conditions is a dictionary keyed on column names - containing condition functions expecting the data string as an - argument and returning a boolean. + + * conditions - a dictionary keyed on column names containing + condition functions expecting the data string as an + argument and returning a boolean. """ try: matching_rowids = self.__Select(table, [], conditions) @@ -499,11 +505,12 @@ class bsdTableDB : def Select(self, table, columns, conditions={}): - """Select(table, conditions) - retrieve specific row data + """Select(table, columns, conditions) - retrieve specific row data Returns a list of row column->value mapping dictionaries. - * columns is a list of which column data to return. If + + * columns - a list of which column data to return. If columns is None, all columns will be returned. - * conditions is a dictionary keyed on column names + * conditions - a dictionary keyed on column names containing callable conditions expecting the data string as an argument and returning a boolean. """ diff --git a/Lib/bsddb/test/test_all.py b/Lib/bsddb/test/test_all.py index abfaf47..ad8b1e9 100644 --- a/Lib/bsddb/test/test_all.py +++ b/Lib/bsddb/test/test_all.py @@ -4,6 +4,12 @@ import sys import os import unittest +try: + # For Pythons w/distutils pybsddb + from bsddb3 import db +except ImportError: + # For Python 2.3 + from bsddb import db verbose = 0 if 'verbose' in sys.argv: @@ -16,12 +22,6 @@ if 'silent' in sys.argv: # take care of old flag, just in case def print_versions(): - try: - # For Pythons w/distutils pybsddb - from bsddb3 import db - except ImportError: - # For Python 2.3 - from bsddb import db print print '-=' * 38 print db.DB_VERSION_STRING @@ -69,6 +69,8 @@ def suite(): 'test_queue', 'test_recno', 'test_thread', + 'test_sequence', + 'test_cursor_pget_bug', ] alltests = unittest.TestSuite() diff --git a/Lib/bsddb/test/test_basics.py b/Lib/bsddb/test/test_basics.py index 7e8f835..bec5da3 100644 --- a/Lib/bsddb/test/test_basics.py +++ b/Lib/bsddb/test/test_basics.py @@ -659,12 +659,22 @@ class BasicTransactionTestCase(BasicTestCase): except db.DBIncompleteError: pass + if db.version() >= (4,0): + statDict = self.env.log_stat(0); + assert statDict.has_key('magic') + assert statDict.has_key('version') + assert statDict.has_key('cur_file') + assert statDict.has_key('region_nowait') + # must have at least one log file present: logs = self.env.log_archive(db.DB_ARCH_ABS | db.DB_ARCH_LOG) assert logs != None for log in logs: if verbose: print 'log file: ' + log + if db.version >= (4,2): + logs = self.env.log_archive(db.DB_ARCH_REMOVE) + assert not logs self.txn = self.env.txn_begin() diff --git a/Lib/bsddb/test/test_cursor_pget_bug.py b/Lib/bsddb/test/test_cursor_pget_bug.py new file mode 100644 index 0000000..de47e6d --- /dev/null +++ b/Lib/bsddb/test/test_cursor_pget_bug.py @@ -0,0 +1,65 @@ +import unittest +import sys, os, glob + +try: + # For Pythons w/distutils pybsddb + from bsddb3 import db +except ImportError: + # For Python 2.3 + from bsddb import db + + +#---------------------------------------------------------------------- + +class pget_bugTestCase(unittest.TestCase): + """Verify that cursor.pget works properly""" + db_name = 'test-cursor_pget.db' + + def setUp(self): + self.homeDir = os.path.join(os.path.dirname(sys.argv[0]), 'db_home') + try: + os.mkdir(self.homeDir) + except os.error: + pass + self.env = db.DBEnv() + self.env.open(self.homeDir, db.DB_CREATE | db.DB_INIT_MPOOL) + self.primary_db = db.DB(self.env) + self.primary_db.open(self.db_name, 'primary', db.DB_BTREE, db.DB_CREATE) + self.secondary_db = db.DB(self.env) + self.secondary_db.set_flags(db.DB_DUP) + self.secondary_db.open(self.db_name, 'secondary', db.DB_BTREE, db.DB_CREATE) + self.primary_db.associate(self.secondary_db, lambda key, data: data) + self.primary_db.put('salad', 'eggs') + self.primary_db.put('spam', 'ham') + self.primary_db.put('omelet', 'eggs') + + + def tearDown(self): + self.secondary_db.close() + self.primary_db.close() + self.env.close() + del self.secondary_db + del self.primary_db + del self.env + for file in glob.glob(os.path.join(self.homeDir, '*')): + os.remove(file) + os.removedirs(self.homeDir) + + def test_pget(self): + cursor = self.secondary_db.cursor() + + self.assertEquals(('eggs', 'salad', 'eggs'), cursor.pget(key='eggs', flags=db.DB_SET)) + self.assertEquals(('eggs', 'omelet', 'eggs'), cursor.pget(db.DB_NEXT_DUP)) + self.assertEquals(None, cursor.pget(db.DB_NEXT_DUP)) + + self.assertEquals(('ham', 'spam', 'ham'), cursor.pget('ham', 'spam', flags=db.DB_SET)) + self.assertEquals(None, cursor.pget(db.DB_NEXT_DUP)) + + cursor.close() + + +def test_suite(): + return unittest.makeSuite(pget_bugTestCase) + +if __name__ == '__main__': + unittest.main(defaultTest='test_suite') diff --git a/Lib/bsddb/test/test_dbtables.py b/Lib/bsddb/test/test_dbtables.py index 1128a5a..26e3d36 100644 --- a/Lib/bsddb/test/test_dbtables.py +++ b/Lib/bsddb/test/test_dbtables.py @@ -339,6 +339,16 @@ class TableDBTestCase(unittest.TestCase): conditions={'Name': dbtables.LikeCond('%')}, mappings={'Access': increment_access}) + try: + self.tdb.Modify(tabname, + conditions={'Name': dbtables.LikeCond('%')}, + mappings={'Access': 'What is your quest?'}) + except TypeError: + # success, the string value in mappings isn't callable + pass + else: + raise RuntimeError, "why was TypeError not raised for bad callable?" + # Delete key in select conditions values = self.tdb.Select( tabname, None, diff --git a/Lib/bsddb/test/test_sequence.py b/Lib/bsddb/test/test_sequence.py new file mode 100644 index 0000000..979f858 --- /dev/null +++ b/Lib/bsddb/test/test_sequence.py @@ -0,0 +1,112 @@ +import unittest +import os +import sys +import tempfile +import glob + +try: + # For Pythons w/distutils pybsddb + from bsddb3 import db +except ImportError: + from bsddb import db + +from test_all import verbose + + +class DBSequenceTest(unittest.TestCase): + def setUp(self): + self.int_32_max = 0x100000000 + self.homeDir = os.path.join(os.path.dirname(sys.argv[0]), 'db_home') + try: + os.mkdir(self.homeDir) + except os.error: + pass + tempfile.tempdir = self.homeDir + self.filename = os.path.split(tempfile.mktemp())[1] + tempfile.tempdir = None + + self.dbenv = db.DBEnv() + self.dbenv.open(self.homeDir, db.DB_CREATE | db.DB_INIT_MPOOL, 0666) + self.d = db.DB(self.dbenv) + self.d.open(self.filename, db.DB_BTREE, db.DB_CREATE, 0666) + + def tearDown(self): + if hasattr(self, 'seq'): + self.seq.close() + del self.seq + if hasattr(self, 'd'): + self.d.close() + del self.d + if hasattr(self, 'dbenv'): + self.dbenv.close() + del self.dbenv + + files = glob.glob(os.path.join(self.homeDir, '*')) + for file in files: + os.remove(file) + + def test_get(self): + self.seq = db.DBSequence(self.d, flags=0) + start_value = 10 * self.int_32_max + self.assertEqual(0xA00000000, start_value) + self.assertEquals(None, self.seq.init_value(start_value)) + self.assertEquals(None, self.seq.open(key='id', txn=None, flags=db.DB_CREATE)) + self.assertEquals(start_value, self.seq.get(5)) + self.assertEquals(start_value + 5, self.seq.get()) + + def test_remove(self): + self.seq = db.DBSequence(self.d, flags=0) + self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE)) + self.assertEquals(None, self.seq.remove(txn=None, flags=0)) + del self.seq + + def test_get_key(self): + self.seq = db.DBSequence(self.d, flags=0) + key = 'foo' + self.assertEquals(None, self.seq.open(key=key, txn=None, flags=db.DB_CREATE)) + self.assertEquals(key, self.seq.get_key()) + + def test_get_dbp(self): + self.seq = db.DBSequence(self.d, flags=0) + self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE)) + self.assertEquals(self.d, self.seq.get_dbp()) + + def test_cachesize(self): + self.seq = db.DBSequence(self.d, flags=0) + cashe_size = 10 + self.assertEquals(None, self.seq.set_cachesize(cashe_size)) + self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE)) + self.assertEquals(cashe_size, self.seq.get_cachesize()) + + def test_flags(self): + self.seq = db.DBSequence(self.d, flags=0) + flag = db.DB_SEQ_WRAP; + self.assertEquals(None, self.seq.set_flags(flag)) + self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE)) + self.assertEquals(flag, self.seq.get_flags() & flag) + + def test_range(self): + self.seq = db.DBSequence(self.d, flags=0) + seq_range = (10 * self.int_32_max, 11 * self.int_32_max - 1) + self.assertEquals(None, self.seq.set_range(seq_range)) + self.seq.init_value(seq_range[0]) + self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE)) + self.assertEquals(seq_range, self.seq.get_range()) + + def test_stat(self): + self.seq = db.DBSequence(self.d, flags=0) + self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE)) + stat = self.seq.stat() + for param in ('nowait', 'min', 'max', 'value', 'current', + 'flags', 'cache_size', 'last_value', 'wait'): + self.assertTrue(param in stat, "parameter %s isn't in stat info" % param) + +def test_suite(): + suite = unittest.TestSuite() + if db.version() >= (4,3): + suite.addTest(unittest.makeSuite(DBSequenceTest)) + return suite + + +if __name__ == '__main__': + unittest.main(defaultTest='test_suite') diff --git a/Lib/ctypes/test/test_cfuncs.py b/Lib/ctypes/test/test_cfuncs.py index 9d8db1f..fa858a6 100644 --- a/Lib/ctypes/test/test_cfuncs.py +++ b/Lib/ctypes/test/test_cfuncs.py @@ -40,41 +40,49 @@ class CFunctions(unittest.TestCase): def test_short(self): self._dll.tf_h.restype = c_short + self._dll.tf_h.argtypes = (c_short,) self.failUnlessEqual(self._dll.tf_h(-32766), -10922) self.failUnlessEqual(self.S(), -32766) def test_short_plus(self): self._dll.tf_bh.restype = c_short + self._dll.tf_bh.argtypes = (c_byte, c_short) self.failUnlessEqual(self._dll.tf_bh(0, -32766), -10922) self.failUnlessEqual(self.S(), -32766) def test_ushort(self): self._dll.tf_H.restype = c_ushort + self._dll.tf_H.argtypes = (c_ushort,) self.failUnlessEqual(self._dll.tf_H(65535), 21845) self.failUnlessEqual(self.U(), 65535) def test_ushort_plus(self): self._dll.tf_bH.restype = c_ushort + self._dll.tf_bH.argtypes = (c_byte, c_ushort) self.failUnlessEqual(self._dll.tf_bH(0, 65535), 21845) self.failUnlessEqual(self.U(), 65535) def test_int(self): self._dll.tf_i.restype = c_int + self._dll.tf_i.argtypes = (c_int,) self.failUnlessEqual(self._dll.tf_i(-2147483646), -715827882) self.failUnlessEqual(self.S(), -2147483646) def test_int_plus(self): self._dll.tf_bi.restype = c_int + self._dll.tf_bi.argtypes = (c_byte, c_int) self.failUnlessEqual(self._dll.tf_bi(0, -2147483646), -715827882) self.failUnlessEqual(self.S(), -2147483646) def test_uint(self): self._dll.tf_I.restype = c_uint + self._dll.tf_I.argtypes = (c_uint,) self.failUnlessEqual(self._dll.tf_I(4294967295), 1431655765) self.failUnlessEqual(self.U(), 4294967295) def test_uint_plus(self): self._dll.tf_bI.restype = c_uint + self._dll.tf_bI.argtypes = (c_byte, c_uint) self.failUnlessEqual(self._dll.tf_bI(0, 4294967295), 1431655765) self.failUnlessEqual(self.U(), 4294967295) diff --git a/Lib/ctypes/test/test_pointers.py b/Lib/ctypes/test/test_pointers.py index 600bb75..a7a2802 100644 --- a/Lib/ctypes/test/test_pointers.py +++ b/Lib/ctypes/test/test_pointers.py @@ -133,7 +133,7 @@ class PointersTestCase(unittest.TestCase): self.failUnlessEqual(p[0], 42) self.failUnlessEqual(p.contents.value, 42) - def test_charpp( self ): + def test_charpp(self): """Test that a character pointer-to-pointer is correctly passed""" dll = CDLL(_ctypes_test.__file__) func = dll._testfunc_c_p_p diff --git a/Lib/ctypes/test/test_structures.py b/Lib/ctypes/test/test_structures.py index 5340f79..49f064b 100644 --- a/Lib/ctypes/test/test_structures.py +++ b/Lib/ctypes/test/test_structures.py @@ -138,8 +138,8 @@ class StructureTestCase(unittest.TestCase): self.failUnlessEqual(X.y.size, sizeof(c_char)) # readonly - self.assertRaises(TypeError, setattr, X.x, "offset", 92) - self.assertRaises(TypeError, setattr, X.x, "size", 92) + self.assertRaises(AttributeError, setattr, X.x, "offset", 92) + self.assertRaises(AttributeError, setattr, X.x, "size", 92) class X(Union): _fields_ = [("x", c_int), @@ -152,8 +152,8 @@ class StructureTestCase(unittest.TestCase): self.failUnlessEqual(X.y.size, sizeof(c_char)) # readonly - self.assertRaises(TypeError, setattr, X.x, "offset", 92) - self.assertRaises(TypeError, setattr, X.x, "size", 92) + self.assertRaises(AttributeError, setattr, X.x, "offset", 92) + self.assertRaises(AttributeError, setattr, X.x, "size", 92) # XXX Should we check nested data types also? # offset is always relative to the class... diff --git a/Lib/doctest.py b/Lib/doctest.py index d549163..47b3aae 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -63,7 +63,6 @@ __all__ = [ 'REPORT_ONLY_FIRST_FAILURE', 'REPORTING_FLAGS', # 1. Utility Functions - 'is_private', # 2. Example & DocTest 'Example', 'DocTest', @@ -101,11 +100,6 @@ import unittest, difflib, pdb, tempfile import warnings from StringIO import StringIO -# Don't whine about the deprecated is_private function in this -# module's tests. -warnings.filterwarnings("ignore", "is_private", DeprecationWarning, - __name__, 0) - # There are 4 basic classes: # - Example: a <source, want> pair, plus an intra-docstring line number. # - DocTest: a collection of examples, parsed from a docstring, plus @@ -178,35 +172,6 @@ ELLIPSIS_MARKER = '...' ## 1. Utility Functions ###################################################################### -def is_private(prefix, base): - """prefix, base -> true iff name prefix + "." + base is "private". - - Prefix may be an empty string, and base does not contain a period. - Prefix is ignored (although functions you write conforming to this - protocol may make use of it). - Return true iff base begins with an (at least one) underscore, but - does not both begin and end with (at least) two underscores. - - >>> is_private("a.b", "my_func") - False - >>> is_private("____", "_my_func") - True - >>> is_private("someclass", "__init__") - False - >>> is_private("sometypo", "__init_") - True - >>> is_private("x.y.z", "_") - True - >>> is_private("_x.y.z", "__") - False - >>> is_private("", "") # senseless but consistent - False - """ - warnings.warn("is_private is deprecated; it wasn't useful; " - "examine DocTestFinder.find() lists instead", - DeprecationWarning, stacklevel=2) - return base[:1] == "_" and not base[:2] == "__" == base[-2:] - def _extract_future_flags(globs): """ Return the compiler-flags associated with the future features that @@ -759,7 +724,7 @@ class DocTestFinder: """ def __init__(self, verbose=False, parser=DocTestParser(), - recurse=True, _namefilter=None, exclude_empty=True): + recurse=True, exclude_empty=True): """ Create a new doctest finder. @@ -779,12 +744,8 @@ class DocTestFinder: self._verbose = verbose self._recurse = recurse self._exclude_empty = exclude_empty - # _namefilter is undocumented, and exists only for temporary backward- - # compatibility support of testmod's deprecated isprivate mess. - self._namefilter = _namefilter - def find(self, obj, name=None, module=None, globs=None, - extraglobs=None): + def find(self, obj, name=None, module=None, globs=None, extraglobs=None): """ Return a list of the DocTests that are defined by the given object's docstring, or by any of its contained objects' @@ -862,13 +823,6 @@ class DocTestFinder: self._find(tests, obj, name, module, source_lines, globs, {}) return tests - def _filter(self, obj, prefix, base): - """ - Return true if the given object should not be examined. - """ - return (self._namefilter is not None and - self._namefilter(prefix, base)) - def _from_module(self, module, object): """ Return true if the given object is defined in the given @@ -910,9 +864,6 @@ class DocTestFinder: # Look for tests in a module's contained objects. if inspect.ismodule(obj) and self._recurse: for valname, val in obj.__dict__.items(): - # Check if this contained object should be ignored. - if self._filter(val, name, valname): - continue valname = '%s.%s' % (name, valname) # Recurse to functions & classes. if ((inspect.isfunction(val) or inspect.isclass(val)) and @@ -941,9 +892,6 @@ class DocTestFinder: # Look for tests in a class's contained objects. if inspect.isclass(obj) and self._recurse: for valname, val in obj.__dict__.items(): - # Check if this contained object should be ignored. - if self._filter(val, name, valname): - continue # Special handling for staticmethod/classmethod. if isinstance(val, staticmethod): val = getattr(obj, valname) @@ -1751,17 +1699,16 @@ class DebugRunner(DocTestRunner): # class, updated by testmod. master = None -def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, +def testmod(m=None, name=None, globs=None, verbose=None, report=True, optionflags=0, extraglobs=None, raise_on_error=False, exclude_empty=False): - """m=None, name=None, globs=None, verbose=None, isprivate=None, - report=True, optionflags=0, extraglobs=None, raise_on_error=False, + """m=None, name=None, globs=None, verbose=None, report=True, + optionflags=0, extraglobs=None, raise_on_error=False, exclude_empty=False Test examples in docstrings in functions and classes reachable from module m (or the current module if m is not supplied), starting - with m.__doc__. Unless isprivate is specified, private names - are not skipped. + with m.__doc__. Also test examples reachable from dict m.__test__ if it exists and is not None. m.__test__ maps names to functions, classes and strings; @@ -1810,13 +1757,6 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, first unexpected exception or failure. This allows failures to be post-mortem debugged. - Deprecated in Python 2.4: - Optional keyword arg "isprivate" specifies a function used to - determine whether a name is private. The default function is - treat all functions as public. Optionally, "isprivate" can be - set to doctest.is_private to skip over functions marked as private - using the underscore naming convention; see its docs for details. - Advanced tomfoolery: testmod runs methods of a local instance of class doctest.Tester, then merges the results into (or creates) global Tester instance doctest.master. Methods of doctest.master @@ -1827,11 +1767,6 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, """ global master - if isprivate is not None: - warnings.warn("the isprivate argument is deprecated; " - "examine DocTestFinder.find() lists instead", - DeprecationWarning) - # If no module was given, then use __main__. if m is None: # DWA - m will still be None if this wasn't invoked from the command @@ -1848,7 +1783,7 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None, name = m.__name__ # Find, parse, and run all tests in the given module. - finder = DocTestFinder(_namefilter=isprivate, exclude_empty=exclude_empty) + finder = DocTestFinder(exclude_empty=exclude_empty) if raise_on_error: runner = DebugRunner(verbose=verbose, optionflags=optionflags) @@ -2021,8 +1956,7 @@ def run_docstring_examples(f, globs, verbose=False, name="NoName", # actually used in any way. class Tester: - def __init__(self, mod=None, globs=None, verbose=None, - isprivate=None, optionflags=0): + def __init__(self, mod=None, globs=None, verbose=None, optionflags=0): warnings.warn("class Tester is deprecated; " "use class doctest.DocTestRunner instead", @@ -2037,9 +1971,8 @@ class Tester: self.globs = globs self.verbose = verbose - self.isprivate = isprivate self.optionflags = optionflags - self.testfinder = DocTestFinder(_namefilter=isprivate) + self.testfinder = DocTestFinder() self.testrunner = DocTestRunner(verbose=verbose, optionflags=optionflags) diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py index 7e589a9..c802b89 100644 --- a/Lib/encodings/cp037.py +++ b/Lib/encodings/cp037.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\x9f' # 0xFF -> CONTROL ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x5A, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0xBA, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0xBB, # RIGHT SQUARE BRACKET - 0x005E: 0xB0, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x4F, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0x4A, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0x5F, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1006.py b/Lib/encodings/cp1006.py index 7829969..e21e804 100644 --- a/Lib/encodings/cp1006.py +++ b/Lib/encodings/cp1006.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,262 +303,5 @@ decoding_table = ( u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00AD: 0xAD, # SOFT HYPHEN - 0x060C: 0xAB, # ARABIC COMMA - 0x061B: 0xAC, # ARABIC SEMICOLON - 0x061F: 0xAE, # ARABIC QUESTION MARK - 0x06F0: 0xA1, # EXTENDED ARABIC-INDIC DIGIT ZERO - 0x06F1: 0xA2, # EXTENDED ARABIC-INDIC DIGIT ONE - 0x06F2: 0xA3, # EXTENDED ARABIC-INDIC DIGIT TWO - 0x06F3: 0xA4, # EXTENDED ARABIC-INDIC DIGIT THREE - 0x06F4: 0xA5, # EXTENDED ARABIC-INDIC DIGIT FOUR - 0x06F5: 0xA6, # EXTENDED ARABIC-INDIC DIGIT FIVE - 0x06F6: 0xA7, # EXTENDED ARABIC-INDIC DIGIT SIX - 0x06F7: 0xA8, # EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x06F8: 0xA9, # EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x06F9: 0xAA, # EXTENDED ARABIC-INDIC DIGIT NINE - 0xFB56: 0xB5, # ARABIC LETTER PEH ISOLATED FORM - 0xFB58: 0xB6, # ARABIC LETTER PEH INITIAL FORM - 0xFB66: 0xBA, # ARABIC LETTER TTEH ISOLATED FORM - 0xFB68: 0xBB, # ARABIC LETTER TTEH INITIAL FORM - 0xFB7A: 0xC0, # ARABIC LETTER TCHEH ISOLATED FORM - 0xFB7C: 0xC1, # ARABIC LETTER TCHEH INITIAL FORM - 0xFB84: 0xC7, # ARABIC LETTER DAHAL ISOLATED FORMN - 0xFB8A: 0xCC, # ARABIC LETTER JEH ISOLATED FORM - 0xFB8C: 0xCA, # ARABIC LETTER RREH ISOLATED FORM - 0xFB92: 0xE5, # ARABIC LETTER GAF ISOLATED FORM - 0xFB94: 0xE6, # ARABIC LETTER GAF INITIAL FORM - 0xFB9E: 0xEC, # ARABIC LETTER NOON GHUNNA ISOLATED FORM - 0xFBA6: 0xF1, # ARABIC LETTER HEH GOAL ISOLATED FORM - 0xFBA8: 0xF2, # ARABIC LETTER HEH GOAL INITIAL FORM - 0xFBA9: 0xF3, # ARABIC LETTER HEH GOAL MEDIAL FORM - 0xFBAA: 0xF4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - 0xFBAE: 0xFD, # ARABIC LETTER YEH BARREE ISOLATED FORM - 0xFBB0: 0xFC, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - 0xFE7C: 0xFE, # ARABIC SHADDA ISOLATED FORM - 0xFE7D: 0xFF, # ARABIC SHADDA MEDIAL FORM - 0xFE80: 0xF5, # ARABIC LETTER HAMZA ISOLATED FORM - 0xFE81: 0xAF, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0xFE85: 0xEF, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0xFE89: 0xF6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - 0xFE8A: 0xF7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - 0xFE8B: 0xF8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0xFE8D: 0xB0, # ARABIC LETTER ALEF ISOLATED FORM - 0xFE8E: None, # ARABIC LETTER ALEF FINAL FORM - 0xFE8F: 0xB3, # ARABIC LETTER BEH ISOLATED FORM - 0xFE91: 0xB4, # ARABIC LETTER BEH INITIAL FORM - 0xFE93: 0xB7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0xFE95: 0xB8, # ARABIC LETTER TEH ISOLATED FORM - 0xFE97: 0xB9, # ARABIC LETTER TEH INITIAL FORM - 0xFE99: 0xBC, # ARABIC LETTER THEH ISOLATED FORM - 0xFE9B: 0xBD, # ARABIC LETTER THEH INITIAL FORM - 0xFE9D: 0xBE, # ARABIC LETTER JEEM ISOLATED FORM - 0xFE9F: 0xBF, # ARABIC LETTER JEEM INITIAL FORM - 0xFEA1: 0xC2, # ARABIC LETTER HAH ISOLATED FORM - 0xFEA3: 0xC3, # ARABIC LETTER HAH INITIAL FORM - 0xFEA5: 0xC4, # ARABIC LETTER KHAH ISOLATED FORM - 0xFEA7: 0xC5, # ARABIC LETTER KHAH INITIAL FORM - 0xFEA9: 0xC6, # ARABIC LETTER DAL ISOLATED FORM - 0xFEAB: 0xC8, # ARABIC LETTER THAL ISOLATED FORM - 0xFEAD: 0xC9, # ARABIC LETTER REH ISOLATED FORM - 0xFEAF: 0xCB, # ARABIC LETTER ZAIN ISOLATED FORM - 0xFEB1: 0xCD, # ARABIC LETTER SEEN ISOLATED FORM - 0xFEB3: 0xCE, # ARABIC LETTER SEEN INITIAL FORM - 0xFEB5: 0xCF, # ARABIC LETTER SHEEN ISOLATED FORM - 0xFEB7: 0xD0, # ARABIC LETTER SHEEN INITIAL FORM - 0xFEB9: 0xD1, # ARABIC LETTER SAD ISOLATED FORM - 0xFEBB: 0xD2, # ARABIC LETTER SAD INITIAL FORM - 0xFEBD: 0xD3, # ARABIC LETTER DAD ISOLATED FORM - 0xFEBF: 0xD4, # ARABIC LETTER DAD INITIAL FORM - 0xFEC1: 0xD5, # ARABIC LETTER TAH ISOLATED FORM - 0xFEC5: 0xD6, # ARABIC LETTER ZAH ISOLATED FORM - 0xFEC9: 0xD7, # ARABIC LETTER AIN ISOLATED FORM - 0xFECA: 0xD8, # ARABIC LETTER AIN FINAL FORM - 0xFECB: 0xD9, # ARABIC LETTER AIN INITIAL FORM - 0xFECC: 0xDA, # ARABIC LETTER AIN MEDIAL FORM - 0xFECD: 0xDB, # ARABIC LETTER GHAIN ISOLATED FORM - 0xFECE: 0xDC, # ARABIC LETTER GHAIN FINAL FORM - 0xFECF: 0xDD, # ARABIC LETTER GHAIN INITIAL FORM - 0xFED0: 0xDE, # ARABIC LETTER GHAIN MEDIAL FORM - 0xFED1: 0xDF, # ARABIC LETTER FEH ISOLATED FORM - 0xFED3: 0xE0, # ARABIC LETTER FEH INITIAL FORM - 0xFED5: 0xE1, # ARABIC LETTER QAF ISOLATED FORM - 0xFED7: 0xE2, # ARABIC LETTER QAF INITIAL FORM - 0xFED9: 0xE3, # ARABIC LETTER KAF ISOLATED FORM - 0xFEDB: 0xE4, # ARABIC LETTER KAF INITIAL FORM - 0xFEDD: 0xE7, # ARABIC LETTER LAM ISOLATED FORM - 0xFEDF: 0xE8, # ARABIC LETTER LAM INITIAL FORM - 0xFEE0: 0xE9, # ARABIC LETTER LAM MEDIAL FORM - 0xFEE1: 0xEA, # ARABIC LETTER MEEM ISOLATED FORM - 0xFEE3: 0xEB, # ARABIC LETTER MEEM INITIAL FORM - 0xFEE5: 0xED, # ARABIC LETTER NOON ISOLATED FORM - 0xFEE7: 0xEE, # ARABIC LETTER NOON INITIAL FORM - 0xFEED: 0xF0, # ARABIC LETTER WAW ISOLATED FORM - 0xFEF1: 0xF9, # ARABIC LETTER YEH ISOLATED FORM - 0xFEF2: 0xFA, # ARABIC LETTER YEH FINAL FORM - 0xFEF3: 0xFB, # ARABIC LETTER YEH INITIAL FORM -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1026.py b/Lib/encodings/cp1026.py index 01c8804..45bbe62 100644 --- a/Lib/encodings/cp1026.py +++ b/Lib/encodings/cp1026.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\x9f' # 0xFF -> CONTROL ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x4F, # EXCLAMATION MARK - 0x0022: 0xFC, # QUOTATION MARK - 0x0023: 0xEC, # NUMBER SIGN - 0x0024: 0xAD, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0xAE, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0x68, # LEFT SQUARE BRACKET - 0x005C: 0xDC, # REVERSE SOLIDUS - 0x005D: 0xAC, # RIGHT SQUARE BRACKET - 0x005E: 0x5F, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x8D, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0x48, # LEFT CURLY BRACKET - 0x007C: 0xBB, # VERTICAL LINE - 0x007D: 0x8C, # RIGHT CURLY BRACKET - 0x007E: 0xCC, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0xB0, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x8E, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xBA, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x4A, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x7B, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x7F, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0xC0, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xA1, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xE0, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0x5A, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xD0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0x5B, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0x79, # LATIN SMALL LETTER DOTLESS I - 0x015E: 0x7C, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0x6A, # LATIN SMALL LETTER S WITH CEDILLA -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1140.py b/Lib/encodings/cp1140.py index ac8d41b..7e507fd 100644 --- a/Lib/encodings/cp1140.py +++ b/Lib/encodings/cp1140.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\x9f' # 0xFF -> CONTROL ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x5A, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0xBA, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0xBB, # RIGHT SQUARE BRACKET - 0x005E: 0xB0, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x4F, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0x4A, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0x5F, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x20AC: 0x9F, # EURO SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1250.py b/Lib/encodings/cp1250.py index 6e6f57c..d620b89 100644 --- a/Lib/encodings/cp1250.py +++ b/Lib/encodings/cp1250.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,258 +303,5 @@ decoding_table = ( u'\u02d9' # 0xFF -> DOT ABOVE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xA5, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB9, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON - 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON - 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE - 0x013D: 0xBC, # LATIN CAPITAL LETTER L WITH CARON - 0x013E: 0xBE, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON - 0x015A: 0x8C, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0x9C, # LATIN SMALL LETTER S WITH ACUTE - 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0x8D, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0x9D, # LATIN SMALL LETTER T WITH CARON - 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0x9F, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xA1, # CARON - 0x02D8: 0xA2, # BREVE - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0xB2, # OGONEK - 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1251.py b/Lib/encodings/cp1251.py index ed835fe..216771f 100644 --- a/Lib/encodings/cp1251.py +++ b/Lib/encodings/cp1251.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,262 +303,5 @@ decoding_table = ( u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x0401: 0xA8, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0x80, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0x81, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xAA, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xBD, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xB2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xAF, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xA3, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0x8A, # CYRILLIC CAPITAL LETTER LJE - 0x040A: 0x8C, # CYRILLIC CAPITAL LETTER NJE - 0x040B: 0x8E, # CYRILLIC CAPITAL LETTER TSHE - 0x040C: 0x8D, # CYRILLIC CAPITAL LETTER KJE - 0x040E: 0xA1, # CYRILLIC CAPITAL LETTER SHORT U - 0x040F: 0x8F, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0xC0, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xC1, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xC2, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xC3, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xC4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xC5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xC6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xC7, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xC8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xC9, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xCA, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xCB, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xCC, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xCD, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xCE, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xCF, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xD0, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xD1, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xD2, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xD3, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xD4, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xD5, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xD6, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xD7, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xD8, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xD9, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xDA, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xDB, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xDC, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xDD, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xDE, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xDF, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xE0, # CYRILLIC SMALL LETTER A - 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xE8, # CYRILLIC SMALL LETTER I - 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA - 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL - 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM - 0x043D: 0xED, # CYRILLIC SMALL LETTER EN - 0x043E: 0xEE, # CYRILLIC SMALL LETTER O - 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE - 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xF3, # CYRILLIC SMALL LETTER U - 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xFD, # CYRILLIC SMALL LETTER E - 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU - 0x044F: 0xFF, # CYRILLIC SMALL LETTER YA - 0x0451: 0xB8, # CYRILLIC SMALL LETTER IO - 0x0452: 0x90, # CYRILLIC SMALL LETTER DJE - 0x0453: 0x83, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xBA, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xBE, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xB3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xBF, # CYRILLIC SMALL LETTER YI - 0x0458: 0xBC, # CYRILLIC SMALL LETTER JE - 0x0459: 0x9A, # CYRILLIC SMALL LETTER LJE - 0x045A: 0x9C, # CYRILLIC SMALL LETTER NJE - 0x045B: 0x9E, # CYRILLIC SMALL LETTER TSHE - 0x045C: 0x9D, # CYRILLIC SMALL LETTER KJE - 0x045E: 0xA2, # CYRILLIC SMALL LETTER SHORT U - 0x045F: 0x9F, # CYRILLIC SMALL LETTER DZHE - 0x0490: 0xA5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x0491: 0xB4, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x88, # EURO SIGN - 0x2116: 0xB9, # NUMERO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1252.py b/Lib/encodings/cp1252.py index e5b6905..e60a328 100644 --- a/Lib/encodings/cp1252.py +++ b/Lib/encodings/cp1252.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,258 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1253.py b/Lib/encodings/cp1253.py index 3ce70b25..49f6ccc 100644 --- a/Lib/encodings/cp1253.py +++ b/Lib/encodings/cp1253.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,246 +303,5 @@ decoding_table = ( u'\ufffe' # 0xFF -> UNDEFINED ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x0384: 0xB4, # GREEK TONOS - 0x0385: 0xA1, # GREEK DIALYTIKA TONOS - 0x0386: 0xA2, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA - 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA - 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA - 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA - 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0xCC, # GREEK CAPITAL LETTER MU - 0x039D: 0xCD, # GREEK CAPITAL LETTER NU - 0x039E: 0xCE, # GREEK CAPITAL LETTER XI - 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI - 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO - 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU - 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI - 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI - 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI - 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA - 0x03B2: 0xE2, # GREEK SMALL LETTER BETA - 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA - 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA - 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON - 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA - 0x03B7: 0xE7, # GREEK SMALL LETTER ETA - 0x03B8: 0xE8, # GREEK SMALL LETTER THETA - 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA - 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA - 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA - 0x03BC: 0xEC, # GREEK SMALL LETTER MU - 0x03BD: 0xED, # GREEK SMALL LETTER NU - 0x03BE: 0xEE, # GREEK SMALL LETTER XI - 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xF0, # GREEK SMALL LETTER PI - 0x03C1: 0xF1, # GREEK SMALL LETTER RHO - 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xF4, # GREEK SMALL LETTER TAU - 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xF6, # GREEK SMALL LETTER PHI - 0x03C7: 0xF7, # GREEK SMALL LETTER CHI - 0x03C8: 0xF8, # GREEK SMALL LETTER PSI - 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2015: 0xAF, # HORIZONTAL BAR - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1254.py b/Lib/encodings/cp1254.py index 31cd48c..65530ab 100644 --- a/Lib/encodings/cp1254.py +++ b/Lib/encodings/cp1254.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,256 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1255.py b/Lib/encodings/cp1255.py index 47c43ce..fd1456fa 100644 --- a/Lib/encodings/cp1255.py +++ b/Lib/encodings/cp1255.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,240 +303,5 @@ decoding_table = ( u'\ufffe' # 0xFF -> UNDEFINED ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00D7: 0xAA, # MULTIPLICATION SIGN - 0x00F7: 0xBA, # DIVISION SIGN - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x05B0: 0xC0, # HEBREW POINT SHEVA - 0x05B1: 0xC1, # HEBREW POINT HATAF SEGOL - 0x05B2: 0xC2, # HEBREW POINT HATAF PATAH - 0x05B3: 0xC3, # HEBREW POINT HATAF QAMATS - 0x05B4: 0xC4, # HEBREW POINT HIRIQ - 0x05B5: 0xC5, # HEBREW POINT TSERE - 0x05B6: 0xC6, # HEBREW POINT SEGOL - 0x05B7: 0xC7, # HEBREW POINT PATAH - 0x05B8: 0xC8, # HEBREW POINT QAMATS - 0x05B9: 0xC9, # HEBREW POINT HOLAM - 0x05BB: 0xCB, # HEBREW POINT QUBUTS - 0x05BC: 0xCC, # HEBREW POINT DAGESH OR MAPIQ - 0x05BD: 0xCD, # HEBREW POINT METEG - 0x05BE: 0xCE, # HEBREW PUNCTUATION MAQAF - 0x05BF: 0xCF, # HEBREW POINT RAFE - 0x05C0: 0xD0, # HEBREW PUNCTUATION PASEQ - 0x05C1: 0xD1, # HEBREW POINT SHIN DOT - 0x05C2: 0xD2, # HEBREW POINT SIN DOT - 0x05C3: 0xD3, # HEBREW PUNCTUATION SOF PASUQ - 0x05D0: 0xE0, # HEBREW LETTER ALEF - 0x05D1: 0xE1, # HEBREW LETTER BET - 0x05D2: 0xE2, # HEBREW LETTER GIMEL - 0x05D3: 0xE3, # HEBREW LETTER DALET - 0x05D4: 0xE4, # HEBREW LETTER HE - 0x05D5: 0xE5, # HEBREW LETTER VAV - 0x05D6: 0xE6, # HEBREW LETTER ZAYIN - 0x05D7: 0xE7, # HEBREW LETTER HET - 0x05D8: 0xE8, # HEBREW LETTER TET - 0x05D9: 0xE9, # HEBREW LETTER YOD - 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF - 0x05DB: 0xEB, # HEBREW LETTER KAF - 0x05DC: 0xEC, # HEBREW LETTER LAMED - 0x05DD: 0xED, # HEBREW LETTER FINAL MEM - 0x05DE: 0xEE, # HEBREW LETTER MEM - 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN - 0x05E0: 0xF0, # HEBREW LETTER NUN - 0x05E1: 0xF1, # HEBREW LETTER SAMEKH - 0x05E2: 0xF2, # HEBREW LETTER AYIN - 0x05E3: 0xF3, # HEBREW LETTER FINAL PE - 0x05E4: 0xF4, # HEBREW LETTER PE - 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI - 0x05E6: 0xF6, # HEBREW LETTER TSADI - 0x05E7: 0xF7, # HEBREW LETTER QOF - 0x05E8: 0xF8, # HEBREW LETTER RESH - 0x05E9: 0xF9, # HEBREW LETTER SHIN - 0x05EA: 0xFA, # HEBREW LETTER TAV - 0x05F0: 0xD4, # HEBREW LIGATURE YIDDISH DOUBLE VAV - 0x05F1: 0xD5, # HEBREW LIGATURE YIDDISH VAV YOD - 0x05F2: 0xD6, # HEBREW LIGATURE YIDDISH DOUBLE YOD - 0x05F3: 0xD7, # HEBREW PUNCTUATION GERESH - 0x05F4: 0xD8, # HEBREW PUNCTUATION GERSHAYIM - 0x200E: 0xFD, # LEFT-TO-RIGHT MARK - 0x200F: 0xFE, # RIGHT-TO-LEFT MARK - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AA: 0xA4, # NEW SHEQEL SIGN - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1256.py b/Lib/encodings/cp1256.py index e90393b..302b5fa 100644 --- a/Lib/encodings/cp1256.py +++ b/Lib/encodings/cp1256.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x060C: 0xA1, # ARABIC COMMA - 0x061B: 0xBA, # ARABIC SEMICOLON - 0x061F: 0xBF, # ARABIC QUESTION MARK - 0x0621: 0xC1, # ARABIC LETTER HAMZA - 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xC7, # ARABIC LETTER ALEF - 0x0628: 0xC8, # ARABIC LETTER BEH - 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA - 0x062A: 0xCA, # ARABIC LETTER TEH - 0x062B: 0xCB, # ARABIC LETTER THEH - 0x062C: 0xCC, # ARABIC LETTER JEEM - 0x062D: 0xCD, # ARABIC LETTER HAH - 0x062E: 0xCE, # ARABIC LETTER KHAH - 0x062F: 0xCF, # ARABIC LETTER DAL - 0x0630: 0xD0, # ARABIC LETTER THAL - 0x0631: 0xD1, # ARABIC LETTER REH - 0x0632: 0xD2, # ARABIC LETTER ZAIN - 0x0633: 0xD3, # ARABIC LETTER SEEN - 0x0634: 0xD4, # ARABIC LETTER SHEEN - 0x0635: 0xD5, # ARABIC LETTER SAD - 0x0636: 0xD6, # ARABIC LETTER DAD - 0x0637: 0xD8, # ARABIC LETTER TAH - 0x0638: 0xD9, # ARABIC LETTER ZAH - 0x0639: 0xDA, # ARABIC LETTER AIN - 0x063A: 0xDB, # ARABIC LETTER GHAIN - 0x0640: 0xDC, # ARABIC TATWEEL - 0x0641: 0xDD, # ARABIC LETTER FEH - 0x0642: 0xDE, # ARABIC LETTER QAF - 0x0643: 0xDF, # ARABIC LETTER KAF - 0x0644: 0xE1, # ARABIC LETTER LAM - 0x0645: 0xE3, # ARABIC LETTER MEEM - 0x0646: 0xE4, # ARABIC LETTER NOON - 0x0647: 0xE5, # ARABIC LETTER HEH - 0x0648: 0xE6, # ARABIC LETTER WAW - 0x0649: 0xEC, # ARABIC LETTER ALEF MAKSURA - 0x064A: 0xED, # ARABIC LETTER YEH - 0x064B: 0xF0, # ARABIC FATHATAN - 0x064C: 0xF1, # ARABIC DAMMATAN - 0x064D: 0xF2, # ARABIC KASRATAN - 0x064E: 0xF3, # ARABIC FATHA - 0x064F: 0xF5, # ARABIC DAMMA - 0x0650: 0xF6, # ARABIC KASRA - 0x0651: 0xF8, # ARABIC SHADDA - 0x0652: 0xFA, # ARABIC SUKUN - 0x0679: 0x8A, # ARABIC LETTER TTEH - 0x067E: 0x81, # ARABIC LETTER PEH - 0x0686: 0x8D, # ARABIC LETTER TCHEH - 0x0688: 0x8F, # ARABIC LETTER DDAL - 0x0691: 0x9A, # ARABIC LETTER RREH - 0x0698: 0x8E, # ARABIC LETTER JEH - 0x06A9: 0x98, # ARABIC LETTER KEHEH - 0x06AF: 0x90, # ARABIC LETTER GAF - 0x06BA: 0x9F, # ARABIC LETTER NOON GHUNNA - 0x06BE: 0xAA, # ARABIC LETTER HEH DOACHASHMEE - 0x06C1: 0xC0, # ARABIC LETTER HEH GOAL - 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE - 0x200C: 0x9D, # ZERO WIDTH NON-JOINER - 0x200D: 0x9E, # ZERO WIDTH JOINER - 0x200E: 0xFD, # LEFT-TO-RIGHT MARK - 0x200F: 0xFE, # RIGHT-TO-LEFT MARK - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1257.py b/Lib/encodings/cp1257.py index dcc81c0..53a6b29 100644 --- a/Lib/encodings/cp1257.py +++ b/Lib/encodings/cp1257.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,251 +303,5 @@ decoding_table = ( u'\u02d9' # 0xFF -> DOT ABOVE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0x8D, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0x9D, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0x8F, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBF, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA - 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA - 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA - 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA - 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON - 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0x8E, # CARON - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0x9E, # OGONEK - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp1258.py b/Lib/encodings/cp1258.py index d4d2271..4b25d8e 100644 --- a/Lib/encodings/cp1258.py +++ b/Lib/encodings/cp1258.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,254 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE - 0x0153: 0x9C, # LATIN SMALL LIGATURE OE - 0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK - 0x01A0: 0xD5, # LATIN CAPITAL LETTER O WITH HORN - 0x01A1: 0xF5, # LATIN SMALL LETTER O WITH HORN - 0x01AF: 0xDD, # LATIN CAPITAL LETTER U WITH HORN - 0x01B0: 0xFD, # LATIN SMALL LETTER U WITH HORN - 0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02DC: 0x98, # SMALL TILDE - 0x0300: 0xCC, # COMBINING GRAVE ACCENT - 0x0301: 0xEC, # COMBINING ACUTE ACCENT - 0x0303: 0xDE, # COMBINING TILDE - 0x0309: 0xD2, # COMBINING HOOK ABOVE - 0x0323: 0xF2, # COMBINING DOT BELOW - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0x86, # DAGGER - 0x2021: 0x87, # DOUBLE DAGGER - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x2030: 0x89, # PER MILLE SIGN - 0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x20AB: 0xFE, # DONG SIGN - 0x20AC: 0x80, # EURO SIGN - 0x2122: 0x99, # TRADE MARK SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp424.py b/Lib/encodings/cp424.py index 966aecb..d3ade22 100644 --- a/Lib/encodings/cp424.py +++ b/Lib/encodings/cp424.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,225 +303,5 @@ decoding_table = ( u'\x9f' # 0xFF -> EIGHT ONES ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x5A, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0xBA, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0xBB, # RIGHT SQUARE BRACKET - 0x005E: 0xB0, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x4F, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # DIGIT SELECT - 0x0081: 0x21, # START OF SIGNIFICANCE - 0x0082: 0x22, # FIELD SEPARATOR - 0x0083: 0x23, # WORD UNDERSCORE - 0x0084: 0x24, # BYPASS OR INHIBIT PRESENTATION - 0x0085: 0x15, # NEW LINE - 0x0086: 0x06, # REQUIRED NEW LINE - 0x0087: 0x17, # PROGRAM OPERATOR COMMUNICATION - 0x0088: 0x28, # SET ATTRIBUTE - 0x0089: 0x29, # START FIELD EXTENDED - 0x008A: 0x2A, # SET MODE OR SWITCH - 0x008B: 0x2B, # CONTROL SEQUENCE PREFIX - 0x008C: 0x2C, # MODIFY FIELD ATTRIBUTE - 0x008D: 0x09, # SUPERSCRIPT - 0x008E: 0x0A, # REPEAT - 0x008F: 0x1B, # CUSTOMER USE ONE - 0x0090: 0x30, # <reserved> - 0x0091: 0x31, # <reserved> - 0x0092: 0x1A, # UNIT BACK SPACE - 0x0093: 0x33, # INDEX RETURN - 0x0094: 0x34, # PRESENTATION POSITION - 0x0095: 0x35, # TRANSPARENT - 0x0096: 0x36, # NUMERIC BACKSPACE - 0x0097: 0x08, # GRAPHIC ESCAPE - 0x0098: 0x38, # SUBSCRIPT - 0x0099: 0x39, # INDENT TABULATION - 0x009A: 0x3A, # REVERSE FORM FEED - 0x009B: 0x3B, # CUSTOMER USE THREE - 0x009C: 0x04, # SELECT - 0x009D: 0x14, # RESTORE/ENABLE PRESENTATION - 0x009E: 0x3E, # <reserved> - 0x009F: 0xFF, # EIGHT ONES - 0x00A0: 0x74, # NO-BREAK SPACE - 0x00A2: 0x4A, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0x5F, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00F7: 0xE1, # DIVISION SIGN - 0x05D0: 0x41, # HEBREW LETTER ALEF - 0x05D1: 0x42, # HEBREW LETTER BET - 0x05D2: 0x43, # HEBREW LETTER GIMEL - 0x05D3: 0x44, # HEBREW LETTER DALET - 0x05D4: 0x45, # HEBREW LETTER HE - 0x05D5: 0x46, # HEBREW LETTER VAV - 0x05D6: 0x47, # HEBREW LETTER ZAYIN - 0x05D7: 0x48, # HEBREW LETTER HET - 0x05D8: 0x49, # HEBREW LETTER TET - 0x05D9: 0x51, # HEBREW LETTER YOD - 0x05DA: 0x52, # HEBREW LETTER FINAL KAF - 0x05DB: 0x53, # HEBREW LETTER KAF - 0x05DC: 0x54, # HEBREW LETTER LAMED - 0x05DD: 0x55, # HEBREW LETTER FINAL MEM - 0x05DE: 0x56, # HEBREW LETTER MEM - 0x05DF: 0x57, # HEBREW LETTER FINAL NUN - 0x05E0: 0x58, # HEBREW LETTER NUN - 0x05E1: 0x59, # HEBREW LETTER SAMEKH - 0x05E2: 0x62, # HEBREW LETTER AYIN - 0x05E3: 0x63, # HEBREW LETTER FINAL PE - 0x05E4: 0x64, # HEBREW LETTER PE - 0x05E5: 0x65, # HEBREW LETTER FINAL TSADI - 0x05E6: 0x66, # HEBREW LETTER TSADI - 0x05E7: 0x67, # HEBREW LETTER QOF - 0x05E8: 0x68, # HEBREW LETTER RESH - 0x05E9: 0x69, # HEBREW LETTER SHIN - 0x05EA: 0x71, # HEBREW LETTER TAV - 0x2017: 0x78, # DOUBLE LOW LINE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py index 83af090..60766c0 100644 --- a/Lib/encodings/cp500.py +++ b/Lib/encodings/cp500.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\x9f' # 0xFF -> CONTROL ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x3F, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x4F, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0x4A, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0x5A, # RIGHT SQUARE BRACKET - 0x005E: 0x5F, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0xBB, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x41, # NO-BREAK SPACE - 0x00A1: 0xAA, # INVERTED EXCLAMATION MARK - 0x00A2: 0xB0, # CENT SIGN - 0x00A3: 0xB1, # POUND SIGN - 0x00A4: 0x9F, # CURRENCY SIGN - 0x00A5: 0xB2, # YEN SIGN - 0x00A6: 0x6A, # BROKEN BAR - 0x00A7: 0xB5, # SECTION SIGN - 0x00A8: 0xBD, # DIAERESIS - 0x00A9: 0xB4, # COPYRIGHT SIGN - 0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR - 0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xBA, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00AE: 0xAF, # REGISTERED SIGN - 0x00AF: 0xBC, # MACRON - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0x8F, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xBE, # ACUTE ACCENT - 0x00B5: 0xA0, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB3, # MIDDLE DOT - 0x00B8: 0x9D, # CEDILLA - 0x00B9: 0xDA, # SUPERSCRIPT ONE - 0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xB8, # VULGAR FRACTION ONE HALF - 0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xAB, # INVERTED QUESTION MARK - 0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE - 0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC) - 0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xBF, # MULTIPLICATION SIGN - 0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC) - 0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0x9C, # LATIN SMALL LIGATURE AE - 0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC) - 0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xE1, # DIVISION SIGN - 0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC) - 0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp856.py b/Lib/encodings/cp856.py index c72fcad..203c2c4 100644 --- a/Lib/encodings/cp856.py +++ b/Lib/encodings/cp856.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,222 +303,5 @@ decoding_table = ( u'\xa0' # 0xFF -> NO-BREAK SPACE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xFF, # NO-BREAK SPACE - 0x00A2: 0xBD, # CENT SIGN - 0x00A3: 0x9C, # POUND SIGN - 0x00A4: 0xCF, # CURRENCY SIGN - 0x00A5: 0xBE, # YEN SIGN - 0x00A6: 0xDD, # BROKEN BAR - 0x00A7: 0xF5, # SECTION SIGN - 0x00A8: 0xF9, # DIAERESIS - 0x00A9: 0xB8, # COPYRIGHT SIGN - 0x00AB: 0xAE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAA, # NOT SIGN - 0x00AD: 0xF0, # SOFT HYPHEN - 0x00AE: 0xA9, # REGISTERED SIGN - 0x00AF: 0xEE, # MACRON - 0x00B0: 0xF8, # DEGREE SIGN - 0x00B1: 0xF1, # PLUS-MINUS SIGN - 0x00B2: 0xFD, # SUPERSCRIPT TWO - 0x00B3: 0xFC, # SUPERSCRIPT THREE - 0x00B4: 0xEF, # ACUTE ACCENT - 0x00B5: 0xE6, # MICRO SIGN - 0x00B6: 0xF4, # PILCROW SIGN - 0x00B7: 0xFA, # MIDDLE DOT - 0x00B8: 0xF7, # CEDILLA - 0x00B9: 0xFB, # SUPERSCRIPT ONE - 0x00BB: 0xAF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xAC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xAB, # VULGAR FRACTION ONE HALF - 0x00BE: 0xF3, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0x9E, # MULTIPLICATION SIGN - 0x00F7: 0xF6, # DIVISION SIGN - 0x05D0: 0x80, # HEBREW LETTER ALEF - 0x05D1: 0x81, # HEBREW LETTER BET - 0x05D2: 0x82, # HEBREW LETTER GIMEL - 0x05D3: 0x83, # HEBREW LETTER DALET - 0x05D4: 0x84, # HEBREW LETTER HE - 0x05D5: 0x85, # HEBREW LETTER VAV - 0x05D6: 0x86, # HEBREW LETTER ZAYIN - 0x05D7: 0x87, # HEBREW LETTER HET - 0x05D8: 0x88, # HEBREW LETTER TET - 0x05D9: 0x89, # HEBREW LETTER YOD - 0x05DA: 0x8A, # HEBREW LETTER FINAL KAF - 0x05DB: 0x8B, # HEBREW LETTER KAF - 0x05DC: 0x8C, # HEBREW LETTER LAMED - 0x05DD: 0x8D, # HEBREW LETTER FINAL MEM - 0x05DE: 0x8E, # HEBREW LETTER MEM - 0x05DF: 0x8F, # HEBREW LETTER FINAL NUN - 0x05E0: 0x90, # HEBREW LETTER NUN - 0x05E1: 0x91, # HEBREW LETTER SAMEKH - 0x05E2: 0x92, # HEBREW LETTER AYIN - 0x05E3: 0x93, # HEBREW LETTER FINAL PE - 0x05E4: 0x94, # HEBREW LETTER PE - 0x05E5: 0x95, # HEBREW LETTER FINAL TSADI - 0x05E6: 0x96, # HEBREW LETTER TSADI - 0x05E7: 0x97, # HEBREW LETTER QOF - 0x05E8: 0x98, # HEBREW LETTER RESH - 0x05E9: 0x99, # HEBREW LETTER SHIN - 0x05EA: 0x9A, # HEBREW LETTER TAV - 0x2017: 0xF2, # DOUBLE LOW LINE - 0x2500: 0xC4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0xB3, # BOX DRAWINGS LIGHT VERTICAL - 0x250C: 0xDA, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0xBF, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0xC0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0xD9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251C: 0xC3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0xB4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252C: 0xC2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0xC1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253C: 0xC5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xCD, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xBA, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0xC9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0xBB, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255A: 0xC8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255D: 0xBC, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0xCC, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0xB9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0xCB, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0xCA, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256C: 0xCE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0xDF, # UPPER HALF BLOCK - 0x2584: 0xDC, # LOWER HALF BLOCK - 0x2588: 0xDB, # FULL BLOCK - 0x2591: 0xB0, # LIGHT SHADE - 0x2592: 0xB1, # MEDIUM SHADE - 0x2593: 0xB2, # DARK SHADE - 0x25A0: 0xFE, # BLACK SQUARE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp874.py b/Lib/encodings/cp874.py index 591e8aa..6110f46 100644 --- a/Lib/encodings/cp874.py +++ b/Lib/encodings/cp874.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,232 +303,5 @@ decoding_table = ( u'\ufffe' # 0xFF -> UNDEFINED ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x0E01: 0xA1, # THAI CHARACTER KO KAI - 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI - 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT - 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI - 0x0E05: 0xA5, # THAI CHARACTER KHO KHON - 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG - 0x0E07: 0xA7, # THAI CHARACTER NGO NGU - 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN - 0x0E09: 0xA9, # THAI CHARACTER CHO CHING - 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG - 0x0E0B: 0xAB, # THAI CHARACTER SO SO - 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE - 0x0E0D: 0xAD, # THAI CHARACTER YO YING - 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA - 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK - 0x0E10: 0xB0, # THAI CHARACTER THO THAN - 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO - 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO - 0x0E13: 0xB3, # THAI CHARACTER NO NEN - 0x0E14: 0xB4, # THAI CHARACTER DO DEK - 0x0E15: 0xB5, # THAI CHARACTER TO TAO - 0x0E16: 0xB6, # THAI CHARACTER THO THUNG - 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN - 0x0E18: 0xB8, # THAI CHARACTER THO THONG - 0x0E19: 0xB9, # THAI CHARACTER NO NU - 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI - 0x0E1B: 0xBB, # THAI CHARACTER PO PLA - 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG - 0x0E1D: 0xBD, # THAI CHARACTER FO FA - 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN - 0x0E1F: 0xBF, # THAI CHARACTER FO FAN - 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO - 0x0E21: 0xC1, # THAI CHARACTER MO MA - 0x0E22: 0xC2, # THAI CHARACTER YO YAK - 0x0E23: 0xC3, # THAI CHARACTER RO RUA - 0x0E24: 0xC4, # THAI CHARACTER RU - 0x0E25: 0xC5, # THAI CHARACTER LO LING - 0x0E26: 0xC6, # THAI CHARACTER LU - 0x0E27: 0xC7, # THAI CHARACTER WO WAEN - 0x0E28: 0xC8, # THAI CHARACTER SO SALA - 0x0E29: 0xC9, # THAI CHARACTER SO RUSI - 0x0E2A: 0xCA, # THAI CHARACTER SO SUA - 0x0E2B: 0xCB, # THAI CHARACTER HO HIP - 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA - 0x0E2D: 0xCD, # THAI CHARACTER O ANG - 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK - 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI - 0x0E30: 0xD0, # THAI CHARACTER SARA A - 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT - 0x0E32: 0xD2, # THAI CHARACTER SARA AA - 0x0E33: 0xD3, # THAI CHARACTER SARA AM - 0x0E34: 0xD4, # THAI CHARACTER SARA I - 0x0E35: 0xD5, # THAI CHARACTER SARA II - 0x0E36: 0xD6, # THAI CHARACTER SARA UE - 0x0E37: 0xD7, # THAI CHARACTER SARA UEE - 0x0E38: 0xD8, # THAI CHARACTER SARA U - 0x0E39: 0xD9, # THAI CHARACTER SARA UU - 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU - 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT - 0x0E40: 0xE0, # THAI CHARACTER SARA E - 0x0E41: 0xE1, # THAI CHARACTER SARA AE - 0x0E42: 0xE2, # THAI CHARACTER SARA O - 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN - 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI - 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO - 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK - 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU - 0x0E48: 0xE8, # THAI CHARACTER MAI EK - 0x0E49: 0xE9, # THAI CHARACTER MAI THO - 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI - 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA - 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT - 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT - 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN - 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN - 0x0E50: 0xF0, # THAI DIGIT ZERO - 0x0E51: 0xF1, # THAI DIGIT ONE - 0x0E52: 0xF2, # THAI DIGIT TWO - 0x0E53: 0xF3, # THAI DIGIT THREE - 0x0E54: 0xF4, # THAI DIGIT FOUR - 0x0E55: 0xF5, # THAI DIGIT FIVE - 0x0E56: 0xF6, # THAI DIGIT SIX - 0x0E57: 0xF7, # THAI DIGIT SEVEN - 0x0E58: 0xF8, # THAI DIGIT EIGHT - 0x0E59: 0xF9, # THAI DIGIT NINE - 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU - 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT - 0x2013: 0x96, # EN DASH - 0x2014: 0x97, # EM DASH - 0x2018: 0x91, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK - 0x2022: 0x95, # BULLET - 0x2026: 0x85, # HORIZONTAL ELLIPSIS - 0x20AC: 0x80, # EURO SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/cp875.py b/Lib/encodings/cp875.py index 879d5a4..72b160b 100644 --- a/Lib/encodings/cp875.py +++ b/Lib/encodings/cp875.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,257 +303,5 @@ decoding_table = ( u'\x9f' # 0xFF -> CONTROL ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x37, # END OF TRANSMISSION - 0x0005: 0x2D, # ENQUIRY - 0x0006: 0x2E, # ACKNOWLEDGE - 0x0007: 0x2F, # BELL - 0x0008: 0x16, # BACKSPACE - 0x0009: 0x05, # HORIZONTAL TABULATION - 0x000A: 0x25, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x3C, # DEVICE CONTROL FOUR - 0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x32, # SYNCHRONOUS IDLE - 0x0017: 0x26, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: None, # SUBSTITUTE - 0x001B: 0x27, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x40, # SPACE - 0x0021: 0x4F, # EXCLAMATION MARK - 0x0022: 0x7F, # QUOTATION MARK - 0x0023: 0x7B, # NUMBER SIGN - 0x0024: 0x5B, # DOLLAR SIGN - 0x0025: 0x6C, # PERCENT SIGN - 0x0026: 0x50, # AMPERSAND - 0x0027: 0x7D, # APOSTROPHE - 0x0028: 0x4D, # LEFT PARENTHESIS - 0x0029: 0x5D, # RIGHT PARENTHESIS - 0x002A: 0x5C, # ASTERISK - 0x002B: 0x4E, # PLUS SIGN - 0x002C: 0x6B, # COMMA - 0x002D: 0x60, # HYPHEN-MINUS - 0x002E: 0x4B, # FULL STOP - 0x002F: 0x61, # SOLIDUS - 0x0030: 0xF0, # DIGIT ZERO - 0x0031: 0xF1, # DIGIT ONE - 0x0032: 0xF2, # DIGIT TWO - 0x0033: 0xF3, # DIGIT THREE - 0x0034: 0xF4, # DIGIT FOUR - 0x0035: 0xF5, # DIGIT FIVE - 0x0036: 0xF6, # DIGIT SIX - 0x0037: 0xF7, # DIGIT SEVEN - 0x0038: 0xF8, # DIGIT EIGHT - 0x0039: 0xF9, # DIGIT NINE - 0x003A: 0x7A, # COLON - 0x003B: 0x5E, # SEMICOLON - 0x003C: 0x4C, # LESS-THAN SIGN - 0x003D: 0x7E, # EQUALS SIGN - 0x003E: 0x6E, # GREATER-THAN SIGN - 0x003F: 0x6F, # QUESTION MARK - 0x0040: 0x7C, # COMMERCIAL AT - 0x0041: 0xC1, # LATIN CAPITAL LETTER A - 0x0042: 0xC2, # LATIN CAPITAL LETTER B - 0x0043: 0xC3, # LATIN CAPITAL LETTER C - 0x0044: 0xC4, # LATIN CAPITAL LETTER D - 0x0045: 0xC5, # LATIN CAPITAL LETTER E - 0x0046: 0xC6, # LATIN CAPITAL LETTER F - 0x0047: 0xC7, # LATIN CAPITAL LETTER G - 0x0048: 0xC8, # LATIN CAPITAL LETTER H - 0x0049: 0xC9, # LATIN CAPITAL LETTER I - 0x004A: 0xD1, # LATIN CAPITAL LETTER J - 0x004B: 0xD2, # LATIN CAPITAL LETTER K - 0x004C: 0xD3, # LATIN CAPITAL LETTER L - 0x004D: 0xD4, # LATIN CAPITAL LETTER M - 0x004E: 0xD5, # LATIN CAPITAL LETTER N - 0x004F: 0xD6, # LATIN CAPITAL LETTER O - 0x0050: 0xD7, # LATIN CAPITAL LETTER P - 0x0051: 0xD8, # LATIN CAPITAL LETTER Q - 0x0052: 0xD9, # LATIN CAPITAL LETTER R - 0x0053: 0xE2, # LATIN CAPITAL LETTER S - 0x0054: 0xE3, # LATIN CAPITAL LETTER T - 0x0055: 0xE4, # LATIN CAPITAL LETTER U - 0x0056: 0xE5, # LATIN CAPITAL LETTER V - 0x0057: 0xE6, # LATIN CAPITAL LETTER W - 0x0058: 0xE7, # LATIN CAPITAL LETTER X - 0x0059: 0xE8, # LATIN CAPITAL LETTER Y - 0x005A: 0xE9, # LATIN CAPITAL LETTER Z - 0x005B: 0x4A, # LEFT SQUARE BRACKET - 0x005C: 0xE0, # REVERSE SOLIDUS - 0x005D: 0x5A, # RIGHT SQUARE BRACKET - 0x005E: 0x5F, # CIRCUMFLEX ACCENT - 0x005F: 0x6D, # LOW LINE - 0x0060: 0x79, # GRAVE ACCENT - 0x0061: 0x81, # LATIN SMALL LETTER A - 0x0062: 0x82, # LATIN SMALL LETTER B - 0x0063: 0x83, # LATIN SMALL LETTER C - 0x0064: 0x84, # LATIN SMALL LETTER D - 0x0065: 0x85, # LATIN SMALL LETTER E - 0x0066: 0x86, # LATIN SMALL LETTER F - 0x0067: 0x87, # LATIN SMALL LETTER G - 0x0068: 0x88, # LATIN SMALL LETTER H - 0x0069: 0x89, # LATIN SMALL LETTER I - 0x006A: 0x91, # LATIN SMALL LETTER J - 0x006B: 0x92, # LATIN SMALL LETTER K - 0x006C: 0x93, # LATIN SMALL LETTER L - 0x006D: 0x94, # LATIN SMALL LETTER M - 0x006E: 0x95, # LATIN SMALL LETTER N - 0x006F: 0x96, # LATIN SMALL LETTER O - 0x0070: 0x97, # LATIN SMALL LETTER P - 0x0071: 0x98, # LATIN SMALL LETTER Q - 0x0072: 0x99, # LATIN SMALL LETTER R - 0x0073: 0xA2, # LATIN SMALL LETTER S - 0x0074: 0xA3, # LATIN SMALL LETTER T - 0x0075: 0xA4, # LATIN SMALL LETTER U - 0x0076: 0xA5, # LATIN SMALL LETTER V - 0x0077: 0xA6, # LATIN SMALL LETTER W - 0x0078: 0xA7, # LATIN SMALL LETTER X - 0x0079: 0xA8, # LATIN SMALL LETTER Y - 0x007A: 0xA9, # LATIN SMALL LETTER Z - 0x007B: 0xC0, # LEFT CURLY BRACKET - 0x007C: 0x6A, # VERTICAL LINE - 0x007D: 0xD0, # RIGHT CURLY BRACKET - 0x007E: 0xA1, # TILDE - 0x007F: 0x07, # DELETE - 0x0080: 0x20, # CONTROL - 0x0081: 0x21, # CONTROL - 0x0082: 0x22, # CONTROL - 0x0083: 0x23, # CONTROL - 0x0084: 0x24, # CONTROL - 0x0085: 0x15, # CONTROL - 0x0086: 0x06, # CONTROL - 0x0087: 0x17, # CONTROL - 0x0088: 0x28, # CONTROL - 0x0089: 0x29, # CONTROL - 0x008A: 0x2A, # CONTROL - 0x008B: 0x2B, # CONTROL - 0x008C: 0x2C, # CONTROL - 0x008D: 0x09, # CONTROL - 0x008E: 0x0A, # CONTROL - 0x008F: 0x1B, # CONTROL - 0x0090: 0x30, # CONTROL - 0x0091: 0x31, # CONTROL - 0x0092: 0x1A, # CONTROL - 0x0093: 0x33, # CONTROL - 0x0094: 0x34, # CONTROL - 0x0095: 0x35, # CONTROL - 0x0096: 0x36, # CONTROL - 0x0097: 0x08, # CONTROL - 0x0098: 0x38, # CONTROL - 0x0099: 0x39, # CONTROL - 0x009A: 0x3A, # CONTROL - 0x009B: 0x3B, # CONTROL - 0x009C: 0x04, # CONTROL - 0x009D: 0x14, # CONTROL - 0x009E: 0x3E, # CONTROL - 0x009F: 0xFF, # CONTROL - 0x00A0: 0x74, # NO-BREAK SPACE - 0x00A3: 0xB0, # POUND SIGN - 0x00A6: 0xDF, # BROKEN BAR - 0x00A7: 0xEB, # SECTION SIGN - 0x00A8: 0x70, # DIAERESIS - 0x00A9: 0xFB, # COPYRIGHT SIGN - 0x00AB: 0xEE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xEF, # NOT SIGN - 0x00AD: 0xCA, # SOFT HYPHEN - 0x00B0: 0x90, # DEGREE SIGN - 0x00B1: 0xDA, # PLUS-MINUS SIGN - 0x00B2: 0xEA, # SUPERSCRIPT TWO - 0x00B3: 0xFA, # SUPERSCRIPT THREE - 0x00B4: 0xA0, # ACUTE ACCENT - 0x00BB: 0xFE, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0xDB, # VULGAR FRACTION ONE HALF - 0x0385: 0x80, # GREEK DIALYTIKA TONOS - 0x0386: 0x71, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0387: 0xDD, # GREEK ANO TELEIA - 0x0388: 0x72, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x73, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0x75, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0x76, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0x77, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0x78, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xCC, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0x41, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x42, # GREEK CAPITAL LETTER BETA - 0x0393: 0x43, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x44, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x45, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x46, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x47, # GREEK CAPITAL LETTER ETA - 0x0398: 0x48, # GREEK CAPITAL LETTER THETA - 0x0399: 0x49, # GREEK CAPITAL LETTER IOTA - 0x039A: 0x51, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0x52, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0x53, # GREEK CAPITAL LETTER MU - 0x039D: 0x54, # GREEK CAPITAL LETTER NU - 0x039E: 0x55, # GREEK CAPITAL LETTER XI - 0x039F: 0x56, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0x57, # GREEK CAPITAL LETTER PI - 0x03A1: 0x58, # GREEK CAPITAL LETTER RHO - 0x03A3: 0x59, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0x62, # GREEK CAPITAL LETTER TAU - 0x03A5: 0x63, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0x64, # GREEK CAPITAL LETTER PHI - 0x03A7: 0x65, # GREEK CAPITAL LETTER CHI - 0x03A8: 0x66, # GREEK CAPITAL LETTER PSI - 0x03A9: 0x67, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0x68, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0x69, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xB1, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xB2, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xB3, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xB5, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xCD, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0x8A, # GREEK SMALL LETTER ALPHA - 0x03B2: 0x8B, # GREEK SMALL LETTER BETA - 0x03B3: 0x8C, # GREEK SMALL LETTER GAMMA - 0x03B4: 0x8D, # GREEK SMALL LETTER DELTA - 0x03B5: 0x8E, # GREEK SMALL LETTER EPSILON - 0x03B6: 0x8F, # GREEK SMALL LETTER ZETA - 0x03B7: 0x9A, # GREEK SMALL LETTER ETA - 0x03B8: 0x9B, # GREEK SMALL LETTER THETA - 0x03B9: 0x9C, # GREEK SMALL LETTER IOTA - 0x03BA: 0x9D, # GREEK SMALL LETTER KAPPA - 0x03BB: 0x9E, # GREEK SMALL LETTER LAMDA - 0x03BC: 0x9F, # GREEK SMALL LETTER MU - 0x03BD: 0xAA, # GREEK SMALL LETTER NU - 0x03BE: 0xAB, # GREEK SMALL LETTER XI - 0x03BF: 0xAC, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xAD, # GREEK SMALL LETTER PI - 0x03C1: 0xAE, # GREEK SMALL LETTER RHO - 0x03C2: 0xBA, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xAF, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xBB, # GREEK SMALL LETTER TAU - 0x03C5: 0xBC, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xBD, # GREEK SMALL LETTER PHI - 0x03C7: 0xBE, # GREEK SMALL LETTER CHI - 0x03C8: 0xBF, # GREEK SMALL LETTER PSI - 0x03C9: 0xCB, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xB4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xB8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xB6, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xB7, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xB9, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0xCF, # HORIZONTAL BAR - 0x2018: 0xCE, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xDE, # RIGHT SINGLE QUOTATION MARK -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py index b985585..71bc13f 100644 --- a/Lib/encodings/iso8859_1.py +++ b/Lib/encodings/iso8859_1.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_10.py b/Lib/encodings/iso8859_10.py index 8588430..757e5c5 100644 --- a/Lib/encodings/iso8859_10.py +++ b/Lib/encodings/iso8859_10.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A7: 0xA7, # SECTION SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic) - 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xA9, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xB9, # LATIN SMALL LETTER D WITH STROKE - 0x0112: 0xA2, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xB2, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xA3, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xB3, # LATIN SMALL LETTER G WITH CEDILLA - 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE - 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE - 0x012A: 0xA4, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xA6, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xB6, # LATIN SMALL LETTER K WITH CEDILLA - 0x0138: 0xFF, # LATIN SMALL LETTER KRA - 0x013B: 0xA8, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xB8, # LATIN SMALL LETTER L WITH CEDILLA - 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA - 0x014A: 0xAF, # LATIN CAPITAL LETTER ENG - 0x014B: 0xBF, # LATIN SMALL LETTER ENG - 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON - 0x0160: 0xAA, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xBA, # LATIN SMALL LETTER S WITH CARON - 0x0166: 0xAB, # LATIN CAPITAL LETTER T WITH STROKE - 0x0167: 0xBB, # LATIN SMALL LETTER T WITH STROKE - 0x0168: 0xD7, # LATIN CAPITAL LETTER U WITH TILDE - 0x0169: 0xF7, # LATIN SMALL LETTER U WITH TILDE - 0x016A: 0xAE, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xBE, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK - 0x017D: 0xAC, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBC, # LATIN SMALL LETTER Z WITH CARON - 0x2015: 0xBD, # HORIZONTAL BAR -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_11.py b/Lib/encodings/iso8859_11.py index fffe692..27ece8d 100644 --- a/Lib/encodings/iso8859_11.py +++ b/Lib/encodings/iso8859_11.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,255 +303,5 @@ decoding_table = ( u'\ufffe' ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x0E01: 0xA1, # THAI CHARACTER KO KAI - 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI - 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT - 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI - 0x0E05: 0xA5, # THAI CHARACTER KHO KHON - 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG - 0x0E07: 0xA7, # THAI CHARACTER NGO NGU - 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN - 0x0E09: 0xA9, # THAI CHARACTER CHO CHING - 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG - 0x0E0B: 0xAB, # THAI CHARACTER SO SO - 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE - 0x0E0D: 0xAD, # THAI CHARACTER YO YING - 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA - 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK - 0x0E10: 0xB0, # THAI CHARACTER THO THAN - 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO - 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO - 0x0E13: 0xB3, # THAI CHARACTER NO NEN - 0x0E14: 0xB4, # THAI CHARACTER DO DEK - 0x0E15: 0xB5, # THAI CHARACTER TO TAO - 0x0E16: 0xB6, # THAI CHARACTER THO THUNG - 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN - 0x0E18: 0xB8, # THAI CHARACTER THO THONG - 0x0E19: 0xB9, # THAI CHARACTER NO NU - 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI - 0x0E1B: 0xBB, # THAI CHARACTER PO PLA - 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG - 0x0E1D: 0xBD, # THAI CHARACTER FO FA - 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN - 0x0E1F: 0xBF, # THAI CHARACTER FO FAN - 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO - 0x0E21: 0xC1, # THAI CHARACTER MO MA - 0x0E22: 0xC2, # THAI CHARACTER YO YAK - 0x0E23: 0xC3, # THAI CHARACTER RO RUA - 0x0E24: 0xC4, # THAI CHARACTER RU - 0x0E25: 0xC5, # THAI CHARACTER LO LING - 0x0E26: 0xC6, # THAI CHARACTER LU - 0x0E27: 0xC7, # THAI CHARACTER WO WAEN - 0x0E28: 0xC8, # THAI CHARACTER SO SALA - 0x0E29: 0xC9, # THAI CHARACTER SO RUSI - 0x0E2A: 0xCA, # THAI CHARACTER SO SUA - 0x0E2B: 0xCB, # THAI CHARACTER HO HIP - 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA - 0x0E2D: 0xCD, # THAI CHARACTER O ANG - 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK - 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI - 0x0E30: 0xD0, # THAI CHARACTER SARA A - 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT - 0x0E32: 0xD2, # THAI CHARACTER SARA AA - 0x0E33: 0xD3, # THAI CHARACTER SARA AM - 0x0E34: 0xD4, # THAI CHARACTER SARA I - 0x0E35: 0xD5, # THAI CHARACTER SARA II - 0x0E36: 0xD6, # THAI CHARACTER SARA UE - 0x0E37: 0xD7, # THAI CHARACTER SARA UEE - 0x0E38: 0xD8, # THAI CHARACTER SARA U - 0x0E39: 0xD9, # THAI CHARACTER SARA UU - 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU - 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT - 0x0E40: 0xE0, # THAI CHARACTER SARA E - 0x0E41: 0xE1, # THAI CHARACTER SARA AE - 0x0E42: 0xE2, # THAI CHARACTER SARA O - 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN - 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI - 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO - 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK - 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU - 0x0E48: 0xE8, # THAI CHARACTER MAI EK - 0x0E49: 0xE9, # THAI CHARACTER MAI THO - 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI - 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA - 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT - 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT - 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN - 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN - 0x0E50: 0xF0, # THAI DIGIT ZERO - 0x0E51: 0xF1, # THAI DIGIT ONE - 0x0E52: 0xF2, # THAI DIGIT TWO - 0x0E53: 0xF3, # THAI DIGIT THREE - 0x0E54: 0xF4, # THAI DIGIT FOUR - 0x0E55: 0xF5, # THAI DIGIT FIVE - 0x0E56: 0xF6, # THAI DIGIT SIX - 0x0E57: 0xF7, # THAI DIGIT SEVEN - 0x0E58: 0xF8, # THAI DIGIT EIGHT - 0x0E59: 0xF9, # THAI DIGIT NINE - 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU - 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_13.py b/Lib/encodings/iso8859_13.py index a890580..71adb5c 100644 --- a/Lib/encodings/iso8859_13.py +++ b/Lib/encodings/iso8859_13.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAF, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German) - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBF, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA - 0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA - 0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA - 0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA - 0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON - 0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON - 0x2019: 0xFF, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0xB4, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xA1, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_14.py b/Lib/encodings/iso8859_14.py index afa458c..56843d5 100644 --- a/Lib/encodings/iso8859_14.py +++ b/Lib/encodings/iso8859_14.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x010A: 0xA4, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x010B: 0xA5, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x0120: 0xB2, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x0121: 0xB3, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x0174: 0xD0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX - 0x0175: 0xF0, # LATIN SMALL LETTER W WITH CIRCUMFLEX - 0x0176: 0xDE, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - 0x0177: 0xFE, # LATIN SMALL LETTER Y WITH CIRCUMFLEX - 0x0178: 0xAF, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x1E02: 0xA1, # LATIN CAPITAL LETTER B WITH DOT ABOVE - 0x1E03: 0xA2, # LATIN SMALL LETTER B WITH DOT ABOVE - 0x1E0A: 0xA6, # LATIN CAPITAL LETTER D WITH DOT ABOVE - 0x1E0B: 0xAB, # LATIN SMALL LETTER D WITH DOT ABOVE - 0x1E1E: 0xB0, # LATIN CAPITAL LETTER F WITH DOT ABOVE - 0x1E1F: 0xB1, # LATIN SMALL LETTER F WITH DOT ABOVE - 0x1E40: 0xB4, # LATIN CAPITAL LETTER M WITH DOT ABOVE - 0x1E41: 0xB5, # LATIN SMALL LETTER M WITH DOT ABOVE - 0x1E56: 0xB7, # LATIN CAPITAL LETTER P WITH DOT ABOVE - 0x1E57: 0xB9, # LATIN SMALL LETTER P WITH DOT ABOVE - 0x1E60: 0xBB, # LATIN CAPITAL LETTER S WITH DOT ABOVE - 0x1E61: 0xBF, # LATIN SMALL LETTER S WITH DOT ABOVE - 0x1E6A: 0xD7, # LATIN CAPITAL LETTER T WITH DOT ABOVE - 0x1E6B: 0xF7, # LATIN SMALL LETTER T WITH DOT ABOVE - 0x1E80: 0xA8, # LATIN CAPITAL LETTER W WITH GRAVE - 0x1E81: 0xB8, # LATIN SMALL LETTER W WITH GRAVE - 0x1E82: 0xAA, # LATIN CAPITAL LETTER W WITH ACUTE - 0x1E83: 0xBA, # LATIN SMALL LETTER W WITH ACUTE - 0x1E84: 0xBD, # LATIN CAPITAL LETTER W WITH DIAERESIS - 0x1E85: 0xBE, # LATIN SMALL LETTER W WITH DIAERESIS - 0x1EF2: 0xAC, # LATIN CAPITAL LETTER Y WITH GRAVE - 0x1EF3: 0xBC, # LATIN SMALL LETTER Y WITH GRAVE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_15.py b/Lib/encodings/iso8859_15.py index 4a8334e..13b140c 100644 --- a/Lib/encodings/iso8859_15.py +++ b/Lib/encodings/iso8859_15.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xF0, # LATIN SMALL LETTER ETH - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xFE, # LATIN SMALL LETTER THORN - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xBD, # LATIN SMALL LIGATURE OE - 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON - 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON - 0x20AC: 0xA4, # EURO SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_16.py b/Lib/encodings/iso8859_16.py index aeebfb6..00b9ac8 100644 --- a/Lib/encodings/iso8859_16.py +++ b/Lib/encodings/iso8859_16.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A7: 0xA7, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xA2, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC5, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE5, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xB2, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xB9, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xDD, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xFD, # LATIN SMALL LETTER E WITH OGONEK - 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xBD, # LATIN SMALL LIGATURE OE - 0x015A: 0xD7, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xF7, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON - 0x0170: 0xD8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xF8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xAE, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON - 0x0218: 0xAA, # LATIN CAPITAL LETTER S WITH COMMA BELOW - 0x0219: 0xBA, # LATIN SMALL LETTER S WITH COMMA BELOW - 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW - 0x021B: 0xFE, # LATIN SMALL LETTER T WITH COMMA BELOW - 0x201D: 0xB5, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK - 0x20AC: 0xA4, # EURO SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_2.py b/Lib/encodings/iso8859_2.py index 845f322..38e91d8 100644 --- a/Lib/encodings/iso8859_2.py +++ b/Lib/encodings/iso8859_2.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02d9' # 0xFF -> DOT ABOVE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B8: 0xB8, # CEDILLA - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON - 0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON - 0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE - 0x013D: 0xA5, # LATIN CAPITAL LETTER L WITH CARON - 0x013E: 0xB5, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON - 0x015A: 0xA6, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xB6, # LATIN SMALL LETTER S WITH ACUTE - 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0xAB, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0xBB, # LATIN SMALL LETTER T WITH CARON - 0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0xBC, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xB7, # CARON - 0x02D8: 0xA2, # BREVE - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0xB2, # OGONEK - 0x02DD: 0xBD, # DOUBLE ACUTE ACCENT -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_3.py b/Lib/encodings/iso8859_3.py index fbc8775..23daafd 100644 --- a/Lib/encodings/iso8859_3.py +++ b/Lib/encodings/iso8859_3.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,256 +303,5 @@ decoding_table = ( u'\u02d9' # 0xFF -> DOT ABOVE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0108: 0xC6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX - 0x0109: 0xE6, # LATIN SMALL LETTER C WITH CIRCUMFLEX - 0x010A: 0xC5, # LATIN CAPITAL LETTER C WITH DOT ABOVE - 0x010B: 0xE5, # LATIN SMALL LETTER C WITH DOT ABOVE - 0x011C: 0xD8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX - 0x011D: 0xF8, # LATIN SMALL LETTER G WITH CIRCUMFLEX - 0x011E: 0xAB, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xBB, # LATIN SMALL LETTER G WITH BREVE - 0x0120: 0xD5, # LATIN CAPITAL LETTER G WITH DOT ABOVE - 0x0121: 0xF5, # LATIN SMALL LETTER G WITH DOT ABOVE - 0x0124: 0xA6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX - 0x0125: 0xB6, # LATIN SMALL LETTER H WITH CIRCUMFLEX - 0x0126: 0xA1, # LATIN CAPITAL LETTER H WITH STROKE - 0x0127: 0xB1, # LATIN SMALL LETTER H WITH STROKE - 0x0130: 0xA9, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xB9, # LATIN SMALL LETTER DOTLESS I - 0x0134: 0xAC, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX - 0x0135: 0xBC, # LATIN SMALL LETTER J WITH CIRCUMFLEX - 0x015C: 0xDE, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX - 0x015D: 0xFE, # LATIN SMALL LETTER S WITH CIRCUMFLEX - 0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA - 0x016C: 0xDD, # LATIN CAPITAL LETTER U WITH BREVE - 0x016D: 0xFD, # LATIN SMALL LETTER U WITH BREVE - 0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x02D8: 0xA2, # BREVE - 0x02D9: 0xFF, # DOT ABOVE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_4.py b/Lib/encodings/iso8859_4.py index e705954..c8e03b5 100644 --- a/Lib/encodings/iso8859_4.py +++ b/Lib/encodings/iso8859_4.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02d9' # 0xFF -> DOT ABOVE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B8: 0xB8, # CEDILLA - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0112: 0xAA, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0xBA, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0xAB, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xBB, # LATIN SMALL LETTER G WITH CEDILLA - 0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE - 0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE - 0x012A: 0xCF, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xEF, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xD3, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xF3, # LATIN SMALL LETTER K WITH CEDILLA - 0x0138: 0xA2, # LATIN SMALL LETTER KRA - 0x013B: 0xA6, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xB6, # LATIN SMALL LETTER L WITH CEDILLA - 0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA - 0x014A: 0xBD, # LATIN CAPITAL LETTER ENG - 0x014B: 0xBF, # LATIN SMALL LETTER ENG - 0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0xA3, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xB3, # LATIN SMALL LETTER R WITH CEDILLA - 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON - 0x0166: 0xAC, # LATIN CAPITAL LETTER T WITH STROKE - 0x0167: 0xBC, # LATIN SMALL LETTER T WITH STROKE - 0x0168: 0xDD, # LATIN CAPITAL LETTER U WITH TILDE - 0x0169: 0xFD, # LATIN SMALL LETTER U WITH TILDE - 0x016A: 0xDE, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xFE, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK - 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xB7, # CARON - 0x02D9: 0xFF, # DOT ABOVE - 0x02DB: 0xB2, # OGONEK -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_5.py b/Lib/encodings/iso8859_5.py index 93a4e90..c01cd1c 100644 --- a/Lib/encodings/iso8859_5.py +++ b/Lib/encodings/iso8859_5.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A7: 0xFD, # SECTION SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x0401: 0xA1, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0xA2, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0xA3, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xA4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xA5, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xA6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xA7, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xA8, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0xA9, # CYRILLIC CAPITAL LETTER LJE - 0x040A: 0xAA, # CYRILLIC CAPITAL LETTER NJE - 0x040B: 0xAB, # CYRILLIC CAPITAL LETTER TSHE - 0x040C: 0xAC, # CYRILLIC CAPITAL LETTER KJE - 0x040E: 0xAE, # CYRILLIC CAPITAL LETTER SHORT U - 0x040F: 0xAF, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0xB0, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xB1, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xB2, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xB3, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xB4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xB5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xB6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xB7, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xB8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xB9, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xBA, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xBB, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xBC, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xBD, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xBE, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xBF, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xC0, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xC1, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xC2, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xC3, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xC4, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xC5, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xC6, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xC7, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xC8, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xC9, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xCA, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xCB, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xCC, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xCD, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xCE, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xCF, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xD0, # CYRILLIC SMALL LETTER A - 0x0431: 0xD1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xD2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xD3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xD4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xD5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xD7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xD8, # CYRILLIC SMALL LETTER I - 0x0439: 0xD9, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xDA, # CYRILLIC SMALL LETTER KA - 0x043B: 0xDB, # CYRILLIC SMALL LETTER EL - 0x043C: 0xDC, # CYRILLIC SMALL LETTER EM - 0x043D: 0xDD, # CYRILLIC SMALL LETTER EN - 0x043E: 0xDE, # CYRILLIC SMALL LETTER O - 0x043F: 0xDF, # CYRILLIC SMALL LETTER PE - 0x0440: 0xE0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xE1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xE2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xE3, # CYRILLIC SMALL LETTER U - 0x0444: 0xE4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xE5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xE6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xE7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xE8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xE9, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xEA, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xEB, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xEC, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xED, # CYRILLIC SMALL LETTER E - 0x044E: 0xEE, # CYRILLIC SMALL LETTER YU - 0x044F: 0xEF, # CYRILLIC SMALL LETTER YA - 0x0451: 0xF1, # CYRILLIC SMALL LETTER IO - 0x0452: 0xF2, # CYRILLIC SMALL LETTER DJE - 0x0453: 0xF3, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xF4, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xF5, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xF6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xF7, # CYRILLIC SMALL LETTER YI - 0x0458: 0xF8, # CYRILLIC SMALL LETTER JE - 0x0459: 0xF9, # CYRILLIC SMALL LETTER LJE - 0x045A: 0xFA, # CYRILLIC SMALL LETTER NJE - 0x045B: 0xFB, # CYRILLIC SMALL LETTER TSHE - 0x045C: 0xFC, # CYRILLIC SMALL LETTER KJE - 0x045E: 0xFE, # CYRILLIC SMALL LETTER SHORT U - 0x045F: 0xFF, # CYRILLIC SMALL LETTER DZHE - 0x2116: 0xF0, # NUMERO SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_6.py b/Lib/encodings/iso8859_6.py index f911cc4..16c34a3 100644 --- a/Lib/encodings/iso8859_6.py +++ b/Lib/encodings/iso8859_6.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,218 +303,5 @@ decoding_table = ( u'\ufffe' ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x060C: 0xAC, # ARABIC COMMA - 0x061B: 0xBB, # ARABIC SEMICOLON - 0x061F: 0xBF, # ARABIC QUESTION MARK - 0x0621: 0xC1, # ARABIC LETTER HAMZA - 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xC7, # ARABIC LETTER ALEF - 0x0628: 0xC8, # ARABIC LETTER BEH - 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA - 0x062A: 0xCA, # ARABIC LETTER TEH - 0x062B: 0xCB, # ARABIC LETTER THEH - 0x062C: 0xCC, # ARABIC LETTER JEEM - 0x062D: 0xCD, # ARABIC LETTER HAH - 0x062E: 0xCE, # ARABIC LETTER KHAH - 0x062F: 0xCF, # ARABIC LETTER DAL - 0x0630: 0xD0, # ARABIC LETTER THAL - 0x0631: 0xD1, # ARABIC LETTER REH - 0x0632: 0xD2, # ARABIC LETTER ZAIN - 0x0633: 0xD3, # ARABIC LETTER SEEN - 0x0634: 0xD4, # ARABIC LETTER SHEEN - 0x0635: 0xD5, # ARABIC LETTER SAD - 0x0636: 0xD6, # ARABIC LETTER DAD - 0x0637: 0xD7, # ARABIC LETTER TAH - 0x0638: 0xD8, # ARABIC LETTER ZAH - 0x0639: 0xD9, # ARABIC LETTER AIN - 0x063A: 0xDA, # ARABIC LETTER GHAIN - 0x0640: 0xE0, # ARABIC TATWEEL - 0x0641: 0xE1, # ARABIC LETTER FEH - 0x0642: 0xE2, # ARABIC LETTER QAF - 0x0643: 0xE3, # ARABIC LETTER KAF - 0x0644: 0xE4, # ARABIC LETTER LAM - 0x0645: 0xE5, # ARABIC LETTER MEEM - 0x0646: 0xE6, # ARABIC LETTER NOON - 0x0647: 0xE7, # ARABIC LETTER HEH - 0x0648: 0xE8, # ARABIC LETTER WAW - 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA - 0x064A: 0xEA, # ARABIC LETTER YEH - 0x064B: 0xEB, # ARABIC FATHATAN - 0x064C: 0xEC, # ARABIC DAMMATAN - 0x064D: 0xED, # ARABIC KASRATAN - 0x064E: 0xEE, # ARABIC FATHA - 0x064F: 0xEF, # ARABIC DAMMA - 0x0650: 0xF0, # ARABIC KASRA - 0x0651: 0xF1, # ARABIC SHADDA - 0x0652: 0xF2, # ARABIC SUKUN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_7.py b/Lib/encodings/iso8859_7.py index 4cce6e2..a560023 100644 --- a/Lib/encodings/iso8859_7.py +++ b/Lib/encodings/iso8859_7.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,260 +303,5 @@ decoding_table = ( u'\ufffe' ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B7: 0xB7, # MIDDLE DOT - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x037A: 0xAA, # GREEK YPOGEGRAMMENI - 0x0384: 0xB4, # GREEK TONOS - 0x0385: 0xB5, # GREEK DIALYTIKA TONOS - 0x0386: 0xB6, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xC2, # GREEK CAPITAL LETTER BETA - 0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xC7, # GREEK CAPITAL LETTER ETA - 0x0398: 0xC8, # GREEK CAPITAL LETTER THETA - 0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA - 0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0xCC, # GREEK CAPITAL LETTER MU - 0x039D: 0xCD, # GREEK CAPITAL LETTER NU - 0x039E: 0xCE, # GREEK CAPITAL LETTER XI - 0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0xD0, # GREEK CAPITAL LETTER PI - 0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO - 0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU - 0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI - 0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI - 0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI - 0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA - 0x03B2: 0xE2, # GREEK SMALL LETTER BETA - 0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA - 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA - 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON - 0x03B6: 0xE6, # GREEK SMALL LETTER ZETA - 0x03B7: 0xE7, # GREEK SMALL LETTER ETA - 0x03B8: 0xE8, # GREEK SMALL LETTER THETA - 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA - 0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA - 0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA - 0x03BC: 0xEC, # GREEK SMALL LETTER MU - 0x03BD: 0xED, # GREEK SMALL LETTER NU - 0x03BE: 0xEE, # GREEK SMALL LETTER XI - 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xF0, # GREEK SMALL LETTER PI - 0x03C1: 0xF1, # GREEK SMALL LETTER RHO - 0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xF4, # GREEK SMALL LETTER TAU - 0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xF6, # GREEK SMALL LETTER PHI - 0x03C7: 0xF7, # GREEK SMALL LETTER CHI - 0x03C8: 0xF8, # GREEK SMALL LETTER PSI - 0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0xAF, # HORIZONTAL BAR - 0x2018: 0xA1, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xA2, # RIGHT SINGLE QUOTATION MARK - 0x20AC: 0xA4, # EURO SIGN - 0x20AF: 0xA5, # DRACHMA SIGN -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_8.py b/Lib/encodings/iso8859_8.py index 8c29a87..43cf213 100644 --- a/Lib/encodings/iso8859_8.py +++ b/Lib/encodings/iso8859_8.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,227 +303,5 @@ decoding_table = ( u'\ufffe' ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00D7: 0xAA, # MULTIPLICATION SIGN - 0x00F7: 0xBA, # DIVISION SIGN - 0x05D0: 0xE0, # HEBREW LETTER ALEF - 0x05D1: 0xE1, # HEBREW LETTER BET - 0x05D2: 0xE2, # HEBREW LETTER GIMEL - 0x05D3: 0xE3, # HEBREW LETTER DALET - 0x05D4: 0xE4, # HEBREW LETTER HE - 0x05D5: 0xE5, # HEBREW LETTER VAV - 0x05D6: 0xE6, # HEBREW LETTER ZAYIN - 0x05D7: 0xE7, # HEBREW LETTER HET - 0x05D8: 0xE8, # HEBREW LETTER TET - 0x05D9: 0xE9, # HEBREW LETTER YOD - 0x05DA: 0xEA, # HEBREW LETTER FINAL KAF - 0x05DB: 0xEB, # HEBREW LETTER KAF - 0x05DC: 0xEC, # HEBREW LETTER LAMED - 0x05DD: 0xED, # HEBREW LETTER FINAL MEM - 0x05DE: 0xEE, # HEBREW LETTER MEM - 0x05DF: 0xEF, # HEBREW LETTER FINAL NUN - 0x05E0: 0xF0, # HEBREW LETTER NUN - 0x05E1: 0xF1, # HEBREW LETTER SAMEKH - 0x05E2: 0xF2, # HEBREW LETTER AYIN - 0x05E3: 0xF3, # HEBREW LETTER FINAL PE - 0x05E4: 0xF4, # HEBREW LETTER PE - 0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI - 0x05E6: 0xF6, # HEBREW LETTER TSADI - 0x05E7: 0xF7, # HEBREW LETTER QOF - 0x05E8: 0xF8, # HEBREW LETTER RESH - 0x05E9: 0xF9, # HEBREW LETTER SHIN - 0x05EA: 0xFA, # HEBREW LETTER TAV - 0x200E: 0xFD, # LEFT-TO-RIGHT MARK - 0x200F: 0xFE, # RIGHT-TO-LEFT MARK - 0x2017: 0xDF, # DOUBLE LOW LINE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/iso8859_9.py b/Lib/encodings/iso8859_9.py index 9648e9f..b802938 100644 --- a/Lib/encodings/iso8859_9.py +++ b/Lib/encodings/iso8859_9.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x00A0: 0xA0, # NO-BREAK SPACE - 0x00A1: 0xA1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A4: 0xA4, # CURRENCY SIGN - 0x00A5: 0xA5, # YEN SIGN - 0x00A6: 0xA6, # BROKEN BAR - 0x00A7: 0xA7, # SECTION SIGN - 0x00A8: 0xA8, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xAC, # NOT SIGN - 0x00AD: 0xAD, # SOFT HYPHEN - 0x00AE: 0xAE, # REGISTERED SIGN - 0x00AF: 0xAF, # MACRON - 0x00B0: 0xB0, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0xB2, # SUPERSCRIPT TWO - 0x00B3: 0xB3, # SUPERSCRIPT THREE - 0x00B4: 0xB4, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xB6, # PILCROW SIGN - 0x00B7: 0xB7, # MIDDLE DOT - 0x00B8: 0xB8, # CEDILLA - 0x00B9: 0xB9, # SUPERSCRIPT ONE - 0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER - 0x00BD: 0xBD, # VULGAR FRACTION ONE HALF - 0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS - 0x00BF: 0xBF, # INVERTED QUESTION MARK - 0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xC6, # LATIN CAPITAL LETTER AE - 0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D7: 0xD7, # MULTIPLICATION SIGN - 0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S - 0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xE6, # LATIN SMALL LETTER AE - 0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xF7, # DIVISION SIGN - 0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I - 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/koi8_r.py b/Lib/encodings/koi8_r.py index 3efeb56..f9eb82c 100644 --- a/Lib/encodings/koi8_r.py +++ b/Lib/encodings/koi8_r.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0x9A, # NO-BREAK SPACE - 0x00A9: 0xBF, # COPYRIGHT SIGN - 0x00B0: 0x9C, # DEGREE SIGN - 0x00B2: 0x9D, # SUPERSCRIPT TWO - 0x00B7: 0x9E, # MIDDLE DOT - 0x00F7: 0x9F, # DIVISION SIGN - 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO - 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xC1, # CYRILLIC SMALL LETTER A - 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE - 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE - 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xC9, # CYRILLIC SMALL LETTER I - 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA - 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL - 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM - 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN - 0x043E: 0xCF, # CYRILLIC SMALL LETTER O - 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE - 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER - 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES - 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE - 0x0443: 0xD5, # CYRILLIC SMALL LETTER U - 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF - 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA - 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xDC, # CYRILLIC SMALL LETTER E - 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU - 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA - 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO - 0x2219: 0x95, # BULLET OPERATOR - 0x221A: 0x96, # SQUARE ROOT - 0x2248: 0x97, # ALMOST EQUAL TO - 0x2264: 0x98, # LESS-THAN OR EQUAL TO - 0x2265: 0x99, # GREATER-THAN OR EQUAL TO - 0x2320: 0x93, # TOP HALF INTEGRAL - 0x2321: 0x9B, # BOTTOM HALF INTEGRAL - 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL - 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0xA4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0xA6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0xA7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255C: 0xAD, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0xB4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0xB6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0xB7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256B: 0xBD, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x8B, # UPPER HALF BLOCK - 0x2584: 0x8C, # LOWER HALF BLOCK - 0x2588: 0x8D, # FULL BLOCK - 0x258C: 0x8E, # LEFT HALF BLOCK - 0x2590: 0x8F, # RIGHT HALF BLOCK - 0x2591: 0x90, # LIGHT SHADE - 0x2592: 0x91, # MEDIUM SHADE - 0x2593: 0x92, # DARK SHADE - 0x25A0: 0x94, # BLACK SQUARE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/koi8_u.py b/Lib/encodings/koi8_u.py index 5f46db1..a9317b1 100644 --- a/Lib/encodings/koi8_u.py +++ b/Lib/encodings/koi8_u.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x00A0: 0x9A, # NO-BREAK SPACE - 0x00A9: 0xBF, # COPYRIGHT SIGN - 0x00B0: 0x9C, # DEGREE SIGN - 0x00B2: 0x9D, # SUPERSCRIPT TWO - 0x00B7: 0x9E, # MIDDLE DOT - 0x00F7: 0x9F, # DIVISION SIGN - 0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0xB4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0406: 0xB6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xB7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - 0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I - 0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O - 0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U - 0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E - 0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xC1, # CYRILLIC SMALL LETTER A - 0x0431: 0xC2, # CYRILLIC SMALL LETTER BE - 0x0432: 0xD7, # CYRILLIC SMALL LETTER VE - 0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xC4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xC5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xC9, # CYRILLIC SMALL LETTER I - 0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xCB, # CYRILLIC SMALL LETTER KA - 0x043B: 0xCC, # CYRILLIC SMALL LETTER EL - 0x043C: 0xCD, # CYRILLIC SMALL LETTER EM - 0x043D: 0xCE, # CYRILLIC SMALL LETTER EN - 0x043E: 0xCF, # CYRILLIC SMALL LETTER O - 0x043F: 0xD0, # CYRILLIC SMALL LETTER PE - 0x0440: 0xD2, # CYRILLIC SMALL LETTER ER - 0x0441: 0xD3, # CYRILLIC SMALL LETTER ES - 0x0442: 0xD4, # CYRILLIC SMALL LETTER TE - 0x0443: 0xD5, # CYRILLIC SMALL LETTER U - 0x0444: 0xC6, # CYRILLIC SMALL LETTER EF - 0x0445: 0xC8, # CYRILLIC SMALL LETTER HA - 0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xDC, # CYRILLIC SMALL LETTER E - 0x044E: 0xC0, # CYRILLIC SMALL LETTER YU - 0x044F: 0xD1, # CYRILLIC SMALL LETTER YA - 0x0451: 0xA3, # CYRILLIC SMALL LETTER IO - 0x0454: 0xA4, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0456: 0xA6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xA7, # CYRILLIC SMALL LETTER YI (UKRAINIAN) - 0x0490: 0xBD, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - 0x0491: 0xAD, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - 0x2219: 0x95, # BULLET OPERATOR - 0x221A: 0x96, # SQUARE ROOT - 0x2248: 0x97, # ALMOST EQUAL TO - 0x2264: 0x98, # LESS-THAN OR EQUAL TO - 0x2265: 0x99, # GREATER-THAN OR EQUAL TO - 0x2320: 0x93, # TOP HALF INTEGRAL - 0x2321: 0x9B, # BOTTOM HALF INTEGRAL - 0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL - 0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x8B, # UPPER HALF BLOCK - 0x2584: 0x8C, # LOWER HALF BLOCK - 0x2588: 0x8D, # FULL BLOCK - 0x258C: 0x8E, # LEFT HALF BLOCK - 0x2590: 0x8F, # RIGHT HALF BLOCK - 0x2591: 0x90, # LIGHT SHADE - 0x2592: 0x91, # MEDIUM SHADE - 0x2593: 0x92, # DARK SHADE - 0x25A0: 0x94, # BLACK SQUARE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_centeuro.py b/Lib/encodings/mac_centeuro.py index 54a1510..483c821 100644 --- a/Lib/encodings/mac_centeuro.py +++ b/Lib/encodings/mac_centeuro.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02c7' # 0xFF -> CARON ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xF8, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xF9, # LATIN SMALL LETTER Y WITH ACUTE - 0x0100: 0x81, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0x82, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0x84, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x88, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x8C, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x8D, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0x89, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0x8B, # LATIN SMALL LETTER C WITH CARON - 0x010E: 0x91, # LATIN CAPITAL LETTER D WITH CARON - 0x010F: 0x93, # LATIN SMALL LETTER D WITH CARON - 0x0112: 0x94, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0x95, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0x96, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0x98, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0xA2, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0xAB, # LATIN SMALL LETTER E WITH OGONEK - 0x011A: 0x9D, # LATIN CAPITAL LETTER E WITH CARON - 0x011B: 0x9E, # LATIN SMALL LETTER E WITH CARON - 0x0122: 0xFE, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0xAE, # LATIN SMALL LETTER G WITH CEDILLA - 0x012A: 0xB1, # LATIN CAPITAL LETTER I WITH MACRON - 0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON - 0x012E: 0xAF, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012F: 0xB0, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0xB5, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0xFA, # LATIN SMALL LETTER K WITH CEDILLA - 0x0139: 0xBD, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013A: 0xBE, # LATIN SMALL LETTER L WITH ACUTE - 0x013B: 0xB9, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013C: 0xBA, # LATIN SMALL LETTER L WITH CEDILLA - 0x013D: 0xBB, # LATIN CAPITAL LETTER L WITH CARON - 0x013E: 0xBC, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0xFC, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0xB8, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0xC1, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0xC4, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0xBF, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0xC0, # LATIN SMALL LETTER N WITH CEDILLA - 0x0147: 0xC5, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0xCB, # LATIN SMALL LETTER N WITH CARON - 0x014C: 0xCF, # LATIN CAPITAL LETTER O WITH MACRON - 0x014D: 0xD8, # LATIN SMALL LETTER O WITH MACRON - 0x0150: 0xCC, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0xCE, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0xD9, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0xDA, # LATIN SMALL LETTER R WITH ACUTE - 0x0156: 0xDF, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0xE0, # LATIN SMALL LETTER R WITH CEDILLA - 0x0158: 0xDB, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0xDE, # LATIN SMALL LETTER R WITH CARON - 0x015A: 0xE5, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015B: 0xE6, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0xE1, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xE4, # LATIN SMALL LETTER S WITH CARON - 0x0164: 0xE8, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0xE9, # LATIN SMALL LETTER T WITH CARON - 0x016A: 0xED, # LATIN CAPITAL LETTER U WITH MACRON - 0x016B: 0xF0, # LATIN SMALL LETTER U WITH MACRON - 0x016E: 0xF1, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016F: 0xF3, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0xF4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0xF5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0172: 0xF6, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0xF7, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017A: 0x90, # LATIN SMALL LETTER Z WITH ACUTE - 0x017B: 0xFB, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017D: 0xEB, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xEC, # LATIN SMALL LETTER Z WITH CARON - 0x02C7: 0xFF, # CARON - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_croatian.py b/Lib/encodings/mac_croatian.py index 9e93cdd..f57f7b4 100644 --- a/Lib/encodings/mac_croatian.py +++ b/Lib/encodings/mac_croatian.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02c7' # 0xFF -> CARON ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xD9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xDF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xDE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xFD, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xFA, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xFE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE - 0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON - 0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON - 0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON - 0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON - 0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02DA: 0xFB, # RING ABOVE - 0x02DC: 0xF7, # SMALL TILDE - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xF9, # GREEK SMALL LETTER PI - 0x2013: 0xE0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xB4, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xD8, # Apple logo -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_cyrillic.py b/Lib/encodings/mac_cyrillic.py index 8ffd715..63324a1 100644 --- a/Lib/encodings/mac_cyrillic.py +++ b/Lib/encodings/mac_cyrillic.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u20ac' # 0xFF -> EURO SIGN ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A3: 0xA3, # POUND SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00F7: 0xD6, # DIVISION SIGN - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x0401: 0xDD, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0xAB, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0xAE, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0xB8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0xC1, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0xA7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0xBA, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0xB7, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0xBC, # CYRILLIC CAPITAL LETTER LJE - 0x040A: 0xBE, # CYRILLIC CAPITAL LETTER NJE - 0x040B: 0xCB, # CYRILLIC CAPITAL LETTER TSHE - 0x040C: 0xCD, # CYRILLIC CAPITAL LETTER KJE - 0x040E: 0xD8, # CYRILLIC CAPITAL LETTER SHORT U - 0x040F: 0xDA, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0x80, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x81, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x82, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x83, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x84, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x85, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x86, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x87, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x88, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x89, # CYRILLIC CAPITAL LETTER SHORT I - 0x041A: 0x8A, # CYRILLIC CAPITAL LETTER KA - 0x041B: 0x8B, # CYRILLIC CAPITAL LETTER EL - 0x041C: 0x8C, # CYRILLIC CAPITAL LETTER EM - 0x041D: 0x8D, # CYRILLIC CAPITAL LETTER EN - 0x041E: 0x8E, # CYRILLIC CAPITAL LETTER O - 0x041F: 0x8F, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x90, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x91, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x92, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x93, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x94, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x95, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x96, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x97, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x98, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x99, # CYRILLIC CAPITAL LETTER SHCHA - 0x042A: 0x9A, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042B: 0x9B, # CYRILLIC CAPITAL LETTER YERU - 0x042C: 0x9C, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042D: 0x9D, # CYRILLIC CAPITAL LETTER E - 0x042E: 0x9E, # CYRILLIC CAPITAL LETTER YU - 0x042F: 0x9F, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0xE0, # CYRILLIC SMALL LETTER A - 0x0431: 0xE1, # CYRILLIC SMALL LETTER BE - 0x0432: 0xE2, # CYRILLIC SMALL LETTER VE - 0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0xE4, # CYRILLIC SMALL LETTER DE - 0x0435: 0xE5, # CYRILLIC SMALL LETTER IE - 0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0xE8, # CYRILLIC SMALL LETTER I - 0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I - 0x043A: 0xEA, # CYRILLIC SMALL LETTER KA - 0x043B: 0xEB, # CYRILLIC SMALL LETTER EL - 0x043C: 0xEC, # CYRILLIC SMALL LETTER EM - 0x043D: 0xED, # CYRILLIC SMALL LETTER EN - 0x043E: 0xEE, # CYRILLIC SMALL LETTER O - 0x043F: 0xEF, # CYRILLIC SMALL LETTER PE - 0x0440: 0xF0, # CYRILLIC SMALL LETTER ER - 0x0441: 0xF1, # CYRILLIC SMALL LETTER ES - 0x0442: 0xF2, # CYRILLIC SMALL LETTER TE - 0x0443: 0xF3, # CYRILLIC SMALL LETTER U - 0x0444: 0xF4, # CYRILLIC SMALL LETTER EF - 0x0445: 0xF5, # CYRILLIC SMALL LETTER HA - 0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA - 0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN - 0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU - 0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044D: 0xFD, # CYRILLIC SMALL LETTER E - 0x044E: 0xFE, # CYRILLIC SMALL LETTER YU - 0x044F: 0xDF, # CYRILLIC SMALL LETTER YA - 0x0451: 0xDE, # CYRILLIC SMALL LETTER IO - 0x0452: 0xAC, # CYRILLIC SMALL LETTER DJE - 0x0453: 0xAF, # CYRILLIC SMALL LETTER GJE - 0x0454: 0xB9, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0xCF, # CYRILLIC SMALL LETTER DZE - 0x0456: 0xB4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0xBB, # CYRILLIC SMALL LETTER YI - 0x0458: 0xC0, # CYRILLIC SMALL LETTER JE - 0x0459: 0xBD, # CYRILLIC SMALL LETTER LJE - 0x045A: 0xBF, # CYRILLIC SMALL LETTER NJE - 0x045B: 0xCC, # CYRILLIC SMALL LETTER TSHE - 0x045C: 0xCE, # CYRILLIC SMALL LETTER KJE - 0x045E: 0xD9, # CYRILLIC SMALL LETTER SHORT U - 0x045F: 0xDB, # CYRILLIC SMALL LETTER DZHE - 0x0490: 0xA2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN - 0x0491: 0xB6, # CYRILLIC SMALL LETTER GHE WITH UPTURN - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xD7, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x20AC: 0xFF, # EURO SIGN - 0x2116: 0xDC, # NUMERO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2206: 0xC6, # INCREMENT - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_farsi.py b/Lib/encodings/mac_farsi.py index 6d26a42..9dbd76a 100644 --- a/Lib/encodings/mac_farsi.py +++ b/Lib/encodings/mac_farsi.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE, left-right - 0x0020: 0xA0, # SPACE, right-left - 0x0021: 0x21, # EXCLAMATION MARK, left-right - 0x0021: 0xA1, # EXCLAMATION MARK, right-left - 0x0022: 0x22, # QUOTATION MARK, left-right - 0x0022: 0xA2, # QUOTATION MARK, right-left - 0x0023: 0x23, # NUMBER SIGN, left-right - 0x0023: 0xA3, # NUMBER SIGN, right-left - 0x0024: 0x24, # DOLLAR SIGN, left-right - 0x0024: 0xA4, # DOLLAR SIGN, right-left - 0x0025: 0x25, # PERCENT SIGN, left-right - 0x0026: 0x26, # AMPERSAND, left-right - 0x0026: 0xA6, # AMPERSAND, right-left - 0x0027: 0x27, # APOSTROPHE, left-right - 0x0027: 0xA7, # APOSTROPHE, right-left - 0x0028: 0x28, # LEFT PARENTHESIS, left-right - 0x0028: 0xA8, # LEFT PARENTHESIS, right-left - 0x0029: 0x29, # RIGHT PARENTHESIS, left-right - 0x0029: 0xA9, # RIGHT PARENTHESIS, right-left - 0x002A: 0x2A, # ASTERISK, left-right - 0x002A: 0xAA, # ASTERISK, right-left - 0x002B: 0x2B, # PLUS SIGN, left-right - 0x002B: 0xAB, # PLUS SIGN, right-left - 0x002C: 0x2C, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - 0x002D: 0x2D, # HYPHEN-MINUS, left-right - 0x002D: 0xAD, # HYPHEN-MINUS, right-left - 0x002E: 0x2E, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - 0x002E: 0xAE, # FULL STOP, right-left - 0x002F: 0x2F, # SOLIDUS, left-right - 0x002F: 0xAF, # SOLIDUS, right-left - 0x0030: 0x30, # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE - 0x0032: 0x32, # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO - 0x0033: 0x33, # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - 0x003A: 0x3A, # COLON, left-right - 0x003A: 0xBA, # COLON, right-left - 0x003B: 0x3B, # SEMICOLON, left-right - 0x003C: 0x3C, # LESS-THAN SIGN, left-right - 0x003C: 0xBC, # LESS-THAN SIGN, right-left - 0x003D: 0x3D, # EQUALS SIGN, left-right - 0x003D: 0xBD, # EQUALS SIGN, right-left - 0x003E: 0x3E, # GREATER-THAN SIGN, left-right - 0x003E: 0xBE, # GREATER-THAN SIGN, right-left - 0x003F: 0x3F, # QUESTION MARK, left-right - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET, left-right - 0x005B: 0xDB, # LEFT SQUARE BRACKET, right-left - 0x005C: 0x5C, # REVERSE SOLIDUS, left-right - 0x005C: 0xDC, # REVERSE SOLIDUS, right-left - 0x005D: 0x5D, # RIGHT SQUARE BRACKET, left-right - 0x005D: 0xDD, # RIGHT SQUARE BRACKET, right-left - 0x005E: 0x5E, # CIRCUMFLEX ACCENT, left-right - 0x005E: 0xDE, # CIRCUMFLEX ACCENT, right-left - 0x005F: 0x5F, # LOW LINE, left-right - 0x005F: 0xDF, # LOW LINE, right-left - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET, left-right - 0x007B: 0xFB, # LEFT CURLY BRACKET, right-left - 0x007C: 0x7C, # VERTICAL LINE, left-right - 0x007C: 0xFC, # VERTICAL LINE, right-left - 0x007D: 0x7D, # RIGHT CURLY BRACKET, left-right - 0x007D: 0xFD, # RIGHT CURLY BRACKET, right-left - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0x81, # NO-BREAK SPACE, right-left - 0x00AB: 0x8C, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00BB: 0x98, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0x9B, # DIVISION SIGN, right-left - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x060C: 0xAC, # ARABIC COMMA - 0x061B: 0xBB, # ARABIC SEMICOLON - 0x061F: 0xBF, # ARABIC QUESTION MARK - 0x0621: 0xC1, # ARABIC LETTER HAMZA - 0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0xC7, # ARABIC LETTER ALEF - 0x0628: 0xC8, # ARABIC LETTER BEH - 0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA - 0x062A: 0xCA, # ARABIC LETTER TEH - 0x062B: 0xCB, # ARABIC LETTER THEH - 0x062C: 0xCC, # ARABIC LETTER JEEM - 0x062D: 0xCD, # ARABIC LETTER HAH - 0x062E: 0xCE, # ARABIC LETTER KHAH - 0x062F: 0xCF, # ARABIC LETTER DAL - 0x0630: 0xD0, # ARABIC LETTER THAL - 0x0631: 0xD1, # ARABIC LETTER REH - 0x0632: 0xD2, # ARABIC LETTER ZAIN - 0x0633: 0xD3, # ARABIC LETTER SEEN - 0x0634: 0xD4, # ARABIC LETTER SHEEN - 0x0635: 0xD5, # ARABIC LETTER SAD - 0x0636: 0xD6, # ARABIC LETTER DAD - 0x0637: 0xD7, # ARABIC LETTER TAH - 0x0638: 0xD8, # ARABIC LETTER ZAH - 0x0639: 0xD9, # ARABIC LETTER AIN - 0x063A: 0xDA, # ARABIC LETTER GHAIN - 0x0640: 0xE0, # ARABIC TATWEEL - 0x0641: 0xE1, # ARABIC LETTER FEH - 0x0642: 0xE2, # ARABIC LETTER QAF - 0x0643: 0xE3, # ARABIC LETTER KAF - 0x0644: 0xE4, # ARABIC LETTER LAM - 0x0645: 0xE5, # ARABIC LETTER MEEM - 0x0646: 0xE6, # ARABIC LETTER NOON - 0x0647: 0xE7, # ARABIC LETTER HEH - 0x0648: 0xE8, # ARABIC LETTER WAW - 0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA - 0x064A: 0xEA, # ARABIC LETTER YEH - 0x064B: 0xEB, # ARABIC FATHATAN - 0x064C: 0xEC, # ARABIC DAMMATAN - 0x064D: 0xED, # ARABIC KASRATAN - 0x064E: 0xEE, # ARABIC FATHA - 0x064F: 0xEF, # ARABIC DAMMA - 0x0650: 0xF0, # ARABIC KASRA - 0x0651: 0xF1, # ARABIC SHADDA - 0x0652: 0xF2, # ARABIC SUKUN - 0x066A: 0xA5, # ARABIC PERCENT SIGN - 0x0679: 0xF4, # ARABIC LETTER TTEH - 0x067E: 0xF3, # ARABIC LETTER PEH - 0x0686: 0xF5, # ARABIC LETTER TCHEH - 0x0688: 0xF9, # ARABIC LETTER DDAL - 0x0691: 0xFA, # ARABIC LETTER RREH - 0x0698: 0xFE, # ARABIC LETTER JEH - 0x06A4: 0xF7, # ARABIC LETTER VEH - 0x06AF: 0xF8, # ARABIC LETTER GAF - 0x06BA: 0x8B, # ARABIC LETTER NOON GHUNNA - 0x06D2: 0xFF, # ARABIC LETTER YEH BARREE - 0x06D5: 0xF6, # ARABIC LETTER AE - 0x06F0: 0xB0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x06F1: 0xB1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x06F2: 0xB2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x06F3: 0xB3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x06F4: 0xB4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x06F5: 0xB5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x06F6: 0xB6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x06F7: 0xB7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x06F8: 0xB8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x06F9: 0xB9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x2026: 0x93, # HORIZONTAL ELLIPSIS, right-left - 0x274A: 0xC0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_greek.py b/Lib/encodings/mac_greek.py index 7264f9a..68f4fff 100644 --- a/Lib/encodings/mac_greek.py +++ b/Lib/encodings/mac_greek.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A3: 0x92, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A6: 0x9B, # BROKEN BAR - 0x00A7: 0xAC, # SECTION SIGN - 0x00A8: 0x8C, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AD: 0xFF, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00B0: 0xAE, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B2: 0x82, # SUPERSCRIPT TWO - 0x00B3: 0x84, # SUPERSCRIPT THREE - 0x00B7: 0xAF, # MIDDLE DOT - 0x00B9: 0x81, # SUPERSCRIPT ONE - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BD: 0x97, # VULGAR FRACTION ONE HALF - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0384: 0x8B, # GREEK TONOS - 0x0385: 0x87, # GREEK DIALYTIKA TONOS - 0x0386: 0xCD, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0xCE, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0xD7, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038A: 0xD8, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038C: 0xD9, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038E: 0xDA, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038F: 0xDF, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0xFD, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0xB0, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0xB5, # GREEK CAPITAL LETTER BETA - 0x0393: 0xA1, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0xA2, # GREEK CAPITAL LETTER DELTA - 0x0395: 0xB6, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0xB7, # GREEK CAPITAL LETTER ZETA - 0x0397: 0xB8, # GREEK CAPITAL LETTER ETA - 0x0398: 0xA3, # GREEK CAPITAL LETTER THETA - 0x0399: 0xB9, # GREEK CAPITAL LETTER IOTA - 0x039A: 0xBA, # GREEK CAPITAL LETTER KAPPA - 0x039B: 0xA4, # GREEK CAPITAL LETTER LAMDA - 0x039C: 0xBB, # GREEK CAPITAL LETTER MU - 0x039D: 0xC1, # GREEK CAPITAL LETTER NU - 0x039E: 0xA5, # GREEK CAPITAL LETTER XI - 0x039F: 0xC3, # GREEK CAPITAL LETTER OMICRON - 0x03A0: 0xA6, # GREEK CAPITAL LETTER PI - 0x03A1: 0xC4, # GREEK CAPITAL LETTER RHO - 0x03A3: 0xAA, # GREEK CAPITAL LETTER SIGMA - 0x03A4: 0xC6, # GREEK CAPITAL LETTER TAU - 0x03A5: 0xCB, # GREEK CAPITAL LETTER UPSILON - 0x03A6: 0xBC, # GREEK CAPITAL LETTER PHI - 0x03A7: 0xCC, # GREEK CAPITAL LETTER CHI - 0x03A8: 0xBE, # GREEK CAPITAL LETTER PSI - 0x03A9: 0xBF, # GREEK CAPITAL LETTER OMEGA - 0x03AA: 0xAB, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03AB: 0xBD, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03AC: 0xC0, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD: 0xDB, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE: 0xDC, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF: 0xDD, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0: 0xFE, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA - 0x03B2: 0xE2, # GREEK SMALL LETTER BETA - 0x03B3: 0xE7, # GREEK SMALL LETTER GAMMA - 0x03B4: 0xE4, # GREEK SMALL LETTER DELTA - 0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON - 0x03B6: 0xFA, # GREEK SMALL LETTER ZETA - 0x03B7: 0xE8, # GREEK SMALL LETTER ETA - 0x03B8: 0xF5, # GREEK SMALL LETTER THETA - 0x03B9: 0xE9, # GREEK SMALL LETTER IOTA - 0x03BA: 0xEB, # GREEK SMALL LETTER KAPPA - 0x03BB: 0xEC, # GREEK SMALL LETTER LAMDA - 0x03BC: 0xED, # GREEK SMALL LETTER MU - 0x03BD: 0xEE, # GREEK SMALL LETTER NU - 0x03BE: 0xEA, # GREEK SMALL LETTER XI - 0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON - 0x03C0: 0xF0, # GREEK SMALL LETTER PI - 0x03C1: 0xF2, # GREEK SMALL LETTER RHO - 0x03C2: 0xF7, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA - 0x03C4: 0xF4, # GREEK SMALL LETTER TAU - 0x03C5: 0xF9, # GREEK SMALL LETTER UPSILON - 0x03C6: 0xE6, # GREEK SMALL LETTER PHI - 0x03C7: 0xF8, # GREEK SMALL LETTER CHI - 0x03C8: 0xE3, # GREEK SMALL LETTER PSI - 0x03C9: 0xF6, # GREEK SMALL LETTER OMEGA - 0x03CA: 0xFB, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB: 0xFC, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC: 0xDE, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD: 0xE0, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE: 0xF1, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2013: 0xD0, # EN DASH - 0x2015: 0xD1, # HORIZONTAL BAR - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2022: 0x96, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0x98, # PER MILLE SIGN - 0x20AC: 0x9C, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN - 0x2122: 0x93, # TRADE MARK SIGN - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_iceland.py b/Lib/encodings/mac_iceland.py index 5d8d9ad..c24add2 100644 --- a/Lib/encodings/mac_iceland.py +++ b/Lib/encodings/mac_iceland.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02c7' # 0xFF -> CARON ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D0: 0xDC, # LATIN CAPITAL LETTER ETH - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DD: 0xA0, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F0: 0xDD, # LATIN SMALL LETTER ETH - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FD: 0xE0, # LATIN SMALL LETTER Y WITH ACUTE - 0x00FE: 0xDF, # LATIN SMALL LETTER THORN - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xF0, # Apple logo -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_roman.py b/Lib/encodings/mac_roman.py index 9552e53..62605ec 100644 --- a/Lib/encodings/mac_roman.py +++ b/Lib/encodings/mac_roman.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02c7' # 0xFF -> CARON ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2021: 0xE0, # DOUBLE DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xF0, # Apple logo - 0xFB01: 0xDE, # LATIN SMALL LIGATURE FI - 0xFB02: 0xDF, # LATIN SMALL LIGATURE FL -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_romanian.py b/Lib/encodings/mac_romanian.py index 51282c3..5bd5ae8 100644 --- a/Lib/encodings/mac_romanian.py +++ b/Lib/encodings/mac_romanian.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02c7' # 0xFF -> CARON ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0102: 0xAE, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0xBE, # LATIN SMALL LETTER A WITH BREVE - 0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x0218: 0xAF, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - 0x0219: 0xBF, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - 0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - 0x021B: 0xDF, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2021: 0xE0, # DOUBLE DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x2044: 0xDA, # FRACTION SLASH - 0x20AC: 0xDB, # EURO SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8FF: 0xF0, # Apple logo -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/mac_turkish.py b/Lib/encodings/mac_turkish.py index 4e5641f..0787f49 100644 --- a/Lib/encodings/mac_turkish.py +++ b/Lib/encodings/mac_turkish.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,263 +303,5 @@ decoding_table = ( u'\u02c7' # 0xFF -> CARON ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # CONTROL CHARACTER - 0x0001: 0x01, # CONTROL CHARACTER - 0x0002: 0x02, # CONTROL CHARACTER - 0x0003: 0x03, # CONTROL CHARACTER - 0x0004: 0x04, # CONTROL CHARACTER - 0x0005: 0x05, # CONTROL CHARACTER - 0x0006: 0x06, # CONTROL CHARACTER - 0x0007: 0x07, # CONTROL CHARACTER - 0x0008: 0x08, # CONTROL CHARACTER - 0x0009: 0x09, # CONTROL CHARACTER - 0x000A: 0x0A, # CONTROL CHARACTER - 0x000B: 0x0B, # CONTROL CHARACTER - 0x000C: 0x0C, # CONTROL CHARACTER - 0x000D: 0x0D, # CONTROL CHARACTER - 0x000E: 0x0E, # CONTROL CHARACTER - 0x000F: 0x0F, # CONTROL CHARACTER - 0x0010: 0x10, # CONTROL CHARACTER - 0x0011: 0x11, # CONTROL CHARACTER - 0x0012: 0x12, # CONTROL CHARACTER - 0x0013: 0x13, # CONTROL CHARACTER - 0x0014: 0x14, # CONTROL CHARACTER - 0x0015: 0x15, # CONTROL CHARACTER - 0x0016: 0x16, # CONTROL CHARACTER - 0x0017: 0x17, # CONTROL CHARACTER - 0x0018: 0x18, # CONTROL CHARACTER - 0x0019: 0x19, # CONTROL CHARACTER - 0x001A: 0x1A, # CONTROL CHARACTER - 0x001B: 0x1B, # CONTROL CHARACTER - 0x001C: 0x1C, # CONTROL CHARACTER - 0x001D: 0x1D, # CONTROL CHARACTER - 0x001E: 0x1E, # CONTROL CHARACTER - 0x001F: 0x1F, # CONTROL CHARACTER - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # CONTROL CHARACTER - 0x00A0: 0xCA, # NO-BREAK SPACE - 0x00A1: 0xC1, # INVERTED EXCLAMATION MARK - 0x00A2: 0xA2, # CENT SIGN - 0x00A3: 0xA3, # POUND SIGN - 0x00A5: 0xB4, # YEN SIGN - 0x00A7: 0xA4, # SECTION SIGN - 0x00A8: 0xAC, # DIAERESIS - 0x00A9: 0xA9, # COPYRIGHT SIGN - 0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR - 0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00AC: 0xC2, # NOT SIGN - 0x00AE: 0xA8, # REGISTERED SIGN - 0x00AF: 0xF8, # MACRON - 0x00B0: 0xA1, # DEGREE SIGN - 0x00B1: 0xB1, # PLUS-MINUS SIGN - 0x00B4: 0xAB, # ACUTE ACCENT - 0x00B5: 0xB5, # MICRO SIGN - 0x00B6: 0xA6, # PILCROW SIGN - 0x00B7: 0xE1, # MIDDLE DOT - 0x00B8: 0xFC, # CEDILLA - 0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR - 0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00BF: 0xC0, # INVERTED QUESTION MARK - 0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE - 0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00C6: 0xAE, # LATIN CAPITAL LETTER AE - 0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE - 0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE - 0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE - 0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S - 0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE - 0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE - 0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE - 0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00E6: 0xBE, # LATIN SMALL LETTER AE - 0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA - 0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE - 0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE - 0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE - 0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE - 0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE - 0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE - 0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE - 0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE - 0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00F7: 0xD6, # DIVISION SIGN - 0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE - 0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE - 0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE - 0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011E: 0xDA, # LATIN CAPITAL LETTER G WITH BREVE - 0x011F: 0xDB, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0xDC, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0xDD, # LATIN SMALL LETTER DOTLESS I - 0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE - 0x0153: 0xCF, # LATIN SMALL LIGATURE OE - 0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015F: 0xDF, # LATIN SMALL LETTER S WITH CEDILLA - 0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK - 0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x02C7: 0xFF, # CARON - 0x02D8: 0xF9, # BREVE - 0x02D9: 0xFA, # DOT ABOVE - 0x02DA: 0xFB, # RING ABOVE - 0x02DB: 0xFE, # OGONEK - 0x02DC: 0xF7, # SMALL TILDE - 0x02DD: 0xFD, # DOUBLE ACUTE ACCENT - 0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA - 0x03C0: 0xB9, # GREEK SMALL LETTER PI - 0x2013: 0xD0, # EN DASH - 0x2014: 0xD1, # EM DASH - 0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK - 0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK - 0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK - 0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK - 0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK - 0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK - 0x2020: 0xA0, # DAGGER - 0x2021: 0xE0, # DOUBLE DAGGER - 0x2022: 0xA5, # BULLET - 0x2026: 0xC9, # HORIZONTAL ELLIPSIS - 0x2030: 0xE4, # PER MILLE SIGN - 0x2122: 0xAA, # TRADE MARK SIGN - 0x2202: 0xB6, # PARTIAL DIFFERENTIAL - 0x2206: 0xC6, # INCREMENT - 0x220F: 0xB8, # N-ARY PRODUCT - 0x2211: 0xB7, # N-ARY SUMMATION - 0x221A: 0xC3, # SQUARE ROOT - 0x221E: 0xB0, # INFINITY - 0x222B: 0xBA, # INTEGRAL - 0x2248: 0xC5, # ALMOST EQUAL TO - 0x2260: 0xAD, # NOT EQUAL TO - 0x2264: 0xB2, # LESS-THAN OR EQUAL TO - 0x2265: 0xB3, # GREATER-THAN OR EQUAL TO - 0x25CA: 0xD7, # LOZENGE - 0xF8A0: 0xF5, # undefined1 - 0xF8FF: 0xF0, # Apple logo -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/encodings/tis_620.py b/Lib/encodings/tis_620.py index 166d932..b2cd22b 100644 --- a/Lib/encodings/tis_620.py +++ b/Lib/encodings/tis_620.py @@ -9,14 +9,14 @@ import codecs class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + return codecs.charmap_encode(input,errors,encoding_table) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] + return codecs.charmap_encode(input,self.errors,encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): @@ -303,254 +303,5 @@ decoding_table = ( u'\ufffe' ) -### Encoding Map - -encoding_map = { - 0x0000: 0x00, # NULL - 0x0001: 0x01, # START OF HEADING - 0x0002: 0x02, # START OF TEXT - 0x0003: 0x03, # END OF TEXT - 0x0004: 0x04, # END OF TRANSMISSION - 0x0005: 0x05, # ENQUIRY - 0x0006: 0x06, # ACKNOWLEDGE - 0x0007: 0x07, # BELL - 0x0008: 0x08, # BACKSPACE - 0x0009: 0x09, # HORIZONTAL TABULATION - 0x000A: 0x0A, # LINE FEED - 0x000B: 0x0B, # VERTICAL TABULATION - 0x000C: 0x0C, # FORM FEED - 0x000D: 0x0D, # CARRIAGE RETURN - 0x000E: 0x0E, # SHIFT OUT - 0x000F: 0x0F, # SHIFT IN - 0x0010: 0x10, # DATA LINK ESCAPE - 0x0011: 0x11, # DEVICE CONTROL ONE - 0x0012: 0x12, # DEVICE CONTROL TWO - 0x0013: 0x13, # DEVICE CONTROL THREE - 0x0014: 0x14, # DEVICE CONTROL FOUR - 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x16, # SYNCHRONOUS IDLE - 0x0017: 0x17, # END OF TRANSMISSION BLOCK - 0x0018: 0x18, # CANCEL - 0x0019: 0x19, # END OF MEDIUM - 0x001A: 0x1A, # SUBSTITUTE - 0x001B: 0x1B, # ESCAPE - 0x001C: 0x1C, # FILE SEPARATOR - 0x001D: 0x1D, # GROUP SEPARATOR - 0x001E: 0x1E, # RECORD SEPARATOR - 0x001F: 0x1F, # UNIT SEPARATOR - 0x0020: 0x20, # SPACE - 0x0021: 0x21, # EXCLAMATION MARK - 0x0022: 0x22, # QUOTATION MARK - 0x0023: 0x23, # NUMBER SIGN - 0x0024: 0x24, # DOLLAR SIGN - 0x0025: 0x25, # PERCENT SIGN - 0x0026: 0x26, # AMPERSAND - 0x0027: 0x27, # APOSTROPHE - 0x0028: 0x28, # LEFT PARENTHESIS - 0x0029: 0x29, # RIGHT PARENTHESIS - 0x002A: 0x2A, # ASTERISK - 0x002B: 0x2B, # PLUS SIGN - 0x002C: 0x2C, # COMMA - 0x002D: 0x2D, # HYPHEN-MINUS - 0x002E: 0x2E, # FULL STOP - 0x002F: 0x2F, # SOLIDUS - 0x0030: 0x30, # DIGIT ZERO - 0x0031: 0x31, # DIGIT ONE - 0x0032: 0x32, # DIGIT TWO - 0x0033: 0x33, # DIGIT THREE - 0x0034: 0x34, # DIGIT FOUR - 0x0035: 0x35, # DIGIT FIVE - 0x0036: 0x36, # DIGIT SIX - 0x0037: 0x37, # DIGIT SEVEN - 0x0038: 0x38, # DIGIT EIGHT - 0x0039: 0x39, # DIGIT NINE - 0x003A: 0x3A, # COLON - 0x003B: 0x3B, # SEMICOLON - 0x003C: 0x3C, # LESS-THAN SIGN - 0x003D: 0x3D, # EQUALS SIGN - 0x003E: 0x3E, # GREATER-THAN SIGN - 0x003F: 0x3F, # QUESTION MARK - 0x0040: 0x40, # COMMERCIAL AT - 0x0041: 0x41, # LATIN CAPITAL LETTER A - 0x0042: 0x42, # LATIN CAPITAL LETTER B - 0x0043: 0x43, # LATIN CAPITAL LETTER C - 0x0044: 0x44, # LATIN CAPITAL LETTER D - 0x0045: 0x45, # LATIN CAPITAL LETTER E - 0x0046: 0x46, # LATIN CAPITAL LETTER F - 0x0047: 0x47, # LATIN CAPITAL LETTER G - 0x0048: 0x48, # LATIN CAPITAL LETTER H - 0x0049: 0x49, # LATIN CAPITAL LETTER I - 0x004A: 0x4A, # LATIN CAPITAL LETTER J - 0x004B: 0x4B, # LATIN CAPITAL LETTER K - 0x004C: 0x4C, # LATIN CAPITAL LETTER L - 0x004D: 0x4D, # LATIN CAPITAL LETTER M - 0x004E: 0x4E, # LATIN CAPITAL LETTER N - 0x004F: 0x4F, # LATIN CAPITAL LETTER O - 0x0050: 0x50, # LATIN CAPITAL LETTER P - 0x0051: 0x51, # LATIN CAPITAL LETTER Q - 0x0052: 0x52, # LATIN CAPITAL LETTER R - 0x0053: 0x53, # LATIN CAPITAL LETTER S - 0x0054: 0x54, # LATIN CAPITAL LETTER T - 0x0055: 0x55, # LATIN CAPITAL LETTER U - 0x0056: 0x56, # LATIN CAPITAL LETTER V - 0x0057: 0x57, # LATIN CAPITAL LETTER W - 0x0058: 0x58, # LATIN CAPITAL LETTER X - 0x0059: 0x59, # LATIN CAPITAL LETTER Y - 0x005A: 0x5A, # LATIN CAPITAL LETTER Z - 0x005B: 0x5B, # LEFT SQUARE BRACKET - 0x005C: 0x5C, # REVERSE SOLIDUS - 0x005D: 0x5D, # RIGHT SQUARE BRACKET - 0x005E: 0x5E, # CIRCUMFLEX ACCENT - 0x005F: 0x5F, # LOW LINE - 0x0060: 0x60, # GRAVE ACCENT - 0x0061: 0x61, # LATIN SMALL LETTER A - 0x0062: 0x62, # LATIN SMALL LETTER B - 0x0063: 0x63, # LATIN SMALL LETTER C - 0x0064: 0x64, # LATIN SMALL LETTER D - 0x0065: 0x65, # LATIN SMALL LETTER E - 0x0066: 0x66, # LATIN SMALL LETTER F - 0x0067: 0x67, # LATIN SMALL LETTER G - 0x0068: 0x68, # LATIN SMALL LETTER H - 0x0069: 0x69, # LATIN SMALL LETTER I - 0x006A: 0x6A, # LATIN SMALL LETTER J - 0x006B: 0x6B, # LATIN SMALL LETTER K - 0x006C: 0x6C, # LATIN SMALL LETTER L - 0x006D: 0x6D, # LATIN SMALL LETTER M - 0x006E: 0x6E, # LATIN SMALL LETTER N - 0x006F: 0x6F, # LATIN SMALL LETTER O - 0x0070: 0x70, # LATIN SMALL LETTER P - 0x0071: 0x71, # LATIN SMALL LETTER Q - 0x0072: 0x72, # LATIN SMALL LETTER R - 0x0073: 0x73, # LATIN SMALL LETTER S - 0x0074: 0x74, # LATIN SMALL LETTER T - 0x0075: 0x75, # LATIN SMALL LETTER U - 0x0076: 0x76, # LATIN SMALL LETTER V - 0x0077: 0x77, # LATIN SMALL LETTER W - 0x0078: 0x78, # LATIN SMALL LETTER X - 0x0079: 0x79, # LATIN SMALL LETTER Y - 0x007A: 0x7A, # LATIN SMALL LETTER Z - 0x007B: 0x7B, # LEFT CURLY BRACKET - 0x007C: 0x7C, # VERTICAL LINE - 0x007D: 0x7D, # RIGHT CURLY BRACKET - 0x007E: 0x7E, # TILDE - 0x007F: 0x7F, # DELETE - 0x0080: 0x80, # <control> - 0x0081: 0x81, # <control> - 0x0082: 0x82, # <control> - 0x0083: 0x83, # <control> - 0x0084: 0x84, # <control> - 0x0085: 0x85, # <control> - 0x0086: 0x86, # <control> - 0x0087: 0x87, # <control> - 0x0088: 0x88, # <control> - 0x0089: 0x89, # <control> - 0x008A: 0x8A, # <control> - 0x008B: 0x8B, # <control> - 0x008C: 0x8C, # <control> - 0x008D: 0x8D, # <control> - 0x008E: 0x8E, # <control> - 0x008F: 0x8F, # <control> - 0x0090: 0x90, # <control> - 0x0091: 0x91, # <control> - 0x0092: 0x92, # <control> - 0x0093: 0x93, # <control> - 0x0094: 0x94, # <control> - 0x0095: 0x95, # <control> - 0x0096: 0x96, # <control> - 0x0097: 0x97, # <control> - 0x0098: 0x98, # <control> - 0x0099: 0x99, # <control> - 0x009A: 0x9A, # <control> - 0x009B: 0x9B, # <control> - 0x009C: 0x9C, # <control> - 0x009D: 0x9D, # <control> - 0x009E: 0x9E, # <control> - 0x009F: 0x9F, # <control> - 0x0E01: 0xA1, # THAI CHARACTER KO KAI - 0x0E02: 0xA2, # THAI CHARACTER KHO KHAI - 0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT - 0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI - 0x0E05: 0xA5, # THAI CHARACTER KHO KHON - 0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG - 0x0E07: 0xA7, # THAI CHARACTER NGO NGU - 0x0E08: 0xA8, # THAI CHARACTER CHO CHAN - 0x0E09: 0xA9, # THAI CHARACTER CHO CHING - 0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG - 0x0E0B: 0xAB, # THAI CHARACTER SO SO - 0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE - 0x0E0D: 0xAD, # THAI CHARACTER YO YING - 0x0E0E: 0xAE, # THAI CHARACTER DO CHADA - 0x0E0F: 0xAF, # THAI CHARACTER TO PATAK - 0x0E10: 0xB0, # THAI CHARACTER THO THAN - 0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO - 0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO - 0x0E13: 0xB3, # THAI CHARACTER NO NEN - 0x0E14: 0xB4, # THAI CHARACTER DO DEK - 0x0E15: 0xB5, # THAI CHARACTER TO TAO - 0x0E16: 0xB6, # THAI CHARACTER THO THUNG - 0x0E17: 0xB7, # THAI CHARACTER THO THAHAN - 0x0E18: 0xB8, # THAI CHARACTER THO THONG - 0x0E19: 0xB9, # THAI CHARACTER NO NU - 0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI - 0x0E1B: 0xBB, # THAI CHARACTER PO PLA - 0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG - 0x0E1D: 0xBD, # THAI CHARACTER FO FA - 0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN - 0x0E1F: 0xBF, # THAI CHARACTER FO FAN - 0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO - 0x0E21: 0xC1, # THAI CHARACTER MO MA - 0x0E22: 0xC2, # THAI CHARACTER YO YAK - 0x0E23: 0xC3, # THAI CHARACTER RO RUA - 0x0E24: 0xC4, # THAI CHARACTER RU - 0x0E25: 0xC5, # THAI CHARACTER LO LING - 0x0E26: 0xC6, # THAI CHARACTER LU - 0x0E27: 0xC7, # THAI CHARACTER WO WAEN - 0x0E28: 0xC8, # THAI CHARACTER SO SALA - 0x0E29: 0xC9, # THAI CHARACTER SO RUSI - 0x0E2A: 0xCA, # THAI CHARACTER SO SUA - 0x0E2B: 0xCB, # THAI CHARACTER HO HIP - 0x0E2C: 0xCC, # THAI CHARACTER LO CHULA - 0x0E2D: 0xCD, # THAI CHARACTER O ANG - 0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK - 0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI - 0x0E30: 0xD0, # THAI CHARACTER SARA A - 0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT - 0x0E32: 0xD2, # THAI CHARACTER SARA AA - 0x0E33: 0xD3, # THAI CHARACTER SARA AM - 0x0E34: 0xD4, # THAI CHARACTER SARA I - 0x0E35: 0xD5, # THAI CHARACTER SARA II - 0x0E36: 0xD6, # THAI CHARACTER SARA UE - 0x0E37: 0xD7, # THAI CHARACTER SARA UEE - 0x0E38: 0xD8, # THAI CHARACTER SARA U - 0x0E39: 0xD9, # THAI CHARACTER SARA UU - 0x0E3A: 0xDA, # THAI CHARACTER PHINTHU - 0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT - 0x0E40: 0xE0, # THAI CHARACTER SARA E - 0x0E41: 0xE1, # THAI CHARACTER SARA AE - 0x0E42: 0xE2, # THAI CHARACTER SARA O - 0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN - 0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI - 0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO - 0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK - 0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU - 0x0E48: 0xE8, # THAI CHARACTER MAI EK - 0x0E49: 0xE9, # THAI CHARACTER MAI THO - 0x0E4A: 0xEA, # THAI CHARACTER MAI TRI - 0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA - 0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT - 0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT - 0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN - 0x0E4F: 0xEF, # THAI CHARACTER FONGMAN - 0x0E50: 0xF0, # THAI DIGIT ZERO - 0x0E51: 0xF1, # THAI DIGIT ONE - 0x0E52: 0xF2, # THAI DIGIT TWO - 0x0E53: 0xF3, # THAI DIGIT THREE - 0x0E54: 0xF4, # THAI DIGIT FOUR - 0x0E55: 0xF5, # THAI DIGIT FIVE - 0x0E56: 0xF6, # THAI DIGIT SIX - 0x0E57: 0xF7, # THAI DIGIT SEVEN - 0x0E58: 0xF8, # THAI DIGIT EIGHT - 0x0E59: 0xF9, # THAI DIGIT NINE - 0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU - 0x0E5B: 0xFB, # THAI CHARACTER KHOMUT -} +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) diff --git a/Lib/functools.py b/Lib/functools.py index 4935c9f..8783f08 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -1,26 +1,51 @@ -"""functools.py - Tools for working with functions +"""functools.py - Tools for working with functions and callable objects """ # Python module wrapper for _functools C module # to allow utilities written in Python to be added # to the functools module. # Written by Nick Coghlan <ncoghlan at gmail.com> -# Copyright (c) 2006 Python Software Foundation. +# Copyright (C) 2006 Python Software Foundation. +# See C source code for _functools credits/copyright from _functools import partial -__all__ = [ - "partial", -] -# Still to come here (need to write tests and docs): -# update_wrapper - utility function to transfer basic function -# metadata to wrapper functions -# WRAPPER_ASSIGNMENTS & WRAPPER_UPDATES - defaults args to above -# (update_wrapper has been approved by BDFL) -# wraps - decorator factory equivalent to: -# def wraps(f): -# return partial(update_wrapper, wrapped=f) -# -# The wraps function makes it easy to avoid the bug that afflicts the -# decorator example in the python-dev email proposing the -# update_wrapper function: -# http://mail.python.org/pipermail/python-dev/2006-May/064775.html +# update_wrapper() and wraps() are tools to help write +# wrapper functions that can handle naive introspection + +WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__doc__') +WRAPPER_UPDATES = ('__dict__',) +def update_wrapper(wrapper, + wrapped, + assigned = WRAPPER_ASSIGNMENTS, + updated = WRAPPER_UPDATES): + """Update a wrapper function to look like the wrapped function + + wrapper is the function to be updated + wrapped is the original function + assigned is a tuple naming the attributes assigned directly + from the wrapped function to the wrapper function (defaults to + functools.WRAPPER_ASSIGNMENTS) + updated is a tuple naming the attributes off the wrapper that + are updated with the corresponding attribute from the wrapped + function (defaults to functools.WRAPPER_UPDATES) + """ + for attr in assigned: + setattr(wrapper, attr, getattr(wrapped, attr)) + for attr in updated: + getattr(wrapper, attr).update(getattr(wrapped, attr)) + # Return the wrapper so this can be used as a decorator via partial() + return wrapper + +def wraps(wrapped, + assigned = WRAPPER_ASSIGNMENTS, + updated = WRAPPER_UPDATES): + """Decorator factory to apply update_wrapper() to a wrapper function + + Returns a decorator that invokes update_wrapper() with the decorated + function as the wrapper argument and the arguments to wraps() as the + remaining arguments. Default arguments are as for update_wrapper(). + This is a convenience function to simplify applying partial() to + update_wrapper(). + """ + return partial(update_wrapper, wrapped=wrapped, + assigned=assigned, updated=updated) diff --git a/Lib/lib-tk/Tix.py b/Lib/lib-tk/Tix.py index 14c3c24..33ac519 100755 --- a/Lib/lib-tk/Tix.py +++ b/Lib/lib-tk/Tix.py @@ -468,7 +468,7 @@ class DisplayStyle: """DisplayStyle - handle configuration options shared by (multiple) Display Items""" - def __init__(self, itemtype, cnf={}, **kw ): + def __init__(self, itemtype, cnf={}, **kw): master = _default_root # global from Tkinter if not master and cnf.has_key('refwindow'): master=cnf['refwindow'] elif not master and kw.has_key('refwindow'): master= kw['refwindow'] @@ -480,7 +480,7 @@ class DisplayStyle: def __str__(self): return self.stylename - def _options(self, cnf, kw ): + def _options(self, cnf, kw): if kw and cnf: cnf = _cnfmerge((cnf, kw)) elif kw: diff --git a/Lib/markupbase.py b/Lib/markupbase.py index 85b07a2..24808d1 100644 --- a/Lib/markupbase.py +++ b/Lib/markupbase.py @@ -140,7 +140,7 @@ class ParserBase: # Internal -- parse a marked section # Override this to handle MS-word extension syntax <![if word]>content<![endif]> - def parse_marked_section( self, i, report=1 ): + def parse_marked_section(self, i, report=1): rawdata= self.rawdata assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()" sectName, j = self._scan_name( i+3, i ) diff --git a/Lib/plat-mac/Carbon/WASTEconst.py b/Lib/plat-mac/Carbon/WASTEconst.py deleted file mode 100644 index dc73905..0000000 --- a/Lib/plat-mac/Carbon/WASTEconst.py +++ /dev/null @@ -1,207 +0,0 @@ -# Generated from 'WASTE.h' - -kPascalStackBased = None # workaround for header parsing -def FOUR_CHAR_CODE(x): return x -weCantUndoErr = -10015 -weEmptySelectionErr = -10013 -weUnknownObjectTypeErr = -9478 -weObjectNotFoundErr = -9477 -weReadOnlyErr = -9476 -weTextNotFoundErr = -9474 -weInvalidTextEncodingErr = -9473 -weDuplicateAttributeErr = -9472 -weInvalidAttributeSizeErr = -9471 -weReadOnlyAttributeErr = -9470 -weOddByteCountErr = -9469 -weHandlerNotFoundErr = -1717 -weNotHandledErr = -1708 -weNewerVersionErr = -1706 -weCorruptDataErr = -1702 -weProtocolErr = -603 -weUndefinedSelectorErr = -50 -weFlushLeft = -2 -weFlushRight = -1 -weFlushDefault = 0 -weCenter = 1 -weJustify = 2 -weDirDefault = 1 -weDirRightToLeft = -1 -weDirLeftToRight = 0 -weDoFont = 0x0001 -weDoFace = 0x0002 -weDoSize = 0x0004 -weDoColor = 0x0008 -weDoAll = weDoFont | weDoFace | weDoSize | weDoColor -weDoAddSize = 0x0010 -weDoToggleFace = 0x0020 -weDoReplaceFace = 0x0040 -weDoPreserveScript = 0x0080 -weDoExtractSubscript = 0x0100 -weDoFaceMask = 0x0200 -weDoDirection = 0x00000001 -weDoAlignment = 0x00000002 -weDoLeftIndent = 0x00000004 -weDoRightIndent = 0x00000008 -weDoFirstLineIndent = 0x00000010 -weDoLineSpacing = 0x00000020 -weDoSpaceBefore = 0x00000040 -weDoSpaceAfter = 0x00000080 -weDoBottomBorderStyle = 0x00000400 -kLeadingEdge = -1 -kTrailingEdge = 0 -kObjectEdge = 2 -weFAutoScroll = 0 -weFOutlineHilite = 2 -weFReadOnly = 5 -weFUndo = 6 -weFIntCutAndPaste = 7 -weFDragAndDrop = 8 -weFInhibitRecal = 9 -weFUseTempMem = 10 -weFDrawOffscreen = 11 -weFInhibitRedraw = 12 -weFMonoStyled = 13 -weFMultipleUndo = 14 -weFNoKeyboardSync = 29 -weFInhibitICSupport = 30 -weFInhibitColor = 31 -# weDoAutoScroll = 1UL << weFAutoScroll -# weDoOutlineHilite = 1UL << weFOutlineHilite -# weDoReadOnly = 1UL << weFReadOnly -# weDoUndo = 1UL << weFUndo -# weDoIntCutAndPaste = 1UL << weFIntCutAndPaste -# weDoDragAndDrop = 1UL << weFDragAndDrop -# weDoInhibitRecal = 1UL << weFInhibitRecal -# weDoUseTempMem = 1UL << weFUseTempMem -# weDoDrawOffscreen = 1UL << weFDrawOffscreen -# weDoInhibitRedraw = 1UL << weFInhibitRedraw -# weDoMonoStyled = 1UL << weFMonoStyled -# weDoMultipleUndo = 1UL << weFMultipleUndo -# weDoNoKeyboardSync = 1UL << weFNoKeyboardSync -# weDoInhibitICSupport = 1UL << weFInhibitICSupport -# weDoInhibitColor = 1UL << weFInhibitColor -weBitToggle = -2 -weBitTest = -1 -weBitClear = 0 -weBitSet = 1 -weLowerCase = 0 -weUpperCase = 1 -weFindWholeWords = 0x00000001 -weFindCaseInsensitive = 0x00000002 -weFindDiacriticalInsensitive = 0x00000004 -wePutIntCutAndPaste = 0x00000001 -wePutAddToTypingSequence = 0x00000002 -wePutDetectUnicodeBOM = 0x00000200 -weStreamDestinationKindMask = 0x000000FF -weStreamIncludeObjects = 0x00000100 -weGetAddUnicodeBOM = 0x00000200 -weGetLittleEndian = 0x00000400 -weTagFontFamily = FOUR_CHAR_CODE('font') -weTagFontSize = FOUR_CHAR_CODE('ptsz') -weTagPlain = FOUR_CHAR_CODE('plan') -weTagBold = FOUR_CHAR_CODE('bold') -weTagItalic = FOUR_CHAR_CODE('ital') -weTagUnderline = FOUR_CHAR_CODE('undl') -weTagOutline = FOUR_CHAR_CODE('outl') -weTagShadow = FOUR_CHAR_CODE('shad') -weTagCondensed = FOUR_CHAR_CODE('cond') -weTagExtended = FOUR_CHAR_CODE('pexp') -weTagStrikethrough = FOUR_CHAR_CODE('strk') -weTagTextColor = FOUR_CHAR_CODE('colr') -weTagBackgroundColor = FOUR_CHAR_CODE('pbcl') -weTagTransferMode = FOUR_CHAR_CODE('pptm') -weTagVerticalShift = FOUR_CHAR_CODE('xshf') -weTagAlignment = FOUR_CHAR_CODE('pjst') -weTagDirection = FOUR_CHAR_CODE('LDIR') -weTagLineSpacing = FOUR_CHAR_CODE('ledg') -weTagLeftIndent = FOUR_CHAR_CODE('lein') -weTagRightIndent = FOUR_CHAR_CODE('riin') -weTagFirstLineIndent = FOUR_CHAR_CODE('fidt') -weTagSpaceBefore = FOUR_CHAR_CODE('spbe') -weTagSpaceAfter = FOUR_CHAR_CODE('spaf') -weTagBottomBorderStyle = FOUR_CHAR_CODE('BBRD') -weTagForceFontFamily = FOUR_CHAR_CODE('ffnt') -weTagAddFontSize = FOUR_CHAR_CODE('+siz') -weTagAddVerticalShift = FOUR_CHAR_CODE('+shf') -weTagTextEncoding = FOUR_CHAR_CODE('ptxe') -weTagQDStyles = FOUR_CHAR_CODE('qdst') -weTagTETextStyle = FOUR_CHAR_CODE('tets') -weTagAlignmentDefault = FOUR_CHAR_CODE('deft') -weTagAlignmentLeft = FOUR_CHAR_CODE('left') -weTagAlignmentCenter = FOUR_CHAR_CODE('cent') -weTagAlignmentRight = FOUR_CHAR_CODE('rght') -weTagAlignmentFull = FOUR_CHAR_CODE('full') -weTagDirectionDefault = FOUR_CHAR_CODE('deft') -weTagDirectionLeftToRight = FOUR_CHAR_CODE('L->R') -weTagDirectionRightToLeft = FOUR_CHAR_CODE('R->L') -weTagBorderStyleNone = FOUR_CHAR_CODE('NONE') -weTagBorderStyleThin = FOUR_CHAR_CODE('SLDL') -weTagBorderStyleDotted = FOUR_CHAR_CODE('DTDL') -weTagBorderStyleThick = FOUR_CHAR_CODE('THKL') -weLineSpacingSingle = 0x00000000 -weLineSpacingOneAndHalf = 0x00008000 -weLineSpacingDouble = 0x00010000 -weCharByteHook = FOUR_CHAR_CODE('cbyt') -weCharToPixelHook = FOUR_CHAR_CODE('c2p ') -weCharTypeHook = FOUR_CHAR_CODE('ctyp') -weClickLoop = FOUR_CHAR_CODE('clik') -weCurrentDrag = FOUR_CHAR_CODE('drag') -weDrawTextHook = FOUR_CHAR_CODE('draw') -weDrawTSMHiliteHook = FOUR_CHAR_CODE('dtsm') -weEraseHook = FOUR_CHAR_CODE('eras') -weFontFamilyToNameHook = FOUR_CHAR_CODE('ff2n') -weFontNameToFamilyHook = FOUR_CHAR_CODE('fn2f') -weFluxProc = FOUR_CHAR_CODE('flux') -weHiliteDropAreaHook = FOUR_CHAR_CODE('hidr') -weLineBreakHook = FOUR_CHAR_CODE('lbrk') -wePixelToCharHook = FOUR_CHAR_CODE('p2c ') -wePort = FOUR_CHAR_CODE('port') -wePreTrackDragHook = FOUR_CHAR_CODE('ptrk') -weRefCon = FOUR_CHAR_CODE('refc') -weScrollProc = FOUR_CHAR_CODE('scrl') -weText = FOUR_CHAR_CODE('text') -weTranslateDragHook = FOUR_CHAR_CODE('xdrg') -weTranslucencyThreshold = FOUR_CHAR_CODE('tluc') -weTSMDocumentID = FOUR_CHAR_CODE('tsmd') -weTSMPreUpdate = FOUR_CHAR_CODE('pre ') -weTSMPostUpdate = FOUR_CHAR_CODE('post') -weURLHint = FOUR_CHAR_CODE('urlh') -weWordBreakHook = FOUR_CHAR_CODE('wbrk') -weNewHandler = FOUR_CHAR_CODE('new ') -weDisposeHandler = FOUR_CHAR_CODE('free') -weDrawHandler = FOUR_CHAR_CODE('draw') -weClickHandler = FOUR_CHAR_CODE('clik') -weStreamHandler = FOUR_CHAR_CODE('strm') -weHoverHandler = FOUR_CHAR_CODE('hovr') -kTypeText = FOUR_CHAR_CODE('TEXT') -kTypeStyles = FOUR_CHAR_CODE('styl') -kTypeSoup = FOUR_CHAR_CODE('SOUP') -kTypeFontTable = FOUR_CHAR_CODE('FISH') -kTypeParaFormat = FOUR_CHAR_CODE('WEpf') -kTypeRulerScrap = FOUR_CHAR_CODE('WEru') -kTypeCharFormat = FOUR_CHAR_CODE('WEcf') -kTypeStyleScrap = FOUR_CHAR_CODE('WEst') -kTypeUnicodeText = FOUR_CHAR_CODE('utxt') -kTypeUTF8Text = FOUR_CHAR_CODE('UTF8') -kTypeStyledText = FOUR_CHAR_CODE('STXT') -weAKNone = 0 -weAKUnspecified = 1 -weAKTyping = 2 -weAKCut = 3 -weAKPaste = 4 -weAKClear = 5 -weAKDrag = 6 -weAKSetStyle = 7 -weAKSetRuler = 8 -weAKBackspace = 9 -weAKFwdDelete = 10 -weAKCaseChange = 11 -weAKObjectChange = 12 -weToScrap = 0 -weToDrag = 1 -weToSoup = 2 -weMouseEnter = 0 -weMouseWithin = 1 -weMouseLeave = 2 -kCurrentSelection = -1 -kNullStyle = -2 diff --git a/Lib/plat-mac/EasyDialogs.py b/Lib/plat-mac/EasyDialogs.py index c622d30..b33d1be 100644 --- a/Lib/plat-mac/EasyDialogs.py +++ b/Lib/plat-mac/EasyDialogs.py @@ -262,7 +262,7 @@ class ProgressBar: self.w.ShowWindow() self.d.DrawDialog() - def __del__( self ): + def __del__(self): if self.w: self.w.BringToFront() self.w.HideWindow() @@ -274,7 +274,7 @@ class ProgressBar: self.w.BringToFront() self.w.SetWTitle(newstr) - def label( self, *newstr ): + def label(self, *newstr): """label(text) - Set text in progress box""" self.w.BringToFront() if newstr: diff --git a/Lib/plat-mac/WASTEconst.py b/Lib/plat-mac/WASTEconst.py deleted file mode 100644 index f453338..0000000 --- a/Lib/plat-mac/WASTEconst.py +++ /dev/null @@ -1,207 +0,0 @@ -# Generated from 'WASTE.h' - -kPascalStackBased = None # workaround for header parsing -def FOUR_CHAR_CODE(x): return x -weCantUndoErr = -10015 -weEmptySelectionErr = -10013 -weUnknownObjectTypeErr = -9478 -weObjectNotFoundErr = -9477 -weReadOnlyErr = -9476 -weTextNotFoundErr = -9474 -weInvalidTextEncodingErr = -9473 -weDuplicateAttributeErr = -9472 -weInvalidAttributeSizeErr = -9471 -weReadOnlyAttributeErr = -9470 -weOddByteCountErr = -9469 -weHandlerNotFoundErr = -1717 -weNotHandledErr = -1708 -weNewerVersionErr = -1706 -weCorruptDataErr = -1702 -weProtocolErr = -603 -weUndefinedSelectorErr = -50 -weFlushLeft = -2 -weFlushRight = -1 -weFlushDefault = 0 -weCenter = 1 -weJustify = 2 -weDirDefault = 1 -weDirRightToLeft = -1 -weDirLeftToRight = 0 -weDoFont = 0x0001 -weDoFace = 0x0002 -weDoSize = 0x0004 -weDoColor = 0x0008 -weDoAll = weDoFont | weDoFace | weDoSize | weDoColor -weDoAddSize = 0x0010 -weDoToggleFace = 0x0020 -weDoReplaceFace = 0x0040 -weDoPreserveScript = 0x0080 -weDoExtractSubscript = 0x0100 -weDoFaceMask = 0x0200 -weDoDirection = 0x00000001 -weDoAlignment = 0x00000002 -weDoLeftIndent = 0x00000004 -weDoRightIndent = 0x00000008 -weDoFirstLineIndent = 0x00000010 -weDoLineSpacing = 0x00000020 -weDoSpaceBefore = 0x00000040 -weDoSpaceAfter = 0x00000080 -weDoBottomBorderStyle = 0x00000400 -kLeadingEdge = -1 -kTrailingEdge = 0 -kObjectEdge = 2 -weFAutoScroll = 0 -weFOutlineHilite = 2 -weFReadOnly = 5 -weFUndo = 6 -weFIntCutAndPaste = 7 -weFDragAndDrop = 8 -weFInhibitRecal = 9 -weFUseTempMem = 10 -weFDrawOffscreen = 11 -weFInhibitRedraw = 12 -weFMonoStyled = 13 -weFMultipleUndo = 14 -weFNoKeyboardSync = 29 -weFInhibitICSupport = 30 -weFInhibitColor = 31 -weDoAutoScroll = 1 << weFAutoScroll -weDoOutlineHilite = 1 << weFOutlineHilite -weDoReadOnly = 1 << weFReadOnly -weDoUndo = 1 << weFUndo -weDoIntCutAndPaste = 1 << weFIntCutAndPaste -weDoDragAndDrop = 1 << weFDragAndDrop -weDoInhibitRecal = 1 << weFInhibitRecal -weDoUseTempMem = 1 << weFUseTempMem -weDoDrawOffscreen = 1 << weFDrawOffscreen -weDoInhibitRedraw = 1 << weFInhibitRedraw -weDoMonoStyled = 1 << weFMonoStyled -weDoMultipleUndo = 1 << weFMultipleUndo -weDoNoKeyboardSync = 1 << weFNoKeyboardSync -weDoInhibitICSupport = 1 << weFInhibitICSupport -# weDoInhibitColor = 1 << weFInhibitColor -weBitToggle = -2 -weBitTest = -1 -weBitClear = 0 -weBitSet = 1 -weLowerCase = 0 -weUpperCase = 1 -weFindWholeWords = 0x00000001 -weFindCaseInsensitive = 0x00000002 -weFindDiacriticalInsensitive = 0x00000004 -wePutIntCutAndPaste = 0x00000001 -wePutAddToTypingSequence = 0x00000002 -wePutDetectUnicodeBOM = 0x00000200 -weStreamDestinationKindMask = 0x000000FF -weStreamIncludeObjects = 0x00000100 -weGetAddUnicodeBOM = 0x00000200 -weGetLittleEndian = 0x00000400 -weTagFontFamily = FOUR_CHAR_CODE('font') -weTagFontSize = FOUR_CHAR_CODE('ptsz') -weTagPlain = FOUR_CHAR_CODE('plan') -weTagBold = FOUR_CHAR_CODE('bold') -weTagItalic = FOUR_CHAR_CODE('ital') -weTagUnderline = FOUR_CHAR_CODE('undl') -weTagOutline = FOUR_CHAR_CODE('outl') -weTagShadow = FOUR_CHAR_CODE('shad') -weTagCondensed = FOUR_CHAR_CODE('cond') -weTagExtended = FOUR_CHAR_CODE('pexp') -weTagStrikethrough = FOUR_CHAR_CODE('strk') -weTagTextColor = FOUR_CHAR_CODE('colr') -weTagBackgroundColor = FOUR_CHAR_CODE('pbcl') -weTagTransferMode = FOUR_CHAR_CODE('pptm') -weTagVerticalShift = FOUR_CHAR_CODE('xshf') -weTagAlignment = FOUR_CHAR_CODE('pjst') -weTagDirection = FOUR_CHAR_CODE('LDIR') -weTagLineSpacing = FOUR_CHAR_CODE('ledg') -weTagLeftIndent = FOUR_CHAR_CODE('lein') -weTagRightIndent = FOUR_CHAR_CODE('riin') -weTagFirstLineIndent = FOUR_CHAR_CODE('fidt') -weTagSpaceBefore = FOUR_CHAR_CODE('spbe') -weTagSpaceAfter = FOUR_CHAR_CODE('spaf') -weTagBottomBorderStyle = FOUR_CHAR_CODE('BBRD') -weTagForceFontFamily = FOUR_CHAR_CODE('ffnt') -weTagAddFontSize = FOUR_CHAR_CODE('+siz') -weTagAddVerticalShift = FOUR_CHAR_CODE('+shf') -weTagTextEncoding = FOUR_CHAR_CODE('ptxe') -weTagQDStyles = FOUR_CHAR_CODE('qdst') -weTagTETextStyle = FOUR_CHAR_CODE('tets') -weTagAlignmentDefault = FOUR_CHAR_CODE('deft') -weTagAlignmentLeft = FOUR_CHAR_CODE('left') -weTagAlignmentCenter = FOUR_CHAR_CODE('cent') -weTagAlignmentRight = FOUR_CHAR_CODE('rght') -weTagAlignmentFull = FOUR_CHAR_CODE('full') -weTagDirectionDefault = FOUR_CHAR_CODE('deft') -weTagDirectionLeftToRight = FOUR_CHAR_CODE('L->R') -weTagDirectionRightToLeft = FOUR_CHAR_CODE('R->L') -weTagBorderStyleNone = FOUR_CHAR_CODE('NONE') -weTagBorderStyleThin = FOUR_CHAR_CODE('SLDL') -weTagBorderStyleDotted = FOUR_CHAR_CODE('DTDL') -weTagBorderStyleThick = FOUR_CHAR_CODE('THKL') -weLineSpacingSingle = 0x00000000 -weLineSpacingOneAndHalf = 0x00008000 -weLineSpacingDouble = 0x00010000 -weCharByteHook = FOUR_CHAR_CODE('cbyt') -weCharToPixelHook = FOUR_CHAR_CODE('c2p ') -weCharTypeHook = FOUR_CHAR_CODE('ctyp') -weClickLoop = FOUR_CHAR_CODE('clik') -weCurrentDrag = FOUR_CHAR_CODE('drag') -weDrawTextHook = FOUR_CHAR_CODE('draw') -weDrawTSMHiliteHook = FOUR_CHAR_CODE('dtsm') -weEraseHook = FOUR_CHAR_CODE('eras') -weFontFamilyToNameHook = FOUR_CHAR_CODE('ff2n') -weFontNameToFamilyHook = FOUR_CHAR_CODE('fn2f') -weFluxProc = FOUR_CHAR_CODE('flux') -weHiliteDropAreaHook = FOUR_CHAR_CODE('hidr') -weLineBreakHook = FOUR_CHAR_CODE('lbrk') -wePixelToCharHook = FOUR_CHAR_CODE('p2c ') -wePort = FOUR_CHAR_CODE('port') -wePreTrackDragHook = FOUR_CHAR_CODE('ptrk') -weRefCon = FOUR_CHAR_CODE('refc') -weScrollProc = FOUR_CHAR_CODE('scrl') -weText = FOUR_CHAR_CODE('text') -weTranslateDragHook = FOUR_CHAR_CODE('xdrg') -weTranslucencyThreshold = FOUR_CHAR_CODE('tluc') -weTSMDocumentID = FOUR_CHAR_CODE('tsmd') -weTSMPreUpdate = FOUR_CHAR_CODE('pre ') -weTSMPostUpdate = FOUR_CHAR_CODE('post') -weURLHint = FOUR_CHAR_CODE('urlh') -weWordBreakHook = FOUR_CHAR_CODE('wbrk') -weNewHandler = FOUR_CHAR_CODE('new ') -weDisposeHandler = FOUR_CHAR_CODE('free') -weDrawHandler = FOUR_CHAR_CODE('draw') -weClickHandler = FOUR_CHAR_CODE('clik') -weStreamHandler = FOUR_CHAR_CODE('strm') -weHoverHandler = FOUR_CHAR_CODE('hovr') -kTypeText = FOUR_CHAR_CODE('TEXT') -kTypeStyles = FOUR_CHAR_CODE('styl') -kTypeSoup = FOUR_CHAR_CODE('SOUP') -kTypeFontTable = FOUR_CHAR_CODE('FISH') -kTypeParaFormat = FOUR_CHAR_CODE('WEpf') -kTypeRulerScrap = FOUR_CHAR_CODE('WEru') -kTypeCharFormat = FOUR_CHAR_CODE('WEcf') -kTypeStyleScrap = FOUR_CHAR_CODE('WEst') -kTypeUnicodeText = FOUR_CHAR_CODE('utxt') -kTypeUTF8Text = FOUR_CHAR_CODE('UTF8') -kTypeStyledText = FOUR_CHAR_CODE('STXT') -weAKNone = 0 -weAKUnspecified = 1 -weAKTyping = 2 -weAKCut = 3 -weAKPaste = 4 -weAKClear = 5 -weAKDrag = 6 -weAKSetStyle = 7 -weAKSetRuler = 8 -weAKBackspace = 9 -weAKFwdDelete = 10 -weAKCaseChange = 11 -weAKObjectChange = 12 -weToScrap = 0 -weToDrag = 1 -weToSoup = 2 -weMouseEnter = 0 -weMouseWithin = 1 -weMouseLeave = 2 -kCurrentSelection = -1 -kNullStyle = -2 diff --git a/Lib/plat-mac/argvemulator.py b/Lib/plat-mac/argvemulator.py index 6103a8a..2d66f1c 100644 --- a/Lib/plat-mac/argvemulator.py +++ b/Lib/plat-mac/argvemulator.py @@ -7,6 +7,7 @@ import traceback from Carbon import AE from Carbon.AppleEvents import * from Carbon import Evt +from Carbon import File from Carbon.Events import * import aetools @@ -16,36 +17,36 @@ class ArgvCollector: def __init__(self): self.quitting = 0 - self.ae_handlers = {} # Remove the funny -psn_xxx_xxx argument if len(sys.argv) > 1 and sys.argv[1][:4] == '-psn': del sys.argv[1] - self.installaehandler('aevt', 'oapp', self.open_app) - self.installaehandler('aevt', 'odoc', self.open_file) - def installaehandler(self, classe, type, callback): - AE.AEInstallEventHandler(classe, type, self.callback_wrapper) - self.ae_handlers[(classe, type)] = callback + AE.AEInstallEventHandler(kCoreEventClass, kAEOpenApplication, self.__runapp) + AE.AEInstallEventHandler(kCoreEventClass, kAEOpenDocuments, self.__openfiles) def close(self): - for classe, type in self.ae_handlers.keys(): - AE.AERemoveEventHandler(classe, type) + AE.AERemoveEventHandler(kCoreEventClass, kAEOpenApplication) + AE.AERemoveEventHandler(kCoreEventClass, kAEOpenDocuments) def mainloop(self, mask = highLevelEventMask, timeout = 1*60): + # Note: this is not the right way to run an event loop in OSX or even + # "recent" versions of MacOS9. This is however code that has proven + # itself. stoptime = Evt.TickCount() + timeout while not self.quitting and Evt.TickCount() < stoptime: - self.dooneevent(mask, timeout) - self.close() + self._dooneevent(mask, timeout) - def _quit(self): - self.quitting = 1 + if not self.quitting: + print "argvemulator: timeout waiting for arguments" - def dooneevent(self, mask = highLevelEventMask, timeout = 1*60): + self.close() + + def _dooneevent(self, mask = highLevelEventMask, timeout = 1*60): got, event = Evt.WaitNextEvent(mask, timeout) if got: - self.lowlevelhandler(event) + self._lowlevelhandler(event) - def lowlevelhandler(self, event): + def _lowlevelhandler(self, event): what, message, when, where, modifiers = event h, v = where if what == kHighLevelEvent: @@ -60,53 +61,28 @@ class ArgvCollector: else: print "Unhandled event:", event - def callback_wrapper(self, _request, _reply): - _parameters, _attributes = aetools.unpackevent(_request) - _class = _attributes['evcl'].type - _type = _attributes['evid'].type - - if self.ae_handlers.has_key((_class, _type)): - _function = self.ae_handlers[(_class, _type)] - elif self.ae_handlers.has_key((_class, '****')): - _function = self.ae_handlers[(_class, '****')] - elif self.ae_handlers.has_key(('****', '****')): - _function = self.ae_handlers[('****', '****')] - else: - raise 'Cannot happen: AE callback without handler', (_class, _type) - - # XXXX Do key-to-name mapping here - - _parameters['_attributes'] = _attributes - _parameters['_class'] = _class - _parameters['_type'] = _type - if _parameters.has_key('----'): - _object = _parameters['----'] - del _parameters['----'] - # The try/except that used to be here can mask programmer errors. - # Let the program crash, the programmer can always add a **args - # to the formal parameter list. - rv = _function(_object, **_parameters) - else: - #Same try/except comment as above - rv = _function(**_parameters) - if rv == None: - aetools.packevent(_reply, {}) - else: - aetools.packevent(_reply, {'----':rv}) + def _quit(self): + self.quitting = 1 - def open_app(self, **args): + def __runapp(self, requestevent, replyevent): self._quit() - def open_file(self, _object=None, **args): - for alias in _object: - fsr = alias.FSResolveAlias(None)[0] - pathname = fsr.as_pathname() - sys.argv.append(pathname) - self._quit() + def __openfiles(self, requestevent, replyevent): + try: + listdesc = requestevent.AEGetParamDesc(keyDirectObject, typeAEList) + for i in range(listdesc.AECountItems()): + aliasdesc = listdesc.AEGetNthDesc(i+1, typeAlias)[1] + alias = File.Alias(rawdata=aliasdesc.data) + fsref = alias.FSResolveAlias(None)[0] + pathname = fsref.as_pathname() + sys.argv.append(pathname) + except Exception, e: + print "argvemulator.py warning: can't unpack an open document event" + import traceback + traceback.print_exc() - def other(self, _object=None, _class=None, _type=None, **args): - print 'Ignore AppleEvent', (_class, _type), 'for', _object, 'Other args:', args + self._quit() if __name__ == '__main__': ArgvCollector().mainloop() diff --git a/Lib/smtplib.py b/Lib/smtplib.py index 07916cc..9c8c4fa 100755 --- a/Lib/smtplib.py +++ b/Lib/smtplib.py @@ -150,7 +150,7 @@ class SSLFakeFile: It only supports what is needed in smtplib. """ - def __init__( self, sslobj): + def __init__(self, sslobj): self.sslobj = sslobj def readline(self): diff --git a/Lib/socket.py b/Lib/socket.py index cc5e65e..fa0e663 100644 --- a/Lib/socket.py +++ b/Lib/socket.py @@ -141,7 +141,7 @@ class _socketobject(object): __doc__ = _realsocket.__doc__ __slots__ = ["_sock", - "recv", "recv_buf", "recvfrom_buf", + "recv", "recv_into", "recvfrom_into", "send", "sendto", "recvfrom", "__weakref__"] @@ -151,10 +151,10 @@ class _socketobject(object): self._sock = _sock self.send = self._sock.send self.recv = self._sock.recv - self.recv_buf = self._sock.recv_buf + self.recv_into = self._sock.recv_into self.sendto = self._sock.sendto self.recvfrom = self._sock.recvfrom - self.recvfrom_buf = self._sock.recvfrom_buf + self.recvfrom_into = self._sock.recvfrom_into def close(self): self._sock = _closedsocket() diff --git a/Lib/struct.py b/Lib/struct.py index 51ee29a..9113e71 100644 --- a/Lib/struct.py +++ b/Lib/struct.py @@ -62,7 +62,7 @@ def pack(fmt, *args): o = _compile(fmt) return o.pack(*args) -def pack_to(fmt, buf, offset, *args): +def pack_into(fmt, buf, offset, *args): """ Pack the values v2, v2, ... according to fmt, write the packed bytes into the writable buffer buf starting at offset. @@ -72,7 +72,7 @@ def pack_to(fmt, buf, offset, *args): o = _cache[fmt] except KeyError: o = _compile(fmt) - return o.pack_to(buf, offset, *args) + return o.pack_into(buf, offset, *args) def unpack(fmt, s): """ diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 19fe847..a6af7e7 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -388,6 +388,7 @@ if mswindows: hStdInput = None hStdOutput = None hStdError = None + wShowWindow = 0 class pywintypes: error = IOError else: @@ -744,18 +745,17 @@ class Popen(object): args = list2cmdline(args) # Process startup details - default_startupinfo = STARTUPINFO() if startupinfo is None: - startupinfo = default_startupinfo - if not None in (p2cread, c2pwrite, errwrite): + startupinfo = STARTUPINFO() + if None not in (p2cread, c2pwrite, errwrite): startupinfo.dwFlags |= STARTF_USESTDHANDLES startupinfo.hStdInput = p2cread startupinfo.hStdOutput = c2pwrite startupinfo.hStdError = errwrite if shell: - default_startupinfo.dwFlags |= STARTF_USESHOWWINDOW - default_startupinfo.wShowWindow = SW_HIDE + startupinfo.dwFlags |= STARTF_USESHOWWINDOW + startupinfo.wShowWindow = SW_HIDE comspec = os.environ.get("COMSPEC", "cmd.exe") args = comspec + " /c " + args if (GetVersion() >= 0x80000000L or diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 314e7e1..ca4a3b5 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -503,6 +503,7 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False): quiet -- if true, don't print 'skipped' messages (probably redundant) testdir -- test directory """ + test_support.unload(test) if not testdir: testdir = findtestdir() @@ -512,11 +513,7 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False): cfp = None else: cfp = cStringIO.StringIO() - if huntrleaks: - if not hasattr(sys, 'gettotalrefcount'): - raise Exception("Tracking reference leaks requires a debug build " - "of Python") - refrep = open(huntrleaks[2], "a") + try: save_stdout = sys.stdout try: @@ -538,60 +535,7 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False): if indirect_test is not None: indirect_test() if huntrleaks: - # This code *is* hackish and inelegant, yes. - # But it seems to do the job. - import copy_reg - fs = warnings.filters[:] - ps = copy_reg.dispatch_table.copy() - pic = sys.path_importer_cache.copy() - import gc - def cleanup(): - import _strptime, linecache, warnings, dircache - import urlparse, urllib, urllib2, mimetypes, doctest - import struct - from distutils.dir_util import _path_created - _path_created.clear() - warnings.filters[:] = fs - gc.collect() - re.purge() - _strptime._regex_cache.clear() - urlparse.clear_cache() - urllib.urlcleanup() - urllib2.install_opener(None) - copy_reg.dispatch_table.clear() - copy_reg.dispatch_table.update(ps) - sys.path_importer_cache.clear() - sys.path_importer_cache.update(pic) - dircache.reset() - linecache.clearcache() - mimetypes._default_mime_types() - struct._cache.clear() - doctest.master = None - if indirect_test: - def run_the_test(): - indirect_test() - else: - def run_the_test(): - reload(the_module) - deltas = [] - repcount = huntrleaks[0] + huntrleaks[1] - print >> sys.stderr, "beginning", repcount, "repetitions" - print >> sys.stderr, \ - ("1234567890"*(repcount//10 + 1))[:repcount] - cleanup() - for i in range(repcount): - rc = sys.gettotalrefcount() - run_the_test() - sys.stderr.write('.') - cleanup() - deltas.append(sys.gettotalrefcount() - rc - 2) - print >>sys.stderr - if max(map(abs, deltas[-huntrleaks[1]:])) > 0: - print >>sys.stderr, test, 'leaked', \ - deltas[-huntrleaks[1]:], 'references' - print >>refrep, test, 'leaked', \ - deltas[-huntrleaks[1]:], 'references' - # The end of the huntrleaks hackishness. + dash_R(the_module, test, indirect_test, huntrleaks) finally: sys.stdout = save_stdout except test_support.ResourceDenied, msg: @@ -651,6 +595,77 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False): sys.stdout.flush() return 0 +def dash_R(the_module, test, indirect_test, huntrleaks): + # This code is hackish and inelegant, but it seems to do the job. + import copy_reg + + if not hasattr(sys, 'gettotalrefcount'): + raise Exception("Tracking reference leaks requires a debug build " + "of Python") + + # Save current values for dash_R_cleanup() to restore. + fs = warnings.filters[:] + ps = copy_reg.dispatch_table.copy() + pic = sys.path_importer_cache.copy() + + if indirect_test: + def run_the_test(): + indirect_test() + else: + def run_the_test(): + reload(the_module) + + deltas = [] + nwarmup, ntracked, fname = huntrleaks + repcount = nwarmup + ntracked + print >> sys.stderr, "beginning", repcount, "repetitions" + print >> sys.stderr, ("1234567890"*(repcount//10 + 1))[:repcount] + dash_R_cleanup(fs, ps, pic) + for i in range(repcount): + rc = sys.gettotalrefcount() + run_the_test() + sys.stderr.write('.') + dash_R_cleanup(fs, ps, pic) + if i >= nwarmup: + deltas.append(sys.gettotalrefcount() - rc - 2) + print >> sys.stderr + if any(deltas): + print >> sys.stderr, test, 'leaked', deltas, 'references' + refrep = open(fname, "a") + print >> refrep, test, 'leaked', deltas, 'references' + refrep.close() + +def dash_R_cleanup(fs, ps, pic): + import gc, copy_reg + import _strptime, linecache, warnings, dircache + import urlparse, urllib, urllib2, mimetypes, doctest + import struct, filecmp + from distutils.dir_util import _path_created + + # Restore some original values. + warnings.filters[:] = fs + copy_reg.dispatch_table.clear() + copy_reg.dispatch_table.update(ps) + sys.path_importer_cache.clear() + sys.path_importer_cache.update(pic) + + # Clear assorted module caches. + _path_created.clear() + re.purge() + _strptime._regex_cache.clear() + urlparse.clear_cache() + urllib.urlcleanup() + urllib2.install_opener(None) + dircache.reset() + linecache.clearcache() + mimetypes._default_mime_types() + struct._cache.clear() + filecmp._cache.clear() + doctest.master = None + + # Collect cyclic trash. + gc.collect() + def reportdiff(expected, output): import difflib print "*" * 70 diff --git a/Lib/test/test_bsddb3.py b/Lib/test/test_bsddb3.py index 2d1bff7..8b0c50c 100644 --- a/Lib/test/test_bsddb3.py +++ b/Lib/test/test_bsddb3.py @@ -44,6 +44,8 @@ def suite(): 'test_queue', 'test_recno', 'test_thread', + 'test_sequence', + 'test_cursor_pget_bug', ] alltests = unittest.TestSuite() diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 71e2b0a..e6e4440 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -336,7 +336,7 @@ class BuiltinTest(unittest.TestCase): _cells = {} def __setitem__(self, key, formula): self._cells[key] = formula - def __getitem__(self, key ): + def __getitem__(self, key): return eval(self._cells[key], globals(), self) ss = SpreadSheet() diff --git a/Lib/test/test_class.py b/Lib/test/test_class.py index 601b8b4..d872357 100644 --- a/Lib/test/test_class.py +++ b/Lib/test/test_class.py @@ -363,3 +363,37 @@ except AttributeError, x: pass else: print "attribute error for I.__init__ got masked" + + +# Test comparison and hash of methods +class A: + def __init__(self, x): + self.x = x + def f(self): + pass + def g(self): + pass + def __eq__(self, other): + return self.x == other.x + def __hash__(self): + return self.x +class B(A): + pass + +a1 = A(1) +a2 = A(2) +assert a1.f == a1.f +assert a1.f != a2.f +assert a1.f != a1.g +assert a1.f == A(1).f +assert hash(a1.f) == hash(a1.f) +assert hash(a1.f) == hash(A(1).f) + +assert A.f != a1.f +assert A.f != A.g +assert B.f == A.f +assert hash(B.f) == hash(A.f) + +# the following triggers a SystemError in 2.4 +a = A(hash(A.f.im_func)^(-1)) +hash(a.f) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 89cebb0..8ee431b 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3866,11 +3866,24 @@ def methodwrapper(): l = [] vereq(l.__add__, l.__add__) - verify(l.__add__ != [].__add__) + vereq(l.__add__, [].__add__) + verify(l.__add__ != [5].__add__) + verify(l.__add__ != l.__mul__) verify(l.__add__.__name__ == '__add__') verify(l.__add__.__self__ is l) verify(l.__add__.__objclass__ is list) vereq(l.__add__.__doc__, list.__add__.__doc__) + try: + hash(l.__add__) + except TypeError: + pass + else: + raise TestFailed("no TypeError from hash([].__add__)") + + t = () + t += (7,) + vereq(t.__add__, (7,).__add__) + vereq(hash(t.__add__), hash((7,).__add__)) def notimplemented(): # all binary methods should be able to return a NotImplemented diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 92d2d74..01f7acd 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -512,15 +512,11 @@ will only be generated for it once: >>> tests[1].name.split('.')[-1] in ['f', 'g'] True -Filter Functions -~~~~~~~~~~~~~~~~ -A filter function can be used to restrict which objects get examined, -but this is temporary, undocumented internal support for testmod's -deprecated isprivate gimmick. - - >>> def namefilter(prefix, base): - ... return base.startswith('a_') - >>> tests = doctest.DocTestFinder(_namefilter=namefilter).find(SampleClass) +Empty Tests +~~~~~~~~~~~ +By default, an object with no doctests doesn't create any tests: + + >>> tests = doctest.DocTestFinder().find(SampleClass) >>> tests.sort() >>> for t in tests: ... print '%2s %s' % (len(t.examples), t.name) @@ -528,6 +524,9 @@ deprecated isprivate gimmick. 3 SampleClass.NestedClass 1 SampleClass.NestedClass.__init__ 1 SampleClass.__init__ + 2 SampleClass.a_classmethod + 1 SampleClass.a_property + 1 SampleClass.a_staticmethod 1 SampleClass.double 1 SampleClass.get @@ -536,8 +535,7 @@ tells it to include (empty) tests for objects with no doctests. This feature is really to support backward compatibility in what doctest.master.summarize() displays. - >>> tests = doctest.DocTestFinder(_namefilter=namefilter, - ... exclude_empty=False).find(SampleClass) + >>> tests = doctest.DocTestFinder(exclude_empty=False).find(SampleClass) >>> tests.sort() >>> for t in tests: ... print '%2s %s' % (len(t.examples), t.name) @@ -547,35 +545,12 @@ displays. 0 SampleClass.NestedClass.get 0 SampleClass.NestedClass.square 1 SampleClass.__init__ - 1 SampleClass.double - 1 SampleClass.get - -If a given object is filtered out, then none of the objects that it -contains will be added either: - - >>> def namefilter(prefix, base): - ... return base == 'NestedClass' - >>> tests = doctest.DocTestFinder(_namefilter=namefilter).find(SampleClass) - >>> tests.sort() - >>> for t in tests: - ... print '%2s %s' % (len(t.examples), t.name) - 3 SampleClass - 1 SampleClass.__init__ 2 SampleClass.a_classmethod 1 SampleClass.a_property 1 SampleClass.a_staticmethod 1 SampleClass.double 1 SampleClass.get -The filter function apply to contained objects, and *not* to the -object explicitly passed to DocTestFinder: - - >>> def namefilter(prefix, base): - ... return base == 'SampleClass' - >>> tests = doctest.DocTestFinder(_namefilter=namefilter).find(SampleClass) - >>> len(tests) - 9 - Turning off Recursion ~~~~~~~~~~~~~~~~~~~~~ DocTestFinder can be told not to look for tests in contained objects @@ -1913,20 +1888,6 @@ def test_DocTestSuite(): modified the test globals, which are a copy of the sample_doctest module dictionary. The test globals are automatically cleared for us after a test. - - Finally, you can provide an alternate test finder. Here we'll - use a custom test_finder to to run just the test named bar. - However, the test in the module docstring, and the two tests - in the module __test__ dict, aren't filtered, so we actually - run three tests besides bar's. The filtering mechanisms are - poorly conceived, and will go away someday. - - >>> finder = doctest.DocTestFinder( - ... _namefilter=lambda prefix, base: base!='bar') - >>> suite = doctest.DocTestSuite('test.sample_doctest', - ... test_finder=finder) - >>> suite.run(unittest.TestResult()) - <unittest.TestResult run=4 errors=0 failures=1> """ def test_DocFileSuite(): diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index ebab913..ebe60c1 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1,9 +1,12 @@ # Python test set -- part 5, built-in exceptions -from test.test_support import TESTFN, unlink, run_unittest -import warnings -import sys, traceback, os +import os +import sys import unittest +import warnings +import pickle, cPickle + +from test.test_support import TESTFN, unlink, run_unittest # XXX This is not really enough, each *operation* should be tested! @@ -191,11 +194,15 @@ class ExceptionTests(unittest.TestCase): def testAttributes(self): # test that exception attributes are happy - try: str(u'Hello \u00E1') - except Exception, e: sampleUnicodeEncodeError = e + try: + str(u'Hello \u00E1') + except Exception, e: + sampleUnicodeEncodeError = e - try: unicode('\xff') - except Exception, e: sampleUnicodeDecodeError = e + try: + unicode('\xff') + except Exception, e: + sampleUnicodeDecodeError = e exceptionList = [ (BaseException, (), {'message' : '', 'args' : ()}), @@ -260,19 +267,20 @@ class ExceptionTests(unittest.TestCase): 'strerror' : 'strErrorStr', 'winerror' : 1, 'errno' : 22, 'filename' : 'filenameStr'}) ) - except NameError: pass - - import pickle, random + except NameError: + pass for args in exceptionList: expected = args[-1] try: exc = args[0] - if len(args) == 2: raise exc - else: raise exc(*args[1]) + if len(args) == 2: + raise exc + else: + raise exc(*args[1]) except BaseException, e: if (e is not exc and # needed for sampleUnicode errors - type(e) is not exc): + type(e) is not exc): raise # Verify no ref leaks in Exc_str() s = str(e) @@ -283,12 +291,15 @@ class ExceptionTests(unittest.TestCase): (repr(e), checkArgName)) # test for pickling support - new = pickle.loads(pickle.dumps(e, random.randint(0, 2))) - for checkArgName in expected: - self.assertEquals(repr(getattr(e, checkArgName)), - repr(expected[checkArgName]), - 'pickled exception "%s", attribute "%s' % - (repr(e), checkArgName)) + for p in pickle, cPickle: + for protocol in range(p.HIGHEST_PROTOCOL + 1): + new = p.loads(p.dumps(e, protocol)) + for checkArgName in expected: + got = repr(getattr(new, checkArgName)) + want = repr(expected[checkArgName]) + self.assertEquals(got, want, + 'pickled "%r", attribute "%s' % + (e, checkArgName)) def testKeywordArgs(self): # test that builtin exception don't take keyword args, diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index ca1c6ba..dcfa265 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -1,356 +1,325 @@ import sys import os +import unittest from array import array from weakref import proxy -from test.test_support import verify, TESTFN, TestFailed, findfile +from test.test_support import TESTFN, findfile, run_unittest from UserList import UserList -# verify weak references -f = file(TESTFN, 'w') -p = proxy(f) -p.write('teststring') -verify(f.tell(), p.tell()) -f.close() -f = None -try: - p.tell() -except ReferenceError: - pass -else: - raise TestFailed('file proxy still exists when the file is gone') - -# verify expected attributes exist -f = file(TESTFN, 'w') -softspace = f.softspace -f.name # merely shouldn't blow up -f.mode # ditto -f.closed # ditto - -# verify softspace is writable -f.softspace = softspace # merely shouldn't blow up - -# verify the others aren't -for attr in 'name', 'mode', 'closed': - try: - setattr(f, attr, 'oops') - except (AttributeError, TypeError): - pass - else: - raise TestFailed('expected exception setting file attr %r' % attr) -f.close() - -# check invalid mode strings -for mode in ("", "aU", "wU+"): - try: - f = file(TESTFN, mode) - except ValueError: - pass - else: - f.close() - raise TestFailed('%r is an invalid file mode' % mode) - -# verify writelines with instance sequence -l = UserList(['1', '2']) -f = open(TESTFN, 'wb') -f.writelines(l) -f.close() -f = open(TESTFN, 'rb') -buf = f.read() -f.close() -verify(buf == '12') - -# verify readinto -a = array('c', 'x'*10) -f = open(TESTFN, 'rb') -n = f.readinto(a) -f.close() -verify(buf == a.tostring()[:n]) - -# verify readinto refuses text files -a = array('c', 'x'*10) -f = open(TESTFN, 'r') -try: - f.readinto(a) - raise TestFailed("readinto shouldn't work in text mode") -except TypeError: - pass -finally: - f.close() - -# verify writelines with integers -f = open(TESTFN, 'wb') -try: - f.writelines([1, 2, 3]) -except TypeError: - pass -else: - print "writelines accepted sequence of integers" -f.close() - -# verify writelines with integers in UserList -f = open(TESTFN, 'wb') -l = UserList([1,2,3]) -try: - f.writelines(l) -except TypeError: - pass -else: - print "writelines accepted sequence of integers" -f.close() - -# verify writelines with non-string object -class NonString: pass - -f = open(TESTFN, 'wb') -try: - f.writelines([NonString(), NonString()]) -except TypeError: - pass -else: - print "writelines accepted sequence of non-string objects" -f.close() - -# This causes the interpreter to exit on OSF1 v5.1. -if sys.platform != 'osf1V5': - try: - sys.stdin.seek(-1) - except IOError: - pass - else: - print "should not be able to seek on sys.stdin" -else: - print >>sys.__stdout__, ( - ' Skipping sys.stdin.seek(-1), it may crash the interpreter.' - ' Test manually.') - -try: - sys.stdin.truncate() -except IOError: - pass -else: - print "should not be able to truncate on sys.stdin" - -# verify repr works -f = open(TESTFN) -if not repr(f).startswith("<open file '" + TESTFN): - print "repr(file) failed" -f.close() - -# verify repr works for unicode too -f = open(unicode(TESTFN)) -if not repr(f).startswith("<open file u'" + TESTFN): - print "repr(file with unicode name) failed" -f.close() - -# verify that we get a sensible error message for bad mode argument -bad_mode = "qwerty" -try: - open(TESTFN, bad_mode) -except ValueError, msg: - if msg[0] != 0: - s = str(msg) - if s.find(TESTFN) != -1 or s.find(bad_mode) == -1: - print "bad error message for invalid mode: %s" % s - # if msg[0] == 0, we're probably on Windows where there may be - # no obvious way to discover why open() failed. -else: - print "no error for invalid mode: %s" % bad_mode - -f = open(TESTFN) -if f.name != TESTFN: - raise TestFailed, 'file.name should be "%s"' % TESTFN -if f.isatty(): - raise TestFailed, 'file.isatty() should be false' - -if f.closed: - raise TestFailed, 'file.closed should be false' - -try: - f.readinto("") -except TypeError: - pass -else: - raise TestFailed, 'file.readinto("") should raise a TypeError' - -f.close() -if not f.closed: - raise TestFailed, 'file.closed should be true' - -# make sure that explicitly setting the buffer size doesn't cause -# misbehaviour especially with repeated close() calls -for s in (-1, 0, 1, 512): - try: - f = open(TESTFN, 'w', s) - f.write(str(s)) - f.close() - f.close() - f = open(TESTFN, 'r', s) - d = int(f.read()) - f.close() - f.close() - except IOError, msg: - raise TestFailed, 'error setting buffer size %d: %s' % (s, str(msg)) - if d != s: - raise TestFailed, 'readback failure using buffer size %d' - -methods = ['fileno', 'flush', 'isatty', 'next', 'read', 'readinto', - 'readline', 'readlines', 'seek', 'tell', 'truncate', 'write', - '__iter__'] -if sys.platform.startswith('atheos'): - methods.remove('truncate') - -for methodname in methods: - method = getattr(f, methodname) - try: - method() - except ValueError: - pass - else: - raise TestFailed, 'file.%s() on a closed file should raise a ValueError' % methodname - -try: - f.writelines([]) -except ValueError: - pass -else: - raise TestFailed, 'file.writelines([]) on a closed file should raise a ValueError' - -os.unlink(TESTFN) - -def bug801631(): - # SF bug <http://www.python.org/sf/801631> - # "file.truncate fault on windows" - f = file(TESTFN, 'wb') - f.write('12345678901') # 11 bytes - f.close() - - f = file(TESTFN,'rb+') - data = f.read(5) - if data != '12345': - raise TestFailed("Read on file opened for update failed %r" % data) - if f.tell() != 5: - raise TestFailed("File pos after read wrong %d" % f.tell()) - - f.truncate() - if f.tell() != 5: - raise TestFailed("File pos after ftruncate wrong %d" % f.tell()) - - f.close() - size = os.path.getsize(TESTFN) - if size != 5: - raise TestFailed("File size after ftruncate wrong %d" % size) - -try: - bug801631() -finally: - os.unlink(TESTFN) - -# Test the complex interaction when mixing file-iteration and the various -# read* methods. Ostensibly, the mixture could just be tested to work -# when it should work according to the Python language, instead of fail -# when it should fail according to the current CPython implementation. -# People don't always program Python the way they should, though, and the -# implemenation might change in subtle ways, so we explicitly test for -# errors, too; the test will just have to be updated when the -# implementation changes. -dataoffset = 16384 -filler = "ham\n" -assert not dataoffset % len(filler), \ - "dataoffset must be multiple of len(filler)" -nchunks = dataoffset // len(filler) -testlines = [ - "spam, spam and eggs\n", - "eggs, spam, ham and spam\n", - "saussages, spam, spam and eggs\n", - "spam, ham, spam and eggs\n", - "spam, spam, spam, spam, spam, ham, spam\n", - "wonderful spaaaaaam.\n" -] -methods = [("readline", ()), ("read", ()), ("readlines", ()), - ("readinto", (array("c", " "*100),))] - -try: - # Prepare the testfile - bag = open(TESTFN, "wb") - bag.write(filler * nchunks) - bag.writelines(testlines) - bag.close() - # Test for appropriate errors mixing read* and iteration - for methodname, args in methods: - f = open(TESTFN, 'rb') - if f.next() != filler: - raise TestFailed, "Broken testfile" - meth = getattr(f, methodname) +class AutoFileTests(unittest.TestCase): + # file tests for which a test file is automatically set up + + def setUp(self): + self.f = file(TESTFN, 'wb') + + def tearDown(self): try: - meth(*args) - except ValueError: + if self.f: + self.f.close() + except IOError: pass + + def testWeakRefs(self): + # verify weak references + p = proxy(self.f) + p.write('teststring') + self.assertEquals(self.f.tell(), p.tell()) + self.f.close() + self.f = None + self.assertRaises(ReferenceError, getattr, p, 'tell') + + def testAttributes(self): + # verify expected attributes exist + f = self.f + softspace = f.softspace + f.name # merely shouldn't blow up + f.mode # ditto + f.closed # ditto + + # verify softspace is writable + f.softspace = softspace # merely shouldn't blow up + + # verify the others aren't + for attr in 'name', 'mode', 'closed': + self.assertRaises((AttributeError, TypeError), setattr, f, attr, 'oops') + + def testReadinto(self): + # verify readinto + self.f.write('12') + self.f.close() + a = array('c', 'x'*10) + self.f = open(TESTFN, 'rb') + n = self.f.readinto(a) + self.assertEquals('12', a.tostring()[:n]) + + def testReadinto_text(self): + # verify readinto refuses text files + a = array('c', 'x'*10) + self.f.close() + self.f = open(TESTFN, 'r') + self.assertRaises(TypeError, self.f.readinto, a) + + def testWritelinesUserList(self): + # verify writelines with instance sequence + l = UserList(['1', '2']) + self.f.writelines(l) + self.f.close() + self.f = open(TESTFN, 'rb') + buf = self.f.read() + self.assertEquals(buf, '12') + + def testWritelinesIntegers(self): + # verify writelines with integers + self.assertRaises(TypeError, self.f.writelines, [1, 2, 3]) + + def testWritelinesIntegersUserList(self): + # verify writelines with integers in UserList + l = UserList([1,2,3]) + self.assertRaises(TypeError, self.f.writelines, l) + + def testWritelinesNonString(self): + # verify writelines with non-string object + class NonString: pass + + self.assertRaises(TypeError, self.f.writelines, [NonString(), NonString()]) + + def testRepr(self): + # verify repr works + self.assert_(repr(self.f).startswith("<open file '" + TESTFN)) + + def testErrors(self): + f = self.f + self.assertEquals(f.name, TESTFN) + self.assert_(not f.isatty()) + self.assert_(not f.closed) + + self.assertRaises(TypeError, f.readinto, "") + f.close() + self.assert_(f.closed) + + def testMethods(self): + methods = ['fileno', 'flush', 'isatty', 'next', 'read', 'readinto', + 'readline', 'readlines', 'seek', 'tell', 'truncate', 'write', + '__iter__'] + if sys.platform.startswith('atheos'): + methods.remove('truncate') + + self.f.close() + + for methodname in methods: + method = getattr(self.f, methodname) + # should raise on closed file + self.assertRaises(ValueError, method) + self.assertRaises(ValueError, self.f.writelines, []) + + +class OtherFileTests(unittest.TestCase): + + def testModeStrings(self): + # check invalid mode strings + for mode in ("", "aU", "wU+"): + try: + f = file(TESTFN, mode) + except ValueError: + pass + else: + f.close() + self.fail('%r is an invalid file mode' % mode) + + def testStdin(self): + # This causes the interpreter to exit on OSF1 v5.1. + if sys.platform != 'osf1V5': + self.assertRaises(IOError, sys.stdin.seek, -1) else: - raise TestFailed("%s%r after next() didn't raise ValueError" % - (methodname, args)) + print >>sys.__stdout__, ( + ' Skipping sys.stdin.seek(-1), it may crash the interpreter.' + ' Test manually.') + self.assertRaises(IOError, sys.stdin.truncate) + + def testUnicodeOpen(self): + # verify repr works for unicode too + f = open(unicode(TESTFN), "w") + self.assert_(repr(f).startswith("<open file u'" + TESTFN)) f.close() - # Test to see if harmless (by accident) mixing of read* and iteration - # still works. This depends on the size of the internal iteration - # buffer (currently 8192,) but we can test it in a flexible manner. - # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so - # 4096 lines of that should get us exactly on the buffer boundary for - # any power-of-2 buffersize between 4 and 16384 (inclusive). - f = open(TESTFN, 'rb') - for i in range(nchunks): - f.next() - testline = testlines.pop(0) - try: - line = f.readline() - except ValueError: - raise TestFailed("readline() after next() with supposedly empty " - "iteration-buffer failed anyway") - if line != testline: - raise TestFailed("readline() after next() with empty buffer " - "failed. Got %r, expected %r" % (line, testline)) - testline = testlines.pop(0) - buf = array("c", "\x00" * len(testline)) - try: - f.readinto(buf) - except ValueError: - raise TestFailed("readinto() after next() with supposedly empty " - "iteration-buffer failed anyway") - line = buf.tostring() - if line != testline: - raise TestFailed("readinto() after next() with empty buffer " - "failed. Got %r, expected %r" % (line, testline)) - - testline = testlines.pop(0) - try: - line = f.read(len(testline)) - except ValueError: - raise TestFailed("read() after next() with supposedly empty " - "iteration-buffer failed anyway") - if line != testline: - raise TestFailed("read() after next() with empty buffer " - "failed. Got %r, expected %r" % (line, testline)) - try: - lines = f.readlines() - except ValueError: - raise TestFailed("readlines() after next() with supposedly empty " - "iteration-buffer failed anyway") - if lines != testlines: - raise TestFailed("readlines() after next() with empty buffer " - "failed. Got %r, expected %r" % (line, testline)) - # Reading after iteration hit EOF shouldn't hurt either - f = open(TESTFN, 'rb') - try: - for line in f: - pass + def testBadModeArgument(self): + # verify that we get a sensible error message for bad mode argument + bad_mode = "qwerty" try: - f.readline() - f.readinto(buf) - f.read() - f.readlines() - except ValueError: - raise TestFailed("read* failed after next() consumed file") - finally: - f.close() -finally: - os.unlink(TESTFN) + f = open(TESTFN, bad_mode) + except ValueError, msg: + if msg[0] != 0: + s = str(msg) + if s.find(TESTFN) != -1 or s.find(bad_mode) == -1: + self.fail("bad error message for invalid mode: %s" % s) + # if msg[0] == 0, we're probably on Windows where there may be + # no obvious way to discover why open() failed. + else: + f.close() + self.fail("no error for invalid mode: %s" % bad_mode) + + def testSetBufferSize(self): + # make sure that explicitly setting the buffer size doesn't cause + # misbehaviour especially with repeated close() calls + for s in (-1, 0, 1, 512): + try: + f = open(TESTFN, 'w', s) + f.write(str(s)) + f.close() + f.close() + f = open(TESTFN, 'r', s) + d = int(f.read()) + f.close() + f.close() + except IOError, msg: + self.fail('error setting buffer size %d: %s' % (s, str(msg))) + self.assertEquals(d, s) + + def testTruncateOnWindows(self): + os.unlink(TESTFN) + + def bug801631(): + # SF bug <http://www.python.org/sf/801631> + # "file.truncate fault on windows" + f = file(TESTFN, 'wb') + f.write('12345678901') # 11 bytes + f.close() + + f = file(TESTFN,'rb+') + data = f.read(5) + if data != '12345': + self.fail("Read on file opened for update failed %r" % data) + if f.tell() != 5: + self.fail("File pos after read wrong %d" % f.tell()) + + f.truncate() + if f.tell() != 5: + self.fail("File pos after ftruncate wrong %d" % f.tell()) + + f.close() + size = os.path.getsize(TESTFN) + if size != 5: + self.fail("File size after ftruncate wrong %d" % size) + + try: + bug801631() + finally: + os.unlink(TESTFN) + + def testIteration(self): + # Test the complex interaction when mixing file-iteration and the various + # read* methods. Ostensibly, the mixture could just be tested to work + # when it should work according to the Python language, instead of fail + # when it should fail according to the current CPython implementation. + # People don't always program Python the way they should, though, and the + # implemenation might change in subtle ways, so we explicitly test for + # errors, too; the test will just have to be updated when the + # implementation changes. + dataoffset = 16384 + filler = "ham\n" + assert not dataoffset % len(filler), \ + "dataoffset must be multiple of len(filler)" + nchunks = dataoffset // len(filler) + testlines = [ + "spam, spam and eggs\n", + "eggs, spam, ham and spam\n", + "saussages, spam, spam and eggs\n", + "spam, ham, spam and eggs\n", + "spam, spam, spam, spam, spam, ham, spam\n", + "wonderful spaaaaaam.\n" + ] + methods = [("readline", ()), ("read", ()), ("readlines", ()), + ("readinto", (array("c", " "*100),))] + + try: + # Prepare the testfile + bag = open(TESTFN, "wb") + bag.write(filler * nchunks) + bag.writelines(testlines) + bag.close() + # Test for appropriate errors mixing read* and iteration + for methodname, args in methods: + f = open(TESTFN, 'rb') + if f.next() != filler: + self.fail, "Broken testfile" + meth = getattr(f, methodname) + try: + meth(*args) + except ValueError: + pass + else: + self.fail("%s%r after next() didn't raise ValueError" % + (methodname, args)) + f.close() + + # Test to see if harmless (by accident) mixing of read* and iteration + # still works. This depends on the size of the internal iteration + # buffer (currently 8192,) but we can test it in a flexible manner. + # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so + # 4096 lines of that should get us exactly on the buffer boundary for + # any power-of-2 buffersize between 4 and 16384 (inclusive). + f = open(TESTFN, 'rb') + for i in range(nchunks): + f.next() + testline = testlines.pop(0) + try: + line = f.readline() + except ValueError: + self.fail("readline() after next() with supposedly empty " + "iteration-buffer failed anyway") + if line != testline: + self.fail("readline() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + testline = testlines.pop(0) + buf = array("c", "\x00" * len(testline)) + try: + f.readinto(buf) + except ValueError: + self.fail("readinto() after next() with supposedly empty " + "iteration-buffer failed anyway") + line = buf.tostring() + if line != testline: + self.fail("readinto() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + + testline = testlines.pop(0) + try: + line = f.read(len(testline)) + except ValueError: + self.fail("read() after next() with supposedly empty " + "iteration-buffer failed anyway") + if line != testline: + self.fail("read() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + try: + lines = f.readlines() + except ValueError: + self.fail("readlines() after next() with supposedly empty " + "iteration-buffer failed anyway") + if lines != testlines: + self.fail("readlines() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + # Reading after iteration hit EOF shouldn't hurt either + f = open(TESTFN, 'rb') + try: + for line in f: + pass + try: + f.readline() + f.readinto(buf) + f.read() + f.readlines() + except ValueError: + self.fail("read* failed after next() consumed file") + finally: + f.close() + finally: + os.unlink(TESTFN) + + +def test_main(): + run_unittest(AutoFileTests, OtherFileTests) + +if __name__ == '__main__': + test_main() diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index 609e8f4..8dc185b 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -152,6 +152,113 @@ class TestPythonPartial(TestPartial): thetype = PythonPartial +class TestUpdateWrapper(unittest.TestCase): + + def check_wrapper(self, wrapper, wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + # Check attributes were assigned + for name in assigned: + self.failUnless(getattr(wrapper, name) is getattr(wrapped, name)) + # Check attributes were updated + for name in updated: + wrapper_attr = getattr(wrapper, name) + wrapped_attr = getattr(wrapped, name) + for key in wrapped_attr: + self.failUnless(wrapped_attr[key] is wrapper_attr[key]) + + def test_default_update(self): + def f(): + """This is a test""" + pass + f.attr = 'This is also a test' + def wrapper(): + pass + functools.update_wrapper(wrapper, f) + self.check_wrapper(wrapper, f) + self.assertEqual(wrapper.__name__, 'f') + self.assertEqual(wrapper.__doc__, 'This is a test') + self.assertEqual(wrapper.attr, 'This is also a test') + + def test_no_update(self): + def f(): + """This is a test""" + pass + f.attr = 'This is also a test' + def wrapper(): + pass + functools.update_wrapper(wrapper, f, (), ()) + self.check_wrapper(wrapper, f, (), ()) + self.assertEqual(wrapper.__name__, 'wrapper') + self.assertEqual(wrapper.__doc__, None) + self.failIf(hasattr(wrapper, 'attr')) + + def test_selective_update(self): + def f(): + pass + f.attr = 'This is a different test' + f.dict_attr = dict(a=1, b=2, c=3) + def wrapper(): + pass + wrapper.dict_attr = {} + assign = ('attr',) + update = ('dict_attr',) + functools.update_wrapper(wrapper, f, assign, update) + self.check_wrapper(wrapper, f, assign, update) + self.assertEqual(wrapper.__name__, 'wrapper') + self.assertEqual(wrapper.__doc__, None) + self.assertEqual(wrapper.attr, 'This is a different test') + self.assertEqual(wrapper.dict_attr, f.dict_attr) + + +class TestWraps(TestUpdateWrapper): + + def test_default_update(self): + def f(): + """This is a test""" + pass + f.attr = 'This is also a test' + @functools.wraps(f) + def wrapper(): + pass + self.check_wrapper(wrapper, f) + self.assertEqual(wrapper.__name__, 'f') + self.assertEqual(wrapper.__doc__, 'This is a test') + self.assertEqual(wrapper.attr, 'This is also a test') + + def test_no_update(self): + def f(): + """This is a test""" + pass + f.attr = 'This is also a test' + @functools.wraps(f, (), ()) + def wrapper(): + pass + self.check_wrapper(wrapper, f, (), ()) + self.assertEqual(wrapper.__name__, 'wrapper') + self.assertEqual(wrapper.__doc__, None) + self.failIf(hasattr(wrapper, 'attr')) + + def test_selective_update(self): + def f(): + pass + f.attr = 'This is a different test' + f.dict_attr = dict(a=1, b=2, c=3) + def add_dict_attr(f): + f.dict_attr = {} + return f + assign = ('attr',) + update = ('dict_attr',) + @functools.wraps(f, assign, update) + @add_dict_attr + def wrapper(): + pass + self.check_wrapper(wrapper, f, assign, update) + self.assertEqual(wrapper.__name__, 'wrapper') + self.assertEqual(wrapper.__doc__, None) + self.assertEqual(wrapper.attr, 'This is a different test') + self.assertEqual(wrapper.dict_attr, f.dict_attr) + def test_main(verbose=None): @@ -160,6 +267,8 @@ def test_main(verbose=None): TestPartial, TestPartialSubclass, TestPythonPartial, + TestUpdateWrapper, + TestWraps ) test_support.run_unittest(*test_classes) diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index a60a768..a184a8b 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -733,7 +733,7 @@ syntax_tests = """ ... yield 1 Traceback (most recent call last): .. -SyntaxError: 'return' with argument inside generator (<doctest test.test_generators.__test__.syntax[0]>, line 2) +SyntaxError: 'return' with argument inside generator (<doctest test.test_generators.__test__.syntax[0]>, line 3) >>> def f(): ... yield 1 @@ -876,9 +876,9 @@ These are fine: ... if 0: ... return 3 # but *this* sucks (line 8) ... if 0: -... yield 2 # because it's a generator +... yield 2 # because it's a generator (line 10) Traceback (most recent call last): -SyntaxError: 'return' with argument inside generator (<doctest test.test_generators.__test__.syntax[24]>, line 8) +SyntaxError: 'return' with argument inside generator (<doctest test.test_generators.__test__.syntax[24]>, line 10) This one caused a crash (see SF bug 567538): diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 2246fb6..01b9b5b 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -860,25 +860,25 @@ class BufferIOTest(SocketConnectedTest): def __init__(self, methodName='runTest'): SocketConnectedTest.__init__(self, methodName=methodName) - def testRecvBuf(self): + def testRecvInto(self): buf = array.array('c', ' '*1024) - nbytes = self.cli_conn.recv_buf(buf) + nbytes = self.cli_conn.recv_into(buf) self.assertEqual(nbytes, len(MSG)) msg = buf.tostring()[:len(MSG)] self.assertEqual(msg, MSG) - def _testRecvBuf(self): + def _testRecvInto(self): buf = buffer(MSG) self.serv_conn.send(buf) - def testRecvFromBuf(self): + def testRecvFromInto(self): buf = array.array('c', ' '*1024) - nbytes, addr = self.cli_conn.recvfrom_buf(buf) + nbytes, addr = self.cli_conn.recvfrom_into(buf) self.assertEqual(nbytes, len(MSG)) msg = buf.tostring()[:len(MSG)] self.assertEqual(msg, MSG) - def _testRecvFromBuf(self): + def _testRecvFromInto(self): buf = buffer(MSG) self.serv_conn.send(buf) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index af835f7..aa458e6 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -1,4 +1,4 @@ -from test.test_support import TestFailed, verbose, verify +from test.test_support import TestFailed, verbose, verify, vereq import test.test_support import struct import array @@ -16,13 +16,11 @@ except ImportError: PY_STRUCT_RANGE_CHECKING = 0 PY_STRUCT_OVERFLOW_MASKING = 1 else: - PY_STRUCT_RANGE_CHECKING = getattr(_struct, '_PY_STRUCT_RANGE_CHECKING', 0) - PY_STRUCT_OVERFLOW_MASKING = getattr(_struct, '_PY_STRUCT_OVERFLOW_MASKING', 0) + PY_STRUCT_RANGE_CHECKING = _struct._PY_STRUCT_RANGE_CHECKING + PY_STRUCT_OVERFLOW_MASKING = _struct._PY_STRUCT_OVERFLOW_MASKING def string_reverse(s): - chars = list(s) - chars.reverse() - return "".join(chars) + return "".join(reversed(s)) def bigendian_to_native(value): if ISBIGENDIAN: @@ -504,7 +502,7 @@ def assertRaises(excClass, callableObj, *args, **kwargs): except excClass: return else: - raise RuntimeError("%s not raised." % excClass) + raise TestFailed("%s not raised." % excClass) def test_unpack_from(): test_string = 'abcd01234' @@ -512,68 +510,67 @@ def test_unpack_from(): s = struct.Struct(fmt) for cls in (str, buffer): data = cls(test_string) - assert s.unpack_from(data) == ('abcd',) - assert s.unpack_from(data, 2) == ('cd01',) - assert s.unpack_from(data, 4) == ('0123',) + vereq(s.unpack_from(data), ('abcd',)) + vereq(s.unpack_from(data, 2), ('cd01',)) + vereq(s.unpack_from(data, 4), ('0123',)) for i in xrange(6): - assert s.unpack_from(data, i) == (data[i:i+4],) + vereq(s.unpack_from(data, i), (data[i:i+4],)) for i in xrange(6, len(test_string) + 1): simple_err(s.unpack_from, data, i) for cls in (str, buffer): data = cls(test_string) - assert struct.unpack_from(fmt, data) == ('abcd',) - assert struct.unpack_from(fmt, data, 2) == ('cd01',) - assert struct.unpack_from(fmt, data, 4) == ('0123',) + vereq(struct.unpack_from(fmt, data), ('abcd',)) + vereq(struct.unpack_from(fmt, data, 2), ('cd01',)) + vereq(struct.unpack_from(fmt, data, 4), ('0123',)) for i in xrange(6): - assert (struct.unpack_from(fmt, data, i) == (data[i:i+4],)) + vereq(struct.unpack_from(fmt, data, i), (data[i:i+4],)) for i in xrange(6, len(test_string) + 1): simple_err(struct.unpack_from, fmt, data, i) -def test_pack_to(): +def test_pack_into(): test_string = 'Reykjavik rocks, eow!' writable_buf = array.array('c', ' '*100) fmt = '21s' s = struct.Struct(fmt) # Test without offset - s.pack_to(writable_buf, 0, test_string) + s.pack_into(writable_buf, 0, test_string) from_buf = writable_buf.tostring()[:len(test_string)] - assert from_buf == test_string + vereq(from_buf, test_string) # Test with offset. - s.pack_to(writable_buf, 10, test_string) + s.pack_into(writable_buf, 10, test_string) from_buf = writable_buf.tostring()[:len(test_string)+10] - assert from_buf == (test_string[:10] + test_string) + vereq(from_buf, test_string[:10] + test_string) # Go beyond boundaries. small_buf = array.array('c', ' '*10) - assertRaises(struct.error, s.pack_to, small_buf, 0, test_string) - assertRaises(struct.error, s.pack_to, small_buf, 2, test_string) + assertRaises(struct.error, s.pack_into, small_buf, 0, test_string) + assertRaises(struct.error, s.pack_into, small_buf, 2, test_string) -def test_pack_to_fn(): +def test_pack_into_fn(): test_string = 'Reykjavik rocks, eow!' writable_buf = array.array('c', ' '*100) fmt = '21s' - pack_to = lambda *args: struct.pack_to(fmt, *args) + pack_into = lambda *args: struct.pack_into(fmt, *args) - # Test without offset - pack_to(writable_buf, 0, test_string) + # Test without offset. + pack_into(writable_buf, 0, test_string) from_buf = writable_buf.tostring()[:len(test_string)] - assert from_buf == test_string + vereq(from_buf, test_string) # Test with offset. - pack_to(writable_buf, 10, test_string) + pack_into(writable_buf, 10, test_string) from_buf = writable_buf.tostring()[:len(test_string)+10] - assert from_buf == (test_string[:10] + test_string) + vereq(from_buf, test_string[:10] + test_string) # Go beyond boundaries. small_buf = array.array('c', ' '*10) - assertRaises(struct.error, pack_to, small_buf, 0, test_string) - assertRaises(struct.error, pack_to, small_buf, 2, test_string) + assertRaises(struct.error, pack_into, small_buf, 0, test_string) + assertRaises(struct.error, pack_into, small_buf, 2, test_string) + - # Test methods to pack and unpack from buffers rather than strings. test_unpack_from() -test_pack_to() -test_pack_to_fn() - +test_pack_into() +test_pack_into_fn() diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index e274c5b..aeaa77e 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -390,7 +390,7 @@ test_classes.append(test_gettempdir) class test_mkstemp(TC): """Test mkstemp().""" - def do_create(self, dir=None, pre="", suf="", ): + def do_create(self, dir=None, pre="", suf=""): if dir is None: dir = tempfile.gettempdir() try: |